From 40b367d95fb3d60fc1edb9ba8f6ef52272e48936 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Thu, 7 Nov 2013 11:35:29 +0100 Subject: irqchip: irq-dove: Add PMU interrupt controller. Dove has a Power Management Unit with its own interrupt controller. This is chained on the main interrupt controller. Add a driver, making use of generic chip where possible. Signed-off-by: Andrew Lunn Tested-by: Sebastian Hesselbarth cc: devicetree@vger.kernel.org cc: pawel.moll@arm.com cc: mark.rutland@arm.com cc: swarren@wwwdotorg.org cc: ian.campbell@citrix.com Signed-off-by: Jason Cooper diff --git a/Documentation/devicetree/bindings/interrupt-controller/marvell,dove-pmu-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/marvell,dove-pmu-intc.txt new file mode 100644 index 0000000..1feb582 --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/marvell,dove-pmu-intc.txt @@ -0,0 +1,17 @@ +Marvell Dove Power Management Unit interrupt controller + +Required properties: +- compatible: shall be "marvell,dove-pmu-intc" +- reg: base address of PMU interrupt registers starting with CAUSE register +- interrupts: PMU interrupt of the main interrupt controller +- interrupt-controller: identifies the node as an interrupt controller +- #interrupt-cells: number of cells to encode an interrupt source, shall be 1 + +Example: + pmu_intc: pmu-interrupt-ctrl@d0050 { + compatible = "marvell,dove-pmu-intc"; + interrupt-controller; + #interrupt-cells = <1>; + reg = <0xd0050 0x8>; + interrupts = <33>; + }; diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index c60b901..f743006 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -1,6 +1,7 @@ obj-$(CONFIG_IRQCHIP) += irqchip.o obj-$(CONFIG_ARCH_BCM2835) += irq-bcm2835.o +obj-$(CONFIG_ARCH_DOVE) += irq-dove.o obj-$(CONFIG_ARCH_EXYNOS) += exynos-combiner.o obj-$(CONFIG_ARCH_MMP) += irq-mmp.o obj-$(CONFIG_ARCH_MVEBU) += irq-armada-370-xp.o diff --git a/drivers/irqchip/irq-dove.c b/drivers/irqchip/irq-dove.c new file mode 100644 index 0000000..788acd8 --- /dev/null +++ b/drivers/irqchip/irq-dove.c @@ -0,0 +1,126 @@ +/* + * Marvell Dove SoCs PMU IRQ chip driver. + * + * Andrew Lunn + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "irqchip.h" + +#define DOVE_PMU_IRQ_CAUSE 0x00 +#define DOVE_PMU_IRQ_MASK 0x04 + +static void dove_pmu_irq_handler(unsigned int irq, struct irq_desc *desc) +{ + struct irq_domain *d = irq_get_handler_data(irq); + struct irq_chip_generic *gc = irq_get_domain_generic_chip(d, 0); + u32 stat = readl_relaxed(gc->reg_base + DOVE_PMU_IRQ_CAUSE) & + gc->mask_cache; + + while (stat) { + u32 hwirq = ffs(stat) - 1; + + generic_handle_irq(irq_find_mapping(d, gc->irq_base + hwirq)); + stat &= ~(1 << hwirq); + } +} + +static void pmu_irq_ack(struct irq_data *d) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); + struct irq_chip_type *ct = irq_data_get_chip_type(d); + u32 mask = ~d->mask; + + /* + * The PMU mask register is not RW0C: it is RW. This means that + * the bits take whatever value is written to them; if you write + * a '1', you will set the interrupt. + * + * Unfortunately this means there is NO race free way to clear + * these interrupts. + * + * So, let's structure the code so that the window is as small as + * possible. + */ + irq_gc_lock(gc); + mask &= irq_reg_readl(gc->reg_base + ct->regs.ack); + irq_reg_writel(mask, gc->reg_base + ct->regs.ack); + irq_gc_unlock(gc); +} + +static int __init dove_pmu_irq_init(struct device_node *np, + struct device_node *parent) +{ + unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; + struct resource r; + struct irq_domain *domain; + struct irq_chip_generic *gc; + int ret, irq, nrirqs = 7; + + domain = irq_domain_add_linear(np, nrirqs, + &irq_generic_chip_ops, NULL); + if (!domain) { + pr_err("%s: unable to add irq domain\n", np->name); + return -ENOMEM; + } + + ret = irq_alloc_domain_generic_chips(domain, nrirqs, 1, np->name, + handle_level_irq, clr, 0, IRQ_GC_INIT_MASK_CACHE); + if (ret) { + pr_err("%s: unable to alloc irq domain gc\n", np->name); + return ret; + } + + ret = of_address_to_resource(np, 0, &r); + if (ret) { + pr_err("%s: unable to get resource\n", np->name); + return ret; + } + + if (!request_mem_region(r.start, resource_size(&r), np->name)) { + pr_err("%s: unable to request mem region\n", np->name); + return -ENOMEM; + } + + /* Map the parent interrupt for the chained handler */ + irq = irq_of_parse_and_map(np, 0); + if (irq <= 0) { + pr_err("%s: unable to parse irq\n", np->name); + return -EINVAL; + } + + gc = irq_get_domain_generic_chip(domain, 0); + gc->reg_base = ioremap(r.start, resource_size(&r)); + if (!gc->reg_base) { + pr_err("%s: unable to map resource\n", np->name); + return -ENOMEM; + } + + gc->chip_types[0].regs.ack = DOVE_PMU_IRQ_CAUSE; + gc->chip_types[0].regs.mask = DOVE_PMU_IRQ_MASK; + gc->chip_types[0].chip.irq_ack = pmu_irq_ack; + gc->chip_types[0].chip.irq_mask = irq_gc_mask_clr_bit; + gc->chip_types[0].chip.irq_unmask = irq_gc_mask_set_bit; + + /* mask and clear all interrupts */ + writel(0, gc->reg_base + DOVE_PMU_IRQ_MASK); + writel(0, gc->reg_base + DOVE_PMU_IRQ_CAUSE); + + irq_set_handler_data(irq, domain); + irq_set_chained_handler(irq, dove_pmu_irq_handler); + + return 0; +} +IRQCHIP_DECLARE(dove_pmu_intc, + "marvell,dove-pmu-intc", dove_pmu_irq_init); -- cgit v0.10.2 From 9848e49a9a9aab075d79c5d2856abb879495f97f Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Thu, 16 Jan 2014 03:38:58 +0400 Subject: xtensa: fix warning '"CONFIG_OF" is not defined' The warning only shows up when building MMUv3 configuration with OF support disabled. Signed-off-by: Max Filippov diff --git a/arch/xtensa/include/asm/io.h b/arch/xtensa/include/asm/io.h index 2a042d4..7494420 100644 --- a/arch/xtensa/include/asm/io.h +++ b/arch/xtensa/include/asm/io.h @@ -25,7 +25,7 @@ #ifdef CONFIG_MMU -#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY && CONFIG_OF +#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY && defined(CONFIG_OF) extern unsigned long xtensa_kio_paddr; static inline unsigned long xtensa_get_kio_paddr(void) diff --git a/arch/xtensa/include/asm/vectors.h b/arch/xtensa/include/asm/vectors.h index 5791b45..f74ddfb 100644 --- a/arch/xtensa/include/asm/vectors.h +++ b/arch/xtensa/include/asm/vectors.h @@ -25,7 +25,7 @@ #define XCHAL_KIO_DEFAULT_PADDR 0xf0000000 #define XCHAL_KIO_SIZE 0x10000000 -#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY && CONFIG_OF +#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY && defined(CONFIG_OF) #define XCHAL_KIO_PADDR xtensa_get_kio_paddr() #else #define XCHAL_KIO_PADDR XCHAL_KIO_DEFAULT_PADDR diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c index 36ec171..861203e 100644 --- a/arch/xtensa/mm/mmu.c +++ b/arch/xtensa/mm/mmu.c @@ -39,7 +39,7 @@ void init_mmu(void) set_itlbcfg_register(0); set_dtlbcfg_register(0); #endif -#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY && CONFIG_OF +#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY && defined(CONFIG_OF) /* * Update the IO area mapping in case xtensa_kio_paddr has changed */ -- cgit v0.10.2 From 45ec8860be2f681b2e3e521a3fb4554ec29fbaac Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sun, 19 Jan 2014 20:00:48 +0400 Subject: xtensa: export ccount_freq Now that ccount_freq is used in udelay and ndelay it needs to be exported in order to be available to modules. Signed-off-by: Max Filippov diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index 08b769d..2a1823d 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -30,6 +30,7 @@ #include unsigned long ccount_freq; /* ccount Hz */ +EXPORT_SYMBOL(ccount_freq); static cycle_t ccount_read(struct clocksource *cs) { -- cgit v0.10.2 From 4a2378a943f09907fb1ae35c15de917f60289c14 Mon Sep 17 00:00:00 2001 From: Greg Hackmann Date: Wed, 8 Jan 2014 10:57:03 -0800 Subject: timerfd: support CLOCK_BOOTTIME clock Add CLOCK_BOOTTIME support to timerfd Signed-off-by: Greg Hackmann Signed-off-by: John Stultz diff --git a/fs/timerfd.c b/fs/timerfd.c index 9293121..0013142 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -317,6 +317,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) (clockid != CLOCK_MONOTONIC && clockid != CLOCK_REALTIME && clockid != CLOCK_REALTIME_ALARM && + clockid != CLOCK_BOOTTIME && clockid != CLOCK_BOOTTIME_ALARM)) return -EINVAL; -- cgit v0.10.2 From e2fd1374c705abe4661df3fb6fadb3879c7c1846 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 22 Jan 2014 08:04:43 +0400 Subject: xtensa: introduce spill_registers_kernel macro Most in-kernel users want registers spilled on the kernel stack and don't require PS.EXCM to be set. That means that they don't need fixup routine and could reuse regular window overflow mechanism for that, which makes spill routine very simple. Cc: stable@vger.kernel.org Suggested-by: Chris Zankel Signed-off-by: Max Filippov diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h index 8c194f6..677bfcf 100644 --- a/arch/xtensa/include/asm/traps.h +++ b/arch/xtensa/include/asm/traps.h @@ -23,25 +23,37 @@ void secondary_trap_init(void); static inline void spill_registers(void) { - +#if XCHAL_NUM_AREGS > 16 __asm__ __volatile__ ( - "movi a14, "__stringify((1 << PS_EXCM_BIT) | LOCKLEVEL)"\n\t" - "mov a12, a0\n\t" - "rsr a13, sar\n\t" - "xsr a14, ps\n\t" - "movi a0, _spill_registers\n\t" - "rsync\n\t" - "callx0 a0\n\t" - "mov a0, a12\n\t" - "wsr a13, sar\n\t" - "wsr a14, ps\n\t" - : : -#if defined(CONFIG_FRAME_POINTER) - : "a2", "a3", "a4", "a11", "a12", "a13", "a14", "a15", + " call12 1f\n" + " _j 2f\n" + " retw\n" + " .align 4\n" + "1:\n" + " _entry a1, 48\n" + " addi a12, a0, 3\n" +#if XCHAL_NUM_AREGS > 32 + " .rept (" __stringify(XCHAL_NUM_AREGS) " - 32) / 12\n" + " _entry a1, 48\n" + " mov a12, a0\n" + " .endr\n" +#endif + " _entry a1, 48\n" +#if XCHAL_NUM_AREGS % 12 == 0 + " mov a8, a8\n" +#elif XCHAL_NUM_AREGS % 12 == 4 + " mov a12, a12\n" +#elif XCHAL_NUM_AREGS % 12 == 8 + " mov a4, a4\n" +#endif + " retw\n" + "2:\n" + : : : "a12", "a13", "memory"); #else - : "a2", "a3", "a4", "a7", "a11", "a12", "a13", "a14", "a15", + __asm__ __volatile__ ( + " mov a12, a12\n" + : : : "memory"); #endif - "memory"); } #endif /* _XTENSA_TRAPS_H */ diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index 21dbe6b..0489918e 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -1794,6 +1794,43 @@ ENTRY(system_call) ENDPROC(system_call) +/* + * Spill live registers on the kernel stack macro. + * + * Entry condition: ps.woe is set, ps.excm is cleared + * Exit condition: windowstart has single bit set + * May clobber: a12, a13 + */ + .macro spill_registers_kernel + +#if XCHAL_NUM_AREGS > 16 + call12 1f + _j 2f + retw + .align 4 +1: + _entry a1, 48 + addi a12, a0, 3 +#if XCHAL_NUM_AREGS > 32 + .rept (XCHAL_NUM_AREGS - 32) / 12 + _entry a1, 48 + mov a12, a0 + .endr +#endif + _entry a1, 48 +#if XCHAL_NUM_AREGS % 12 == 0 + mov a8, a8 +#elif XCHAL_NUM_AREGS % 12 == 4 + mov a12, a12 +#elif XCHAL_NUM_AREGS % 12 == 8 + mov a4, a4 +#endif + retw +2: +#else + mov a12, a12 +#endif + .endm /* * Task switch. @@ -1806,21 +1843,20 @@ ENTRY(_switch_to) entry a1, 16 - mov a12, a2 # preserve 'prev' (a2) - mov a13, a3 # and 'next' (a3) + mov a10, a2 # preserve 'prev' (a2) + mov a11, a3 # and 'next' (a3) l32i a4, a2, TASK_THREAD_INFO l32i a5, a3, TASK_THREAD_INFO - save_xtregs_user a4 a6 a8 a9 a10 a11 THREAD_XTREGS_USER + save_xtregs_user a4 a6 a8 a9 a12 a13 THREAD_XTREGS_USER - s32i a0, a12, THREAD_RA # save return address - s32i a1, a12, THREAD_SP # save stack pointer + s32i a0, a10, THREAD_RA # save return address + s32i a1, a10, THREAD_SP # save stack pointer /* Disable ints while we manipulate the stack pointer. */ - movi a14, (1 << PS_EXCM_BIT) | LOCKLEVEL - xsr a14, ps + rsil a14, LOCKLEVEL rsr a3, excsave1 rsync s32i a3, a3, EXC_TABLE_FIXUP /* enter critical section */ @@ -1835,7 +1871,7 @@ ENTRY(_switch_to) /* Flush register file. */ - call0 _spill_registers # destroys a3, a4, and SAR + spill_registers_kernel /* Set kernel stack (and leave critical section) * Note: It's save to set it here. The stack will not be overwritten @@ -1851,13 +1887,13 @@ ENTRY(_switch_to) /* restore context of the task 'next' */ - l32i a0, a13, THREAD_RA # restore return address - l32i a1, a13, THREAD_SP # restore stack pointer + l32i a0, a11, THREAD_RA # restore return address + l32i a1, a11, THREAD_SP # restore stack pointer - load_xtregs_user a5 a6 a8 a9 a10 a11 THREAD_XTREGS_USER + load_xtregs_user a5 a6 a8 a9 a12 a13 THREAD_XTREGS_USER wsr a14, ps - mov a2, a12 # return 'prev' + mov a2, a10 # return 'prev' rsync retw -- cgit v0.10.2 From 3251f1e27a5a17f0efd436cfd1e7b9896cfab0a0 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 30 Oct 2013 16:18:25 +0400 Subject: xtensa: save current register frame in fast_syscall_spill_registers_fixup We need it saved because it contains a3 where we track which register windows we still need to spill, and fixup handler may call C exception handlers. Also fix comments. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index 0489918e..b61e251 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -1117,6 +1117,13 @@ ENDPROC(fast_syscall_spill_registers) * We basically restore WINDOWBASE and WINDOWSTART to the condition when * we entered the spill routine and jump to the user exception handler. * + * Note that we only need to restore the bits in windowstart that have not + * been spilled yet by the _spill_register routine. Luckily, a3 contains a + * rotated windowstart with only those bits set for frames that haven't been + * spilled yet. Because a3 is rotated such that bit 0 represents the register + * frame for the current windowbase - 1, we need to rotate a3 left by the + * value of the current windowbase + 1 and move it to windowstart. + * * a0: value of depc, original value in depc * a2: trashed, original value in EXC_TABLE_DOUBLE_SAVE * a3: exctable, original value in excsave1 @@ -1131,10 +1138,15 @@ ENTRY(fast_syscall_spill_registers_fixup) /* We need to make sure the current registers (a0-a3) are preserved. * To do this, we simply set the bit for the current window frame * in WS, so that the exception handlers save them to the task stack. + * + * Note: we use a3 to set the windowbase, so we take a special care + * of it, saving it in the original _spill_registers frame across + * the exception handler call. */ xsr a3, excsave1 # get spill-mask slli a3, a3, 1 # shift left by one + addi a3, a3, 1 # set the bit for the current window frame slli a2, a3, 32-WSBITS src a2, a3, a2 # a2 = xxwww1yyxxxwww1yy...... -- cgit v0.10.2 From aff7385b5a16bca6b8d9243f01a9ea5a5b411e1d Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Tue, 21 Jan 2014 15:35:53 -0800 Subject: locking/mutexes/mcs: Correct barrier usage This patch corrects the way memory barriers are used in the MCS lock with smp_load_acquire and smp_store_release fucnctions. The previous barriers could leak critical sections if mcs lock is used by itself. It is not a problem when mcs lock is embedded in mutex but will be an issue when the mcs_lock is used elsewhere. The patch removes the incorrect barriers and put in correct barriers with the pair of functions smp_load_acquire and smp_store_release. Suggested-by: Michel Lespinasse Reviewed-by: Paul E. McKenney Signed-off-by: Waiman Long Signed-off-by: Jason Low Signed-off-by: Tim Chen Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1390347353.3138.62.camel@schen9-DESK Signed-off-by: Ingo Molnar diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 4dd6e4c..fbbd2ed 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -136,9 +136,12 @@ void mspin_lock(struct mspin_node **lock, struct mspin_node *node) return; } ACCESS_ONCE(prev->next) = node; - smp_wmb(); - /* Wait until the lock holder passes the lock down */ - while (!ACCESS_ONCE(node->locked)) + /* + * Wait until the lock holder passes the lock down. + * Using smp_load_acquire() provides a memory barrier that + * ensures subsequent operations happen after the lock is acquired. + */ + while (!(smp_load_acquire(&node->locked))) arch_mutex_cpu_relax(); } @@ -156,8 +159,13 @@ static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node) while (!(next = ACCESS_ONCE(node->next))) arch_mutex_cpu_relax(); } - ACCESS_ONCE(next->locked) = 1; - smp_wmb(); + /* + * Pass lock to next waiter. + * smp_store_release() provides a memory barrier to ensure + * all operations in the critical section has been completed + * before unlocking. + */ + smp_store_release(&next->locked, 1); } /* -- cgit v0.10.2 From e72246748ff006ab928bc774e276e6ef5542f9c5 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Tue, 21 Jan 2014 15:36:00 -0800 Subject: locking/mutexes/mcs: Restructure the MCS lock defines and locking code into its own file We will need the MCS lock code for doing optimistic spinning for rwsem and queued rwlock. Extracting the MCS code from mutex.c and put into its own file allow us to reuse this code easily. We also inline mcs_spin_lock and mcs_spin_unlock functions for better efficiency. Note that using the smp_load_acquire/smp_store_release pair used in mcs_lock and mcs_unlock is not sufficient to form a full memory barrier across cpus for many architectures (except x86). For applications that absolutely need a full barrier across multiple cpus with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be used after mcs_lock. Reviewed-by: Paul E. McKenney Signed-off-by: Tim Chen Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1390347360.3138.63.camel@schen9-DESK Signed-off-by: Ingo Molnar diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h new file mode 100644 index 0000000..9578ef8 --- /dev/null +++ b/include/linux/mcs_spinlock.h @@ -0,0 +1,77 @@ +/* + * MCS lock defines + * + * This file contains the main data structure and API definitions of MCS lock. + * + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock + * with the desirable properties of being fair, and with each cpu trying + * to acquire the lock spinning on a local variable. + * It avoids expensive cache bouncings that common test-and-set spin-lock + * implementations incur. + */ +#ifndef __LINUX_MCS_SPINLOCK_H +#define __LINUX_MCS_SPINLOCK_H + +struct mcs_spinlock { + struct mcs_spinlock *next; + int locked; /* 1 if lock acquired */ +}; + +/* + * Note: the smp_load_acquire/smp_store_release pair is not + * sufficient to form a full memory barrier across + * cpus for many architectures (except x86) for mcs_unlock and mcs_lock. + * For applications that need a full barrier across multiple cpus + * with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be + * used after mcs_lock. + */ +static inline +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) +{ + struct mcs_spinlock *prev; + + /* Init node */ + node->locked = 0; + node->next = NULL; + + prev = xchg(lock, node); + if (likely(prev == NULL)) { + /* Lock acquired */ + node->locked = 1; + return; + } + ACCESS_ONCE(prev->next) = node; + /* + * Wait until the lock holder passes the lock down. + * Using smp_load_acquire() provides a memory barrier that + * ensures subsequent operations happen after the lock is acquired. + */ + while (!(smp_load_acquire(&node->locked))) + arch_mutex_cpu_relax(); +} + +static inline +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) +{ + struct mcs_spinlock *next = ACCESS_ONCE(node->next); + + if (likely(!next)) { + /* + * Release the lock by setting it to NULL + */ + if (cmpxchg(lock, node, NULL) == node) + return; + /* Wait until the next pointer is set */ + while (!(next = ACCESS_ONCE(node->next))) + arch_mutex_cpu_relax(); + } + /* + * Pass lock to next waiter. + * smp_store_release() provides a memory barrier to ensure + * all operations in the critical section has been completed + * before unlocking. + */ + smp_store_release(&next->locked, 1); +} + +#endif /* __LINUX_MCS_SPINLOCK_H */ diff --git a/include/linux/mutex.h b/include/linux/mutex.h index d318193..c482e1d 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -46,6 +46,7 @@ * - detects multi-task circular deadlocks and prints out all affected * locks and tasks (and only those tasks) */ +struct mcs_spinlock; struct mutex { /* 1: unlocked, 0: locked, negative: locked, possible waiters */ atomic_t count; @@ -55,7 +56,7 @@ struct mutex { struct task_struct *owner; #endif #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - void *spin_mlock; /* Spinner MCS lock */ + struct mcs_spinlock *mcs_lock; /* Spinner MCS lock */ #endif #ifdef CONFIG_DEBUG_MUTEXES const char *name; @@ -179,4 +180,4 @@ extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); # define arch_mutex_cpu_relax() cpu_relax() #endif -#endif +#endif /* __LINUX_MUTEX_H */ diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index fbbd2ed..45fe1b5 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -25,6 +25,7 @@ #include #include #include +#include /* * In the DEBUG case we are using the "NULL fastpath" for mutexes, @@ -52,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) INIT_LIST_HEAD(&lock->wait_list); mutex_clear_owner(lock); #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - lock->spin_mlock = NULL; + lock->mcs_lock = NULL; #endif debug_mutex_init(lock, name, key); @@ -111,62 +112,7 @@ EXPORT_SYMBOL(mutex_lock); * more or less simultaneously, the spinners need to acquire a MCS lock * first before spinning on the owner field. * - * We don't inline mspin_lock() so that perf can correctly account for the - * time spent in this lock function. */ -struct mspin_node { - struct mspin_node *next ; - int locked; /* 1 if lock acquired */ -}; -#define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock)) - -static noinline -void mspin_lock(struct mspin_node **lock, struct mspin_node *node) -{ - struct mspin_node *prev; - - /* Init node */ - node->locked = 0; - node->next = NULL; - - prev = xchg(lock, node); - if (likely(prev == NULL)) { - /* Lock acquired */ - node->locked = 1; - return; - } - ACCESS_ONCE(prev->next) = node; - /* - * Wait until the lock holder passes the lock down. - * Using smp_load_acquire() provides a memory barrier that - * ensures subsequent operations happen after the lock is acquired. - */ - while (!(smp_load_acquire(&node->locked))) - arch_mutex_cpu_relax(); -} - -static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node) -{ - struct mspin_node *next = ACCESS_ONCE(node->next); - - if (likely(!next)) { - /* - * Release the lock by setting it to NULL - */ - if (cmpxchg(lock, node, NULL) == node) - return; - /* Wait until the next pointer is set */ - while (!(next = ACCESS_ONCE(node->next))) - arch_mutex_cpu_relax(); - } - /* - * Pass lock to next waiter. - * smp_store_release() provides a memory barrier to ensure - * all operations in the critical section has been completed - * before unlocking. - */ - smp_store_release(&next->locked, 1); -} /* * Mutex spinning code migrated from kernel/sched/core.c @@ -456,7 +402,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, for (;;) { struct task_struct *owner; - struct mspin_node node; + struct mcs_spinlock node; if (use_ww_ctx && ww_ctx->acquired > 0) { struct ww_mutex *ww; @@ -478,10 +424,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * If there's an owner, wait for it to either * release the lock or go to sleep. */ - mspin_lock(MLOCK(lock), &node); + mcs_spin_lock(&lock->mcs_lock, &node); owner = ACCESS_ONCE(lock->owner); if (owner && !mutex_spin_on_owner(lock, owner)) { - mspin_unlock(MLOCK(lock), &node); + mcs_spin_unlock(&lock->mcs_lock, &node); goto slowpath; } @@ -496,11 +442,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, } mutex_set_owner(lock); - mspin_unlock(MLOCK(lock), &node); + mcs_spin_unlock(&lock->mcs_lock, &node); preempt_enable(); return 0; } - mspin_unlock(MLOCK(lock), &node); + mcs_spin_unlock(&lock->mcs_lock, &node); /* * When there's no owner, we might have preempted between the -- cgit v0.10.2 From 5faeb8adb956a5ad6579c4e309e8689943ad8294 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Tue, 21 Jan 2014 15:36:05 -0800 Subject: locking/mcs: Micro-optimize the MCS code, add extra comments Remove unnecessary operation to assign locked status to 1 if lock is acquired without contention. Lock status will not be checked by lock holder again once it is acquired and any lock contenders will not be looking at the lock holder's lock status. Make the cmpxchg(lock, node, NULL) == node check in mcs_spin_unlock() likely() as it is likely that a race did not occur most of the time. Also add in more comments describing how the local node is used in MCS locks. Reviewed-by: Paul E. McKenney Reviewed-by: Tim Chen Signed-off-by: Jason Low Signed-off-by: Tim Chen Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/1390347365.3138.64.camel@schen9-DESK Signed-off-by: Ingo Molnar diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h index 9578ef8..143fa42 100644 --- a/include/linux/mcs_spinlock.h +++ b/include/linux/mcs_spinlock.h @@ -25,6 +25,17 @@ struct mcs_spinlock { * with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be * used after mcs_lock. */ + +/* + * In order to acquire the lock, the caller should declare a local node and + * pass a reference of the node to this function in addition to the lock. + * If the lock has already been acquired, then this will proceed to spin + * on this node->locked until the previous lock holder sets the node->locked + * in mcs_spin_unlock(). + * + * We don't inline mcs_spin_lock() so that perf can correctly account for the + * time spent in this lock function. + */ static inline void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) { @@ -36,8 +47,14 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) prev = xchg(lock, node); if (likely(prev == NULL)) { - /* Lock acquired */ - node->locked = 1; + /* + * Lock acquired, don't need to set node->locked to 1. Threads + * only spin on its own node->locked value for lock acquisition. + * However, since this thread can immediately acquire the lock + * and does not proceed to spin on its own node->locked, this + * value won't be used. If a debug mode is needed to + * audit lock status, then set node->locked value here. + */ return; } ACCESS_ONCE(prev->next) = node; @@ -50,6 +67,10 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) arch_mutex_cpu_relax(); } +/* + * Releases the lock. The caller should pass in the corresponding node that + * was used to acquire the lock. + */ static inline void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) { @@ -59,7 +80,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) /* * Release the lock by setting it to NULL */ - if (cmpxchg(lock, node, NULL) == node) + if (likely(cmpxchg(lock, node, NULL) == node)) return; /* Wait until the next pointer is set */ while (!(next = ACCESS_ONCE(node->next))) -- cgit v0.10.2 From e207552e64ea053a33e856828ad7915484911d06 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 21 Jan 2014 15:36:10 -0800 Subject: locking/mcs: Allow architectures to hook in to contended paths When contended, architectures may be able to reduce the polling overhead in ways which aren't expressible using a simple relax() primitive. This patch allows architectures to hook into the mcs_{lock,unlock} functions for the contended cases only. Reviewed-by: Paul E. McKenney Signed-off-by: Will Deacon Signed-off-by: Tim Chen Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/1390347370.3138.65.camel@schen9-DESK Signed-off-by: Ingo Molnar diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h index 143fa42..e9a4d74 100644 --- a/include/linux/mcs_spinlock.h +++ b/include/linux/mcs_spinlock.h @@ -17,6 +17,28 @@ struct mcs_spinlock { int locked; /* 1 if lock acquired */ }; +#ifndef arch_mcs_spin_lock_contended +/* + * Using smp_load_acquire() provides a memory barrier that ensures + * subsequent operations happen after the lock is acquired. + */ +#define arch_mcs_spin_lock_contended(l) \ +do { \ + while (!(smp_load_acquire(l))) \ + arch_mutex_cpu_relax(); \ +} while (0) +#endif + +#ifndef arch_mcs_spin_unlock_contended +/* + * smp_store_release() provides a memory barrier to ensure all + * operations in the critical section has been completed before + * unlocking. + */ +#define arch_mcs_spin_unlock_contended(l) \ + smp_store_release((l), 1) +#endif + /* * Note: the smp_load_acquire/smp_store_release pair is not * sufficient to form a full memory barrier across @@ -58,13 +80,9 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) return; } ACCESS_ONCE(prev->next) = node; - /* - * Wait until the lock holder passes the lock down. - * Using smp_load_acquire() provides a memory barrier that - * ensures subsequent operations happen after the lock is acquired. - */ - while (!(smp_load_acquire(&node->locked))) - arch_mutex_cpu_relax(); + + /* Wait until the lock holder passes the lock down. */ + arch_mcs_spin_lock_contended(&node->locked); } /* @@ -86,13 +104,9 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) while (!(next = ACCESS_ONCE(node->next))) arch_mutex_cpu_relax(); } - /* - * Pass lock to next waiter. - * smp_store_release() provides a memory barrier to ensure - * all operations in the critical section has been completed - * before unlocking. - */ - smp_store_release(&next->locked, 1); + + /* Pass lock to next waiter. */ + arch_mcs_spin_unlock_contended(&next->locked); } #endif /* __LINUX_MCS_SPINLOCK_H */ -- cgit v0.10.2 From 52bf84aa206cd2c2516dfa3e03b578edf8a3242f Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Mon, 27 Jan 2014 17:03:40 -0500 Subject: sched/numa, mm: Remove p->numa_migrate_deferred Excessive migration of pages can hurt the performance of workloads that span multiple NUMA nodes. However, it turns out that the p->numa_migrate_deferred knob is a really big hammer, which does reduce migration rates, but does not actually help performance. Now that the second stage of the automatic numa balancing code has stabilized, it is time to replace the simplistic migration deferral code with something smarter. Signed-off-by: Rik van Riel Acked-by: Mel Gorman Signed-off-by: Peter Zijlstra Cc: Chegu Vinod Link: http://lkml.kernel.org/r/1390860228-21539-2-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 6d48640..760f6e6a 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -386,8 +386,7 @@ feature should be disabled. Otherwise, if the system overhead from the feature is too high then the rate the kernel samples for NUMA hinting faults may be controlled by the numa_balancing_scan_period_min_ms, numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms, -numa_balancing_scan_size_mb, numa_balancing_settle_count sysctls and -numa_balancing_migrate_deferred. +numa_balancing_scan_size_mb, and numa_balancing_settle_count sysctls. ============================================================== @@ -428,13 +427,6 @@ rate for each task. numa_balancing_scan_size_mb is how many megabytes worth of pages are scanned for a given scan. -numa_balancing_migrate_deferred is how many page migrations get skipped -unconditionally, after a page migration is skipped because a page is shared -with other tasks. This reduces page migration overhead, and determines -how much stronger the "move task near its memory" policy scheduler becomes, -versus the "move memory near its task" memory management policy, for workloads -with shared memory. - ============================================================== osrelease, ostype & version: diff --git a/include/linux/sched.h b/include/linux/sched.h index ffccdad..d572d5b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1457,7 +1457,6 @@ struct task_struct { unsigned int numa_scan_period; unsigned int numa_scan_period_max; int numa_preferred_nid; - int numa_migrate_deferred; unsigned long numa_migrate_retry; u64 node_stamp; /* migration stamp */ struct callback_head numa_work; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index efe6457..7cdde91 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -819,14 +819,6 @@ unsigned int sysctl_numa_balancing_scan_size = 256; /* Scan @scan_size MB every @scan_period after an initial @scan_delay in ms */ unsigned int sysctl_numa_balancing_scan_delay = 1000; -/* - * After skipping a page migration on a shared page, skip N more numa page - * migrations unconditionally. This reduces the number of NUMA migrations - * in shared memory workloads, and has the effect of pulling tasks towards - * where their memory lives, over pulling the memory towards the task. - */ -unsigned int sysctl_numa_balancing_migrate_deferred = 16; - static unsigned int task_nr_scan_windows(struct task_struct *p) { unsigned long rss = 0; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c8da99f..b41d61d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -384,13 +384,6 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "numa_balancing_migrate_deferred", - .data = &sysctl_numa_balancing_migrate_deferred, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, #endif /* CONFIG_NUMA_BALANCING */ #endif /* CONFIG_SCHED_DEBUG */ { diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 0cd2c4d..68d5c7f 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2304,35 +2304,6 @@ static void sp_free(struct sp_node *n) kmem_cache_free(sn_cache, n); } -#ifdef CONFIG_NUMA_BALANCING -static bool numa_migrate_deferred(struct task_struct *p, int last_cpupid) -{ - /* Never defer a private fault */ - if (cpupid_match_pid(p, last_cpupid)) - return false; - - if (p->numa_migrate_deferred) { - p->numa_migrate_deferred--; - return true; - } - return false; -} - -static inline void defer_numa_migrate(struct task_struct *p) -{ - p->numa_migrate_deferred = sysctl_numa_balancing_migrate_deferred; -} -#else -static inline bool numa_migrate_deferred(struct task_struct *p, int last_cpupid) -{ - return false; -} - -static inline void defer_numa_migrate(struct task_struct *p) -{ -} -#endif /* CONFIG_NUMA_BALANCING */ - /** * mpol_misplaced - check whether current page node is valid in policy * @@ -2435,24 +2406,8 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long */ last_cpupid = page_cpupid_xchg_last(page, this_cpupid); if (!cpupid_pid_unset(last_cpupid) && cpupid_to_nid(last_cpupid) != thisnid) { - - /* See sysctl_numa_balancing_migrate_deferred comment */ - if (!cpupid_match_pid(current, last_cpupid)) - defer_numa_migrate(current); - goto out; } - - /* - * The quadratic filter above reduces extraneous migration - * of shared pages somewhat. This code reduces it even more, - * reducing the overhead of page migrations of shared pages. - * This makes workloads with shared pages rely more on - * "move task near its memory", and less on "move memory - * towards its task", which is exactly what we want. - */ - if (numa_migrate_deferred(current, last_cpupid)) - goto out; } if (curnid != polnid) -- cgit v0.10.2 From ff1df896aef8e0ec1556a5c44f424bd45bfa2cbe Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Mon, 27 Jan 2014 17:03:41 -0500 Subject: sched/numa: Rename p->numa_faults to numa_faults_memory In order to get a more consistent naming scheme, making it clear which fault statistics track memory locality, and which track CPU locality, rename the memory fault statistics. Suggested-by: Mel Gorman Signed-off-by: Rik van Riel Acked-by: Mel Gorman Signed-off-by: Peter Zijlstra Cc: Chegu Vinod Link: http://lkml.kernel.org/r/1390860228-21539-3-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar diff --git a/include/linux/sched.h b/include/linux/sched.h index d572d5b..144d509 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1469,15 +1469,15 @@ struct task_struct { * Scheduling placement decisions are made based on the these counts. * The values remain static for the duration of a PTE scan */ - unsigned long *numa_faults; + unsigned long *numa_faults_memory; unsigned long total_numa_faults; /* * numa_faults_buffer records faults per node during the current - * scan window. When the scan completes, the counts in numa_faults - * decay and these values are copied. + * scan window. When the scan completes, the counts in + * numa_faults_memory decay and these values are copied. */ - unsigned long *numa_faults_buffer; + unsigned long *numa_faults_buffer_memory; /* * numa_faults_locality tracks if faults recorded during the last diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 81343d6..bc708c5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1744,8 +1744,8 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0; p->numa_scan_period = sysctl_numa_balancing_scan_delay; p->numa_work.next = &p->numa_work; - p->numa_faults = NULL; - p->numa_faults_buffer = NULL; + p->numa_faults_memory = NULL; + p->numa_faults_buffer_memory = NULL; INIT_LIST_HEAD(&p->numa_entry); p->numa_group = NULL; diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index dd52e7f..31b908d 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -533,15 +533,15 @@ static void sched_show_numa(struct task_struct *p, struct seq_file *m) unsigned long nr_faults = -1; int cpu_current, home_node; - if (p->numa_faults) - nr_faults = p->numa_faults[2*node + i]; + if (p->numa_faults_memory) + nr_faults = p->numa_faults_memory[2*node + i]; cpu_current = !i ? (task_node(p) == node) : (pol && node_isset(node, pol->v.nodes)); home_node = (p->numa_preferred_nid == node); - SEQ_printf(m, "numa_faults, %d, %d, %d, %d, %ld\n", + SEQ_printf(m, "numa_faults_memory, %d, %d, %d, %d, %ld\n", i, node, cpu_current, home_node, nr_faults); } } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7cdde91..3e616d7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -901,11 +901,11 @@ static inline int task_faults_idx(int nid, int priv) static inline unsigned long task_faults(struct task_struct *p, int nid) { - if (!p->numa_faults) + if (!p->numa_faults_memory) return 0; - return p->numa_faults[task_faults_idx(nid, 0)] + - p->numa_faults[task_faults_idx(nid, 1)]; + return p->numa_faults_memory[task_faults_idx(nid, 0)] + + p->numa_faults_memory[task_faults_idx(nid, 1)]; } static inline unsigned long group_faults(struct task_struct *p, int nid) @@ -927,7 +927,7 @@ static inline unsigned long task_weight(struct task_struct *p, int nid) { unsigned long total_faults; - if (!p->numa_faults) + if (!p->numa_faults_memory) return 0; total_faults = p->total_numa_faults; @@ -1255,7 +1255,7 @@ static int task_numa_migrate(struct task_struct *p) static void numa_migrate_preferred(struct task_struct *p) { /* This task has no NUMA fault statistics yet */ - if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults)) + if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults_memory)) return; /* Periodically retry migrating the task to the preferred node */ @@ -1371,16 +1371,16 @@ static void task_numa_placement(struct task_struct *p) long diff; i = task_faults_idx(nid, priv); - diff = -p->numa_faults[i]; + diff = -p->numa_faults_memory[i]; /* Decay existing window, copy faults since last scan */ - p->numa_faults[i] >>= 1; - p->numa_faults[i] += p->numa_faults_buffer[i]; - fault_types[priv] += p->numa_faults_buffer[i]; - p->numa_faults_buffer[i] = 0; + p->numa_faults_memory[i] >>= 1; + p->numa_faults_memory[i] += p->numa_faults_buffer_memory[i]; + fault_types[priv] += p->numa_faults_buffer_memory[i]; + p->numa_faults_buffer_memory[i] = 0; - faults += p->numa_faults[i]; - diff += p->numa_faults[i]; + faults += p->numa_faults_memory[i]; + diff += p->numa_faults_memory[i]; p->total_numa_faults += diff; if (p->numa_group) { /* safe because we can only change our own group */ @@ -1465,7 +1465,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, grp->gid = p->pid; for (i = 0; i < 2*nr_node_ids; i++) - grp->faults[i] = p->numa_faults[i]; + grp->faults[i] = p->numa_faults_memory[i]; grp->total_faults = p->total_numa_faults; @@ -1523,8 +1523,8 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, double_lock(&my_grp->lock, &grp->lock); for (i = 0; i < 2*nr_node_ids; i++) { - my_grp->faults[i] -= p->numa_faults[i]; - grp->faults[i] += p->numa_faults[i]; + my_grp->faults[i] -= p->numa_faults_memory[i]; + grp->faults[i] += p->numa_faults_memory[i]; } my_grp->total_faults -= p->total_numa_faults; grp->total_faults += p->total_numa_faults; @@ -1550,12 +1550,12 @@ void task_numa_free(struct task_struct *p) { struct numa_group *grp = p->numa_group; int i; - void *numa_faults = p->numa_faults; + void *numa_faults = p->numa_faults_memory; if (grp) { spin_lock(&grp->lock); for (i = 0; i < 2*nr_node_ids; i++) - grp->faults[i] -= p->numa_faults[i]; + grp->faults[i] -= p->numa_faults_memory[i]; grp->total_faults -= p->total_numa_faults; list_del(&p->numa_entry); @@ -1565,8 +1565,8 @@ void task_numa_free(struct task_struct *p) put_numa_group(grp); } - p->numa_faults = NULL; - p->numa_faults_buffer = NULL; + p->numa_faults_memory = NULL; + p->numa_faults_buffer_memory = NULL; kfree(numa_faults); } @@ -1591,16 +1591,16 @@ void task_numa_fault(int last_cpupid, int node, int pages, int flags) return; /* Allocate buffer to track faults on a per-node basis */ - if (unlikely(!p->numa_faults)) { - int size = sizeof(*p->numa_faults) * 2 * nr_node_ids; + if (unlikely(!p->numa_faults_memory)) { + int size = sizeof(*p->numa_faults_memory) * 2 * nr_node_ids; /* numa_faults and numa_faults_buffer share the allocation */ - p->numa_faults = kzalloc(size * 2, GFP_KERNEL|__GFP_NOWARN); - if (!p->numa_faults) + p->numa_faults_memory = kzalloc(size * 2, GFP_KERNEL|__GFP_NOWARN); + if (!p->numa_faults_memory) return; - BUG_ON(p->numa_faults_buffer); - p->numa_faults_buffer = p->numa_faults + (2 * nr_node_ids); + BUG_ON(p->numa_faults_buffer_memory); + p->numa_faults_buffer_memory = p->numa_faults_memory + (2 * nr_node_ids); p->total_numa_faults = 0; memset(p->numa_faults_locality, 0, sizeof(p->numa_faults_locality)); } @@ -1629,7 +1629,7 @@ void task_numa_fault(int last_cpupid, int node, int pages, int flags) if (migrated) p->numa_pages_migrated += pages; - p->numa_faults_buffer[task_faults_idx(node, priv)] += pages; + p->numa_faults_buffer_memory[task_faults_idx(node, priv)] += pages; p->numa_faults_locality[!!(flags & TNF_FAULT_LOCAL)] += pages; } @@ -4771,7 +4771,7 @@ static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env) { int src_nid, dst_nid; - if (!sched_feat(NUMA_FAVOUR_HIGHER) || !p->numa_faults || + if (!sched_feat(NUMA_FAVOUR_HIGHER) || !p->numa_faults_memory || !(env->sd->flags & SD_NUMA)) { return false; } @@ -4802,7 +4802,7 @@ static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env) if (!sched_feat(NUMA) || !sched_feat(NUMA_RESIST_LOWER)) return false; - if (!p->numa_faults || !(env->sd->flags & SD_NUMA)) + if (!p->numa_faults_memory || !(env->sd->flags & SD_NUMA)) return false; src_nid = cpu_to_node(env->src_cpu); -- cgit v0.10.2 From 50ec8a401fed6d246ab65e6011d61ac91c34af70 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Mon, 27 Jan 2014 17:03:42 -0500 Subject: sched/numa: Track from which nodes NUMA faults are triggered Track which nodes NUMA faults are triggered from, in other words the CPUs on which the NUMA faults happened. This uses a similar mechanism to what is used to track the memory involved in numa faults. The next patches use this to build up a bitmap of which nodes a workload is actively running on. Signed-off-by: Rik van Riel Acked-by: Mel Gorman Signed-off-by: Peter Zijlstra Cc: Chegu Vinod Link: http://lkml.kernel.org/r/1390860228-21539-4-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar diff --git a/include/linux/sched.h b/include/linux/sched.h index 144d509..5fb0cfb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1480,6 +1480,13 @@ struct task_struct { unsigned long *numa_faults_buffer_memory; /* + * Track the nodes the process was running on when a NUMA hinting + * fault was incurred. + */ + unsigned long *numa_faults_cpu; + unsigned long *numa_faults_buffer_cpu; + + /* * numa_faults_locality tracks if faults recorded during the last * scan window were remote/local. The task scan period is adapted * based on the locality of the faults with different weights @@ -1582,8 +1589,6 @@ extern void task_numa_fault(int last_node, int node, int pages, int flags); extern pid_t task_numa_group_id(struct task_struct *p); extern void set_numabalancing_state(bool enabled); extern void task_numa_free(struct task_struct *p); - -extern unsigned int sysctl_numa_balancing_migrate_deferred; #else static inline void task_numa_fault(int last_node, int node, int pages, int flags) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3e616d7..4841aaf 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -886,6 +886,7 @@ struct numa_group { struct rcu_head rcu; unsigned long total_faults; + unsigned long *faults_cpu; unsigned long faults[0]; }; @@ -1368,10 +1369,11 @@ static void task_numa_placement(struct task_struct *p) int priv, i; for (priv = 0; priv < 2; priv++) { - long diff; + long diff, f_diff; i = task_faults_idx(nid, priv); diff = -p->numa_faults_memory[i]; + f_diff = -p->numa_faults_cpu[i]; /* Decay existing window, copy faults since last scan */ p->numa_faults_memory[i] >>= 1; @@ -1379,12 +1381,18 @@ static void task_numa_placement(struct task_struct *p) fault_types[priv] += p->numa_faults_buffer_memory[i]; p->numa_faults_buffer_memory[i] = 0; + p->numa_faults_cpu[i] >>= 1; + p->numa_faults_cpu[i] += p->numa_faults_buffer_cpu[i]; + p->numa_faults_buffer_cpu[i] = 0; + faults += p->numa_faults_memory[i]; diff += p->numa_faults_memory[i]; + f_diff += p->numa_faults_cpu[i]; p->total_numa_faults += diff; if (p->numa_group) { /* safe because we can only change our own group */ p->numa_group->faults[i] += diff; + p->numa_group->faults_cpu[i] += f_diff; p->numa_group->total_faults += diff; group_faults += p->numa_group->faults[i]; } @@ -1453,7 +1461,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, if (unlikely(!p->numa_group)) { unsigned int size = sizeof(struct numa_group) + - 2*nr_node_ids*sizeof(unsigned long); + 4*nr_node_ids*sizeof(unsigned long); grp = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); if (!grp) @@ -1463,8 +1471,10 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, spin_lock_init(&grp->lock); INIT_LIST_HEAD(&grp->task_list); grp->gid = p->pid; + /* Second half of the array tracks nids where faults happen */ + grp->faults_cpu = grp->faults + 2 * nr_node_ids; - for (i = 0; i < 2*nr_node_ids; i++) + for (i = 0; i < 4*nr_node_ids; i++) grp->faults[i] = p->numa_faults_memory[i]; grp->total_faults = p->total_numa_faults; @@ -1522,7 +1532,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, double_lock(&my_grp->lock, &grp->lock); - for (i = 0; i < 2*nr_node_ids; i++) { + for (i = 0; i < 4*nr_node_ids; i++) { my_grp->faults[i] -= p->numa_faults_memory[i]; grp->faults[i] += p->numa_faults_memory[i]; } @@ -1554,7 +1564,7 @@ void task_numa_free(struct task_struct *p) if (grp) { spin_lock(&grp->lock); - for (i = 0; i < 2*nr_node_ids; i++) + for (i = 0; i < 4*nr_node_ids; i++) grp->faults[i] -= p->numa_faults_memory[i]; grp->total_faults -= p->total_numa_faults; @@ -1567,6 +1577,8 @@ void task_numa_free(struct task_struct *p) p->numa_faults_memory = NULL; p->numa_faults_buffer_memory = NULL; + p->numa_faults_cpu= NULL; + p->numa_faults_buffer_cpu = NULL; kfree(numa_faults); } @@ -1577,6 +1589,7 @@ void task_numa_fault(int last_cpupid, int node, int pages, int flags) { struct task_struct *p = current; bool migrated = flags & TNF_MIGRATED; + int this_node = task_node(current); int priv; if (!numabalancing_enabled) @@ -1592,7 +1605,7 @@ void task_numa_fault(int last_cpupid, int node, int pages, int flags) /* Allocate buffer to track faults on a per-node basis */ if (unlikely(!p->numa_faults_memory)) { - int size = sizeof(*p->numa_faults_memory) * 2 * nr_node_ids; + int size = sizeof(*p->numa_faults_memory) * 4 * nr_node_ids; /* numa_faults and numa_faults_buffer share the allocation */ p->numa_faults_memory = kzalloc(size * 2, GFP_KERNEL|__GFP_NOWARN); @@ -1600,7 +1613,9 @@ void task_numa_fault(int last_cpupid, int node, int pages, int flags) return; BUG_ON(p->numa_faults_buffer_memory); - p->numa_faults_buffer_memory = p->numa_faults_memory + (2 * nr_node_ids); + p->numa_faults_cpu = p->numa_faults_memory + (2 * nr_node_ids); + p->numa_faults_buffer_memory = p->numa_faults_memory + (4 * nr_node_ids); + p->numa_faults_buffer_cpu = p->numa_faults_memory + (6 * nr_node_ids); p->total_numa_faults = 0; memset(p->numa_faults_locality, 0, sizeof(p->numa_faults_locality)); } @@ -1630,6 +1645,7 @@ void task_numa_fault(int last_cpupid, int node, int pages, int flags) p->numa_pages_migrated += pages; p->numa_faults_buffer_memory[task_faults_idx(node, priv)] += pages; + p->numa_faults_buffer_cpu[task_faults_idx(this_node, priv)] += pages; p->numa_faults_locality[!!(flags & TNF_FAULT_LOCAL)] += pages; } -- cgit v0.10.2 From 20e07dea286a90f096a779706861472d296397c6 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Mon, 27 Jan 2014 17:03:43 -0500 Subject: sched/numa: Build per numa_group active node mask from numa_faults_cpu statistics The numa_faults_cpu statistics are used to maintain an active_nodes nodemask per numa_group. This allows us to be smarter about when to do numa migrations. Signed-off-by: Rik van Riel Acked-by: Mel Gorman Signed-off-by: Peter Zijlstra Cc: Chegu Vinod Link: http://lkml.kernel.org/r/1390860228-21539-5-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4841aaf..1ee921f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -885,6 +885,7 @@ struct numa_group { struct list_head task_list; struct rcu_head rcu; + nodemask_t active_nodes; unsigned long total_faults; unsigned long *faults_cpu; unsigned long faults[0]; @@ -918,6 +919,12 @@ static inline unsigned long group_faults(struct task_struct *p, int nid) p->numa_group->faults[task_faults_idx(nid, 1)]; } +static inline unsigned long group_faults_cpu(struct numa_group *group, int nid) +{ + return group->faults_cpu[task_faults_idx(nid, 0)] + + group->faults_cpu[task_faults_idx(nid, 1)]; +} + /* * These return the fraction of accesses done by a particular task, or * task group, on a particular numa node. The group weight is given a @@ -1271,6 +1278,38 @@ static void numa_migrate_preferred(struct task_struct *p) } /* + * Find the nodes on which the workload is actively running. We do this by + * tracking the nodes from which NUMA hinting faults are triggered. This can + * be different from the set of nodes where the workload's memory is currently + * located. + * + * The bitmask is used to make smarter decisions on when to do NUMA page + * migrations, To prevent flip-flopping, and excessive page migrations, nodes + * are added when they cause over 6/16 of the maximum number of faults, but + * only removed when they drop below 3/16. + */ +static void update_numa_active_node_mask(struct numa_group *numa_group) +{ + unsigned long faults, max_faults = 0; + int nid; + + for_each_online_node(nid) { + faults = group_faults_cpu(numa_group, nid); + if (faults > max_faults) + max_faults = faults; + } + + for_each_online_node(nid) { + faults = group_faults_cpu(numa_group, nid); + if (!node_isset(nid, numa_group->active_nodes)) { + if (faults > max_faults * 6 / 16) + node_set(nid, numa_group->active_nodes); + } else if (faults < max_faults * 3 / 16) + node_clear(nid, numa_group->active_nodes); + } +} + +/* * When adapting the scan rate, the period is divided into NUMA_PERIOD_SLOTS * increments. The more local the fault statistics are, the higher the scan * period will be for the next scan window. If local/remote ratio is below @@ -1412,6 +1451,7 @@ static void task_numa_placement(struct task_struct *p) update_task_scan_period(p, fault_types[0], fault_types[1]); if (p->numa_group) { + update_numa_active_node_mask(p->numa_group); /* * If the preferred task and group nids are different, * iterate over the nodes again to find the best place. @@ -1474,6 +1514,8 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, /* Second half of the array tracks nids where faults happen */ grp->faults_cpu = grp->faults + 2 * nr_node_ids; + node_set(task_node(current), grp->active_nodes); + for (i = 0; i < 4*nr_node_ids; i++) grp->faults[i] = p->numa_faults_memory[i]; -- cgit v0.10.2 From 10f39042711ba21773763f267b4943a2c66c8bef Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Mon, 27 Jan 2014 17:03:44 -0500 Subject: sched/numa, mm: Use active_nodes nodemask to limit numa migrations Use the active_nodes nodemask to make smarter decisions on NUMA migrations. In order to maximize performance of workloads that do not fit in one NUMA node, we want to satisfy the following criteria: 1) keep private memory local to each thread 2) avoid excessive NUMA migration of pages 3) distribute shared memory across the active nodes, to maximize memory bandwidth available to the workload This patch accomplishes that by implementing the following policy for NUMA migrations: 1) always migrate on a private fault 2) never migrate to a node that is not in the set of active nodes for the numa_group 3) always migrate from a node outside of the set of active nodes, to a node that is in that set 4) within the set of active nodes in the numa_group, only migrate from a node with more NUMA page faults, to a node with fewer NUMA page faults, with a 25% margin to avoid ping-ponging This results in most pages of a workload ending up on the actively used nodes, with reduced ping-ponging of pages between those nodes. Signed-off-by: Rik van Riel Acked-by: Mel Gorman Signed-off-by: Peter Zijlstra Cc: Chegu Vinod Link: http://lkml.kernel.org/r/1390860228-21539-6-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar diff --git a/include/linux/sched.h b/include/linux/sched.h index 5fb0cfb..5ab3b89 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1589,6 +1589,8 @@ extern void task_numa_fault(int last_node, int node, int pages, int flags); extern pid_t task_numa_group_id(struct task_struct *p); extern void set_numabalancing_state(bool enabled); extern void task_numa_free(struct task_struct *p); +extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page, + int src_nid, int dst_cpu); #else static inline void task_numa_fault(int last_node, int node, int pages, int flags) @@ -1604,6 +1606,11 @@ static inline void set_numabalancing_state(bool enabled) static inline void task_numa_free(struct task_struct *p) { } +static inline bool should_numa_migrate_memory(struct task_struct *p, + struct page *page, int src_nid, int dst_cpu) +{ + return true; +} #endif static inline struct pid *task_pid(struct task_struct *task) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1ee921f..eeabb33 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -954,6 +954,69 @@ static inline unsigned long group_weight(struct task_struct *p, int nid) return 1000 * group_faults(p, nid) / p->numa_group->total_faults; } +bool should_numa_migrate_memory(struct task_struct *p, struct page * page, + int src_nid, int dst_cpu) +{ + struct numa_group *ng = p->numa_group; + int dst_nid = cpu_to_node(dst_cpu); + int last_cpupid, this_cpupid; + + this_cpupid = cpu_pid_to_cpupid(dst_cpu, current->pid); + + /* + * Multi-stage node selection is used in conjunction with a periodic + * migration fault to build a temporal task<->page relation. By using + * a two-stage filter we remove short/unlikely relations. + * + * Using P(p) ~ n_p / n_t as per frequentist probability, we can equate + * a task's usage of a particular page (n_p) per total usage of this + * page (n_t) (in a given time-span) to a probability. + * + * Our periodic faults will sample this probability and getting the + * same result twice in a row, given these samples are fully + * independent, is then given by P(n)^2, provided our sample period + * is sufficiently short compared to the usage pattern. + * + * This quadric squishes small probabilities, making it less likely we + * act on an unlikely task<->page relation. + */ + last_cpupid = page_cpupid_xchg_last(page, this_cpupid); + if (!cpupid_pid_unset(last_cpupid) && + cpupid_to_nid(last_cpupid) != dst_nid) + return false; + + /* Always allow migrate on private faults */ + if (cpupid_match_pid(p, last_cpupid)) + return true; + + /* A shared fault, but p->numa_group has not been set up yet. */ + if (!ng) + return true; + + /* + * Do not migrate if the destination is not a node that + * is actively used by this numa group. + */ + if (!node_isset(dst_nid, ng->active_nodes)) + return false; + + /* + * Source is a node that is not actively used by this + * numa group, while the destination is. Migrate. + */ + if (!node_isset(src_nid, ng->active_nodes)) + return true; + + /* + * Both source and destination are nodes in active + * use by this numa group. Maximize memory bandwidth + * by migrating from more heavily used groups, to less + * heavily used ones, spreading the load around. + * Use a 1/4 hysteresis to avoid spurious page movement. + */ + return group_faults(p, dst_nid) < (group_faults(p, src_nid) * 3 / 4); +} + static unsigned long weighted_cpuload(const int cpu); static unsigned long source_load(int cpu, int type); static unsigned long target_load(int cpu, int type); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 68d5c7f..784c11e 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2377,37 +2377,10 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long /* Migrate the page towards the node whose CPU is referencing it */ if (pol->flags & MPOL_F_MORON) { - int last_cpupid; - int this_cpupid; - polnid = thisnid; - this_cpupid = cpu_pid_to_cpupid(thiscpu, current->pid); - /* - * Multi-stage node selection is used in conjunction - * with a periodic migration fault to build a temporal - * task<->page relation. By using a two-stage filter we - * remove short/unlikely relations. - * - * Using P(p) ~ n_p / n_t as per frequentist - * probability, we can equate a task's usage of a - * particular page (n_p) per total usage of this - * page (n_t) (in a given time-span) to a probability. - * - * Our periodic faults will sample this probability and - * getting the same result twice in a row, given these - * samples are fully independent, is then given by - * P(n)^2, provided our sample period is sufficiently - * short compared to the usage pattern. - * - * This quadric squishes small probabilities, making - * it less likely we act on an unlikely task<->page - * relation. - */ - last_cpupid = page_cpupid_xchg_last(page, this_cpupid); - if (!cpupid_pid_unset(last_cpupid) && cpupid_to_nid(last_cpupid) != thisnid) { + if (!should_numa_migrate_memory(current, page, curnid, thiscpu)) goto out; - } } if (curnid != polnid) -- cgit v0.10.2 From 7e2703e6099609adc93679c4d45cd6247f565971 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Mon, 27 Jan 2014 17:03:45 -0500 Subject: sched/numa: Normalize faults_cpu stats and weigh by CPU use Tracing the code that decides the active nodes has made it abundantly clear that the naive implementation of the faults_from code has issues. Specifically, the garbage collector in some workloads will access orders of magnitudes more memory than the threads that do all the active work. This resulted in the node with the garbage collector being marked the only active node in the group. This issue is avoided if we weigh the statistics by CPU use of each task in the numa group, instead of by how many faults each thread has occurred. To achieve this, we normalize the number of faults to the fraction of faults that occurred on each node, and then multiply that fraction by the fraction of CPU time the task has used since the last time task_numa_placement was invoked. This way the nodes in the active node mask will be the ones where the tasks from the numa group are most actively running, and the influence of eg. the garbage collector and other do-little threads is properly minimized. On a 4 node system, using CPU use statistics calculated over a longer interval results in about 1% fewer page migrations with two 32-warehouse specjbb runs on a 4 node system, and about 5% fewer page migrations, as well as 1% better throughput, with two 8-warehouse specjbb runs, as compared with the shorter term statistics kept by the scheduler. Signed-off-by: Rik van Riel Acked-by: Mel Gorman Signed-off-by: Peter Zijlstra Cc: Chegu Vinod Link: http://lkml.kernel.org/r/1390860228-21539-7-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar diff --git a/include/linux/sched.h b/include/linux/sched.h index 5ab3b89..ef92953 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1459,6 +1459,8 @@ struct task_struct { int numa_preferred_nid; unsigned long numa_migrate_retry; u64 node_stamp; /* migration stamp */ + u64 last_task_numa_placement; + u64 last_sum_exec_runtime; struct callback_head numa_work; struct list_head numa_entry; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index bc708c5..a561c9e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1746,6 +1746,8 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) p->numa_work.next = &p->numa_work; p->numa_faults_memory = NULL; p->numa_faults_buffer_memory = NULL; + p->last_task_numa_placement = 0; + p->last_sum_exec_runtime = 0; INIT_LIST_HEAD(&p->numa_entry); p->numa_group = NULL; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index eeabb33..8fc3a82 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -887,6 +887,11 @@ struct numa_group { struct rcu_head rcu; nodemask_t active_nodes; unsigned long total_faults; + /* + * Faults_cpu is used to decide whether memory should move + * towards the CPU. As a consequence, these stats are weighted + * more by CPU use than by memory faults. + */ unsigned long *faults_cpu; unsigned long faults[0]; }; @@ -1446,11 +1451,41 @@ static void update_task_scan_period(struct task_struct *p, memset(p->numa_faults_locality, 0, sizeof(p->numa_faults_locality)); } +/* + * Get the fraction of time the task has been running since the last + * NUMA placement cycle. The scheduler keeps similar statistics, but + * decays those on a 32ms period, which is orders of magnitude off + * from the dozens-of-seconds NUMA balancing period. Use the scheduler + * stats only if the task is so new there are no NUMA statistics yet. + */ +static u64 numa_get_avg_runtime(struct task_struct *p, u64 *period) +{ + u64 runtime, delta, now; + /* Use the start of this time slice to avoid calculations. */ + now = p->se.exec_start; + runtime = p->se.sum_exec_runtime; + + if (p->last_task_numa_placement) { + delta = runtime - p->last_sum_exec_runtime; + *period = now - p->last_task_numa_placement; + } else { + delta = p->se.avg.runnable_avg_sum; + *period = p->se.avg.runnable_avg_period; + } + + p->last_sum_exec_runtime = runtime; + p->last_task_numa_placement = now; + + return delta; +} + static void task_numa_placement(struct task_struct *p) { int seq, nid, max_nid = -1, max_group_nid = -1; unsigned long max_faults = 0, max_group_faults = 0; unsigned long fault_types[2] = { 0, 0 }; + unsigned long total_faults; + u64 runtime, period; spinlock_t *group_lock = NULL; seq = ACCESS_ONCE(p->mm->numa_scan_seq); @@ -1459,6 +1494,10 @@ static void task_numa_placement(struct task_struct *p) p->numa_scan_seq = seq; p->numa_scan_period_max = task_scan_max(p); + total_faults = p->numa_faults_locality[0] + + p->numa_faults_locality[1]; + runtime = numa_get_avg_runtime(p, &period); + /* If the task is part of a group prevent parallel updates to group stats */ if (p->numa_group) { group_lock = &p->numa_group->lock; @@ -1471,7 +1510,7 @@ static void task_numa_placement(struct task_struct *p) int priv, i; for (priv = 0; priv < 2; priv++) { - long diff, f_diff; + long diff, f_diff, f_weight; i = task_faults_idx(nid, priv); diff = -p->numa_faults_memory[i]; @@ -1483,8 +1522,18 @@ static void task_numa_placement(struct task_struct *p) fault_types[priv] += p->numa_faults_buffer_memory[i]; p->numa_faults_buffer_memory[i] = 0; + /* + * Normalize the faults_from, so all tasks in a group + * count according to CPU use, instead of by the raw + * number of faults. Tasks with little runtime have + * little over-all impact on throughput, and thus their + * faults are less important. + */ + f_weight = div64_u64(runtime << 16, period + 1); + f_weight = (f_weight * p->numa_faults_buffer_cpu[i]) / + (total_faults + 1); p->numa_faults_cpu[i] >>= 1; - p->numa_faults_cpu[i] += p->numa_faults_buffer_cpu[i]; + p->numa_faults_cpu[i] += f_weight; p->numa_faults_buffer_cpu[i] = 0; faults += p->numa_faults_memory[i]; -- cgit v0.10.2 From 35664fd41e1c8cc4f0b89f6a51db5af39ba50640 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Mon, 27 Jan 2014 17:03:46 -0500 Subject: sched/numa: Do statistics calculation using local variables only The current code in task_numa_placement calculates the difference between the old and the new value, but also temporarily stores half of the old value in the per-process variables. The NUMA balancing code looks at those per-process variables, and having other tasks temporarily see halved statistics could lead to unwanted numa migrations. This can be avoided by doing all the math in local variables. This change also simplifies the code a little. Signed-off-by: Rik van Riel Acked-by: Mel Gorman Signed-off-by: Peter Zijlstra Cc: Chegu Vinod Link: http://lkml.kernel.org/r/1390860228-21539-8-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8fc3a82..4c44990 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1513,12 +1513,9 @@ static void task_numa_placement(struct task_struct *p) long diff, f_diff, f_weight; i = task_faults_idx(nid, priv); - diff = -p->numa_faults_memory[i]; - f_diff = -p->numa_faults_cpu[i]; /* Decay existing window, copy faults since last scan */ - p->numa_faults_memory[i] >>= 1; - p->numa_faults_memory[i] += p->numa_faults_buffer_memory[i]; + diff = p->numa_faults_buffer_memory[i] - p->numa_faults_memory[i] / 2; fault_types[priv] += p->numa_faults_buffer_memory[i]; p->numa_faults_buffer_memory[i] = 0; @@ -1532,13 +1529,12 @@ static void task_numa_placement(struct task_struct *p) f_weight = div64_u64(runtime << 16, period + 1); f_weight = (f_weight * p->numa_faults_buffer_cpu[i]) / (total_faults + 1); - p->numa_faults_cpu[i] >>= 1; - p->numa_faults_cpu[i] += f_weight; + f_diff = f_weight - p->numa_faults_cpu[i] / 2; p->numa_faults_buffer_cpu[i] = 0; + p->numa_faults_memory[i] += diff; + p->numa_faults_cpu[i] += f_diff; faults += p->numa_faults_memory[i]; - diff += p->numa_faults_memory[i]; - f_diff += p->numa_faults_cpu[i]; p->total_numa_faults += diff; if (p->numa_group) { /* safe because we can only change our own group */ -- cgit v0.10.2 From 58b46da336a9312b2e21bb576d1c2c484dbf6257 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Mon, 27 Jan 2014 17:03:47 -0500 Subject: sched/numa: Rename variables in task_numa_fault() We track both the node of the memory after a NUMA fault, and the node of the CPU on which the fault happened. Rename the local variables in task_numa_fault to make things more explicit. Suggested-by: Mel Gorman Signed-off-by: Rik van Riel Acked-by: Mel Gorman Signed-off-by: Peter Zijlstra Cc: Chegu Vinod Link: http://lkml.kernel.org/r/1390860228-21539-9-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4c44990..d5832c3 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1735,11 +1735,11 @@ void task_numa_free(struct task_struct *p) /* * Got a PROT_NONE fault for a page on @node. */ -void task_numa_fault(int last_cpupid, int node, int pages, int flags) +void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags) { struct task_struct *p = current; bool migrated = flags & TNF_MIGRATED; - int this_node = task_node(current); + int cpu_node = task_node(current); int priv; if (!numabalancing_enabled) @@ -1794,8 +1794,8 @@ void task_numa_fault(int last_cpupid, int node, int pages, int flags) if (migrated) p->numa_pages_migrated += pages; - p->numa_faults_buffer_memory[task_faults_idx(node, priv)] += pages; - p->numa_faults_buffer_cpu[task_faults_idx(this_node, priv)] += pages; + p->numa_faults_buffer_memory[task_faults_idx(mem_node, priv)] += pages; + p->numa_faults_buffer_cpu[task_faults_idx(cpu_node, priv)] += pages; p->numa_faults_locality[!!(flags & TNF_FAULT_LOCAL)] += pages; } -- cgit v0.10.2 From be1e4e760d940c14d119bffef5eb007dfdf29046 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Mon, 27 Jan 2014 17:03:48 -0500 Subject: sched/numa: Turn some magic numbers into #defines Cleanup suggested by Mel Gorman. Now the code contains some more hints on what statistics go where. Suggested-by: Mel Gorman Signed-off-by: Rik van Riel Acked-by: Mel Gorman Signed-off-by: Peter Zijlstra Cc: Chegu Vinod Link: http://lkml.kernel.org/r/1390860228-21539-10-git-send-email-riel@redhat.com Signed-off-by: Ingo Molnar diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d5832c3..1f41b12 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -896,6 +896,15 @@ struct numa_group { unsigned long faults[0]; }; +/* Shared or private faults. */ +#define NR_NUMA_HINT_FAULT_TYPES 2 + +/* Memory and CPU locality */ +#define NR_NUMA_HINT_FAULT_STATS (NR_NUMA_HINT_FAULT_TYPES * 2) + +/* Averaged statistics, and temporary buffers. */ +#define NR_NUMA_HINT_FAULT_BUCKETS (NR_NUMA_HINT_FAULT_STATS * 2) + pid_t task_numa_group_id(struct task_struct *p) { return p->numa_group ? p->numa_group->gid : 0; @@ -903,7 +912,7 @@ pid_t task_numa_group_id(struct task_struct *p) static inline int task_faults_idx(int nid, int priv) { - return 2 * nid + priv; + return NR_NUMA_HINT_FAULT_TYPES * nid + priv; } static inline unsigned long task_faults(struct task_struct *p, int nid) @@ -1509,7 +1518,7 @@ static void task_numa_placement(struct task_struct *p) unsigned long faults = 0, group_faults = 0; int priv, i; - for (priv = 0; priv < 2; priv++) { + for (priv = 0; priv < NR_NUMA_HINT_FAULT_TYPES; priv++) { long diff, f_diff, f_weight; i = task_faults_idx(nid, priv); @@ -1620,11 +1629,12 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, INIT_LIST_HEAD(&grp->task_list); grp->gid = p->pid; /* Second half of the array tracks nids where faults happen */ - grp->faults_cpu = grp->faults + 2 * nr_node_ids; + grp->faults_cpu = grp->faults + NR_NUMA_HINT_FAULT_TYPES * + nr_node_ids; node_set(task_node(current), grp->active_nodes); - for (i = 0; i < 4*nr_node_ids; i++) + for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) grp->faults[i] = p->numa_faults_memory[i]; grp->total_faults = p->total_numa_faults; @@ -1682,7 +1692,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, double_lock(&my_grp->lock, &grp->lock); - for (i = 0; i < 4*nr_node_ids; i++) { + for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) { my_grp->faults[i] -= p->numa_faults_memory[i]; grp->faults[i] += p->numa_faults_memory[i]; } @@ -1714,7 +1724,7 @@ void task_numa_free(struct task_struct *p) if (grp) { spin_lock(&grp->lock); - for (i = 0; i < 4*nr_node_ids; i++) + for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) grp->faults[i] -= p->numa_faults_memory[i]; grp->total_faults -= p->total_numa_faults; @@ -1755,14 +1765,20 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags) /* Allocate buffer to track faults on a per-node basis */ if (unlikely(!p->numa_faults_memory)) { - int size = sizeof(*p->numa_faults_memory) * 4 * nr_node_ids; + int size = sizeof(*p->numa_faults_memory) * + NR_NUMA_HINT_FAULT_BUCKETS * nr_node_ids; - /* numa_faults and numa_faults_buffer share the allocation */ - p->numa_faults_memory = kzalloc(size * 2, GFP_KERNEL|__GFP_NOWARN); + p->numa_faults_memory = kzalloc(size, GFP_KERNEL|__GFP_NOWARN); if (!p->numa_faults_memory) return; BUG_ON(p->numa_faults_buffer_memory); + /* + * The averaged statistics, shared & private, memory & cpu, + * occupy the first half of the array. The second half of the + * array is for current counters, which are averaged into the + * first set by task_numa_placement. + */ p->numa_faults_cpu = p->numa_faults_memory + (2 * nr_node_ids); p->numa_faults_buffer_memory = p->numa_faults_memory + (4 * nr_node_ids); p->numa_faults_buffer_cpu = p->numa_faults_memory + (6 * nr_node_ids); -- cgit v0.10.2 From 6b5a1f74e50170e64104135490dc32b657483594 Mon Sep 17 00:00:00 2001 From: Chris Zankel Date: Tue, 28 Jan 2014 22:09:51 -0800 Subject: xtensa: fix fast_syscall_spill_registers The original implementation could clobber registers under certain conditions. The Xtensa processor architecture uses windowed registers and the original implementation was using a4 as a temporary register, which under certain conditions could be register a0 of the oldest window frame, and didn't always restore the content correctly. By moving the _spill_registers routine inside the fast system call, it frees up one more register (the return address is not required anymore) for the spill routine. Signed-off-by: Chris Zankel diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index b61e251..ef7f499 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -1081,34 +1081,202 @@ ENTRY(fast_syscall_spill_registers) rsr a0, sar s32i a3, a2, PT_AREG3 - s32i a4, a2, PT_AREG4 - s32i a0, a2, PT_AREG5 # store SAR to PT_AREG5 + s32i a0, a2, PT_SAR - /* The spill routine might clobber a7, a11, and a15. */ + /* The spill routine might clobber a4, a7, a8, a11, a12, and a15. */ + s32i a4, a2, PT_AREG4 s32i a7, a2, PT_AREG7 + s32i a8, a2, PT_AREG8 s32i a11, a2, PT_AREG11 + s32i a12, a2, PT_AREG12 s32i a15, a2, PT_AREG15 - call0 _spill_registers # destroys a3, a4, and SAR + /* + * Rotate ws so that the current windowbase is at bit 0. + * Assume ws = xxxwww1yy (www1 current window frame). + * Rotate ws right so that a4 = yyxxxwww1. + */ + + rsr a0, windowbase + rsr a3, windowstart # a3 = xxxwww1yy + ssr a0 # holds WB + slli a0, a3, WSBITS + or a3, a3, a0 # a3 = xxxwww1yyxxxwww1yy + srl a3, a3 # a3 = 00xxxwww1yyxxxwww1 + + /* We are done if there are no more than the current register frame. */ + + extui a3, a3, 1, WSBITS-1 # a3 = 0yyxxxwww + movi a0, (1 << (WSBITS-1)) + _beqz a3, .Lnospill # only one active frame? jump + + /* We want 1 at the top, so that we return to the current windowbase */ + + or a3, a3, a0 # 1yyxxxwww + + /* Skip empty frames - get 'oldest' WINDOWSTART-bit. */ + + wsr a3, windowstart # save shifted windowstart + neg a0, a3 + and a3, a0, a3 # first bit set from right: 000010000 + + ffs_ws a0, a3 # a0: shifts to skip empty frames + movi a3, WSBITS + sub a0, a3, a0 # WSBITS-a0:number of 0-bits from right + ssr a0 # save in SAR for later. + + rsr a3, windowbase + add a3, a3, a0 + wsr a3, windowbase + rsync + + rsr a3, windowstart + srl a3, a3 # shift windowstart + + /* WB is now just one frame below the oldest frame in the register + window. WS is shifted so the oldest frame is in bit 0, thus, WB + and WS differ by one 4-register frame. */ + + /* Save frames. Depending what call was used (call4, call8, call12), + * we have to save 4,8. or 12 registers. + */ + + +.Lloop: _bbsi.l a3, 1, .Lc4 + _bbci.l a3, 2, .Lc12 + +.Lc8: s32e a4, a13, -16 + l32e a4, a5, -12 + s32e a8, a4, -32 + s32e a5, a13, -12 + s32e a6, a13, -8 + s32e a7, a13, -4 + s32e a9, a4, -28 + s32e a10, a4, -24 + s32e a11, a4, -20 + srli a11, a3, 2 # shift windowbase by 2 + rotw 2 + _bnei a3, 1, .Lloop + j .Lexit + +.Lc4: s32e a4, a9, -16 + s32e a5, a9, -12 + s32e a6, a9, -8 + s32e a7, a9, -4 + + srli a7, a3, 1 + rotw 1 + _bnei a3, 1, .Lloop + j .Lexit + +.Lc12: _bbci.l a3, 3, .Linvalid_mask # bit 2 shouldn't be zero! + + /* 12-register frame (call12) */ + + l32e a0, a5, -12 + s32e a8, a0, -48 + mov a8, a0 + + s32e a9, a8, -44 + s32e a10, a8, -40 + s32e a11, a8, -36 + s32e a12, a8, -32 + s32e a13, a8, -28 + s32e a14, a8, -24 + s32e a15, a8, -20 + srli a15, a3, 3 + + /* The stack pointer for a4..a7 is out of reach, so we rotate the + * window, grab the stackpointer, and rotate back. + * Alternatively, we could also use the following approach, but that + * makes the fixup routine much more complicated: + * rotw 1 + * s32e a0, a13, -16 + * ... + * rotw 2 + */ + + rotw 1 + mov a4, a13 + rotw -1 + + s32e a4, a8, -16 + s32e a5, a8, -12 + s32e a6, a8, -8 + s32e a7, a8, -4 + + rotw 3 + + _beqi a3, 1, .Lexit + j .Lloop + +.Lexit: + + /* Done. Do the final rotation and set WS */ + + rotw 1 + rsr a3, windowbase + ssl a3 + movi a3, 1 + sll a3, a3 + wsr a3, windowstart +.Lnospill: /* Advance PC, restore registers and SAR, and return from exception. */ - l32i a3, a2, PT_AREG5 - l32i a4, a2, PT_AREG4 + l32i a3, a2, PT_SAR l32i a0, a2, PT_AREG0 wsr a3, sar l32i a3, a2, PT_AREG3 /* Restore clobbered registers. */ + l32i a4, a2, PT_AREG4 l32i a7, a2, PT_AREG7 + l32i a8, a2, PT_AREG8 l32i a11, a2, PT_AREG11 + l32i a12, a2, PT_AREG12 l32i a15, a2, PT_AREG15 movi a2, 0 rfe +.Linvalid_mask: + + /* We get here because of an unrecoverable error in the window + * registers, so set up a dummy frame and kill the user application. + * Note: We assume EXC_TABLE_KSTK contains a valid stack pointer. + */ + + movi a0, 1 + movi a1, 0 + + wsr a0, windowstart + wsr a1, windowbase + rsync + + movi a0, 0 + + rsr a3, excsave1 + l32i a1, a3, EXC_TABLE_KSTK + + movi a4, (1 << PS_WOE_BIT) | LOCKLEVEL + wsr a4, ps + rsync + + movi a6, SIGSEGV + movi a4, do_exit + callx4 a4 + + /* shouldn't return, so panic */ + + wsr a0, excsave1 + movi a0, unrecoverable_exception + callx0 a0 # should not return +1: j 1b + + ENDPROC(fast_syscall_spill_registers) /* Fixup handler. @@ -1232,209 +1400,6 @@ ENTRY(fast_syscall_spill_registers_fixup_return) ENDPROC(fast_syscall_spill_registers_fixup_return) -/* - * spill all registers. - * - * This is not a real function. The following conditions must be met: - * - * - must be called with call0. - * - uses a3, a4 and SAR. - * - the last 'valid' register of each frame are clobbered. - * - the caller must have registered a fixup handler - * (or be inside a critical section) - * - PS_EXCM must be set (PS_WOE cleared?) - */ - -ENTRY(_spill_registers) - - /* - * Rotate ws so that the current windowbase is at bit 0. - * Assume ws = xxxwww1yy (www1 current window frame). - * Rotate ws right so that a4 = yyxxxwww1. - */ - - rsr a4, windowbase - rsr a3, windowstart # a3 = xxxwww1yy - ssr a4 # holds WB - slli a4, a3, WSBITS - or a3, a3, a4 # a3 = xxxwww1yyxxxwww1yy - srl a3, a3 # a3 = 00xxxwww1yyxxxwww1 - - /* We are done if there are no more than the current register frame. */ - - extui a3, a3, 1, WSBITS-1 # a3 = 0yyxxxwww - movi a4, (1 << (WSBITS-1)) - _beqz a3, .Lnospill # only one active frame? jump - - /* We want 1 at the top, so that we return to the current windowbase */ - - or a3, a3, a4 # 1yyxxxwww - - /* Skip empty frames - get 'oldest' WINDOWSTART-bit. */ - - wsr a3, windowstart # save shifted windowstart - neg a4, a3 - and a3, a4, a3 # first bit set from right: 000010000 - - ffs_ws a4, a3 # a4: shifts to skip empty frames - movi a3, WSBITS - sub a4, a3, a4 # WSBITS-a4:number of 0-bits from right - ssr a4 # save in SAR for later. - - rsr a3, windowbase - add a3, a3, a4 - wsr a3, windowbase - rsync - - rsr a3, windowstart - srl a3, a3 # shift windowstart - - /* WB is now just one frame below the oldest frame in the register - window. WS is shifted so the oldest frame is in bit 0, thus, WB - and WS differ by one 4-register frame. */ - - /* Save frames. Depending what call was used (call4, call8, call12), - * we have to save 4,8. or 12 registers. - */ - - _bbsi.l a3, 1, .Lc4 - _bbsi.l a3, 2, .Lc8 - - /* Special case: we have a call12-frame starting at a4. */ - - _bbci.l a3, 3, .Lc12 # bit 3 shouldn't be zero! (Jump to Lc12 first) - - s32e a4, a1, -16 # a1 is valid with an empty spill area - l32e a4, a5, -12 - s32e a8, a4, -48 - mov a8, a4 - l32e a4, a1, -16 - j .Lc12c - -.Lnospill: - ret - -.Lloop: _bbsi.l a3, 1, .Lc4 - _bbci.l a3, 2, .Lc12 - -.Lc8: s32e a4, a13, -16 - l32e a4, a5, -12 - s32e a8, a4, -32 - s32e a5, a13, -12 - s32e a6, a13, -8 - s32e a7, a13, -4 - s32e a9, a4, -28 - s32e a10, a4, -24 - s32e a11, a4, -20 - - srli a11, a3, 2 # shift windowbase by 2 - rotw 2 - _bnei a3, 1, .Lloop - -.Lexit: /* Done. Do the final rotation, set WS, and return. */ - - rotw 1 - rsr a3, windowbase - ssl a3 - movi a3, 1 - sll a3, a3 - wsr a3, windowstart - ret - -.Lc4: s32e a4, a9, -16 - s32e a5, a9, -12 - s32e a6, a9, -8 - s32e a7, a9, -4 - - srli a7, a3, 1 - rotw 1 - _bnei a3, 1, .Lloop - j .Lexit - -.Lc12: _bbci.l a3, 3, .Linvalid_mask # bit 2 shouldn't be zero! - - /* 12-register frame (call12) */ - - l32e a2, a5, -12 - s32e a8, a2, -48 - mov a8, a2 - -.Lc12c: s32e a9, a8, -44 - s32e a10, a8, -40 - s32e a11, a8, -36 - s32e a12, a8, -32 - s32e a13, a8, -28 - s32e a14, a8, -24 - s32e a15, a8, -20 - srli a15, a3, 3 - - /* The stack pointer for a4..a7 is out of reach, so we rotate the - * window, grab the stackpointer, and rotate back. - * Alternatively, we could also use the following approach, but that - * makes the fixup routine much more complicated: - * rotw 1 - * s32e a0, a13, -16 - * ... - * rotw 2 - */ - - rotw 1 - mov a5, a13 - rotw -1 - - s32e a4, a9, -16 - s32e a5, a9, -12 - s32e a6, a9, -8 - s32e a7, a9, -4 - - rotw 3 - - _beqi a3, 1, .Lexit - j .Lloop - -.Linvalid_mask: - - /* We get here because of an unrecoverable error in the window - * registers. If we are in user space, we kill the application, - * however, this condition is unrecoverable in kernel space. - */ - - rsr a0, ps - _bbci.l a0, PS_UM_BIT, 1f - - /* User space: Setup a dummy frame and kill application. - * Note: We assume EXC_TABLE_KSTK contains a valid stack pointer. - */ - - movi a0, 1 - movi a1, 0 - - wsr a0, windowstart - wsr a1, windowbase - rsync - - movi a0, 0 - - rsr a3, excsave1 - l32i a1, a3, EXC_TABLE_KSTK - - movi a4, (1 << PS_WOE_BIT) | LOCKLEVEL - wsr a4, ps - rsync - - movi a6, SIGSEGV - movi a4, do_exit - callx4 a4 - -1: /* Kernel space: PANIC! */ - - wsr a0, excsave1 - movi a0, unrecoverable_exception - callx0 a0 # should not return -1: j 1b - -ENDPROC(_spill_registers) - #ifdef CONFIG_MMU /* * We should never get here. Bail out! -- cgit v0.10.2 From c0e50d41126e4786d9cf1105bdf783e55c99f915 Mon Sep 17 00:00:00 2001 From: Chris Zankel Date: Tue, 28 Jan 2014 22:09:51 -0800 Subject: xtensa: fix fast_syscall_spill_registers The original implementation could clobber registers under certain conditions. The Xtensa processor architecture uses windowed registers and the original implementation was using a4 as a temporary register, which under certain conditions could be register a0 of the oldest window frame, and didn't always restore the content correctly. By moving the _spill_registers routine inside the fast system call, it frees up one more register (the return address is not required anymore) for the spill routine. Signed-off-by: Chris Zankel diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S index cb8fd44..f9e1ec3 100644 --- a/arch/xtensa/kernel/vectors.S +++ b/arch/xtensa/kernel/vectors.S @@ -235,7 +235,7 @@ ENTRY(_DoubleExceptionVector) /* Check for overflow/underflow exception, jump if overflow. */ - _bbci.l a0, 6, _DoubleExceptionVector_WindowOverflow + bbci.l a0, 6, _DoubleExceptionVector_WindowOverflow /* * Restart window underflow exception. diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c index 74a60c7..80b33ed 100644 --- a/arch/xtensa/kernel/xtensa_ksyms.c +++ b/arch/xtensa/kernel/xtensa_ksyms.c @@ -122,9 +122,7 @@ EXPORT_SYMBOL(insw); EXPORT_SYMBOL(insl); extern long common_exception_return; -extern long _spill_registers; EXPORT_SYMBOL(common_exception_return); -EXPORT_SYMBOL(_spill_registers); #ifdef CONFIG_FUNCTION_TRACER EXPORT_SYMBOL(_mcount); -- cgit v0.10.2 From 70e0ac5f3683f48a8174a6f231a0f3097217c189 Mon Sep 17 00:00:00 2001 From: Aaron Tomlin Date: Mon, 27 Jan 2014 09:00:57 +0000 Subject: hung_task/Documentation: Fix hung_task_warnings description Improve the documentation on hung_task_warnings. Signed-off-by: Aaron Tomlin Link: http://lkml.kernel.org/n/tip-xepjnxzummfDlg9lvhh7Rlzc@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 4205f3c..621c56c 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -319,10 +319,11 @@ This file shows up if CONFIG_DETECT_HUNG_TASK is enabled. ============================================================== -hung_task_warning: +hung_task_warnings: The maximum number of warnings to report. During a check interval -When this value is reached, no more the warnings will be reported. +if a hung task is detected, this value is decreased by 1. +When this value reaches 0, no more warnings will be reported. This file shows up if CONFIG_DETECT_HUNG_TASK is enabled. -1: report an infinite number of warnings. -- cgit v0.10.2 From 5547fec74a566e1f5e00a937b9a367f7c6a94a8b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 29 Jan 2014 16:17:57 +0300 Subject: UBI: fix some use after free bugs Move the kmem_cache_free() calls down a couple lines. Signed-off-by: Dan Carpenter Signed-off-by: Artem Bityutskiy diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index ead8613..c5dad65 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -463,8 +463,8 @@ static int scan_pool(struct ubi_device *ubi, struct ubi_attach_info *ai, } } if (found_orphan) { - kmem_cache_free(ai->aeb_slab_cache, tmp_aeb); list_del(&tmp_aeb->u.list); + kmem_cache_free(ai->aeb_slab_cache, tmp_aeb); } new_aeb = kmem_cache_alloc(ai->aeb_slab_cache, @@ -846,16 +846,16 @@ fail_bad: ret = UBI_BAD_FASTMAP; fail: list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &used, u.list) { - kmem_cache_free(ai->aeb_slab_cache, tmp_aeb); list_del(&tmp_aeb->u.list); + kmem_cache_free(ai->aeb_slab_cache, tmp_aeb); } list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &eba_orphans, u.list) { - kmem_cache_free(ai->aeb_slab_cache, tmp_aeb); list_del(&tmp_aeb->u.list); + kmem_cache_free(ai->aeb_slab_cache, tmp_aeb); } list_for_each_entry_safe(tmp_aeb, _tmp_aeb, &free, u.list) { - kmem_cache_free(ai->aeb_slab_cache, tmp_aeb); list_del(&tmp_aeb->u.list); + kmem_cache_free(ai->aeb_slab_cache, tmp_aeb); } return ret; -- cgit v0.10.2 From b399fe355b30d0102e7690c99e6f764ddfd32ec3 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 5 Feb 2014 01:55:11 +0100 Subject: x86: Disable generation of traditional x87 instructions We recently had the case where wrongly used floating-constant 'E' caused the generation of traditional x87 instructions in kernel code and wreaking all kinds of havoc. Disable the generation of those too. This will save people a lot of time when trying to debug such issues by erroring out of the build instead of let them manifest themselves in very spectacular and happy-crappy ways at runtime. We're using -mno-fp-ret-in-387 in addition to -mno-80387 (which is == -msoft-float) because, as the gcc manpage says: On machines where a function returns floating-point results in the 80387 register stack, some floating-point opcodes may be emitted even if -msoft-float is used. so we want to turn off *all* non-integer instructions involving any architectural FPU state, unless it is absolutely necessary (and those cases need special handling anyway). Cc: Jiri Kosina Cc: Michael Matz Cc: Michal Hocko Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1391561711-3023-1-git-send-email-bp@alien8.de Signed-off-by: H. Peter Anvin diff --git a/arch/x86/Makefile b/arch/x86/Makefile index eeda43a..a414b14 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -82,8 +82,8 @@ else KBUILD_AFLAGS += -m64 KBUILD_CFLAGS += -m64 - # Don't autogenerate MMX or SSE instructions - KBUILD_CFLAGS += -mno-mmx -mno-sse + # Don't autogenerate traditional x87, MMX or SSE instructions + KBUILD_CFLAGS += -mno-mmx -mno-sse -mno-80387 -mno-fp-ret-in-387 # Use -mpreferred-stack-boundary=3 if supported. KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3) -- cgit v0.10.2 From 07ad6836fa21b8ae1715d5f82a0d28c4140e1e73 Mon Sep 17 00:00:00 2001 From: Sebastian Hesselbarth Date: Sat, 25 Jan 2014 19:19:07 +0100 Subject: clk: mvebu: armada-370: maintain clock init order Init order of CLK_OF_DECLARE'd drivers depends on compile order. Unfortunately, clk_of_init does not allow drivers to return errors, e.g. -EPROBE_DEFER if parent clocks have not been registered, yet. To avoid init order woes for MVEBU clock drivers, we take care of proper init order ourselves. This patch joins core-clk and gating-clk init to maintain proper init order. Signed-off-by: Sebastian Hesselbarth Tested-by: Ezequiel Garcia Signed-off-by: Jason Cooper diff --git a/drivers/clk/mvebu/armada-370.c b/drivers/clk/mvebu/armada-370.c index 81a202d..bef198a 100644 --- a/drivers/clk/mvebu/armada-370.c +++ b/drivers/clk/mvebu/armada-370.c @@ -141,13 +141,6 @@ static const struct coreclk_soc_desc a370_coreclks = { .num_ratios = ARRAY_SIZE(a370_coreclk_ratios), }; -static void __init a370_coreclk_init(struct device_node *np) -{ - mvebu_coreclk_setup(np, &a370_coreclks); -} -CLK_OF_DECLARE(a370_core_clk, "marvell,armada-370-core-clock", - a370_coreclk_init); - /* * Clock Gating Control */ @@ -168,9 +161,15 @@ static const struct clk_gating_soc_desc a370_gating_desc[] __initconst = { { } }; -static void __init a370_clk_gating_init(struct device_node *np) +static void __init a370_clk_init(struct device_node *np) { - mvebu_clk_gating_setup(np, a370_gating_desc); + struct device_node *cgnp = + of_find_compatible_node(NULL, NULL, "marvell,armada-370-gating-clock"); + + mvebu_coreclk_setup(np, &a370_coreclks); + + if (cgnp) + mvebu_clk_gating_setup(cgnp, a370_gating_desc); } -CLK_OF_DECLARE(a370_clk_gating, "marvell,armada-370-gating-clock", - a370_clk_gating_init); +CLK_OF_DECLARE(a370_clk, "marvell,armada-370-core-clock", a370_clk_init); + -- cgit v0.10.2 From 0a11a6ae94373f37e738f7dc8f51d60a78d78a58 Mon Sep 17 00:00:00 2001 From: Sebastian Hesselbarth Date: Sat, 25 Jan 2014 19:19:08 +0100 Subject: clk: mvebu: armada-xp: maintain clock init order Init order of CLK_OF_DECLARE'd drivers depends on compile order. Unfortunately, clk_of_init does not allow drivers to return errors, e.g. -EPROBE_DEFER if parent clocks have not been registered, yet. To avoid init order woes for MVEBU clock drivers, we take care of proper init order ourselves. This patch joins core-clk and gating-clk init to maintain proper init order. Signed-off-by: Sebastian Hesselbarth Signed-off-by: Jason Cooper diff --git a/drivers/clk/mvebu/armada-xp.c b/drivers/clk/mvebu/armada-xp.c index 9922c44..b309431 100644 --- a/drivers/clk/mvebu/armada-xp.c +++ b/drivers/clk/mvebu/armada-xp.c @@ -158,13 +158,6 @@ static const struct coreclk_soc_desc axp_coreclks = { .num_ratios = ARRAY_SIZE(axp_coreclk_ratios), }; -static void __init axp_coreclk_init(struct device_node *np) -{ - mvebu_coreclk_setup(np, &axp_coreclks); -} -CLK_OF_DECLARE(axp_core_clk, "marvell,armada-xp-core-clock", - axp_coreclk_init); - /* * Clock Gating Control */ @@ -202,9 +195,14 @@ static const struct clk_gating_soc_desc axp_gating_desc[] __initconst = { { } }; -static void __init axp_clk_gating_init(struct device_node *np) +static void __init axp_clk_init(struct device_node *np) { - mvebu_clk_gating_setup(np, axp_gating_desc); + struct device_node *cgnp = + of_find_compatible_node(NULL, NULL, "marvell,armada-xp-gating-clock"); + + mvebu_coreclk_setup(np, &axp_coreclks); + + if (cgnp) + mvebu_clk_gating_setup(cgnp, axp_gating_desc); } -CLK_OF_DECLARE(axp_clk_gating, "marvell,armada-xp-gating-clock", - axp_clk_gating_init); +CLK_OF_DECLARE(axp_clk, "marvell,armada-xp-core-clock", axp_clk_init); -- cgit v0.10.2 From 8f7fc5450b64210b08cb3daabb3473dbad197e54 Mon Sep 17 00:00:00 2001 From: Sebastian Hesselbarth Date: Sat, 25 Jan 2014 19:19:09 +0100 Subject: clk: mvebu: dove: maintain clock init order Init order of CLK_OF_DECLARE'd drivers depends on compile order. Unfortunately, clk_of_init does not allow drivers to return errors, e.g. -EPROBE_DEFER if parent clocks have not been registered, yet. To avoid init order woes for MVEBU clock drivers, we take care of proper init order ourselves. This patch joins core-clk and gating-clk init to maintain proper init order. Signed-off-by: Sebastian Hesselbarth Tested-by: Ezequiel Garcia Signed-off-by: Jason Cooper diff --git a/drivers/clk/mvebu/dove.c b/drivers/clk/mvebu/dove.c index 38aee1e..b8c2424 100644 --- a/drivers/clk/mvebu/dove.c +++ b/drivers/clk/mvebu/dove.c @@ -154,12 +154,6 @@ static const struct coreclk_soc_desc dove_coreclks = { .num_ratios = ARRAY_SIZE(dove_coreclk_ratios), }; -static void __init dove_coreclk_init(struct device_node *np) -{ - mvebu_coreclk_setup(np, &dove_coreclks); -} -CLK_OF_DECLARE(dove_core_clk, "marvell,dove-core-clock", dove_coreclk_init); - /* * Clock Gating Control */ @@ -186,9 +180,14 @@ static const struct clk_gating_soc_desc dove_gating_desc[] __initconst = { { } }; -static void __init dove_clk_gating_init(struct device_node *np) +static void __init dove_clk_init(struct device_node *np) { - mvebu_clk_gating_setup(np, dove_gating_desc); + struct device_node *cgnp = + of_find_compatible_node(NULL, NULL, "marvell,dove-gating-clock"); + + mvebu_coreclk_setup(np, &dove_coreclks); + + if (cgnp) + mvebu_clk_gating_setup(cgnp, dove_gating_desc); } -CLK_OF_DECLARE(dove_clk_gating, "marvell,dove-gating-clock", - dove_clk_gating_init); +CLK_OF_DECLARE(dove_clk, "marvell,dove-core-clock", dove_clk_init); -- cgit v0.10.2 From 58d516ae95cbf7eed44f85a01cdfad41f17b4197 Mon Sep 17 00:00:00 2001 From: Sebastian Hesselbarth Date: Sat, 25 Jan 2014 19:19:10 +0100 Subject: clk: mvebu: kirkwood: maintain clock init order Init order of CLK_OF_DECLARE'd drivers depends on compile order. Unfortunately, clk_of_init does not allow drivers to return errors, e.g. -EPROBE_DEFER if parent clocks have not been registered, yet. To avoid init order woes for MVEBU clock drivers, we take care of proper init order ourselves. This patch joins core-clk and gating-clk init to maintain proper init order. Signed-off-by: Sebastian Hesselbarth Tested-by: Ezequiel Garcia Signed-off-by: Jason Cooper diff --git a/drivers/clk/mvebu/kirkwood.c b/drivers/clk/mvebu/kirkwood.c index 2636a55..ddb666a 100644 --- a/drivers/clk/mvebu/kirkwood.c +++ b/drivers/clk/mvebu/kirkwood.c @@ -193,13 +193,6 @@ static const struct coreclk_soc_desc kirkwood_coreclks = { .num_ratios = ARRAY_SIZE(kirkwood_coreclk_ratios), }; -static void __init kirkwood_coreclk_init(struct device_node *np) -{ - mvebu_coreclk_setup(np, &kirkwood_coreclks); -} -CLK_OF_DECLARE(kirkwood_core_clk, "marvell,kirkwood-core-clock", - kirkwood_coreclk_init); - static const struct coreclk_soc_desc mv88f6180_coreclks = { .get_tclk_freq = kirkwood_get_tclk_freq, .get_cpu_freq = mv88f6180_get_cpu_freq, @@ -208,13 +201,6 @@ static const struct coreclk_soc_desc mv88f6180_coreclks = { .num_ratios = ARRAY_SIZE(kirkwood_coreclk_ratios), }; -static void __init mv88f6180_coreclk_init(struct device_node *np) -{ - mvebu_coreclk_setup(np, &mv88f6180_coreclks); -} -CLK_OF_DECLARE(mv88f6180_core_clk, "marvell,mv88f6180-core-clock", - mv88f6180_coreclk_init); - /* * Clock Gating Control */ @@ -239,9 +225,21 @@ static const struct clk_gating_soc_desc kirkwood_gating_desc[] __initconst = { { } }; -static void __init kirkwood_clk_gating_init(struct device_node *np) +static void __init kirkwood_clk_init(struct device_node *np) { - mvebu_clk_gating_setup(np, kirkwood_gating_desc); + struct device_node *cgnp = + of_find_compatible_node(NULL, NULL, "marvell,kirkwood-gating-clock"); + + + if (of_device_is_compatible(np, "marvell,mv88f6180-core-clock")) + mvebu_coreclk_setup(np, &mv88f6180_coreclks); + else + mvebu_coreclk_setup(np, &kirkwood_coreclks); + + if (cgnp) + mvebu_clk_gating_setup(cgnp, kirkwood_gating_desc); } -CLK_OF_DECLARE(kirkwood_clk_gating, "marvell,kirkwood-gating-clock", - kirkwood_clk_gating_init); +CLK_OF_DECLARE(kirkwood_clk, "marvell,kirkwood-core-clock", + kirkwood_clk_init); +CLK_OF_DECLARE(mv88f6180_clk, "marvell,mv88f6180-core-clock", + kirkwood_clk_init); -- cgit v0.10.2 From e8b175946c16d7001b22620f52d78ab497efc9d0 Mon Sep 17 00:00:00 2001 From: Shaibal Dutta Date: Fri, 31 Jan 2014 11:18:24 -0800 Subject: timekeeping: Move clock sync work to power efficient workqueue For better use of CPU idle time, allow the scheduler to select the CPU on which the CMOS clock sync work would be scheduled. This improves idle residency time and conserver power. This functionality is enabled when CONFIG_WQ_POWER_EFFICIENT is selected. Signed-off-by: Shaibal Dutta [zoran.markovic@linaro.org: Added commit message. Aligned code.] Signed-off-by: Zoran Markovic Cc: John Stultz Link: http://lkml.kernel.org/r/1391195904-12497-1-git-send-email-zoran.markovic@linaro.org Signed-off-by: Thomas Gleixner diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index af8d1d4..419a52c 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -514,12 +514,13 @@ static void sync_cmos_clock(struct work_struct *work) next.tv_sec++; next.tv_nsec -= NSEC_PER_SEC; } - schedule_delayed_work(&sync_cmos_work, timespec_to_jiffies(&next)); + queue_delayed_work(system_power_efficient_wq, + &sync_cmos_work, timespec_to_jiffies(&next)); } void ntp_notify_cmos_timer(void) { - schedule_delayed_work(&sync_cmos_work, 0); + queue_delayed_work(system_power_efficient_wq, &sync_cmos_work, 0); } #else -- cgit v0.10.2 From 627ee7947e2e83ba565c31c5c9373d6e364b1ecd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 3 Feb 2014 14:34:31 -0800 Subject: clockevents: Serialize calls to clockevents_update_freq() in the core We can identify the broadcast device in the core and serialize all callers including interrupts on a different CPU against the update. Also, disabling interrupts is moved into the core allowing callers to leave interrutps enabled when calling clockevents_update_freq(). Signed-off-by: Soren Brinkmann Cc: linux-arm-kernel@lists.infradead.org Cc: Soeren Brinkmann Cc: Daniel Lezcano Cc: Michal Simek Link: http://lkml.kernel.org/r/1391466877-28908-2-git-send-email-soren.brinkmann@xilinx.com Signed-off-by: Thomas Gleixner diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 086ad60..641d910 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -439,6 +439,16 @@ void clockevents_config_and_register(struct clock_event_device *dev, } EXPORT_SYMBOL_GPL(clockevents_config_and_register); +int __clockevents_update_freq(struct clock_event_device *dev, u32 freq) +{ + clockevents_config(dev, freq); + + if (dev->mode != CLOCK_EVT_MODE_ONESHOT) + return 0; + + return clockevents_program_event(dev, dev->next_event, false); +} + /** * clockevents_update_freq - Update frequency and reprogram a clock event device. * @dev: device to modify @@ -446,17 +456,22 @@ EXPORT_SYMBOL_GPL(clockevents_config_and_register); * * Reconfigure and reprogram a clock event device in oneshot * mode. Must be called on the cpu for which the device delivers per - * cpu timer events with interrupts disabled! Returns 0 on success, - * -ETIME when the event is in the past. + * cpu timer events. If called for the broadcast device the core takes + * care of serialization. + * + * Returns 0 on success, -ETIME when the event is in the past. */ int clockevents_update_freq(struct clock_event_device *dev, u32 freq) { - clockevents_config(dev, freq); - - if (dev->mode != CLOCK_EVT_MODE_ONESHOT) - return 0; + unsigned long flags; + int ret; - return clockevents_program_event(dev, dev->next_event, false); + local_irq_save(flags); + ret = tick_broadcast_update_freq(dev, freq); + if (ret == -ENODEV) + ret = __clockevents_update_freq(dev, freq); + local_irq_restore(flags); + return ret; } /* diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 43780ab..003e6c3 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -120,6 +120,19 @@ int tick_is_broadcast_device(struct clock_event_device *dev) return (dev && tick_broadcast_device.evtdev == dev); } +int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq) +{ + int ret = -ENODEV; + + if (tick_is_broadcast_device(dev)) { + raw_spin_lock(&tick_broadcast_lock); + ret = __clockevents_update_freq(dev, freq); + raw_spin_unlock(&tick_broadcast_lock); + } + return ret; +} + + static void err_broadcast(const struct cpumask *mask) { pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n"); @@ -272,12 +285,8 @@ static void tick_do_broadcast(struct cpumask *mask) */ static void tick_do_periodic_broadcast(void) { - raw_spin_lock(&tick_broadcast_lock); - cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask); tick_do_broadcast(tmpmask); - - raw_spin_unlock(&tick_broadcast_lock); } /* @@ -287,13 +296,15 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) { ktime_t next; + raw_spin_lock(&tick_broadcast_lock); + tick_do_periodic_broadcast(); /* * The device is in periodic mode. No reprogramming necessary: */ if (dev->mode == CLOCK_EVT_MODE_PERIODIC) - return; + goto unlock; /* * Setup the next period for devices, which do not have @@ -306,9 +317,11 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) next = ktime_add(next, tick_period); if (!clockevents_program_event(dev, next, false)) - return; + goto unlock; tick_do_periodic_broadcast(); } +unlock: + raw_spin_unlock(&tick_broadcast_lock); } /* diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 8329669..26f1c0b 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -111,6 +111,7 @@ extern int tick_resume_broadcast(void); extern void tick_broadcast_init(void); extern void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast); +int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq); #else /* !BROADCAST */ @@ -133,6 +134,8 @@ static inline void tick_shutdown_broadcast(unsigned int *cpup) { } static inline void tick_suspend_broadcast(void) { } static inline int tick_resume_broadcast(void) { return 0; } static inline void tick_broadcast_init(void) { } +static inline int tick_broadcast_update_freq(struct clock_event_device *dev, + u32 freq) { return -ENODEV; } /* * Set the periodic handler in non broadcast mode @@ -154,5 +157,6 @@ static inline int tick_device_is_functional(struct clock_event_device *dev) #endif +int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); extern void do_timer(unsigned long ticks); extern void update_wall_time(void); -- cgit v0.10.2 From fe79a9ba11962a603fb6af68fcb476e64031e46c Mon Sep 17 00:00:00 2001 From: Soren Brinkmann Date: Mon, 3 Feb 2014 14:34:32 -0800 Subject: clockevents: Adjust timer interval when frequency changes clockevent devices in periodic mode are not updated when the frequency of the device changes. Issue a dev->set_mode() callback which forces the device to reevaluate the timer settings. Signed-off-by: Soren Brinkmann Cc: linux-arm-kernel@lists.infradead.org Cc: Daniel Lezcano Cc: Michal Simek Link: http://lkml.kernel.org/r/1391466877-28908-3-git-send-email-soren.brinkmann@xilinx.com Signed-off-by: Thomas Gleixner diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 641d910..f85e5fd 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -443,10 +443,13 @@ int __clockevents_update_freq(struct clock_event_device *dev, u32 freq) { clockevents_config(dev, freq); - if (dev->mode != CLOCK_EVT_MODE_ONESHOT) - return 0; + if (dev->mode == CLOCK_EVT_MODE_ONESHOT) + return clockevents_program_event(dev, dev->next_event, false); + + if (dev->mode == CLOCK_EVT_MODE_PERIODIC) + dev->set_mode(CLOCK_EVT_MODE_PERIODIC, dev); - return clockevents_program_event(dev, dev->next_event, false); + return 0; } /** -- cgit v0.10.2 From da7e6f45c34d39186b72328bacc4dd86bff60e0a Mon Sep 17 00:00:00 2001 From: Preeti U Murthy Date: Fri, 7 Feb 2014 13:36:06 +0530 Subject: time: Change the return type of clockevents_notify() to integer The broadcast framework can potentially be made use of by archs which do not have an external clock device as well. Then, it is required that one of the CPUs need to handle the broadcasting of wakeup IPIs to the CPUs in deep idle. As a result its local timers should remain functional all the time. For such a CPU, the BROADCAST_ENTER notification has to fail indicating that its clock device cannot be shutdown. To make way for this support, change the return type of tick_broadcast_oneshot_control() and hence clockevents_notify() to indicate such scenarios. Signed-off-by: Preeti U Murthy Cc: deepthi@linux.vnet.ibm.com Cc: paulmck@linux.vnet.ibm.com Cc: fweisbec@gmail.com Cc: paulus@samba.org Cc: srivatsa.bhat@linux.vnet.ibm.com Cc: svaidy@linux.vnet.ibm.com Cc: peterz@infradead.org Cc: benh@kernel.crashing.org Cc: rafael.j.wysocki@intel.com Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20140207080606.17187.78306.stgit@preeti.in.ibm.com Signed-off-by: Thomas Gleixner diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 493aa02..e0c5a6c 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -186,9 +186,9 @@ static inline int tick_check_broadcast_expired(void) { return 0; } #endif #ifdef CONFIG_GENERIC_CLOCKEVENTS -extern void clockevents_notify(unsigned long reason, void *arg); +extern int clockevents_notify(unsigned long reason, void *arg); #else -static inline void clockevents_notify(unsigned long reason, void *arg) {} +static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; } #endif #else /* CONFIG_GENERIC_CLOCKEVENTS_BUILD */ @@ -196,7 +196,7 @@ static inline void clockevents_notify(unsigned long reason, void *arg) {} static inline void clockevents_suspend(void) {} static inline void clockevents_resume(void) {} -static inline void clockevents_notify(unsigned long reason, void *arg) {} +static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; } static inline int tick_check_broadcast_expired(void) { return 0; } #endif diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index f85e5fd..ad362c2 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -542,12 +542,13 @@ void clockevents_resume(void) #ifdef CONFIG_GENERIC_CLOCKEVENTS /** * clockevents_notify - notification about relevant events + * Returns 0 on success, any other value on error */ -void clockevents_notify(unsigned long reason, void *arg) +int clockevents_notify(unsigned long reason, void *arg) { struct clock_event_device *dev, *tmp; unsigned long flags; - int cpu; + int cpu, ret = 0; raw_spin_lock_irqsave(&clockevents_lock, flags); @@ -560,7 +561,7 @@ void clockevents_notify(unsigned long reason, void *arg) case CLOCK_EVT_NOTIFY_BROADCAST_ENTER: case CLOCK_EVT_NOTIFY_BROADCAST_EXIT: - tick_broadcast_oneshot_control(reason); + ret = tick_broadcast_oneshot_control(reason); break; case CLOCK_EVT_NOTIFY_CPU_DYING: @@ -603,6 +604,7 @@ void clockevents_notify(unsigned long reason, void *arg) break; } raw_spin_unlock_irqrestore(&clockevents_lock, flags); + return ret; } EXPORT_SYMBOL_GPL(clockevents_notify); diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 003e6c3..84c8fd9 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -646,14 +646,15 @@ again: /* * Powerstate information: The system enters/leaves a state, where * affected devices might stop + * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups. */ -void tick_broadcast_oneshot_control(unsigned long reason) +int tick_broadcast_oneshot_control(unsigned long reason) { struct clock_event_device *bc, *dev; struct tick_device *td; unsigned long flags; ktime_t now; - int cpu; + int cpu, ret = 0; /* * Periodic mode does not care about the enter/exit of power @@ -759,6 +760,7 @@ void tick_broadcast_oneshot_control(unsigned long reason) } out: raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); + return ret; } /* diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 26f1c0b..0756c62 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -46,7 +46,7 @@ extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)); extern void tick_resume_oneshot(void); # ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); -extern void tick_broadcast_oneshot_control(unsigned long reason); +extern int tick_broadcast_oneshot_control(unsigned long reason); extern void tick_broadcast_switch_to_oneshot(void); extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup); extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc); @@ -58,7 +58,7 @@ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } -static inline void tick_broadcast_oneshot_control(unsigned long reason) { } +static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; } static inline void tick_broadcast_switch_to_oneshot(void) { } static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } static inline int tick_broadcast_oneshot_active(void) { return 0; } @@ -87,7 +87,7 @@ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } -static inline void tick_broadcast_oneshot_control(unsigned long reason) { } +static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; } static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { } static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc) { -- cgit v0.10.2 From ba8f20c2eb4158a443e9d6a909aee5010efa0c69 Mon Sep 17 00:00:00 2001 From: Preeti U Murthy Date: Fri, 7 Feb 2014 13:36:52 +0530 Subject: cpuidle: Handle clockevents_notify(BROADCAST_ENTER) failure Some archs set the CPUIDLE_FLAG_TIMER_STOP flag for idle states in which the local timers stop. The cpuidle_idle_call() currently handles such idle states by calling into the broadcast framework so as to wakeup CPUs at their next wakeup event. With the hrtimer mode of broadcast, the BROADCAST_ENTER call into the broadcast frameowork can fail for archs that do not have an external clock device to handle wakeups and the CPU in question has thus to be made the stand by CPU. This patch handles such cases by failing the call into cpuidle so that the arch can take some default action. The arch will certainly not enter a similar idle state because a failed cpuidle call will also implicitly indicate that the broadcast framework has not registered this CPU to be woken up. Hence we are safe if we fail the cpuidle call. In the process move the functions that trace idle statistics just before and after the entry and exit into idle states respectively. In other scenarios where the call to cpuidle fails, we end up not tracing idle entry and exit since a decision on an idle state could not be taken. Similarly when the call to broadcast framework fails, we skip tracing idle statistics because we are in no further position to take a decision on an alternative idle state to enter into. Signed-off-by: Preeti U Murthy Cc: deepthi@linux.vnet.ibm.com Cc: paulmck@linux.vnet.ibm.com Cc: fweisbec@gmail.com Cc: paulus@samba.org Cc: srivatsa.bhat@linux.vnet.ibm.com Cc: svaidy@linux.vnet.ibm.com Cc: peterz@infradead.org Cc: benh@kernel.crashing.org Acked-by: Rafael J. Wysocki Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20140207080652.17187.66344.stgit@preeti.in.ibm.com Signed-off-by: Thomas Gleixner diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index a55e68f..09d05ab 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -140,12 +140,14 @@ int cpuidle_idle_call(void) return 0; } - trace_cpu_idle_rcuidle(next_state, dev->cpu); - broadcast = !!(drv->states[next_state].flags & CPUIDLE_FLAG_TIMER_STOP); - if (broadcast) - clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu); + if (broadcast && + clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu)) + return -EBUSY; + + + trace_cpu_idle_rcuidle(next_state, dev->cpu); if (cpuidle_state_is_coupled(dev, drv, next_state)) entered_state = cpuidle_enter_state_coupled(dev, drv, @@ -153,11 +155,11 @@ int cpuidle_idle_call(void) else entered_state = cpuidle_enter_state(dev, drv, next_state); + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); + if (broadcast) clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu); - /* give the governor an opportunity to reflect on the outcome */ if (cpuidle_curr_governor->reflect) cpuidle_curr_governor->reflect(dev, entered_state); -- cgit v0.10.2 From 5d1638acb9f62fa7eb0c07cb85318bbe1f13b227 Mon Sep 17 00:00:00 2001 From: Preeti U Murthy Date: Fri, 7 Feb 2014 13:36:32 +0530 Subject: tick: Introduce hrtimer based broadcast On some architectures, in certain CPU deep idle states the local timers stop. An external clock device is used to wakeup these CPUs. The kernel support for the wakeup of these CPUs is provided by the tick broadcast framework by using the external clock device as the wakeup source. However not all implementations of architectures provide such an external clock device. This patch includes support in the broadcast framework to handle the wakeup of the CPUs in deep idle states on such systems by queuing a hrtimer on one of the CPUs, which is meant to handle the wakeup of CPUs in deep idle states. This patchset introduces a pseudo clock device which can be registered by the archs as tick_broadcast_device in the absence of a real external clock device. Once registered, the broadcast framework will work as is for these architectures as long as the archs take care of the BROADCAST_ENTER notification failing for one of the CPUs. This CPU is made the stand by CPU to handle wakeup of the CPUs in deep idle and it *must not enter deep idle states*. The CPU with the earliest wakeup is chosen to be this CPU. Hence this way the stand by CPU dynamically moves around and so does the hrtimer which is queued to trigger at the next earliest wakeup time. This is consistent with the case where an external clock device is present. The smp affinity of this clock device is set to the CPU with the earliest wakeup. This patchset handles the hotplug of the stand by CPU as well by moving the hrtimer on to the CPU handling the CPU_DEAD notification. Originally-from: Thomas Gleixner Signed-off-by: Preeti U Murthy Cc: deepthi@linux.vnet.ibm.com Cc: paulmck@linux.vnet.ibm.com Cc: fweisbec@gmail.com Cc: paulus@samba.org Cc: srivatsa.bhat@linux.vnet.ibm.com Cc: svaidy@linux.vnet.ibm.com Cc: peterz@infradead.org Cc: benh@kernel.crashing.org Cc: rafael.j.wysocki@intel.com Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20140207080632.17187.80532.stgit@preeti.in.ibm.com Signed-off-by: Thomas Gleixner diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index e0c5a6c..dbe9e14 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -62,6 +62,11 @@ enum clock_event_mode { #define CLOCK_EVT_FEAT_DYNIRQ 0x000020 #define CLOCK_EVT_FEAT_PERCPU 0x000040 +/* + * Clockevent device is based on a hrtimer for broadcast + */ +#define CLOCK_EVT_FEAT_HRTIMER 0x000080 + /** * struct clock_event_device - clock event device descriptor * @event_handler: Assigned by the framework to be called by the low @@ -83,6 +88,7 @@ enum clock_event_mode { * @name: ptr to clock event name * @rating: variable to rate clock event devices * @irq: IRQ number (only for non CPU local devices) + * @bound_on: Bound on CPU * @cpumask: cpumask to indicate for which CPUs this device works * @list: list head for the management code * @owner: module reference @@ -113,6 +119,7 @@ struct clock_event_device { const char *name; int rating; int irq; + int bound_on; const struct cpumask *cpumask; struct list_head list; struct module *owner; @@ -180,9 +187,11 @@ extern int tick_receive_broadcast(void); #endif #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) +extern void tick_setup_hrtimer_broadcast(void); extern int tick_check_broadcast_expired(void); #else static inline int tick_check_broadcast_expired(void) { return 0; } +static void tick_setup_hrtimer_broadcast(void) {}; #endif #ifdef CONFIG_GENERIC_CLOCKEVENTS diff --git a/kernel/time/Makefile b/kernel/time/Makefile index 9250130..06151ef 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -3,7 +3,7 @@ obj-y += timeconv.o posix-clock.o alarmtimer.o obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o -obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o +obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o tick-broadcast-hrtimer.o obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c new file mode 100644 index 0000000..9242527 --- /dev/null +++ b/kernel/time/tick-broadcast-hrtimer.c @@ -0,0 +1,106 @@ +/* + * linux/kernel/time/tick-broadcast-hrtimer.c + * This file emulates a local clock event device + * via a pseudo clock device. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tick-internal.h" + +static struct hrtimer bctimer; + +static void bc_set_mode(enum clock_event_mode mode, + struct clock_event_device *bc) +{ + switch (mode) { + case CLOCK_EVT_MODE_SHUTDOWN: + /* + * Note, we cannot cancel the timer here as we might + * run into the following live lock scenario: + * + * cpu 0 cpu1 + * lock(broadcast_lock); + * hrtimer_interrupt() + * bc_handler() + * tick_handle_oneshot_broadcast(); + * lock(broadcast_lock); + * hrtimer_cancel() + * wait_for_callback() + */ + hrtimer_try_to_cancel(&bctimer); + break; + default: + break; + } +} + +/* + * This is called from the guts of the broadcast code when the cpu + * which is about to enter idle has the earliest broadcast timer event. + */ +static int bc_set_next(ktime_t expires, struct clock_event_device *bc) +{ + /* + * We try to cancel the timer first. If the callback is on + * flight on some other cpu then we let it handle it. If we + * were able to cancel the timer nothing can rearm it as we + * own broadcast_lock. + * + * However we can also be called from the event handler of + * ce_broadcast_hrtimer itself when it expires. We cannot + * restart the timer because we are in the callback, but we + * can set the expiry time and let the callback return + * HRTIMER_RESTART. + */ + if (hrtimer_try_to_cancel(&bctimer) >= 0) { + hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED); + /* Bind the "device" to the cpu */ + bc->bound_on = smp_processor_id(); + } else if (bc->bound_on == smp_processor_id()) { + hrtimer_set_expires(&bctimer, expires); + } + return 0; +} + +static struct clock_event_device ce_broadcast_hrtimer = { + .set_mode = bc_set_mode, + .set_next_ktime = bc_set_next, + .features = CLOCK_EVT_FEAT_ONESHOT | + CLOCK_EVT_FEAT_KTIME | + CLOCK_EVT_FEAT_HRTIMER, + .rating = 0, + .bound_on = -1, + .min_delta_ns = 1, + .max_delta_ns = KTIME_MAX, + .min_delta_ticks = 1, + .max_delta_ticks = KTIME_MAX, + .mult = 1, + .shift = 0, + .cpumask = cpu_all_mask, +}; + +static enum hrtimer_restart bc_handler(struct hrtimer *t) +{ + ce_broadcast_hrtimer.event_handler(&ce_broadcast_hrtimer); + + if (ce_broadcast_hrtimer.next_event.tv64 == KTIME_MAX) + return HRTIMER_NORESTART; + + return HRTIMER_RESTART; +} + +void tick_setup_hrtimer_broadcast(void) +{ + hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + bctimer.function = bc_handler; + clockevents_register_device(&ce_broadcast_hrtimer); +} diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 84c8fd9..63c7b2d 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -643,6 +643,42 @@ again: raw_spin_unlock(&tick_broadcast_lock); } +static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu) +{ + if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER)) + return 0; + if (bc->next_event.tv64 == KTIME_MAX) + return 0; + return bc->bound_on == cpu ? -EBUSY : 0; +} + +static void broadcast_shutdown_local(struct clock_event_device *bc, + struct clock_event_device *dev) +{ + /* + * For hrtimer based broadcasting we cannot shutdown the cpu + * local device if our own event is the first one to expire or + * if we own the broadcast timer. + */ + if (bc->features & CLOCK_EVT_FEAT_HRTIMER) { + if (broadcast_needs_cpu(bc, smp_processor_id())) + return; + if (dev->next_event.tv64 < bc->next_event.tv64) + return; + } + clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); +} + +static void broadcast_move_bc(int deadcpu) +{ + struct clock_event_device *bc = tick_broadcast_device.evtdev; + + if (!bc || !broadcast_needs_cpu(bc, deadcpu)) + return; + /* This moves the broadcast assignment to this cpu */ + clockevents_program_event(bc, bc->next_event, 1); +} + /* * Powerstate information: The system enters/leaves a state, where * affected devices might stop @@ -661,7 +697,7 @@ int tick_broadcast_oneshot_control(unsigned long reason) * states */ if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) - return; + return 0; /* * We are called with preemtion disabled from the depth of the @@ -672,7 +708,7 @@ int tick_broadcast_oneshot_control(unsigned long reason) dev = td->evtdev; if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) - return; + return 0; bc = tick_broadcast_device.evtdev; @@ -680,7 +716,7 @@ int tick_broadcast_oneshot_control(unsigned long reason) if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) { if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); - clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN); + broadcast_shutdown_local(bc, dev); /* * We only reprogram the broadcast timer if we * did not mark ourself in the force mask and @@ -693,6 +729,16 @@ int tick_broadcast_oneshot_control(unsigned long reason) dev->next_event.tv64 < bc->next_event.tv64) tick_broadcast_set_event(bc, cpu, dev->next_event, 1); } + /* + * If the current CPU owns the hrtimer broadcast + * mechanism, it cannot go deep idle and we remove the + * CPU from the broadcast mask. We don't have to go + * through the EXIT path as the local timer is not + * shutdown. + */ + ret = broadcast_needs_cpu(bc, cpu); + if (ret) + cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); } else { if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT); @@ -866,6 +912,8 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup) cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); cpumask_clear_cpu(cpu, tick_broadcast_force_mask); + broadcast_move_bc(cpu); + raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } -- cgit v0.10.2 From f1689bb7abec8e2e670d8ad11eaa86d54bad8cfd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Feb 2014 16:00:46 +0100 Subject: time: Fixup fallout from recent clockevent/tick changes Make the stub function static inline instead of static and move the clockevents related function into the proper ifdeffed section. Reported-by: Fengguang Wu Signed-off-by: Thomas Gleixner Cc: Soren Brinkmann Cc: Preeti U Murthy diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index dbe9e14..20a7183 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -191,7 +191,7 @@ extern void tick_setup_hrtimer_broadcast(void); extern int tick_check_broadcast_expired(void); #else static inline int tick_check_broadcast_expired(void) { return 0; } -static void tick_setup_hrtimer_broadcast(void) {}; +static inline void tick_setup_hrtimer_broadcast(void) {}; #endif #ifdef CONFIG_GENERIC_CLOCKEVENTS diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 0756c62..7ab92b1 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -155,8 +155,9 @@ static inline int tick_device_is_functional(struct clock_event_device *dev) return !(dev->features & CLOCK_EVT_FEAT_DUMMY); } +int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); + #endif -int __clockevents_update_freq(struct clock_event_device *dev, u32 freq); extern void do_timer(unsigned long ticks); extern void update_wall_time(void); -- cgit v0.10.2 From 2f41898704d3cff796ea0adaea272808707d758e Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Sat, 1 Feb 2014 16:46:16 +0100 Subject: ARM: sun7i: dt: Fix interrupt trigger types The Allwinner A20 uses the ARM GIC as its internal interrupts controller. The GIC can work on several interrupt triggers, and the A20 was actually setting it up to use a rising edge as a trigger, while it was actually a level high trigger, leading to some interrupts that would be completely ignored if the edge was missed. Fix this for the remaining DT nodes that slipped through. Signed-off-by: Maxime Ripard Cc: stable@vger.kernel.org diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 119f066..2374f5a 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -454,7 +454,7 @@ rtc: rtc@01c20d00 { compatible = "allwinner,sun7i-a20-rtc"; reg = <0x01c20d00 0x20>; - interrupts = <0 24 1>; + interrupts = <0 24 4>; }; sid: eeprom@01c23800 { @@ -596,10 +596,10 @@ hstimer@01c60000 { compatible = "allwinner,sun7i-a20-hstimer"; reg = <0x01c60000 0x1000>; - interrupts = <0 81 1>, - <0 82 1>, - <0 83 1>, - <0 84 1>; + interrupts = <0 81 4>, + <0 82 4>, + <0 83 4>, + <0 84 4>; clocks = <&ahb_gates 28>; }; -- cgit v0.10.2 From 40dd8f3b900cac1d925605a9d3199368c4af0a40 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Sun, 2 Feb 2014 14:52:40 +0100 Subject: ARM: sunxi: dt: Change the touchscreen compatibles Switch the device tree touchscreen compatibles to have a common pattern accross all Allwinner SoCs. Since the touchscreen driver has not been merged yet, it has no side effect. Signed-off-by: Maxime Ripard diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index 040bb0e..e3ff64c 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -426,7 +426,7 @@ }; rtp: rtp@01c25000 { - compatible = "allwinner,sun4i-ts"; + compatible = "allwinner,sun4i-a10-ts"; reg = <0x01c25000 0x100>; interrupts = <29>; }; diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi index ea16054..8e57a28 100644 --- a/arch/arm/boot/dts/sun5i-a10s.dtsi +++ b/arch/arm/boot/dts/sun5i-a10s.dtsi @@ -383,7 +383,7 @@ }; rtp: rtp@01c25000 { - compatible = "allwinner,sun4i-ts"; + compatible = "allwinner,sun4i-a10-ts"; reg = <0x01c25000 0x100>; interrupts = <29>; }; diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi index 320335a..c463fd7 100644 --- a/arch/arm/boot/dts/sun5i-a13.dtsi +++ b/arch/arm/boot/dts/sun5i-a13.dtsi @@ -346,7 +346,7 @@ }; rtp: rtp@01c25000 { - compatible = "allwinner,sun4i-ts"; + compatible = "allwinner,sun4i-a10-ts"; reg = <0x01c25000 0x100>; interrupts = <29>; }; diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 2374f5a..b79a626 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -463,7 +463,7 @@ }; rtp: rtp@01c25000 { - compatible = "allwinner,sun4i-ts"; + compatible = "allwinner,sun4i-a10-ts"; reg = <0x01c25000 0x100>; interrupts = <0 29 4>; }; -- cgit v0.10.2 From 980f88e414418bf65569a3b62b08b07e6fc2f4c6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 6 Feb 2014 17:28:41 +0100 Subject: sched/wait: Suppress Sparse 'variable shadowing' warning This warning seems to show up a lot now, since ___wait_event() is (indirectly) used inside wait_event_timeout(), which also has a variable called __ret. Rename the one in ___wait_event() to ___ret (another leading underscore) to suppress the warning. Signed-off-by: Johannes Berg Signed-off-by: Peter Zijlstra Cc: Steven Rostedt Cc: Andrew Morton Link: http://lkml.kernel.org/r/1391704121.12789.20.camel@jlt4.sipsolutions.net Signed-off-by: Ingo Molnar diff --git a/include/linux/wait.h b/include/linux/wait.h index 559044c..c55ea5c 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -195,7 +195,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); ({ \ __label__ __out; \ wait_queue_t __wait; \ - long __ret = ret; \ + long ___ret = ret; \ \ INIT_LIST_HEAD(&__wait.task_list); \ if (exclusive) \ @@ -210,7 +210,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); break; \ \ if (___wait_is_interruptible(state) && __int) { \ - __ret = __int; \ + ___ret = __int; \ if (exclusive) { \ abort_exclusive_wait(&wq, &__wait, \ state, NULL); \ @@ -222,7 +222,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); cmd; \ } \ finish_wait(&wq, &__wait); \ -__out: __ret; \ +__out: ___ret; \ }) #define __wait_event(wq, condition) \ -- cgit v0.10.2 From 6a02ad66b2c44155d529f430d4fa5c6c66321077 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 3 Feb 2014 18:11:08 +0100 Subject: perf/x86: Push the duration-logging printk() to IRQ context Calling printk() from NMI context is bad (TM), so move it to IRQ context. This also avoids the problem where the printk() time is measured by the generic NMI duration goo and triggers a second warning. Signed-off-by: Peter Zijlstra Cc: Don Zickus Cc: Dave Hansen Link: http://lkml.kernel.org/n/tip-75dv35xf6dhhmeb7nq6fua31@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 6601702..add13c8 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -30,6 +30,8 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *)) work->func = func; } +#define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { .func = (_f), } + void irq_work_queue(struct irq_work *work); void irq_work_run(void); void irq_work_sync(struct irq_work *work); diff --git a/kernel/events/core.c b/kernel/events/core.c index 56003c6..2067cbb 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -231,11 +231,29 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, #define NR_ACCUMULATED_SAMPLES 128 static DEFINE_PER_CPU(u64, running_sample_length); -void perf_sample_event_took(u64 sample_len_ns) +static void perf_duration_warn(struct irq_work *w) { + u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns); u64 avg_local_sample_len; u64 local_samples_len; + + local_samples_len = __get_cpu_var(running_sample_length); + avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES; + + printk_ratelimited(KERN_WARNING + "perf interrupt took too long (%lld > %lld), lowering " + "kernel.perf_event_max_sample_rate to %d\n", + avg_local_sample_len, allowed_ns, + sysctl_perf_event_sample_rate); +} + +static DEFINE_IRQ_WORK(perf_duration_work, perf_duration_warn); + +void perf_sample_event_took(u64 sample_len_ns) +{ u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns); + u64 avg_local_sample_len; + u64 local_samples_len; if (allowed_ns == 0) return; @@ -263,13 +281,9 @@ void perf_sample_event_took(u64 sample_len_ns) sysctl_perf_event_sample_rate = max_samples_per_tick * HZ; perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; - printk_ratelimited(KERN_WARNING - "perf samples too long (%lld > %lld), lowering " - "kernel.perf_event_max_sample_rate to %d\n", - avg_local_sample_len, allowed_ns, - sysctl_perf_event_sample_rate); - update_perf_cpu_limits(); + + irq_work_queue(&perf_duration_work); } static atomic64_t perf_event_id; -- cgit v0.10.2 From e90c78535283dd0ded3bf2e484890d14dba2d527 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 3 Feb 2014 18:02:09 +0100 Subject: x86/nmi: Push duration printk() to irq context Calling printk() from NMI context is bad (TM), so move it to IRQ context. In doing so we slightly change (probably wreck) the debugfs nmi_longest_ns thingy, in that it doesn't update to reflect the longest, nor does writing to it reset the count. Signed-off-by: Peter Zijlstra Cc: Don Zickus Cc: Dave Hansen Link: http://lkml.kernel.org/n/tip-rdw0au56a5ymis1u8p48c12d@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 86f9301..5f2fc44 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -1,6 +1,7 @@ #ifndef _ASM_X86_NMI_H #define _ASM_X86_NMI_H +#include #include #include #include @@ -38,6 +39,8 @@ typedef int (*nmi_handler_t)(unsigned int, struct pt_regs *); struct nmiaction { struct list_head list; nmi_handler_t handler; + u64 max_duration; + struct irq_work irq_work; unsigned long flags; const char *name; }; diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 6fcb49c..b4872b9 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -87,6 +87,7 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic); #define nmi_to_desc(type) (&nmi_desc[type]) static u64 nmi_longest_ns = 1 * NSEC_PER_MSEC; + static int __init nmi_warning_debugfs(void) { debugfs_create_u64("nmi_longest_ns", 0644, @@ -95,6 +96,20 @@ static int __init nmi_warning_debugfs(void) } fs_initcall(nmi_warning_debugfs); +static void nmi_max_handler(struct irq_work *w) +{ + struct nmiaction *a = container_of(w, struct nmiaction, irq_work); + int remainder_ns, decimal_msecs; + u64 whole_msecs = ACCESS_ONCE(a->max_duration); + + remainder_ns = do_div(whole_msecs, (1000 * 1000)); + decimal_msecs = remainder_ns / 1000; + + printk_ratelimited(KERN_INFO + "INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n", + a->handler, whole_msecs, decimal_msecs); +} + static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) { struct nmi_desc *desc = nmi_to_desc(type); @@ -110,26 +125,20 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2 * to handle those situations. */ list_for_each_entry_rcu(a, &desc->head, list) { - u64 before, delta, whole_msecs; - int remainder_ns, decimal_msecs, thishandled; + int thishandled; + u64 delta; - before = sched_clock(); + delta = sched_clock(); thishandled = a->handler(type, regs); handled += thishandled; - delta = sched_clock() - before; + delta = sched_clock() - delta; trace_nmi_handler(a->handler, (int)delta, thishandled); - if (delta < nmi_longest_ns) + if (delta < nmi_longest_ns || delta < a->max_duration) continue; - nmi_longest_ns = delta; - whole_msecs = delta; - remainder_ns = do_div(whole_msecs, (1000 * 1000)); - decimal_msecs = remainder_ns / 1000; - printk_ratelimited(KERN_INFO - "INFO: NMI handler (%ps) took too long to run: " - "%lld.%03d msecs\n", a->handler, whole_msecs, - decimal_msecs); + a->max_duration = delta; + irq_work_queue(&a->irq_work); } rcu_read_unlock(); @@ -146,6 +155,8 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action) if (!action->handler) return -EINVAL; + init_irq_work(&action->irq_work, nmi_max_handler); + spin_lock_irqsave(&desc->lock, flags); /* -- cgit v0.10.2 From 13beacee817d27a40ffc6f065ea0042685611dd5 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Wed, 29 Jan 2014 14:37:50 -0500 Subject: perf/x86/p4: Fix counter corruption when using lots of perf groups On a P4 box stressing perf with: ./perf record -o perf.data ./perf stat -v ./perf bench all it was noticed that a slew of unknown NMIs would pop out rather quickly. Painfully debugging this ancient platform, led me to notice cross cpu counter corruption. The P4 machine is special in that it has 18 counters, half are used for cpu0 and the other half is for cpu1 (or all 18 if hyperthreading is disabled). But the splitting of the counters has to be actively managed by the software. In this particular bug, one of the cpu0 specific counters was being used by cpu1 and caused all sorts of random unknown nmis. I am not entirely sure on the corruption path, but what happens is: o perf schedules a group with p4_pmu_schedule_events() o inside p4_pmu_schedule_events(), it notices an hwc pointer is being reused but for a different cpu, so it 'swaps' the config bits and returns the updated 'assign' array with a _new_ index. o perf schedules another group with p4_pmu_schedule_events() o inside p4_pmu_schedule_events(), it notices an hwc pointer is being reused (the same one as above) but for the _same_ cpu [BUG!!], so it updates the 'assign' array to use the _old_ (wrong cpu) index because the _new_ index is in an earlier part of the 'assign' array (and hasn't been committed yet). o perf commits the transaction using the wrong index and corrupts the other cpu The [BUG!!] is because the 'hwc->config' is updated but not the 'hwc->idx'. So the check for 'p4_should_swap_ts()' is correct the first time around but incorrect the second time around (because hwc->config was updated in between). I think the spirit of perf was to not modify anything until all the transactions had a chance to 'test' if they would succeed, and if so, commit atomically. However, P4 breaks this spirit by touching the hwc->config element. So my fix is to continue the un-perf like breakage, by assigning hwc->idx to -1 on swap to tell follow up group scheduling to find a new index. Of course if the transaction fails rolling this back will be difficult, but that is not different than how the current code works. :-) And I wasn't sure how much effort to cleanup the code I should do for a platform that is almost 10 years old by now. Hence the lazy fix. Signed-off-by: Don Zickus Acked-by: Cyrill Gorcunov Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1391024270-19469-1-git-send-email-dzickus@redhat.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 3486e66..f44c34d 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -1257,7 +1257,24 @@ again: pass++; goto again; } - + /* + * Perf does test runs to see if a whole group can be assigned + * together succesfully. There can be multiple rounds of this. + * Unfortunately, p4_pmu_swap_config_ts touches the hwc->config + * bits, such that the next round of group assignments will + * cause the above p4_should_swap_ts to pass instead of fail. + * This leads to counters exclusive to thread0 being used by + * thread1. + * + * Solve this with a cheap hack, reset the idx back to -1 to + * force a new lookup (p4_next_cntr) to get the right counter + * for the right thread. + * + * This probably doesn't comply with the general spirit of how + * perf wants to work, but P4 is special. :-( + */ + if (p4_should_swap_ts(hwc->config, cpu)) + hwc->idx = -1; p4_pmu_swap_config_ts(hwc, cpu); if (assign) assign[i] = cntr_idx; -- cgit v0.10.2 From 90ed5b0fa5eb96e1cbb34aebf6a9ed96ee1587ec Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Sun, 9 Feb 2014 13:20:18 +0100 Subject: perf/x86/p4: Block PMIs on init to prevent a stream of unkown NMIs A bunch of unknown NMIs have popped up on a Pentium4 recently when booting into a kdump kernel. This was exposed because the watchdog timer went from 60 seconds down to 10 seconds (increasing the ability to reproduce this problem). What is happening is on boot up of the second kernel (the kdump one), the previous nmi_watchdogs were enabled on thread 0 and thread 1. The second kernel only initializes one cpu but the perf counter on thread 1 still counts. Normally in a kdump scenario, the other cpus are blocking in an NMI loop, but more importantly their local apics have the performance counters disabled (iow LVTPC is masked). So any counters that fire are masked and never get through to the second kernel. However, on a P4 the local apic is shared by both threads and thread1's PMI (despite being configured to only interrupt thread1) will generate an NMI on thread0. Because thread0 knows nothing about this NMI, it is seen as an unknown NMI. This would be fine because it is a kdump kernel, strange things happen what is the big deal about a single unknown NMI. Unfortunately, the P4 comes with another quirk: clearing the overflow bit to prevent a stream of NMIs. This is the problem. The kdump kernel can not execute because of the endless NMIs that happen. To solve this, I instrumented the p4 perf init code, to walk all the counters and zero them out (just like a normal reset would). Now when the counters go off, they do not generate anything and no unknown NMIs are seen. I tested this on a P4 we have in our lab. After two or three crashes, I could normally reproduce the problem. Now after 10 crashes, everything continues to boot correctly. Signed-off-by: Don Zickus Cc: Dave Young Cc: Vivek Goyal Cc: Cyrill Gorcunov Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140120154115.GZ25953@redhat.com [ Fixed a stylistic detail. ] Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index f44c34d..5d466b7 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -1339,6 +1339,7 @@ static __initconst const struct x86_pmu p4_pmu = { __init int p4_pmu_init(void) { unsigned int low, high; + int i, reg; /* If we get stripped -- indexing fails */ BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC); @@ -1357,5 +1358,19 @@ __init int p4_pmu_init(void) x86_pmu = p4_pmu; + /* + * Even though the counters are configured to interrupt a particular + * logical processor when an overflow happens, testing has shown that + * on kdump kernels (which uses a single cpu), thread1's counter + * continues to run and will report an NMI on thread0. Due to the + * overflow bug, this leads to a stream of unknown NMIs. + * + * Solve this by zero'ing out the registers to mimic a reset. + */ + for (i = 0; i < x86_pmu.num_counters; i++) { + reg = x86_pmu_config_addr(i); + wrmsrl_safe(reg, 0ULL); + } + return 0; } -- cgit v0.10.2 From 390f3258cb2d031f1c17aa32e771ebd336e89073 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 28 Jan 2014 11:26:14 +0400 Subject: sched/deadline: Skip in switched_to_dl() if task is current When p is current and it's not of dl class, then there are no other dl taks in the rq. If we had had pushable tasks in some other rq, they would have been pushed earlier. So, skip "p == rq->curr" case. Signed-off-by: Kirill Tkhai Acked-by: Juri Lelli Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140128072421.32315.25300.stgit@tkhai Signed-off-by: Ingo Molnar diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 0dd5e09..b5700bc 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1560,7 +1560,7 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p) if (unlikely(p->dl.dl_throttled)) return; - if (p->on_rq || rq->curr != p) { + if (p->on_rq && rq->curr != p) { #ifdef CONFIG_SMP if (rq->dl.overloaded && push_dl_task(rq) && rq != task_rq(p)) /* Only reschedule if pushing failed */ -- cgit v0.10.2 From 5c228079ce8a9bb043a423069a6674dfb9268037 Mon Sep 17 00:00:00 2001 From: Dongsheng Yang Date: Mon, 27 Jan 2014 17:15:37 -0500 Subject: sched: Move the priority specific bits into a new header file Some bits about priority are defined in linux/sched/rt.h, but some of them are not only for rt scheduler, such as MAX_PRIO. This patch move them all into a new header file, linux/sched/prio.h. Signed-off-by: Dongsheng Yang Cc: clark.williams@gmail.com Cc: rostedt@goodmis.org Cc: raistlin@linux.it Cc: juri.lelli@gmail.com Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/f7549508a1588da2c613d601748ca9de30fa5dcf.1390859827.git.yangds.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar diff --git a/include/linux/sched.h b/include/linux/sched.h index ed86779..d97d0a8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -3,6 +3,8 @@ #include +#include + struct sched_param { int sched_priority; diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h new file mode 100644 index 0000000..9382ba8 --- /dev/null +++ b/include/linux/sched/prio.h @@ -0,0 +1,23 @@ +#ifndef _SCHED_PRIO_H +#define _SCHED_PRIO_H + +/* + * Priority of a process goes from 0..MAX_PRIO-1, valid RT + * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH + * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority + * values are inverted: lower p->prio value means higher priority. + * + * The MAX_USER_RT_PRIO value allows the actual maximum + * RT priority to be separate from the value exported to + * user-space. This allows kernel threads to set their + * priority to a value higher than any user task. Note: + * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. + */ + +#define MAX_USER_RT_PRIO 100 +#define MAX_RT_PRIO MAX_USER_RT_PRIO + +#define MAX_PRIO (MAX_RT_PRIO + 40) +#define DEFAULT_PRIO (MAX_RT_PRIO + 20) + +#endif /* _SCHED_PRIO_H */ diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index 34e4ebe..f7453d4 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -1,24 +1,7 @@ #ifndef _SCHED_RT_H #define _SCHED_RT_H -/* - * Priority of a process goes from 0..MAX_PRIO-1, valid RT - * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH - * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority - * values are inverted: lower p->prio value means higher priority. - * - * The MAX_USER_RT_PRIO value allows the actual maximum - * RT priority to be separate from the value exported to - * user-space. This allows kernel threads to set their - * priority to a value higher than any user task. Note: - * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. - */ - -#define MAX_USER_RT_PRIO 100 -#define MAX_RT_PRIO MAX_USER_RT_PRIO - -#define MAX_PRIO (MAX_RT_PRIO + 40) -#define DEFAULT_PRIO (MAX_RT_PRIO + 20) +#include static inline int rt_prio(int prio) { -- cgit v0.10.2 From 6b6350f155afdfdf888e18c7bf26950a6d10b0c2 Mon Sep 17 00:00:00 2001 From: Dongsheng Yang Date: Mon, 27 Jan 2014 17:15:38 -0500 Subject: sched: Expose some macros related to priority Some macros in kernel/sched/sched.h about priority are private to kernel/sched. But they are useful to other parts of the core kernel. This patch moves these macros from kernel/sched/sched.h to include/linux/sched/prio.h so that they are available to other subsystems. Signed-off-by: Dongsheng Yang Cc: raistlin@linux.it Cc: juri.lelli@gmail.com Cc: clark.williams@gmail.com Cc: rostedt@goodmis.org Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/2b022810905b52d13238466807f4b2a691577180.1390859827.git.yangds.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h index 9382ba8..13216f1 100644 --- a/include/linux/sched/prio.h +++ b/include/linux/sched/prio.h @@ -20,4 +20,22 @@ #define MAX_PRIO (MAX_RT_PRIO + 40) #define DEFAULT_PRIO (MAX_RT_PRIO + 20) +/* + * Convert user-nice values [ -20 ... 0 ... 19 ] + * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], + * and back. + */ +#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20) +#define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20) +#define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio) + +/* + * 'User priority' is the nice value converted to something we + * can work with better when scaling various scheduler parameters, + * it's a [ 0 ... 39 ] range. + */ +#define USER_PRIO(p) ((p)-MAX_RT_PRIO) +#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio) +#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO)) + #endif /* _SCHED_PRIO_H */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c2119fd..b44720d 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -24,24 +24,6 @@ extern long calc_load_fold_active(struct rq *this_rq); extern void update_cpu_load_active(struct rq *this_rq); /* - * Convert user-nice values [ -20 ... 0 ... 19 ] - * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], - * and back. - */ -#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20) -#define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20) -#define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio) - -/* - * 'User priority' is the nice value converted to something we - * can work with better when scaling various scheduler parameters, - * it's a [ 0 ... 39 ] range. - */ -#define USER_PRIO(p) ((p)-MAX_RT_PRIO) -#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio) -#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO)) - -/* * Helpers for converting nanosecond timing to jiffy resolution */ #define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ)) -- cgit v0.10.2 From c32fa99f0b4252633aa464e28d1cb925bd2a79df Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Wed, 18 Dec 2013 05:02:14 -0800 Subject: bitops: Fix signedness of compile-time hweight implementations Enabling '-Wsign-compare' compiler warnings on code that includes include/linux/bitops.h can generate the following warning: In file included from include/linux/kernel.h:10:0, from :48: include/linux/bitops.h: In function 'hweight_long': include/linux/bitops.h:77:26: error: signed and unsigned type in conditional expression [-Werror=sign-compare] (converted to an error with -Werror) This is due to the use of the logical negation operator '!' in the __const_hweight8 macro in include/asm-generic/bitops/const_hweight.h. The use of that operator here results in a signed value. Fix by explicitly casting the __const_hweight8 macro expansion to 'unsigned int'. While here, clean up several checkpatch.pl warnings. Signed-off-by: Paul Walmsley Cc: Arnd Bergmann Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Andrew Morton Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1312180459580.30198@tamien Signed-off-by: Ingo Molnar diff --git a/include/asm-generic/bitops/const_hweight.h b/include/asm-generic/bitops/const_hweight.h index fa2a50b..0a7e066 100644 --- a/include/asm-generic/bitops/const_hweight.h +++ b/include/asm-generic/bitops/const_hweight.h @@ -5,14 +5,15 @@ * Compile time versions of __arch_hweightN() */ #define __const_hweight8(w) \ - ( (!!((w) & (1ULL << 0))) + \ - (!!((w) & (1ULL << 1))) + \ - (!!((w) & (1ULL << 2))) + \ - (!!((w) & (1ULL << 3))) + \ - (!!((w) & (1ULL << 4))) + \ - (!!((w) & (1ULL << 5))) + \ - (!!((w) & (1ULL << 6))) + \ - (!!((w) & (1ULL << 7))) ) + ((unsigned int) \ + ((!!((w) & (1ULL << 0))) + \ + (!!((w) & (1ULL << 1))) + \ + (!!((w) & (1ULL << 2))) + \ + (!!((w) & (1ULL << 3))) + \ + (!!((w) & (1ULL << 4))) + \ + (!!((w) & (1ULL << 5))) + \ + (!!((w) & (1ULL << 6))) + \ + (!!((w) & (1ULL << 7))))) #define __const_hweight16(w) (__const_hweight8(w) + __const_hweight8((w) >> 8 )) #define __const_hweight32(w) (__const_hweight16(w) + __const_hweight16((w) >> 16)) -- cgit v0.10.2 From 849401b66d305f3feb75b6db7459b95ad190552a Mon Sep 17 00:00:00 2001 From: Preeti U Murthy Date: Sun, 9 Feb 2014 11:32:22 +0530 Subject: tick: Fixup more fallout from hrtimer broadcast mode The hrtimer mode of broadcast is supported only when GENERIC_CLOCKEVENTS_BROADCAST and TICK_ONESHOT config options are enabled. Hence compile in the functions for hrtimer mode of broadcast only when these options are selected. Also fix max_delta_ticks value for the pseudo clock device. Reported-by: Fengguang Wu Reported-by: Ingo Molnar Signed-off-by: Preeti U Murthy Link: http://lkml.kernel.org/r/52F719EE.9010304@linux.vnet.ibm.com Signed-off-by: Thomas Gleixner diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 20a7183..2e4cb67 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -207,6 +207,7 @@ static inline void clockevents_resume(void) {} static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; } static inline int tick_check_broadcast_expired(void) { return 0; } +static inline void tick_setup_hrtimer_broadcast(void) {}; #endif diff --git a/kernel/time/Makefile b/kernel/time/Makefile index 06151ef..57a413f 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -3,7 +3,10 @@ obj-y += timeconv.o posix-clock.o alarmtimer.o obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o -obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o tick-broadcast-hrtimer.o +ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y) + obj-y += tick-broadcast.o + obj-$(CONFIG_TICK_ONESHOT) += tick-broadcast-hrtimer.o +endif obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c index 9242527..eb682d5 100644 --- a/kernel/time/tick-broadcast-hrtimer.c +++ b/kernel/time/tick-broadcast-hrtimer.c @@ -82,7 +82,7 @@ static struct clock_event_device ce_broadcast_hrtimer = { .min_delta_ns = 1, .max_delta_ns = KTIME_MAX, .min_delta_ticks = 1, - .max_delta_ticks = KTIME_MAX, + .max_delta_ticks = ULONG_MAX, .mult = 1, .shift = 0, .cpumask = cpu_all_mask, -- cgit v0.10.2 From d3c63ae1e2e35e4250390c079dc3fb7291347f5c Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 4 Feb 2014 23:54:58 -0800 Subject: x86/apic: Only use default_wait_for_init_deassert() es7000_wait_for_init_deassert() is functionally equivalent to default_wait_for_init_deassert(), so remove the duplicate code and use only a single function. Signed-off-by: David Rientjes Reviewed-by: Thomas Gleixner Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1402042353030.7839@chino.kir.corp.google.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 1d2091a..2ef7013 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -521,7 +521,6 @@ static inline void default_wait_for_init_deassert(atomic_t *deassert) { while (!atomic_read(deassert)) cpu_relax(); - return; } extern void generic_bigsmp_probe(void); diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index c552247..d03d57e 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -394,12 +394,6 @@ static void es7000_enable_apic_mode(void) WARN(1, "Command failed, status = %x\n", mip_status); } -static void es7000_wait_for_init_deassert(atomic_t *deassert) -{ - while (!atomic_read(deassert)) - cpu_relax(); -} - static unsigned int es7000_get_apic_id(unsigned long x) { return (x >> 24) & 0xFF; @@ -722,7 +716,7 @@ static struct apic __refdata apic_es7000 = { .trampoline_phys_low = 0x467, .trampoline_phys_high = 0x469, - .wait_for_init_deassert = es7000_wait_for_init_deassert, + .wait_for_init_deassert = default_wait_for_init_deassert, /* Nothing to do for most platforms, since cleared by the INIT cycle: */ .smp_callin_clear_local_apic = NULL, -- cgit v0.10.2 From 465822cfc8cb850ba76046965cc7b6fd1f8c3d73 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 4 Feb 2014 23:55:01 -0800 Subject: x86/apic: Switch wait_for_init_deassert() to a bool flag Now that there is only a single wait_for_init_deassert() function, just convert the member of struct apic to a bool to determine whether we need to wait for init_deassert to become non-zero. There are no more callers of default_wait_for_init_deassert(), so fold it into the caller. Signed-off-by: David Rientjes Reviewed-by: Thomas Gleixner Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1402042354010.7839@chino.kir.corp.google.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 2ef7013..c1d3074 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -351,7 +351,7 @@ struct apic { int trampoline_phys_low; int trampoline_phys_high; - void (*wait_for_init_deassert)(atomic_t *deassert); + bool wait_for_init_deassert; void (*smp_callin_clear_local_apic)(void); void (*inquire_remote_apic)(int apicid); @@ -517,12 +517,6 @@ extern int default_cpu_present_to_apicid(int mps_cpu); extern int default_check_phys_apicid_present(int phys_apicid); #endif -static inline void default_wait_for_init_deassert(atomic_t *deassert) -{ - while (!atomic_read(deassert)) - cpu_relax(); -} - extern void generic_bigsmp_probe(void); diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 2c621a6..7c1b294 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -198,7 +198,7 @@ static struct apic apic_flat = { .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = NULL, + .wait_for_init_deassert = false, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = default_inquire_remote_apic, @@ -314,7 +314,7 @@ static struct apic apic_physflat = { .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = NULL, + .wait_for_init_deassert = false, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = default_inquire_remote_apic, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 191ce75..8c7c982 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -172,8 +172,7 @@ struct apic apic_noop = { .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = NULL, - + .wait_for_init_deassert = false, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = NULL, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 3e67f9e..a5b45df 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -248,7 +248,7 @@ static const struct apic apic_numachip __refconst = { .wakeup_secondary_cpu = numachip_wakeup_secondary, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = NULL, + .wait_for_init_deassert = false, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = NULL, /* REMRD not supported */ diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index d50e364..e4840aa 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -199,8 +199,7 @@ static struct apic apic_bigsmp = { .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = default_wait_for_init_deassert, - + .wait_for_init_deassert = true, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = default_inquire_remote_apic, diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index d03d57e..6f8f8b3 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -652,8 +652,7 @@ static struct apic __refdata apic_es7000_cluster = { .trampoline_phys_low = 0x467, .trampoline_phys_high = 0x469, - .wait_for_init_deassert = NULL, - + .wait_for_init_deassert = false, /* Nothing to do for most platforms, since cleared by the INIT cycle: */ .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = default_inquire_remote_apic, @@ -716,8 +715,7 @@ static struct apic __refdata apic_es7000 = { .trampoline_phys_low = 0x467, .trampoline_phys_high = 0x469, - .wait_for_init_deassert = default_wait_for_init_deassert, - + .wait_for_init_deassert = true, /* Nothing to do for most platforms, since cleared by the INIT cycle: */ .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = default_inquire_remote_apic, diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 1e42e8f..030ea1c 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -505,8 +505,7 @@ static struct apic __refdata apic_numaq = { .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH, /* We don't do anything here because we use NMI's to boot instead */ - .wait_for_init_deassert = NULL, - + .wait_for_init_deassert = false, .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic, .inquire_remote_apic = NULL, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index eb35ef9..cceb352 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -119,8 +119,7 @@ static struct apic apic_default = { .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = default_wait_for_init_deassert, - + .wait_for_init_deassert = true, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = default_inquire_remote_apic, diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 00146f9..b656128 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -532,8 +532,7 @@ static struct apic apic_summit = { .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = default_wait_for_init_deassert, - + .wait_for_init_deassert = true, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = default_inquire_remote_apic, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index cac85ee..e66766b 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -279,7 +279,7 @@ static struct apic apic_x2apic_cluster = { .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = NULL, + .wait_for_init_deassert = false, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = NULL, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index de231e3..6d600eb 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -133,7 +133,7 @@ static struct apic apic_x2apic_phys = { .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = NULL, + .wait_for_init_deassert = false, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = NULL, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index d263b13..7834389 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -396,7 +396,7 @@ static struct apic __refdata apic_x2apic_uv_x = { .wakeup_secondary_cpu = uv_wakeup_secondary, .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, - .wait_for_init_deassert = NULL, + .wait_for_init_deassert = false, .smp_callin_clear_local_apic = NULL, .inquire_remote_apic = NULL, diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a32da80..c77acc6 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -122,8 +122,9 @@ static void smp_callin(void) * Since CPU0 is not wakened up by INIT, it doesn't wait for the IPI. */ cpuid = smp_processor_id(); - if (apic->wait_for_init_deassert && cpuid != 0) - apic->wait_for_init_deassert(&init_deasserted); + if (apic->wait_for_init_deassert && cpuid) + while (!atomic_read(&init_deasserted)) + cpu_relax(); /* * (This works even if the APIC is not enabled.) -- cgit v0.10.2 From 6d4989835e5418fdfda764fce7294246f80cf464 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 4 Feb 2014 23:55:04 -0800 Subject: x86/apic: Remove unused function prototypes Some function prototypes declared in asm/apic.h are never defined, so remove them. Signed-off-by: David Rientjes Reviewed-by: Thomas Gleixner Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1402042354210.7839@chino.kir.corp.google.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index c1d3074..b4c561d 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -93,9 +93,6 @@ static inline int is_vsmp_box(void) return 0; } #endif -extern void xapic_wait_icr_idle(void); -extern u32 safe_xapic_wait_icr_idle(void); -extern void xapic_icr_write(u32, u32); extern int setup_profiling_timer(unsigned int); static inline void native_apic_mem_write(u32 reg, u32 v) @@ -184,7 +181,6 @@ extern int x2apic_phys; extern int x2apic_preenabled; extern void check_x2apic(void); extern void enable_x2apic(void); -extern void x2apic_icr_write(u32 low, u32 id); static inline int x2apic_enabled(void) { u64 msr; -- cgit v0.10.2 From dc9788f40a769d967de3eb5a7aee8c1a70094d32 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 4 Feb 2014 23:55:06 -0800 Subject: x86/apic: Always define nox2apic and define it as initdata The "nox2apic" variable can be defined as __initdata since it is only used for bootstrap. It can now unconditionally be defined since it will later be freed. At the same time, it is also better off as a bool. Signed-off-by: David Rientjes Reviewed-by: Thomas Gleixner Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1402042354380.7839@chino.kir.corp.google.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index b4c561d..19b0eba 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -217,7 +217,6 @@ static inline void x2apic_force_phys(void) { } -#define nox2apic 0 #define x2apic_preenabled 0 #define x2apic_supported() 0 #endif diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 7f26c9a..f824d69 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -133,6 +133,10 @@ static inline void imcr_apic_to_pic(void) * +1=force-enable */ static int force_enable_local_apic __initdata; + +/* Control whether x2APIC mode is enabled or not */ +static bool nox2apic __initdata; + /* * APIC command line parameters */ @@ -162,8 +166,7 @@ int x2apic_mode; /* x2apic enabled before OS handover */ int x2apic_preenabled; static int x2apic_disabled; -static int nox2apic; -static __init int setup_nox2apic(char *str) +static int __init setup_nox2apic(char *str) { if (x2apic_enabled()) { int apicid = native_apic_msr_read(APIC_ID); @@ -178,7 +181,7 @@ static __init int setup_nox2apic(char *str) } else setup_clear_cpu_cap(X86_FEATURE_X2APIC); - nox2apic = 1; + nox2apic = true; return 0; } -- cgit v0.10.2 From d0ea026808ad81de2af14938448419a95211b938 Mon Sep 17 00:00:00 2001 From: Dongsheng Yang Date: Mon, 27 Jan 2014 22:00:45 -0500 Subject: sched: Implement task_nice() as static inline function As patch "sched: Move the priority specific bits into a new header file" exposes the priority related macros in linux/sched/prio.h, we don't have to implement task_nice() in kernel/sched/core.c any more. This patch implements it in linux/sched/sched.h as static inline function, saving the kernel stack and enhancing performance a bit. Signed-off-by: Dongsheng Yang Cc: clark.williams@gmail.com Cc: rostedt@goodmis.org Cc: raistlin@linux.it Cc: juri.lelli@gmail.com Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1390878045-7096-1-git-send-email-yangds.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar diff --git a/include/linux/sched.h b/include/linux/sched.h index d97d0a8..e3d5564 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2094,7 +2094,16 @@ static inline void sched_autogroup_exit(struct signal_struct *sig) { } extern bool yield_to(struct task_struct *p, bool preempt); extern void set_user_nice(struct task_struct *p, long nice); extern int task_prio(const struct task_struct *p); -extern int task_nice(const struct task_struct *p); +/** + * task_nice - return the nice value of a given task. + * @p: the task in question. + * + * Return: The nice value [ -20 ... 0 ... 19 ]. + */ +static inline int task_nice(const struct task_struct *p) +{ + return PRIO_TO_NICE((p)->static_prio); +} extern int can_nice(const struct task_struct *p, const int nice); extern int task_curr(const struct task_struct *p); extern int idle_cpu(int cpu); diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h index 13216f1..410ccb7 100644 --- a/include/linux/sched/prio.h +++ b/include/linux/sched/prio.h @@ -27,7 +27,6 @@ */ #define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20) #define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20) -#define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio) /* * 'User priority' is the nice value converted to something we diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 210a12a..104c816 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3000,7 +3000,7 @@ void set_user_nice(struct task_struct *p, long nice) unsigned long flags; struct rq *rq; - if (TASK_NICE(p) == nice || nice < -20 || nice > 19) + if (task_nice(p) == nice || nice < -20 || nice > 19) return; /* * We have to be careful, if called from sys_setpriority(), @@ -3078,7 +3078,7 @@ SYSCALL_DEFINE1(nice, int, increment) if (increment > 40) increment = 40; - nice = TASK_NICE(current) + increment; + nice = task_nice(current) + increment; if (nice < -20) nice = -20; if (nice > 19) @@ -3111,18 +3111,6 @@ int task_prio(const struct task_struct *p) } /** - * task_nice - return the nice value of a given task. - * @p: the task in question. - * - * Return: The nice value [ -20 ... 0 ... 19 ]. - */ -int task_nice(const struct task_struct *p) -{ - return TASK_NICE(p); -} -EXPORT_SYMBOL(task_nice); - -/** * idle_cpu - is a given cpu idle currently? * @cpu: the processor in question. * @@ -3321,7 +3309,7 @@ recheck: */ if (user && !capable(CAP_SYS_NICE)) { if (fair_policy(policy)) { - if (attr->sched_nice < TASK_NICE(p) && + if (attr->sched_nice < task_nice(p) && !can_nice(p, attr->sched_nice)) return -EPERM; } @@ -3345,7 +3333,7 @@ recheck: * SCHED_NORMAL if the RLIMIT_NICE would normally permit it. */ if (p->policy == SCHED_IDLE && policy != SCHED_IDLE) { - if (!can_nice(p, TASK_NICE(p))) + if (!can_nice(p, task_nice(p))) return -EPERM; } @@ -3385,7 +3373,7 @@ recheck: * If not changing anything there's no need to proceed further: */ if (unlikely(policy == p->policy)) { - if (fair_policy(policy) && attr->sched_nice != TASK_NICE(p)) + if (fair_policy(policy) && attr->sched_nice != task_nice(p)) goto change; if (rt_policy(policy) && attr->sched_priority != p->rt_priority) goto change; @@ -3837,7 +3825,7 @@ SYSCALL_DEFINE3(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, else if (task_has_rt_policy(p)) attr.sched_priority = p->rt_priority; else - attr.sched_nice = TASK_NICE(p); + attr.sched_nice = task_nice(p); rcu_read_unlock(); @@ -7010,7 +6998,7 @@ void normalize_rt_tasks(void) * Renice negative nice level userspace * tasks back to 0: */ - if (TASK_NICE(p) < 0 && p->mm) + if (task_nice(p) < 0 && p->mm) set_user_nice(p, 0); continue; } diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 9994791..58624a6 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -142,7 +142,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime, p->utimescaled += cputime_scaled; account_group_user_time(p, cputime); - index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; + index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; /* Add user time to cpustat. */ task_group_account_field(p, index, (__force u64) cputime); @@ -169,7 +169,7 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime, p->gtime += cputime; /* Add guest time to cpustat. */ - if (TASK_NICE(p) > 0) { + if (task_nice(p) > 0) { cpustat[CPUTIME_NICE] += (__force u64) cputime; cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime; } else { -- cgit v0.10.2 From ba6a328f7dfc95b20df5e0eb33c698187e997190 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 26 Jan 2014 13:01:42 -0800 Subject: x86/mm: Avoid duplicated pxm_to_node() calls In slit init code, too many pxm_to_node() function calls are done. We can store from_node/to_node instead of keep calling pxm_to_node(). - Before this patch: pxm_to_node() is called n*(1+n*3) times. - After this patch: pxm_to_node() is called n*(1+n) times. for 8 sockets, it will be 72 instead of 200. for 32 sockets, it will be 1056 instead of 3104. Signed-off-by: Yinghai Lu Cc: Toshi Kani Cc: David Rientjes Link: http://lkml.kernel.org/r/1390770102-4007-1-git-send-email-yinghai@kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 1953e9c..66338a6 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -52,12 +52,18 @@ void __init acpi_numa_slit_init(struct acpi_table_slit *slit) int i, j; for (i = 0; i < slit->locality_count; i++) { - if (pxm_to_node(i) == NUMA_NO_NODE) + const int from_node = pxm_to_node(i); + + if (from_node == NUMA_NO_NODE) continue; + for (j = 0; j < slit->locality_count; j++) { - if (pxm_to_node(j) == NUMA_NO_NODE) + const int to_node = pxm_to_node(j); + + if (to_node == NUMA_NO_NODE) continue; - numa_set_distance(pxm_to_node(i), pxm_to_node(j), + + numa_set_distance(from_node, to_node, slit->entry[slit->locality_count * i + j]); } } -- cgit v0.10.2 From b119fa61d440fe0f5af504299ad506637585a8ca Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Tue, 21 Jan 2014 15:36:16 -0800 Subject: locking/mcs: Order the header files in Kbuild of each architecture in alphabetical order We perform a clean up of the Kbuid files in each architecture. We order the files in each Kbuild in alphabetical order by running the below script. for i in arch/*/include/asm/Kbuild do cat $i | gawk '/^generic-y/ { i = 3; do { for (; i <= NF; i++) { if ($i == "\\") { getline; i = 1; continue; } if ($i != "") hdr[$i] = $i; } break; } while (1); next; } // { print $0; } END { n = asort(hdr); for (i = 1; i <= n; i++) print "generic-y += " hdr[i]; }' > ${i}.sorted; mv ${i}.sorted $i; done Signed-off-by: Tim Chen Cc: Arnd Bergmann Cc: Matthew R Wilcox Cc: AswinChandramouleeswaran Cc: Dave Hansen Cc: "Paul E.McKenney" Cc: Scott J Norton Cc: Will Deacon Cc: "Figo.zhang" Cc: Linus Torvalds Cc: Rik van Riel Cc: Waiman Long Cc: Peter Hurley Cc: Andrea Arcangeli Cc: Tim Chen Cc: Alex Shi Cc: Raghavendra K T Cc: Andi Kleen Cc: George Spelvin Cc: MichelLespinasse Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Davidlohr Bueso Cc: Andrew Morton Signed-off-by: Peter Zijlstra [ Fixed build bug. ] Signed-off-by: Ingo Molnar diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index a73a8e2..55a2b039 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -1,7 +1,7 @@ -generic-y += clkdev.h +generic-y += clkdev.h generic-y += exec.h -generic-y += trace_clock.h -generic-y += preempt.h generic-y += hash.h +generic-y += preempt.h +generic-y += trace_clock.h diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index 0d33629..54ceb91 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -1,15 +1,15 @@ generic-y += auxvec.h generic-y += barrier.h -generic-y += bugs.h generic-y += bitsperlong.h +generic-y += bugs.h generic-y += clkdev.h generic-y += cputime.h generic-y += device.h generic-y += div64.h generic-y += emergency-restart.h generic-y += errno.h -generic-y += fcntl.h generic-y += fb.h +generic-y += fcntl.h generic-y += ftrace.h generic-y += hardirq.h generic-y += hash.h @@ -30,6 +30,7 @@ generic-y += pci.h generic-y += percpu.h generic-y += poll.h generic-y += posix_types.h +generic-y += preempt.h generic-y += resource.h generic-y += scatterlist.h generic-y += sembuf.h @@ -48,4 +49,3 @@ generic-y += ucontext.h generic-y += user.h generic-y += vga.h generic-y += xor.h -generic-y += preempt.h diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 3278afe..3b20920 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -7,6 +7,7 @@ generic-y += current.h generic-y += emergency-restart.h generic-y += errno.h generic-y += exec.h +generic-y += hash.h generic-y += ioctl.h generic-y += ipcbuf.h generic-y += irq_regs.h @@ -17,6 +18,7 @@ generic-y += msgbuf.h generic-y += param.h generic-y += parport.h generic-y += poll.h +generic-y += preempt.h generic-y += resource.h generic-y += sections.h generic-y += segment.h @@ -33,5 +35,3 @@ generic-y += termios.h generic-y += timex.h generic-y += trace_clock.h generic-y += unaligned.h -generic-y += preempt.h -generic-y += hash.h diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 71c53ec..54038be 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -12,6 +12,7 @@ generic-y += dma.h generic-y += emergency-restart.h generic-y += errno.h generic-y += ftrace.h +generic-y += hash.h generic-y += hw_irq.h generic-y += ioctl.h generic-y += ioctls.h @@ -28,6 +29,7 @@ generic-y += mutex.h generic-y += pci.h generic-y += poll.h generic-y += posix_types.h +generic-y += preempt.h generic-y += resource.h generic-y += scatterlist.h generic-y += sections.h @@ -38,8 +40,8 @@ generic-y += shmbuf.h generic-y += sizes.h generic-y += socket.h generic-y += sockios.h -generic-y += switch_to.h generic-y += swab.h +generic-y += switch_to.h generic-y += termbits.h generic-y += termios.h generic-y += topology.h @@ -49,5 +51,3 @@ generic-y += unaligned.h generic-y += user.h generic-y += vga.h generic-y += xor.h -generic-y += preempt.h -generic-y += hash.h diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild index cfb9fe1..064ca3c 100644 --- a/arch/avr32/include/asm/Kbuild +++ b/arch/avr32/include/asm/Kbuild @@ -1,21 +1,21 @@ -generic-y += clkdev.h -generic-y += cputime.h -generic-y += delay.h -generic-y += device.h -generic-y += div64.h -generic-y += emergency-restart.h -generic-y += exec.h -generic-y += futex.h -generic-y += preempt.h -generic-y += irq_regs.h -generic-y += param.h -generic-y += local.h -generic-y += local64.h -generic-y += percpu.h -generic-y += scatterlist.h -generic-y += sections.h -generic-y += topology.h -generic-y += trace_clock.h -generic-y += xor.h -generic-y += hash.h +generic-y += clkdev.h +generic-y += cputime.h +generic-y += delay.h +generic-y += device.h +generic-y += div64.h +generic-y += emergency-restart.h +generic-y += exec.h +generic-y += futex.h +generic-y += hash.h +generic-y += irq_regs.h +generic-y += local.h +generic-y += local64.h +generic-y += param.h +generic-y += percpu.h +generic-y += preempt.h +generic-y += scatterlist.h +generic-y += sections.h +generic-y += topology.h +generic-y += trace_clock.h +generic-y += xor.h diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index 359d36f..ca30a3d 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -10,6 +10,7 @@ generic-y += emergency-restart.h generic-y += errno.h generic-y += fb.h generic-y += futex.h +generic-y += hash.h generic-y += hw_irq.h generic-y += ioctl.h generic-y += ipcbuf.h @@ -17,14 +18,15 @@ generic-y += irq_regs.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kvm_para.h -generic-y += local64.h generic-y += local.h +generic-y += local64.h generic-y += mman.h generic-y += msgbuf.h generic-y += mutex.h generic-y += param.h generic-y += percpu.h generic-y += pgalloc.h +generic-y += preempt.h generic-y += resource.h generic-y += scatterlist.h generic-y += sembuf.h @@ -44,5 +46,3 @@ generic-y += ucontext.h generic-y += unaligned.h generic-y += user.h generic-y += xor.h -generic-y += preempt.h -generic-y += hash.h diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index d73bb85..dea0671 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -15,6 +15,7 @@ generic-y += exec.h generic-y += fb.h generic-y += fcntl.h generic-y += futex.h +generic-y += hash.h generic-y += hw_irq.h generic-y += io.h generic-y += ioctl.h @@ -34,6 +35,7 @@ generic-y += percpu.h generic-y += pgalloc.h generic-y += poll.h generic-y += posix_types.h +generic-y += preempt.h generic-y += resource.h generic-y += scatterlist.h generic-y += segment.h @@ -56,5 +58,3 @@ generic-y += ucontext.h generic-y += user.h generic-y += vga.h generic-y += xor.h -generic-y += preempt.h -generic-y += hash.h diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index f3fd876..7f8d1b1 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -10,7 +10,7 @@ generic-y += hash.h generic-y += kvm_para.h generic-y += linkage.h generic-y += module.h +generic-y += preempt.h generic-y += trace_clock.h generic-y += vga.h generic-y += xor.h -generic-y += preempt.h diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild index bc42f14..93428d0 100644 --- a/arch/frv/include/asm/Kbuild +++ b/arch/frv/include/asm/Kbuild @@ -1,6 +1,6 @@ generic-y += clkdev.h generic-y += exec.h -generic-y += trace_clock.h -generic-y += preempt.h generic-y += hash.h +generic-y += preempt.h +generic-y += trace_clock.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 38ca45d..9f0eda4 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -25,14 +25,15 @@ generic-y += ipcbuf.h generic-y += irq_regs.h generic-y += kdebug.h generic-y += kmap_types.h -generic-y += local64.h generic-y += local.h +generic-y += local64.h generic-y += mman.h generic-y += msgbuf.h generic-y += pci.h generic-y += percpu.h generic-y += poll.h generic-y += posix_types.h +generic-y += preempt.h generic-y += resource.h generic-y += rwsem.h generic-y += scatterlist.h @@ -45,8 +46,8 @@ generic-y += siginfo.h generic-y += sizes.h generic-y += socket.h generic-y += sockios.h -generic-y += statfs.h generic-y += stat.h +generic-y += statfs.h generic-y += termbits.h generic-y += termios.h generic-y += topology.h @@ -55,4 +56,3 @@ generic-y += types.h generic-y += ucontext.h generic-y += unaligned.h generic-y += xor.h -generic-y += preempt.h diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index 283a831..81ebef7 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -1,8 +1,8 @@ generic-y += clkdev.h generic-y += exec.h +generic-y += hash.h generic-y += kvm_para.h -generic-y += trace_clock.h generic-y += preempt.h +generic-y += trace_clock.h generic-y += vtime.h -generic-y += hash.h diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index 932435a..8704221 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -1,7 +1,7 @@ generic-y += clkdev.h generic-y += exec.h +generic-y += hash.h generic-y += module.h -generic-y += trace_clock.h generic-y += preempt.h -generic-y += hash.h +generic-y += trace_clock.h diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 7cc8c36..f44ac8f 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -6,6 +6,7 @@ generic-y += device.h generic-y += emergency-restart.h generic-y += errno.h generic-y += exec.h +generic-y += hash.h generic-y += hw_irq.h generic-y += ioctl.h generic-y += ipcbuf.h @@ -13,11 +14,12 @@ generic-y += irq_regs.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kvm_para.h -generic-y += local64.h generic-y += local.h +generic-y += local64.h generic-y += mman.h generic-y += mutex.h generic-y += percpu.h +generic-y += preempt.h generic-y += resource.h generic-y += scatterlist.h generic-y += sections.h @@ -31,5 +33,3 @@ generic-y += trace_clock.h generic-y += types.h generic-y += word-at-a-time.h generic-y += xor.h -generic-y += preempt.h -generic-y += hash.h diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild index b716d80..ac22144 100644 --- a/arch/metag/include/asm/Kbuild +++ b/arch/metag/include/asm/Kbuild @@ -13,6 +13,7 @@ generic-y += fb.h generic-y += fcntl.h generic-y += futex.h generic-y += hardirq.h +generic-y += hash.h generic-y += hw_irq.h generic-y += ioctl.h generic-y += ioctls.h @@ -30,6 +31,7 @@ generic-y += pci.h generic-y += percpu.h generic-y += poll.h generic-y += posix_types.h +generic-y += preempt.h generic-y += scatterlist.h generic-y += sections.h generic-y += sembuf.h @@ -52,5 +54,3 @@ generic-y += unaligned.h generic-y += user.h generic-y += vga.h generic-y += xor.h -generic-y += preempt.h -generic-y += hash.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 2b98bc7..4b5f122 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -3,6 +3,6 @@ generic-y += barrier.h generic-y += clkdev.h generic-y += exec.h generic-y += hash.h -generic-y += trace_clock.h -generic-y += syscalls.h generic-y += preempt.h +generic-y += syscalls.h +generic-y += trace_clock.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 2d7f650..82ee17a 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -2,16 +2,16 @@ generic-y += cputime.h generic-y += current.h generic-y += emergency-restart.h +generic-y += hash.h generic-y += local64.h generic-y += mutex.h generic-y += parport.h generic-y += percpu.h +generic-y += preempt.h generic-y += scatterlist.h generic-y += sections.h generic-y += segment.h generic-y += serial.h generic-y += trace_clock.h -generic-y += preempt.h generic-y += ucontext.h generic-y += xor.h -generic-y += hash.h diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild index 992e989..da3b2f5 100644 --- a/arch/mn10300/include/asm/Kbuild +++ b/arch/mn10300/include/asm/Kbuild @@ -3,5 +3,5 @@ generic-y += barrier.h generic-y += clkdev.h generic-y += exec.h generic-y += hash.h -generic-y += trace_clock.h generic-y += preempt.h +generic-y += trace_clock.h diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index 2e40f1c..f62a67a 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -10,8 +10,8 @@ generic-y += bugs.h generic-y += cacheflush.h generic-y += checksum.h generic-y += clkdev.h -generic-y += cmpxchg.h generic-y += cmpxchg-local.h +generic-y += cmpxchg.h generic-y += cputime.h generic-y += current.h generic-y += device.h @@ -25,6 +25,7 @@ generic-y += fcntl.h generic-y += ftrace.h generic-y += futex.h generic-y += hardirq.h +generic-y += hash.h generic-y += hw_irq.h generic-y += ioctl.h generic-y += ioctls.h @@ -41,6 +42,7 @@ generic-y += pci.h generic-y += percpu.h generic-y += poll.h generic-y += posix_types.h +generic-y += preempt.h generic-y += resource.h generic-y += scatterlist.h generic-y += sections.h @@ -53,11 +55,11 @@ generic-y += siginfo.h generic-y += signal.h generic-y += socket.h generic-y += sockios.h -generic-y += statfs.h generic-y += stat.h +generic-y += statfs.h generic-y += string.h -generic-y += switch_to.h generic-y += swab.h +generic-y += switch_to.h generic-y += termbits.h generic-y += termios.h generic-y += topology.h @@ -68,5 +70,3 @@ generic-y += user.h generic-y += vga.h generic-y += word-at-a-time.h generic-y += xor.h -generic-y += preempt.h -generic-y += hash.h diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index 752c981..958b79a 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -1,9 +1,28 @@ +generic-y += auxvec.h generic-y += barrier.h -generic-y += word-at-a-time.h auxvec.h user.h cputime.h emergency-restart.h \ - segment.h topology.h vga.h device.h percpu.h hw_irq.h mutex.h \ - div64.h irq_regs.h kdebug.h kvm_para.h local64.h local.h param.h \ - poll.h xor.h clkdev.h exec.h -generic-y += trace_clock.h -generic-y += preempt.h +generic-y += clkdev.h +generic-y += cputime.h +generic-y += device.h +generic-y += div64.h +generic-y += emergency-restart.h +generic-y += exec.h generic-y += hash.h +generic-y += hw_irq.h +generic-y += irq_regs.h +generic-y += kdebug.h +generic-y += kvm_para.h +generic-y += local.h +generic-y += local64.h +generic-y += mutex.h +generic-y += param.h +generic-y += percpu.h +generic-y += poll.h +generic-y += preempt.h +generic-y += segment.h +generic-y += topology.h +generic-y += trace_clock.h +generic-y += user.h +generic-y += vga.h +generic-y += word-at-a-time.h +generic-y += xor.h diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 6c0a955..b618c41 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -1,7 +1,7 @@ generic-y += clkdev.h +generic-y += hash.h +generic-y += preempt.h generic-y += rwsem.h generic-y += trace_clock.h -generic-y += preempt.h generic-y += vtime.h -generic-y += hash.h diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 8386a4a..482f492 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -1,6 +1,6 @@ generic-y += clkdev.h -generic-y += trace_clock.h -generic-y += preempt.h generic-y += hash.h +generic-y += preempt.h +generic-y += trace_clock.h diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index 146b9d5..e92319f 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -1,10 +1,10 @@ header-y += + generic-y += barrier.h generic-y += clkdev.h generic-y += hash.h +generic-y += preempt.h generic-y += trace_clock.h generic-y += xor.h -generic-y += preempt.h - diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index 0cd7198..85c214e 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -8,18 +8,20 @@ generic-y += emergency-restart.h generic-y += errno.h generic-y += exec.h generic-y += fcntl.h +generic-y += hash.h generic-y += ioctl.h generic-y += ipcbuf.h generic-y += irq_regs.h generic-y += kvm_para.h generic-y += local.h generic-y += local64.h +generic-y += mman.h +generic-y += msgbuf.h generic-y += param.h generic-y += parport.h generic-y += percpu.h generic-y += poll.h -generic-y += mman.h -generic-y += msgbuf.h +generic-y += preempt.h generic-y += resource.h generic-y += scatterlist.h generic-y += sembuf.h @@ -34,5 +36,3 @@ generic-y += termios.h generic-y += trace_clock.h generic-y += ucontext.h generic-y += xor.h -generic-y += preempt.h -generic-y += hash.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 4b60a0c..8215028 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -6,15 +6,15 @@ generic-y += cputime.h generic-y += div64.h generic-y += emergency-restart.h generic-y += exec.h -generic-y += linkage.h -generic-y += local64.h -generic-y += mutex.h +generic-y += hash.h generic-y += irq_regs.h +generic-y += linkage.h generic-y += local.h +generic-y += local64.h generic-y += module.h +generic-y += mutex.h +generic-y += preempt.h generic-y += serial.h generic-y += trace_clock.h generic-y += types.h generic-y += word-at-a-time.h -generic-y += preempt.h -generic-y += hash.h diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index 3793c75..e0b1ed9 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -11,6 +11,7 @@ generic-y += errno.h generic-y += exec.h generic-y += fb.h generic-y += fcntl.h +generic-y += hash.h generic-y += hw_irq.h generic-y += ioctl.h generic-y += ioctls.h @@ -24,6 +25,7 @@ generic-y += param.h generic-y += parport.h generic-y += poll.h generic-y += posix_types.h +generic-y += preempt.h generic-y += resource.h generic-y += scatterlist.h generic-y += sembuf.h @@ -38,5 +40,3 @@ generic-y += termios.h generic-y += trace_clock.h generic-y += types.h generic-y += xor.h -generic-y += preempt.h -generic-y += hash.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 88a330d..215e05d 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -1,8 +1,27 @@ -generic-y += bug.h cputime.h device.h emergency-restart.h futex.h hardirq.h -generic-y += hw_irq.h irq_regs.h kdebug.h percpu.h sections.h topology.h xor.h -generic-y += ftrace.h pci.h io.h param.h delay.h mutex.h current.h exec.h -generic-y += switch_to.h clkdev.h -generic-y += trace_clock.h -generic-y += preempt.h -generic-y += hash.h generic-y += barrier.h +generic-y += bug.h +generic-y += clkdev.h +generic-y += cputime.h +generic-y += current.h +generic-y += delay.h +generic-y += device.h +generic-y += emergency-restart.h +generic-y += exec.h +generic-y += ftrace.h +generic-y += futex.h +generic-y += hardirq.h +generic-y += hash.h +generic-y += hw_irq.h +generic-y += io.h +generic-y += irq_regs.h +generic-y += kdebug.h +generic-y += mutex.h +generic-y += param.h +generic-y += pci.h +generic-y += percpu.h +generic-y += preempt.h +generic-y += sections.h +generic-y += switch_to.h +generic-y += topology.h +generic-y += trace_clock.h +generic-y += xor.h diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index 3ef4f9d..ad2c737 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -16,6 +16,7 @@ generic-y += fcntl.h generic-y += ftrace.h generic-y += futex.h generic-y += hardirq.h +generic-y += hash.h generic-y += hw_irq.h generic-y += ioctl.h generic-y += ioctls.h @@ -32,6 +33,7 @@ generic-y += parport.h generic-y += percpu.h generic-y += poll.h generic-y += posix_types.h +generic-y += preempt.h generic-y += resource.h generic-y += scatterlist.h generic-y += sections.h @@ -60,5 +62,3 @@ generic-y += unaligned.h generic-y += user.h generic-y += vga.h generic-y += xor.h -generic-y += preempt.h -generic-y += hash.h diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 0a337e4..223280d 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -9,6 +9,7 @@ generic-y += errno.h generic-y += exec.h generic-y += fcntl.h generic-y += hardirq.h +generic-y += hash.h generic-y += ioctl.h generic-y += irq_regs.h generic-y += kdebug.h @@ -18,6 +19,7 @@ generic-y += linkage.h generic-y += local.h generic-y += local64.h generic-y += percpu.h +generic-y += preempt.h generic-y += resource.h generic-y += scatterlist.h generic-y += sections.h @@ -27,5 +29,3 @@ generic-y += termios.h generic-y += topology.h generic-y += trace_clock.h generic-y += xor.h -generic-y += preempt.h -generic-y += hash.h -- cgit v0.10.2 From ddf1d169c0a489d498c1799a7043904a43b0c159 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Tue, 21 Jan 2014 15:36:22 -0800 Subject: locking/mcs: Allow architecture specific asm files to be used for contended case This patch allows each architecture to add its specific assembly optimized arch_mcs_spin_lock_contended and arch_mcs_spinlock_uncontended for MCS lock and unlock functions. Signed-off-by: Tim Chen Cc: Scott J Norton Cc: Raghavendra K T Cc: AswinChandramouleeswaran Cc: George Spelvin Cc: Rik vanRiel Cc: Andrea Arcangeli Cc: MichelLespinasse Cc: Peter Hurley Cc: Andi Kleen Cc: Alex Shi Cc: Dave Hansen Cc: Tim Chen Cc: Arnd Bergmann Cc: "Figo.zhang" Cc: "Paul E.McKenney" Cc: "H. Peter Anvin" Cc: Davidlohr Bueso Cc: Waiman Long Cc: Ingo Molnar Cc: Will Deacon Cc: Andrew Morton Cc: Linus Torvalds Cc: Matthew R Wilcox Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1390347382.3138.67.camel@schen9-DESK Signed-off-by: Ingo Molnar diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index 55a2b039..7736f42 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -3,5 +3,6 @@ generic-y += clkdev.h generic-y += exec.h generic-y += hash.h +generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += trace_clock.h diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index 54ceb91..e76fd79 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -22,6 +22,7 @@ generic-y += kmap_types.h generic-y += kvm_para.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += mman.h generic-y += msgbuf.h generic-y += param.h diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 3b20920..23e728e 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -14,6 +14,7 @@ generic-y += irq_regs.h generic-y += kdebug.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += msgbuf.h generic-y += param.h generic-y += parport.h diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 54038be..3bdfdda 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -23,6 +23,7 @@ generic-y += kmap_types.h generic-y += kvm_para.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += mman.h generic-y += msgbuf.h generic-y += mutex.h diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild index 064ca3c..8b398ff 100644 --- a/arch/avr32/include/asm/Kbuild +++ b/arch/avr32/include/asm/Kbuild @@ -11,6 +11,7 @@ generic-y += hash.h generic-y += irq_regs.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += param.h generic-y += percpu.h generic-y += preempt.h diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index ca30a3d..0d93b9a 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -20,6 +20,7 @@ generic-y += kmap_types.h generic-y += kvm_para.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += mman.h generic-y += msgbuf.h generic-y += mutex.h diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index dea0671..8dbdce8 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -25,6 +25,7 @@ generic-y += irq_regs.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += local.h +generic-y += mcs_spinlock.h generic-y += mman.h generic-y += mmu.h generic-y += mmu_context.h diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 7f8d1b1..056027f 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -9,6 +9,7 @@ generic-y += exec.h generic-y += hash.h generic-y += kvm_para.h generic-y += linkage.h +generic-y += mcs_spinlock.h generic-y += module.h generic-y += preempt.h generic-y += trace_clock.h diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild index 93428d0..babb933 100644 --- a/arch/frv/include/asm/Kbuild +++ b/arch/frv/include/asm/Kbuild @@ -2,5 +2,6 @@ generic-y += clkdev.h generic-y += exec.h generic-y += hash.h +generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += trace_clock.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 9f0eda4..eadcc11 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -27,6 +27,7 @@ generic-y += kdebug.h generic-y += kmap_types.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += mman.h generic-y += msgbuf.h generic-y += pci.h diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index 81ebef7..0da4aa2 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -3,6 +3,7 @@ generic-y += clkdev.h generic-y += exec.h generic-y += hash.h generic-y += kvm_para.h +generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += trace_clock.h generic-y += vtime.h diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index 8704221..5825a35 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -2,6 +2,7 @@ generic-y += clkdev.h generic-y += exec.h generic-y += hash.h +generic-y += mcs_spinlock.h generic-y += module.h generic-y += preempt.h generic-y += trace_clock.h diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index f44ac8f..86b4c17 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -16,6 +16,7 @@ generic-y += kmap_types.h generic-y += kvm_para.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += mman.h generic-y += mutex.h generic-y += percpu.h diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild index ac22144..c29ead8 100644 --- a/arch/metag/include/asm/Kbuild +++ b/arch/metag/include/asm/Kbuild @@ -24,6 +24,7 @@ generic-y += kmap_types.h generic-y += kvm_para.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += msgbuf.h generic-y += mutex.h generic-y += param.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 4b5f122..1f590ab 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -3,6 +3,7 @@ generic-y += barrier.h generic-y += clkdev.h generic-y += exec.h generic-y += hash.h +generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += syscalls.h generic-y += trace_clock.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 82ee17a..0543918 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -4,6 +4,7 @@ generic-y += current.h generic-y += emergency-restart.h generic-y += hash.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += mutex.h generic-y += parport.h generic-y += percpu.h diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild index da3b2f5..cbc6b9b 100644 --- a/arch/mn10300/include/asm/Kbuild +++ b/arch/mn10300/include/asm/Kbuild @@ -3,5 +3,6 @@ generic-y += barrier.h generic-y += clkdev.h generic-y += exec.h generic-y += hash.h +generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += trace_clock.h diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index f62a67a..480af0d 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -35,6 +35,7 @@ generic-y += kdebug.h generic-y += kmap_types.h generic-y += kvm_para.h generic-y += local.h +generic-y += mcs_spinlock.h generic-y += mman.h generic-y += module.h generic-y += msgbuf.h diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index 958b79a..ecf25e6 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -14,6 +14,7 @@ generic-y += kdebug.h generic-y += kvm_para.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += mutex.h generic-y += param.h generic-y += percpu.h diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index b618c41..3fb1bc4 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -1,6 +1,7 @@ generic-y += clkdev.h generic-y += hash.h +generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += rwsem.h generic-y += trace_clock.h diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 482f492..57892a8 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -2,5 +2,6 @@ generic-y += clkdev.h generic-y += hash.h +generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += trace_clock.h diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index e92319f..4630cf2 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -5,6 +5,7 @@ header-y += generic-y += barrier.h generic-y += clkdev.h generic-y += hash.h +generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += trace_clock.h generic-y += xor.h diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index 85c214e..c19e47d 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -15,6 +15,7 @@ generic-y += irq_regs.h generic-y += kvm_para.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += mman.h generic-y += msgbuf.h generic-y += param.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 8215028..a458218 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -11,6 +11,7 @@ generic-y += irq_regs.h generic-y += linkage.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += module.h generic-y += mutex.h generic-y += preempt.h diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index e0b1ed9..0aa5675 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -19,6 +19,7 @@ generic-y += ipcbuf.h generic-y += irq_regs.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += msgbuf.h generic-y += mutex.h generic-y += param.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 215e05d..a5e4b60 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -15,6 +15,7 @@ generic-y += hw_irq.h generic-y += io.h generic-y += irq_regs.h generic-y += kdebug.h +generic-y += mcs_spinlock.h generic-y += mutex.h generic-y += param.h generic-y += pci.h diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index ad2c737..1e5fb87 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -25,6 +25,7 @@ generic-y += irq_regs.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += local.h +generic-y += mcs_spinlock.h generic-y += mman.h generic-y += module.h generic-y += msgbuf.h diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 7f66985..a8fee07 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -5,3 +5,4 @@ genhdr-y += unistd_64.h genhdr-y += unistd_x32.h generic-y += clkdev.h +generic-y += mcs_spinlock.h diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 223280d..c3d20ba 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -18,6 +18,7 @@ generic-y += kvm_para.h generic-y += linkage.h generic-y += local.h generic-y += local64.h +generic-y += mcs_spinlock.h generic-y += percpu.h generic-y += preempt.h generic-y += resource.h diff --git a/include/asm-generic/mcs_spinlock.h b/include/asm-generic/mcs_spinlock.h new file mode 100644 index 0000000..10cd4ff --- /dev/null +++ b/include/asm-generic/mcs_spinlock.h @@ -0,0 +1,13 @@ +#ifndef __ASM_MCS_SPINLOCK_H +#define __ASM_MCS_SPINLOCK_H + +/* + * Architectures can define their own: + * + * arch_mcs_spin_lock_contended(l) + * arch_mcs_spin_unlock_contended(l) + * + * See kernel/locking/mcs_spinlock.c. + */ + +#endif /* __ASM_MCS_SPINLOCK_H */ diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h index e9a4d74..f2a5c63 100644 --- a/include/linux/mcs_spinlock.h +++ b/include/linux/mcs_spinlock.h @@ -12,6 +12,8 @@ #ifndef __LINUX_MCS_SPINLOCK_H #define __LINUX_MCS_SPINLOCK_H +#include + struct mcs_spinlock { struct mcs_spinlock *next; int locked; /* 1 if lock acquired */ -- cgit v0.10.2 From fb9edbe98493fcd9df66de926ae9157cbe0e4dcd Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 20 Jan 2014 19:20:06 +0100 Subject: lockdep: Make held_lock->check and "int check" argument bool The "int check" argument of lock_acquire() and held_lock->check are misleading. This is actually a boolean: 2 means "true", everything else is "false". And there is no need to pass 1 or 0 to lock_acquire() depending on CONFIG_PROVE_LOCKING, __lock_acquire() checks prove_locking at the start and clears "check" if !CONFIG_PROVE_LOCKING. Note: probably we can simply kill this member/arg. The only explicit user of check => 0 is rcu_lock_acquire(), perhaps we can change it to use lock_acquire(trylock =>, read => 2). __lockdep_no_validate means check => 0 implicitly, but we can change validate_chain() to check hlock->instance->key instead. Not to mention it would be nice to get rid of lockdep_set_novalidate_class(). Signed-off-by: Oleg Nesterov Cc: Dave Jones Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Paul McKenney Cc: Steven Rostedt Cc: Alan Stern Cc: Sasha Levin Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140120182006.GA26495@redhat.com Signed-off-by: Ingo Molnar diff --git a/drivers/tty/tty_ldsem.c b/drivers/tty/tty_ldsem.c index d8a55e8..0ffb0cb 100644 --- a/drivers/tty/tty_ldsem.c +++ b/drivers/tty/tty_ldsem.c @@ -39,17 +39,10 @@ lock_acquire(&(l)->dep_map, s, t, r, c, n, i) # define __rel(l, n, i) \ lock_release(&(l)->dep_map, n, i) -# ifdef CONFIG_PROVE_LOCKING -# define lockdep_acquire(l, s, t, i) __acq(l, s, t, 0, 2, NULL, i) -# define lockdep_acquire_nest(l, s, t, n, i) __acq(l, s, t, 0, 2, n, i) -# define lockdep_acquire_read(l, s, t, i) __acq(l, s, t, 1, 2, NULL, i) -# define lockdep_release(l, n, i) __rel(l, n, i) -# else -# define lockdep_acquire(l, s, t, i) __acq(l, s, t, 0, 1, NULL, i) -# define lockdep_acquire_nest(l, s, t, n, i) __acq(l, s, t, 0, 1, n, i) -# define lockdep_acquire_read(l, s, t, i) __acq(l, s, t, 1, 1, NULL, i) -# define lockdep_release(l, n, i) __rel(l, n, i) -# endif +#define lockdep_acquire(l, s, t, i) __acq(l, s, t, 0, 1, NULL, i) +#define lockdep_acquire_nest(l, s, t, n, i) __acq(l, s, t, 0, 1, n, i) +#define lockdep_acquire_read(l, s, t, i) __acq(l, s, t, 1, 1, NULL, i) +#define lockdep_release(l, n, i) __rel(l, n, i) #else # define lockdep_acquire(l, s, t, i) do { } while (0) # define lockdep_acquire_nest(l, s, t, n, i) do { } while (0) diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 92b1bfc..1626047 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -252,9 +252,9 @@ struct held_lock { unsigned int trylock:1; /* 16 bits */ unsigned int read:2; /* see lock_acquire() comment */ - unsigned int check:2; /* see lock_acquire() comment */ + unsigned int check:1; /* see lock_acquire() comment */ unsigned int hardirqs_off:1; - unsigned int references:11; /* 32 bits */ + unsigned int references:12; /* 32 bits */ }; /* @@ -326,9 +326,8 @@ static inline int lockdep_match_key(struct lockdep_map *lock, * * Values for check: * - * 0: disabled - * 1: simple checks (freeing, held-at-exit-time, etc.) - * 2: full validation + * 0: simple checks (freeing, held-at-exit-time, etc.) + * 1: full validation */ extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass, int trylock, int read, int check, @@ -479,15 +478,9 @@ static inline void print_irqtrace_events(struct task_struct *curr) * on the per lock-class debug mode: */ -#ifdef CONFIG_PROVE_LOCKING - #define lock_acquire_exclusive(l, s, t, n, i) lock_acquire(l, s, t, 0, 2, n, i) - #define lock_acquire_shared(l, s, t, n, i) lock_acquire(l, s, t, 1, 2, n, i) - #define lock_acquire_shared_recursive(l, s, t, n, i) lock_acquire(l, s, t, 2, 2, n, i) -#else - #define lock_acquire_exclusive(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, n, i) - #define lock_acquire_shared(l, s, t, n, i) lock_acquire(l, s, t, 1, 1, n, i) - #define lock_acquire_shared_recursive(l, s, t, n, i) lock_acquire(l, s, t, 2, 1, n, i) -#endif +#define lock_acquire_exclusive(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, n, i) +#define lock_acquire_shared(l, s, t, n, i) lock_acquire(l, s, t, 1, 1, n, i) +#define lock_acquire_shared_recursive(l, s, t, n, i) lock_acquire(l, s, t, 2, 1, n, i) #define spin_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i) #define spin_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i) @@ -518,13 +511,13 @@ static inline void print_irqtrace_events(struct task_struct *curr) # define might_lock(lock) \ do { \ typecheck(struct lockdep_map *, &(lock)->dep_map); \ - lock_acquire(&(lock)->dep_map, 0, 0, 0, 2, NULL, _THIS_IP_); \ + lock_acquire(&(lock)->dep_map, 0, 0, 0, 1, NULL, _THIS_IP_); \ lock_release(&(lock)->dep_map, 0, _THIS_IP_); \ } while (0) # define might_lock_read(lock) \ do { \ typecheck(struct lockdep_map *, &(lock)->dep_map); \ - lock_acquire(&(lock)->dep_map, 0, 0, 1, 2, NULL, _THIS_IP_); \ + lock_acquire(&(lock)->dep_map, 0, 0, 1, 1, NULL, _THIS_IP_); \ lock_release(&(lock)->dep_map, 0, _THIS_IP_); \ } while (0) #else diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 72bf3a0..adff3c9 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -314,7 +314,7 @@ static inline bool rcu_lockdep_current_cpu_online(void) static inline void rcu_lock_acquire(struct lockdep_map *map) { - lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_); + lock_acquire(map, 0, 0, 2, 0, NULL, _THIS_IP_); } static inline void rcu_lock_release(struct lockdep_map *map) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index eb8a547..8c85a0d 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2098,7 +2098,7 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, * (If lookup_chain_cache() returns with 1 it acquires * graph_lock for us) */ - if (!hlock->trylock && (hlock->check == 2) && + if (!hlock->trylock && hlock->check && lookup_chain_cache(curr, hlock, chain_key)) { /* * Check whether last held lock: @@ -3055,9 +3055,6 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, int class_idx; u64 chain_key; - if (!prove_locking) - check = 1; - if (unlikely(!debug_locks)) return 0; @@ -3069,8 +3066,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) return 0; - if (lock->key == &__lockdep_no_validate__) - check = 1; + if (!prove_locking || lock->key == &__lockdep_no_validate__) + check = 0; if (subclass < NR_LOCKDEP_CACHING_CLASSES) class = lock->class_cache[subclass]; @@ -3138,7 +3135,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, hlock->holdtime_stamp = lockstat_clock(); #endif - if (check == 2 && !mark_irqflags(curr, hlock)) + if (check && !mark_irqflags(curr, hlock)) return 0; /* mark it as used: */ -- cgit v0.10.2 From 1b5ff816cab708ba44c7d7b56b613516269eb577 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 20 Jan 2014 19:20:10 +0100 Subject: lockdep: Don't create the wrong dependency on hlock->check == 0 Test-case: DEFINE_MUTEX(m1); DEFINE_MUTEX(m2); DEFINE_MUTEX(mx); void lockdep_should_complain(void) { lockdep_set_novalidate_class(&mx); // m1 -> mx -> m2 mutex_lock(&m1); mutex_lock(&mx); mutex_lock(&m2); mutex_unlock(&m2); mutex_unlock(&mx); mutex_unlock(&m1); // m2 -> m1 ; should trigger the warning mutex_lock(&m2); mutex_lock(&m1); mutex_unlock(&m1); mutex_unlock(&m2); } this doesn't trigger any warning, lockdep can't detect the trivial deadlock. This is because lock(&mx) correctly avoids m1 -> mx dependency, it skips validate_chain() due to mx->check == 0. But lock(&m2) wrongly adds mx -> m2 and thus m1 -> m2 is not created. rcu_lock_acquire()->lock_acquire(check => 0) is fine due to read == 2, so currently only __lockdep_no_validate__ can trigger this problem. Signed-off-by: Oleg Nesterov Cc: Dave Jones Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Paul McKenney Cc: Steven Rostedt Cc: Alan Stern Cc: Sasha Levin Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140120182010.GA26498@redhat.com Signed-off-by: Ingo Molnar diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 8c85a0d..f7eba92 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1936,12 +1936,12 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) for (;;) { int distance = curr->lockdep_depth - depth + 1; - hlock = curr->held_locks + depth-1; + hlock = curr->held_locks + depth - 1; /* * Only non-recursive-read entries get new dependencies * added: */ - if (hlock->read != 2) { + if (hlock->read != 2 && hlock->check) { if (!check_prev_add(curr, hlock, next, distance, trylock_loop)) return 0; -- cgit v0.10.2 From 34d0ed5ea7a72d5961552fb1758a94f0d3f8f3dc Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 20 Jan 2014 19:20:13 +0100 Subject: lockdep: Change mark_held_locks() to check hlock->check instead of lockdep_no_validate The __lockdep_no_validate check in mark_held_locks() adds the subtle and (afaics) unnecessary difference between no-validate and check==0. And this looks even more inconsistent because __lock_acquire() skips mark_irqflags()->mark_lock() if !check. Change mark_held_locks() to check hlock->check instead. Signed-off-by: Oleg Nesterov Cc: Dave Jones Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Paul McKenney Cc: Steven Rostedt Cc: Alan Stern Cc: Sasha Levin Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140120182013.GA26505@redhat.com Signed-off-by: Ingo Molnar diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index f7eba92..bf0c6b0 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2517,7 +2517,7 @@ mark_held_locks(struct task_struct *curr, enum mark_type mark) BUG_ON(usage_bit >= LOCK_USAGE_STATES); - if (hlock_class(hlock)->key == __lockdep_no_validate__.subkeys) + if (!hlock->check) continue; if (!mark_lock(curr, hlock, usage_bit)) -- cgit v0.10.2 From 47be1c1a0e188232b5e5962917b21750053cd3f8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 20 Jan 2014 19:20:16 +0100 Subject: lockdep: Change lockdep_set_novalidate_class() to use _and_name Cosmetic. This doesn't really matter because a) device->mutex is the only user of __lockdep_no_validate__ and b) this class should be never reported as the source of problem, but if something goes wrong "&dev->mutex" looks better than "&__lockdep_no_validate__" as the name of the lock. Signed-off-by: Oleg Nesterov Cc: Dave Jones Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Paul McKenney Cc: Steven Rostedt Cc: Alan Stern Cc: Sasha Levin Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140120182016.GA26512@redhat.com Signed-off-by: Ingo Molnar diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 1626047..060e513 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -303,7 +303,7 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name, (lock)->dep_map.key, sub) #define lockdep_set_novalidate_class(lock) \ - lockdep_set_class(lock, &__lockdep_no_validate__) + lockdep_set_class_and_name(lock, &__lockdep_no_validate__, #lock) /* * Compare locking classes */ -- cgit v0.10.2 From b4f2ab43615e5b36c48fffa99f26aca381839ac6 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 17 Jan 2014 10:04:01 +0100 Subject: sched: Remove 'cpu' parameter from idle_balance() The cpu parameter passed to idle_balance() is not needed as it could be retrieved from 'struct rq.' Signed-off-by: Daniel Lezcano Cc: alex.shi@linaro.org Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1389949444-14821-1-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 104c816..74dd565 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2705,7 +2705,7 @@ need_resched: pre_schedule(rq, prev); if (unlikely(!rq->nr_running)) - idle_balance(cpu, rq); + idle_balance(rq); put_prev_task(rq, prev); next = pick_next_task(rq); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4caa803..428bc9d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6531,12 +6531,13 @@ out: * idle_balance is called by schedule() if this_cpu is about to become * idle. Attempts to pull tasks from other CPUs. */ -void idle_balance(int this_cpu, struct rq *this_rq) +void idle_balance(struct rq *this_rq) { struct sched_domain *sd; int pulled_task = 0; unsigned long next_balance = jiffies + HZ; u64 curr_cost = 0; + int this_cpu = this_rq->cpu; this_rq->idle_stamp = rq_clock(this_rq); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b44720d..82c0e02 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1158,7 +1158,7 @@ extern const struct sched_class idle_sched_class; extern void update_group_power(struct sched_domain *sd, int cpu); extern void trigger_load_balance(struct rq *rq); -extern void idle_balance(int this_cpu, struct rq *this_rq); +extern void idle_balance(struct rq *this_rq); extern void idle_enter_fair(struct rq *this_rq); extern void idle_exit_fair(struct rq *this_rq); -- cgit v0.10.2 From e5fc66119ec97054eefc83f173a7ee9e133c3c3a Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 17 Jan 2014 10:04:02 +0100 Subject: sched: Fix race in idle_balance() The scheduler main function 'schedule()' checks if there are no more tasks on the runqueue. Then it checks if a task should be pulled in the current runqueue in idle_balance() assuming it will go to idle otherwise. But idle_balance() releases the rq->lock in order to look up the sched domains and takes the lock again right after. That opens a window where another cpu may put a task in our runqueue, so we won't go to idle but we have filled the idle_stamp, thinking we will. This patch closes the window by checking if the runqueue has been modified but without pulling a task after taking the lock again, so we won't go to idle right after in the __schedule() function. Signed-off-by: Daniel Lezcano Cc: alex.shi@linaro.org Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1389949444-14821-2-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 428bc9d..5ebc681 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6589,6 +6589,13 @@ void idle_balance(struct rq *this_rq) raw_spin_lock(&this_rq->lock); + /* + * While browsing the domains, we released the rq lock. + * A task could have be enqueued in the meantime + */ + if (this_rq->nr_running && !pulled_task) + return; + if (pulled_task || time_after(jiffies, this_rq->next_balance)) { /* * We are going idle. next_balance may be set based on -- cgit v0.10.2 From 3c4017c13f91069194fce3160944efec50f15a6e Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Fri, 17 Jan 2014 10:04:03 +0100 Subject: sched: Move rq->idle_stamp up to the core idle_balance() modifies the rq->idle_stamp field, making this information shared across core.c and fair.c. As we know if the cpu is going to idle or not with the previous patch, let's encapsulate the rq->idle_stamp information in core.c by moving it up to the caller. The idle_balance() function returns true in case a balancing occured and the cpu won't be idle, false if no balance happened and the cpu is going idle. Signed-off-by: Daniel Lezcano Cc: alex.shi@linaro.org Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1389949444-14821-3-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 74dd565..417cf65 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2704,8 +2704,15 @@ need_resched: pre_schedule(rq, prev); - if (unlikely(!rq->nr_running)) - idle_balance(rq); + if (unlikely(!rq->nr_running)) { + /* + * We must set idle_stamp _before_ calling idle_balance(), such + * that we measure the duration of idle_balance() as idle time. + */ + rq->idle_stamp = rq_clock(rq); + if (idle_balance(rq)) + rq->idle_stamp = 0; + } put_prev_task(rq, prev); next = pick_next_task(rq); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5ebc681..04fea77 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6531,7 +6531,7 @@ out: * idle_balance is called by schedule() if this_cpu is about to become * idle. Attempts to pull tasks from other CPUs. */ -void idle_balance(struct rq *this_rq) +int idle_balance(struct rq *this_rq) { struct sched_domain *sd; int pulled_task = 0; @@ -6539,10 +6539,8 @@ void idle_balance(struct rq *this_rq) u64 curr_cost = 0; int this_cpu = this_rq->cpu; - this_rq->idle_stamp = rq_clock(this_rq); - if (this_rq->avg_idle < sysctl_sched_migration_cost) - return; + return 0; /* * Drop the rq->lock, but keep IRQ/preempt disabled. @@ -6580,10 +6578,8 @@ void idle_balance(struct rq *this_rq) interval = msecs_to_jiffies(sd->balance_interval); if (time_after(next_balance, sd->last_balance + interval)) next_balance = sd->last_balance + interval; - if (pulled_task) { - this_rq->idle_stamp = 0; + if (pulled_task) break; - } } rcu_read_unlock(); @@ -6594,7 +6590,7 @@ void idle_balance(struct rq *this_rq) * A task could have be enqueued in the meantime */ if (this_rq->nr_running && !pulled_task) - return; + return 1; if (pulled_task || time_after(jiffies, this_rq->next_balance)) { /* @@ -6606,6 +6602,8 @@ void idle_balance(struct rq *this_rq) if (curr_cost > this_rq->max_idle_balance_cost) this_rq->max_idle_balance_cost = curr_cost; + + return pulled_task; } /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 82c0e02..bb89991 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1158,7 +1158,7 @@ extern const struct sched_class idle_sched_class; extern void update_group_power(struct sched_domain *sd, int cpu); extern void trigger_load_balance(struct rq *rq); -extern void idle_balance(struct rq *this_rq); +extern int idle_balance(struct rq *this_rq); extern void idle_enter_fair(struct rq *this_rq); extern void idle_exit_fair(struct rq *this_rq); -- cgit v0.10.2 From fed14d45f945042a15b09de48d7d3d58d9455fc4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 11 Feb 2012 06:05:00 +0100 Subject: sched/fair: Track cgroup depth Track depth in cgroup tree, this is useful for things like find_matching_se() where you need to get to a common parent of two sched entities. Keeping the depth avoids having to calculate it on the spot, which saves a number of possible cache-misses. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1328936700.2476.17.camel@laptop Signed-off-by: Ingo Molnar diff --git a/include/linux/sched.h b/include/linux/sched.h index e3d5564..555e27d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1078,6 +1078,7 @@ struct sched_entity { #endif #ifdef CONFIG_FAIR_GROUP_SCHED + int depth; struct sched_entity *parent; /* rq on which this entity is (to be) queued: */ struct cfs_rq *cfs_rq; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 04fea77..748a7ac 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -322,13 +322,13 @@ static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq) list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list) /* Do the two (enqueued) entities belong to the same group ? */ -static inline int +static inline struct cfs_rq * is_same_group(struct sched_entity *se, struct sched_entity *pse) { if (se->cfs_rq == pse->cfs_rq) - return 1; + return se->cfs_rq; - return 0; + return NULL; } static inline struct sched_entity *parent_entity(struct sched_entity *se) @@ -336,17 +336,6 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se) return se->parent; } -/* return depth at which a sched entity is present in the hierarchy */ -static inline int depth_se(struct sched_entity *se) -{ - int depth = 0; - - for_each_sched_entity(se) - depth++; - - return depth; -} - static void find_matching_se(struct sched_entity **se, struct sched_entity **pse) { @@ -360,8 +349,8 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse) */ /* First walk up until both entities are at same depth */ - se_depth = depth_se(*se); - pse_depth = depth_se(*pse); + se_depth = (*se)->depth; + pse_depth = (*pse)->depth; while (se_depth > pse_depth) { se_depth--; @@ -426,10 +415,10 @@ static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq) #define for_each_leaf_cfs_rq(rq, cfs_rq) \ for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL) -static inline int +static inline struct cfs_rq * is_same_group(struct sched_entity *se, struct sched_entity *pse) { - return 1; + return cfs_rq_of(se); /* always the same rq */ } static inline struct sched_entity *parent_entity(struct sched_entity *se) @@ -7262,7 +7251,9 @@ void init_cfs_rq(struct cfs_rq *cfs_rq) #ifdef CONFIG_FAIR_GROUP_SCHED static void task_move_group_fair(struct task_struct *p, int on_rq) { + struct sched_entity *se = &p->se; struct cfs_rq *cfs_rq; + /* * If the task was not on the rq at the time of this cgroup movement * it must have been asleep, sleeping tasks keep their ->vruntime @@ -7288,23 +7279,24 @@ static void task_move_group_fair(struct task_struct *p, int on_rq) * To prevent boost or penalty in the new cfs_rq caused by delta * min_vruntime between the two cfs_rqs, we skip vruntime adjustment. */ - if (!on_rq && (!p->se.sum_exec_runtime || p->state == TASK_WAKING)) + if (!on_rq && (!se->sum_exec_runtime || p->state == TASK_WAKING)) on_rq = 1; if (!on_rq) - p->se.vruntime -= cfs_rq_of(&p->se)->min_vruntime; + se->vruntime -= cfs_rq_of(se)->min_vruntime; set_task_rq(p, task_cpu(p)); + se->depth = se->parent ? se->parent->depth + 1 : 0; if (!on_rq) { - cfs_rq = cfs_rq_of(&p->se); - p->se.vruntime += cfs_rq->min_vruntime; + cfs_rq = cfs_rq_of(se); + se->vruntime += cfs_rq->min_vruntime; #ifdef CONFIG_SMP /* * migrate_task_rq_fair() will have removed our previous * contribution, but we must synchronize for ongoing future * decay. */ - p->se.avg.decay_count = atomic64_read(&cfs_rq->decay_counter); - cfs_rq->blocked_load_avg += p->se.avg.load_avg_contrib; + se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter); + cfs_rq->blocked_load_avg += se->avg.load_avg_contrib; #endif } } @@ -7400,10 +7392,13 @@ void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, if (!se) return; - if (!parent) + if (!parent) { se->cfs_rq = &rq->cfs; - else + se->depth = 0; + } else { se->cfs_rq = parent->my_q; + se->depth = parent->depth + 1; + } se->my_q = cfs_rq; /* guarantee group entities always have weight */ -- cgit v0.10.2 From 606dba2e289446600a0b68422ed2019af5355c12 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 11 Feb 2012 06:05:00 +0100 Subject: sched: Push put_prev_task() into pick_next_task() In order to avoid having to do put/set on a whole cgroup hierarchy when we context switch, push the put into pick_next_task() so that both operations are in the same function. Further changes then allow us to possibly optimize away redundant work. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1328936700.2476.17.camel@laptop Signed-off-by: Ingo Molnar diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 417cf65..dedb5f0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2579,18 +2579,11 @@ static inline void schedule_debug(struct task_struct *prev) schedstat_inc(this_rq(), sched_count); } -static void put_prev_task(struct rq *rq, struct task_struct *prev) -{ - if (prev->on_rq || rq->skip_clock_update < 0) - update_rq_clock(rq); - prev->sched_class->put_prev_task(rq, prev); -} - /* * Pick up the highest-prio task: */ static inline struct task_struct * -pick_next_task(struct rq *rq) +pick_next_task(struct rq *rq, struct task_struct *prev) { const struct sched_class *class; struct task_struct *p; @@ -2600,13 +2593,13 @@ pick_next_task(struct rq *rq) * the fair class we can call that function directly: */ if (likely(rq->nr_running == rq->cfs.h_nr_running)) { - p = fair_sched_class.pick_next_task(rq); + p = fair_sched_class.pick_next_task(rq, prev); if (likely(p)) return p; } for_each_class(class) { - p = class->pick_next_task(rq); + p = class->pick_next_task(rq, prev); if (p) return p; } @@ -2714,8 +2707,10 @@ need_resched: rq->idle_stamp = 0; } - put_prev_task(rq, prev); - next = pick_next_task(rq); + if (prev->on_rq || rq->skip_clock_update < 0) + update_rq_clock(rq); + + next = pick_next_task(rq, prev); clear_tsk_need_resched(prev); clear_preempt_need_resched(); rq->skip_clock_update = 0; @@ -4748,7 +4743,7 @@ static void migrate_tasks(unsigned int dead_cpu) if (rq->nr_running == 1) break; - next = pick_next_task(rq); + next = pick_next_task(rq, NULL); BUG_ON(!next); next->sched_class->put_prev_task(rq, next); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index b5700bc..50797d5 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -990,7 +990,7 @@ static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq, return rb_entry(left, struct sched_dl_entity, rb_node); } -struct task_struct *pick_next_task_dl(struct rq *rq) +struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev) { struct sched_dl_entity *dl_se; struct task_struct *p; @@ -1001,6 +1001,9 @@ struct task_struct *pick_next_task_dl(struct rq *rq) if (unlikely(!dl_rq->dl_nr_running)) return NULL; + if (prev) + prev->sched_class->put_prev_task(rq, prev); + dl_se = pick_next_dl_entity(rq, dl_rq); BUG_ON(!dl_se); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 748a7ac..c4bb0ac 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4655,7 +4655,8 @@ preempt: set_last_buddy(se); } -static struct task_struct *pick_next_task_fair(struct rq *rq) +static struct task_struct * +pick_next_task_fair(struct rq *rq, struct task_struct *prev) { struct task_struct *p; struct cfs_rq *cfs_rq = &rq->cfs; @@ -4664,6 +4665,9 @@ static struct task_struct *pick_next_task_fair(struct rq *rq) if (!cfs_rq->nr_running) return NULL; + if (prev) + prev->sched_class->put_prev_task(rq, prev); + do { se = pick_next_entity(cfs_rq); set_next_entity(cfs_rq, se); diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c index 516c3d9..e5c922a 100644 --- a/kernel/sched/idle_task.c +++ b/kernel/sched/idle_task.c @@ -33,8 +33,12 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl resched_task(rq->idle); } -static struct task_struct *pick_next_task_idle(struct rq *rq) +static struct task_struct * +pick_next_task_idle(struct rq *rq, struct task_struct *prev) { + if (prev) + prev->sched_class->put_prev_task(rq, prev); + schedstat_inc(rq, sched_goidle); #ifdef CONFIG_SMP /* Trigger the post schedule to do an idle_enter for CFS */ diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index a2740b7..a15ca1c 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1310,15 +1310,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq) { struct sched_rt_entity *rt_se; struct task_struct *p; - struct rt_rq *rt_rq; - - rt_rq = &rq->rt; - - if (!rt_rq->rt_nr_running) - return NULL; - - if (rt_rq_throttled(rt_rq)) - return NULL; + struct rt_rq *rt_rq = &rq->rt; do { rt_se = pick_next_rt_entity(rq, rt_rq); @@ -1332,9 +1324,22 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq) return p; } -static struct task_struct *pick_next_task_rt(struct rq *rq) +static struct task_struct * +pick_next_task_rt(struct rq *rq, struct task_struct *prev) { - struct task_struct *p = _pick_next_task_rt(rq); + struct task_struct *p; + struct rt_rq *rt_rq = &rq->rt; + + if (!rt_rq->rt_nr_running) + return NULL; + + if (rt_rq_throttled(rt_rq)) + return NULL; + + if (prev) + prev->sched_class->put_prev_task(rq, prev); + + p = _pick_next_task_rt(rq); /* The running task is never eligible for pushing */ if (p) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index bb89991..c534cf4 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1105,7 +1105,13 @@ struct sched_class { void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags); - struct task_struct * (*pick_next_task) (struct rq *rq); + /* + * It is the responsibility of the pick_next_task() method that will + * return the next task to call put_prev_task() on the @prev task or + * something equivalent. + */ + struct task_struct * (*pick_next_task) (struct rq *rq, + struct task_struct *prev); void (*put_prev_task) (struct rq *rq, struct task_struct *p); #ifdef CONFIG_SMP diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index fdb6bb0..a4147c9 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c @@ -23,16 +23,20 @@ check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags) /* we're never preempted */ } -static struct task_struct *pick_next_task_stop(struct rq *rq) +static struct task_struct * +pick_next_task_stop(struct rq *rq, struct task_struct *prev) { struct task_struct *stop = rq->stop; - if (stop && stop->on_rq) { - stop->se.exec_start = rq_clock_task(rq); - return stop; - } + if (!stop || !stop->on_rq) + return NULL; - return NULL; + if (prev) + prev->sched_class->put_prev_task(rq, prev); + + stop->se.exec_start = rq_clock_task(rq); + + return stop; } static void -- cgit v0.10.2 From f10447998a59b97747c16258a9c6e6a1512f27f3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 11 Feb 2012 06:05:00 +0100 Subject: sched/fair: Clean up the __clear_buddies_*() functions Slightly easier code flow, no functional changes. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1328936700.2476.17.camel@laptop Signed-off-by: Ingo Molnar diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c4bb0ac..8461721 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2739,10 +2739,10 @@ static void __clear_buddies_last(struct sched_entity *se) { for_each_sched_entity(se) { struct cfs_rq *cfs_rq = cfs_rq_of(se); - if (cfs_rq->last == se) - cfs_rq->last = NULL; - else + if (cfs_rq->last != se) break; + + cfs_rq->last = NULL; } } @@ -2750,10 +2750,10 @@ static void __clear_buddies_next(struct sched_entity *se) { for_each_sched_entity(se) { struct cfs_rq *cfs_rq = cfs_rq_of(se); - if (cfs_rq->next == se) - cfs_rq->next = NULL; - else + if (cfs_rq->next != se) break; + + cfs_rq->next = NULL; } } @@ -2761,10 +2761,10 @@ static void __clear_buddies_skip(struct sched_entity *se) { for_each_sched_entity(se) { struct cfs_rq *cfs_rq = cfs_rq_of(se); - if (cfs_rq->skip == se) - cfs_rq->skip = NULL; - else + if (cfs_rq->skip != se) break; + + cfs_rq->skip = NULL; } } -- cgit v0.10.2 From 678d5718d8d099421b0dd54c01b0528f4aaf5919 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 11 Feb 2012 06:05:00 +0100 Subject: sched/fair: Optimize cgroup pick_next_task_fair() Since commit 2f36825b1 ("sched: Next buddy hint on sleep and preempt path") it is likely we pick a new task from the same cgroup, doing a put and then set on all intermediate entities is a waste of time, so try to avoid this. Measured using: mount nodev /cgroup -t cgroup -o cpu cd /cgroup mkdir a; cd a mkdir b; cd b mkdir c; cd c echo $$ > tasks perf stat --repeat 10 -- taskset 1 perf bench sched pipe PRE : 4.542422684 seconds time elapsed ( +- 0.33% ) POST: 4.389409991 seconds time elapsed ( +- 0.32% ) Which shows a significant improvement of ~3.5% Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Cc: Tejun Heo Link: http://lkml.kernel.org/r/1328936700.2476.17.camel@laptop Signed-off-by: Ingo Molnar diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8461721..a81b241 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2907,17 +2907,36 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); * 3) pick the "last" process, for cache locality * 4) do not run the "skip" process, if something else is available */ -static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) +static struct sched_entity * +pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) { - struct sched_entity *se = __pick_first_entity(cfs_rq); - struct sched_entity *left = se; + struct sched_entity *left = __pick_first_entity(cfs_rq); + struct sched_entity *se; + + /* + * If curr is set we have to see if its left of the leftmost entity + * still in the tree, provided there was anything in the tree at all. + */ + if (!left || (curr && entity_before(curr, left))) + left = curr; + + se = left; /* ideally we run the leftmost entity */ /* * Avoid running the skip buddy, if running something else can * be done without getting too unfair. */ if (cfs_rq->skip == se) { - struct sched_entity *second = __pick_next_entity(se); + struct sched_entity *second; + + if (se == curr) { + second = __pick_first_entity(cfs_rq); + } else { + second = __pick_next_entity(se); + if (!second || (curr && entity_before(curr, second))) + second = curr; + } + if (second && wakeup_preempt_entity(second, left) < 1) se = second; } @@ -2939,7 +2958,7 @@ static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) return se; } -static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq); +static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq); static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) { @@ -3594,22 +3613,23 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq) } /* conditionally throttle active cfs_rq's from put_prev_entity() */ -static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) +static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) { if (!cfs_bandwidth_used()) - return; + return false; if (likely(!cfs_rq->runtime_enabled || cfs_rq->runtime_remaining > 0)) - return; + return false; /* * it's possible for a throttled entity to be forced into a running * state (e.g. set_curr_task), in this case we're finished. */ if (cfs_rq_throttled(cfs_rq)) - return; + return true; throttle_cfs_rq(cfs_rq); + return true; } static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer) @@ -3719,7 +3739,7 @@ static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq) } static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {} -static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} +static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) { return false; } static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} @@ -4658,9 +4678,86 @@ preempt: static struct task_struct * pick_next_task_fair(struct rq *rq, struct task_struct *prev) { - struct task_struct *p; struct cfs_rq *cfs_rq = &rq->cfs; struct sched_entity *se; + struct task_struct *p; + +#ifdef CONFIG_FAIR_GROUP_SCHED + if (!cfs_rq->nr_running) + return NULL; + + if (!prev || prev->sched_class != &fair_sched_class) + goto simple; + + /* + * Because of the set_next_buddy() in dequeue_task_fair() it is rather + * likely that a next task is from the same cgroup as the current. + * + * Therefore attempt to avoid putting and setting the entire cgroup + * hierarchy, only change the part that actually changes. + */ + + do { + struct sched_entity *curr = cfs_rq->curr; + + /* + * Since we got here without doing put_prev_entity() we also + * have to consider cfs_rq->curr. If it is still a runnable + * entity, update_curr() will update its vruntime, otherwise + * forget we've ever seen it. + */ + if (curr && curr->on_rq) + update_curr(cfs_rq); + else + curr = NULL; + + /* + * This call to check_cfs_rq_runtime() will do the throttle and + * dequeue its entity in the parent(s). Therefore the 'simple' + * nr_running test will indeed be correct. + */ + if (unlikely(check_cfs_rq_runtime(cfs_rq))) + goto simple; + + se = pick_next_entity(cfs_rq, curr); + cfs_rq = group_cfs_rq(se); + } while (cfs_rq); + + p = task_of(se); + + /* + * Since we haven't yet done put_prev_entity and if the selected task + * is a different task than we started out with, try and touch the + * least amount of cfs_rqs. + */ + if (prev != p) { + struct sched_entity *pse = &prev->se; + + while (!(cfs_rq = is_same_group(se, pse))) { + int se_depth = se->depth; + int pse_depth = pse->depth; + + if (se_depth <= pse_depth) { + put_prev_entity(cfs_rq_of(pse), pse); + pse = parent_entity(pse); + } + if (se_depth >= pse_depth) { + set_next_entity(cfs_rq_of(se), se); + se = parent_entity(se); + } + } + + put_prev_entity(cfs_rq, pse); + set_next_entity(cfs_rq, se); + } + + if (hrtick_enabled(rq)) + hrtick_start_fair(rq, p); + + return p; +simple: + cfs_rq = &rq->cfs; +#endif if (!cfs_rq->nr_running) return NULL; @@ -4669,12 +4766,13 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev) prev->sched_class->put_prev_task(rq, prev); do { - se = pick_next_entity(cfs_rq); + se = pick_next_entity(cfs_rq, NULL); set_next_entity(cfs_rq, se); cfs_rq = group_cfs_rq(se); } while (cfs_rq); p = task_of(se); + if (hrtick_enabled(rq)) hrtick_start_fair(rq, p); -- cgit v0.10.2 From 6c3b4d44ba2838f00614a5a2d777d4401e0bfd71 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Jan 2014 15:09:39 +0100 Subject: sched: Clean up idle task SMP logic The idle post_schedule flag is just a vile waste of time, furthermore it appears unneeded, move the idle_enter_fair() call into pick_next_task_idle(). Signed-off-by: Peter Zijlstra Cc: Daniel Lezcano Cc: Vincent Guittot Cc: alex.shi@linaro.org Cc: mingo@kernel.org Cc: Steven Rostedt Link: http://lkml.kernel.org/n/tip-aljykihtxJt3mkokxi0qZurb@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c index e5c922a..721371b 100644 --- a/kernel/sched/idle_task.c +++ b/kernel/sched/idle_task.c @@ -19,11 +19,6 @@ static void pre_schedule_idle(struct rq *rq, struct task_struct *prev) idle_exit_fair(rq); rq_last_tick_reset(rq); } - -static void post_schedule_idle(struct rq *rq) -{ - idle_enter_fair(rq); -} #endif /* CONFIG_SMP */ /* * Idle tasks are unconditionally rescheduled: @@ -41,8 +36,7 @@ pick_next_task_idle(struct rq *rq, struct task_struct *prev) schedstat_inc(rq, sched_goidle); #ifdef CONFIG_SMP - /* Trigger the post schedule to do an idle_enter for CFS */ - rq->post_schedule = 1; + idle_enter_fair(rq); #endif return rq->idle; } @@ -106,7 +100,6 @@ const struct sched_class idle_sched_class = { #ifdef CONFIG_SMP .select_task_rq = select_task_rq_idle, .pre_schedule = pre_schedule_idle, - .post_schedule = post_schedule_idle, #endif .set_curr_task = set_curr_task_idle, -- cgit v0.10.2 From a18b31dd537f51331b9cd357987a5e807bf3e8d1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 3 Feb 2014 10:35:35 +0100 Subject: m68k: Sort arch/m68k/include/asm/Kbuild Signed-off-by: Geert Uytterhoeven Acked-by: Greg Ungerer diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 7cc8c36..df4d46f 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -1,4 +1,3 @@ - generic-y += bitsperlong.h generic-y += clkdev.h generic-y += cputime.h @@ -6,6 +5,7 @@ generic-y += device.h generic-y += emergency-restart.h generic-y += errno.h generic-y += exec.h +generic-y += hash.h generic-y += hw_irq.h generic-y += ioctl.h generic-y += ipcbuf.h @@ -18,6 +18,7 @@ generic-y += local.h generic-y += mman.h generic-y += mutex.h generic-y += percpu.h +generic-y += preempt.h generic-y += resource.h generic-y += scatterlist.h generic-y += sections.h @@ -31,5 +32,3 @@ generic-y += trace_clock.h generic-y += types.h generic-y += word-at-a-time.h generic-y += xor.h -generic-y += preempt.h -generic-y += hash.h -- cgit v0.10.2 From 3067bab1cb6e45063c602a731b9341651c4f2ddb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 3 Feb 2014 10:38:39 +0100 Subject: m68k: Switch to asm-generic/barrier.h The generic nop() implementation is fine for m68k. Signed-off-by: Geert Uytterhoeven Acked-by: Greg Ungerer diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index df4d46f..6fb9e81 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -1,3 +1,4 @@ +generic-y += barrier.h generic-y += bitsperlong.h generic-y += clkdev.h generic-y += cputime.h diff --git a/arch/m68k/include/asm/barrier.h b/arch/m68k/include/asm/barrier.h deleted file mode 100644 index 15c5f77..0000000 --- a/arch/m68k/include/asm/barrier.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _M68K_BARRIER_H -#define _M68K_BARRIER_H - -#define nop() do { asm volatile ("nop"); barrier(); } while (0) - -#include - -#endif /* _M68K_BARRIER_H */ -- cgit v0.10.2 From 7247f55381d54645a1eb47588de51bb26fa7cb7a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 3 Feb 2014 15:07:13 +0100 Subject: m68k: Wire up sched_setattr and sched_getattr Signed-off-by: Geert Uytterhoeven Acked-by: Greg Ungerer diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h index 014f288..9d38b73 100644 --- a/arch/m68k/include/asm/unistd.h +++ b/arch/m68k/include/asm/unistd.h @@ -4,7 +4,7 @@ #include -#define NR_syscalls 349 +#define NR_syscalls 351 #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_OLD_STAT diff --git a/arch/m68k/include/uapi/asm/unistd.h b/arch/m68k/include/uapi/asm/unistd.h index 625f321..b932dd4 100644 --- a/arch/m68k/include/uapi/asm/unistd.h +++ b/arch/m68k/include/uapi/asm/unistd.h @@ -354,5 +354,7 @@ #define __NR_process_vm_writev 346 #define __NR_kcmp 347 #define __NR_finit_module 348 +#define __NR_sched_setattr 349 +#define __NR_sched_getattr 350 #endif /* _UAPI_ASM_M68K_UNISTD_H_ */ diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S index 3f04ea0..b6223dc4 100644 --- a/arch/m68k/kernel/syscalltable.S +++ b/arch/m68k/kernel/syscalltable.S @@ -369,4 +369,6 @@ ENTRY(sys_call_table) .long sys_process_vm_writev .long sys_kcmp .long sys_finit_module + .long sys_sched_setattr + .long sys_sched_getattr /* 350 */ -- cgit v0.10.2 From a65e0c6a7f14d80e87b82959c7333595cbd3e54e Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Tue, 28 Jan 2014 12:49:15 +0200 Subject: clk: keystone: gate: fix clk_init_data initialization The clk_init_data struct is allocated in the stack. All members of this struct should be initialized before using otherwise it will lead to unpredictable situation as it can contain garbage. Ultimately the clk->flag field contains garbage. In my case it leads that flag CLK_IGNORE_UNUSED is set for most of clocks. As result a bunch of unused clocks cannot be disabled. So initialize flags in this structure too. Cc: Mike Turquette Signed-off-by: Ivan Khoronzhuk Signed-off-by: Santosh Shilimkar diff --git a/drivers/clk/keystone/gate.c b/drivers/clk/keystone/gate.c index 17a5983..86f1e36 100644 --- a/drivers/clk/keystone/gate.c +++ b/drivers/clk/keystone/gate.c @@ -179,6 +179,7 @@ static struct clk *clk_register_psc(struct device *dev, init.name = name; init.ops = &clk_psc_ops; + init.flags = 0; init.parent_names = (parent_name ? &parent_name : NULL); init.num_parents = (parent_name ? 1 : 0); -- cgit v0.10.2 From 565bbdcd3b91523b4142587c00206302e091a23e Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Mon, 10 Feb 2014 14:07:35 -0500 Subject: ARM: keystone: dts: fix clkvcp3 control register address The address for control regs in clkvcp3 node is not correct and should be 0x023500a8 instead of 0x0235000a8. This lead to few unexpected behaviors while clocks were turned of in absence of clk_ignore_unused Mike Turquette Signed-off-by: Ivan Khoronzhuk Signed-off-by: Santosh Shilimkar diff --git a/arch/arm/boot/dts/keystone-clocks.dtsi b/arch/arm/boot/dts/keystone-clocks.dtsi index 2363593..ef58d1c 100644 --- a/arch/arm/boot/dts/keystone-clocks.dtsi +++ b/arch/arm/boot/dts/keystone-clocks.dtsi @@ -612,7 +612,7 @@ clocks { compatible = "ti,keystone,psc-clock"; clocks = <&chipclk13>; clock-output-names = "vcp-3"; - reg = <0x0235000a8 0xb00>, <0x02350060 0x400>; + reg = <0x023500a8 0xb00>, <0x02350060 0x400>; reg-names = "control", "domain"; domain-id = <24>; }; -- cgit v0.10.2 From 38033c37faab850ed5d33bb675c4de6c66be84d8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Jan 2014 20:32:21 +0100 Subject: sched: Push down pre_schedule() and idle_balance() This patch both merged idle_balance() and pre_schedule() and pushes both of them into pick_next_task(). Conceptually pre_schedule() and idle_balance() are rather similar, both are used to pull more work onto the current CPU. We cannot however first move idle_balance() into pre_schedule_fair() since there is no guarantee the last runnable task is a fair task, and thus we would miss newidle balances. Similarly, the dl and rt pre_schedule calls must be ran before idle_balance() since their respective tasks have higher priority and it would not do to delay their execution searching for less important tasks first. However, by noticing that pick_next_tasks() already traverses the sched_class hierarchy in the right order, we can get the right behaviour and do away with both calls. We must however change the special case optimization to also require that prev is of sched_class_fair, otherwise we can miss doing a dl or rt pull where we needed one. Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/n/tip-a8k6vvaebtn64nie345kx1je@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/kernel/sched/core.c b/kernel/sched/core.c index dedb5f0..3068f37 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2169,13 +2169,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) #ifdef CONFIG_SMP -/* assumes rq->lock is held */ -static inline void pre_schedule(struct rq *rq, struct task_struct *prev) -{ - if (prev->sched_class->pre_schedule) - prev->sched_class->pre_schedule(rq, prev); -} - /* rq->lock is NOT held, but preemption is disabled */ static inline void post_schedule(struct rq *rq) { @@ -2193,10 +2186,6 @@ static inline void post_schedule(struct rq *rq) #else -static inline void pre_schedule(struct rq *rq, struct task_struct *p) -{ -} - static inline void post_schedule(struct rq *rq) { } @@ -2592,7 +2581,8 @@ pick_next_task(struct rq *rq, struct task_struct *prev) * Optimization: we know that if all tasks are in * the fair class we can call that function directly: */ - if (likely(rq->nr_running == rq->cfs.h_nr_running)) { + if (likely(prev->sched_class == &fair_sched_class && + rq->nr_running == rq->cfs.h_nr_running)) { p = fair_sched_class.pick_next_task(rq, prev); if (likely(p)) return p; @@ -2695,18 +2685,6 @@ need_resched: switch_count = &prev->nvcsw; } - pre_schedule(rq, prev); - - if (unlikely(!rq->nr_running)) { - /* - * We must set idle_stamp _before_ calling idle_balance(), such - * that we measure the duration of idle_balance() as idle time. - */ - rq->idle_stamp = rq_clock(rq); - if (idle_balance(rq)) - rq->idle_stamp = 0; - } - if (prev->on_rq || rq->skip_clock_update < 0) update_rq_clock(rq); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 50797d5..ed31ef6 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -944,6 +944,8 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) resched_task(rq->curr); } +static int pull_dl_task(struct rq *this_rq); + #endif /* CONFIG_SMP */ /* @@ -998,6 +1000,11 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev) dl_rq = &rq->dl; +#ifdef CONFIG_SMP + if (dl_task(prev)) + pull_dl_task(rq); +#endif + if (unlikely(!dl_rq->dl_nr_running)) return NULL; @@ -1429,13 +1436,6 @@ skip: return ret; } -static void pre_schedule_dl(struct rq *rq, struct task_struct *prev) -{ - /* Try to pull other tasks here */ - if (dl_task(prev)) - pull_dl_task(rq); -} - static void post_schedule_dl(struct rq *rq) { push_dl_tasks(rq); @@ -1628,7 +1628,6 @@ const struct sched_class dl_sched_class = { .set_cpus_allowed = set_cpus_allowed_dl, .rq_online = rq_online_dl, .rq_offline = rq_offline_dl, - .pre_schedule = pre_schedule_dl, .post_schedule = post_schedule_dl, .task_woken = task_woken_dl, #endif diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a81b241..43b49fe 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2577,7 +2577,8 @@ void idle_exit_fair(struct rq *this_rq) update_rq_runnable_avg(this_rq, 0); } -#else +#else /* CONFIG_SMP */ + static inline void update_entity_load_avg(struct sched_entity *se, int update_cfs_rq) {} static inline void update_rq_runnable_avg(struct rq *rq, int runnable) {} @@ -2589,7 +2590,7 @@ static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq, int sleep) {} static inline void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update) {} -#endif +#endif /* CONFIG_SMP */ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) { @@ -4682,9 +4683,10 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev) struct sched_entity *se; struct task_struct *p; +again: __maybe_unused #ifdef CONFIG_FAIR_GROUP_SCHED if (!cfs_rq->nr_running) - return NULL; + goto idle; if (!prev || prev->sched_class != &fair_sched_class) goto simple; @@ -4760,7 +4762,7 @@ simple: #endif if (!cfs_rq->nr_running) - return NULL; + goto idle; if (prev) prev->sched_class->put_prev_task(rq, prev); @@ -4777,6 +4779,22 @@ simple: hrtick_start_fair(rq, p); return p; + +idle: +#ifdef CONFIG_SMP + idle_enter_fair(rq); + /* + * We must set idle_stamp _before_ calling idle_balance(), such that we + * measure the duration of idle_balance() as idle time. + */ + rq->idle_stamp = rq_clock(rq); + if (idle_balance(rq)) { /* drops rq->lock */ + rq->idle_stamp = 0; + goto again; + } +#endif + + return NULL; } /* diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c index 721371b..f7d03af 100644 --- a/kernel/sched/idle_task.c +++ b/kernel/sched/idle_task.c @@ -13,13 +13,8 @@ select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags) { return task_cpu(p); /* IDLE tasks as never migrated */ } - -static void pre_schedule_idle(struct rq *rq, struct task_struct *prev) -{ - idle_exit_fair(rq); - rq_last_tick_reset(rq); -} #endif /* CONFIG_SMP */ + /* * Idle tasks are unconditionally rescheduled: */ @@ -56,6 +51,10 @@ dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags) static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) { +#ifdef CONFIG_SMP + idle_exit_fair(rq); + rq_last_tick_reset(rq); +#endif } static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued) @@ -99,7 +98,6 @@ const struct sched_class idle_sched_class = { #ifdef CONFIG_SMP .select_task_rq = select_task_rq_idle, - .pre_schedule = pre_schedule_idle, #endif .set_curr_task = set_curr_task_idle, diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index a15ca1c..72f9ec7 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -229,6 +229,8 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) #ifdef CONFIG_SMP +static int pull_rt_task(struct rq *this_rq); + static inline int rt_overloaded(struct rq *rq) { return atomic_read(&rq->rd->rto_count); @@ -1330,6 +1332,12 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev) struct task_struct *p; struct rt_rq *rt_rq = &rq->rt; +#ifdef CONFIG_SMP + /* Try to pull RT tasks here if we lower this rq's prio */ + if (rq->rt.highest_prio.curr > prev->prio) + pull_rt_task(rq); +#endif + if (!rt_rq->rt_nr_running) return NULL; @@ -1721,13 +1729,6 @@ skip: return ret; } -static void pre_schedule_rt(struct rq *rq, struct task_struct *prev) -{ - /* Try to pull RT tasks here if we lower this rq's prio */ - if (rq->rt.highest_prio.curr > prev->prio) - pull_rt_task(rq); -} - static void post_schedule_rt(struct rq *rq) { push_rt_tasks(rq); @@ -2004,7 +2005,6 @@ const struct sched_class rt_sched_class = { .set_cpus_allowed = set_cpus_allowed_rt, .rq_online = rq_online_rt, .rq_offline = rq_offline_rt, - .pre_schedule = pre_schedule_rt, .post_schedule = post_schedule_rt, .task_woken = task_woken_rt, .switched_from = switched_from_rt, diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c534cf4..1bf34c2 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1118,7 +1118,6 @@ struct sched_class { int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags); void (*migrate_task_rq)(struct task_struct *p, int next_cpu); - void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); void (*post_schedule) (struct rq *this_rq); void (*task_waking) (struct task_struct *task); void (*task_woken) (struct rq *this_rq, struct task_struct *task); -- cgit v0.10.2 From 27f17580fd2c7514c8f5cce22ab903c6f3ddf458 Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Wed, 29 Jan 2014 14:29:33 +0000 Subject: sched: Delete is_same_group() outside CONFIG_FAIR_GROUP_SCHED Since is_same_group() is only used in the group scheduling code, there is no need to define it outside CONFIG_FAIR_GROUP_SCHED. Signed-off-by: Dietmar Eggemann Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1391005773-29493-1-git-send-email-dietmar.eggemann@arm.com Signed-off-by: Ingo Molnar diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 43b49fe..235cfa7 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -415,12 +415,6 @@ static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq) #define for_each_leaf_cfs_rq(rq, cfs_rq) \ for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL) -static inline struct cfs_rq * -is_same_group(struct sched_entity *se, struct sched_entity *pse) -{ - return cfs_rq_of(se); /* always the same rq */ -} - static inline struct sched_entity *parent_entity(struct sched_entity *se) { return NULL; -- cgit v0.10.2 From 37e6bae8395a94b4dd934c92b02b9408be992365 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Thu, 23 Jan 2014 18:39:54 +0800 Subject: sched: Add statistic for newidle load balance cost Tracking rq->max_idle_balance_cost and sd->max_newidle_lb_cost. It's useful to know these values in debug mode. Signed-off-by: Alex Shi Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/52E0F3BF.5020904@linaro.org Signed-off-by: Ingo Molnar diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3068f37..fb9764f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4811,7 +4811,7 @@ set_table_entry(struct ctl_table *entry, static struct ctl_table * sd_alloc_ctl_domain_table(struct sched_domain *sd) { - struct ctl_table *table = sd_alloc_ctl_entry(13); + struct ctl_table *table = sd_alloc_ctl_entry(14); if (table == NULL) return NULL; @@ -4839,9 +4839,12 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd) sizeof(int), 0644, proc_dointvec_minmax, false); set_table_entry(&table[10], "flags", &sd->flags, sizeof(int), 0644, proc_dointvec_minmax, false); - set_table_entry(&table[11], "name", sd->name, + set_table_entry(&table[11], "max_newidle_lb_cost", + &sd->max_newidle_lb_cost, + sizeof(long), 0644, proc_doulongvec_minmax, false); + set_table_entry(&table[12], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring, false); - /* &table[12] is terminator */ + /* &table[13] is terminator */ return table; } diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 31b908d..f3344c3 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -321,6 +321,7 @@ do { \ P(sched_goidle); #ifdef CONFIG_SMP P64(avg_idle); + P64(max_idle_balance_cost); #endif P(ttwu_count); -- cgit v0.10.2 From af8cd8ef726f335815233d03b8723e9c52041edd Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 29 Jan 2014 15:31:36 -0500 Subject: sched/idle: Move the cpuidle entry point to the generic idle loop In order to integrate cpuidle with the scheduler, we must have a better proximity in the core code with what cpuidle is doing and not delegate such interaction to arch code. Architectures implementing arch_cpu_idle() should simply enter a cheap idle mode in the absence of a proper cpuidle driver. In both cases i.e. whether it is a cpuidle driver or the default arch_cpu_idle(), the calling convention expects IRQs to be disabled on entry and enabled on exit. There is a warning in place already but let's add a forced IRQ enable here as well. This will allow for removing the forced IRQ enable some implementations do locally and allowing for the warning to trig. Signed-off-by: Nicolas Pitre Acked-by: Daniel Lezcano Cc: Benjamin Herrenschmidt Cc: Preeti U Murthy Cc: Paul Mundt Cc: "Rafael J. Wysocki" Cc: Olof Johansson Cc: Russell King Cc: Linus Torvalds Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/alpine.LFD.2.11.1401291526320.1652@knanqh.ubzr Signed-off-by: Ingo Molnar diff --git a/kernel/cpu/idle.c b/kernel/cpu/idle.c index 277f494..b7976a1 100644 --- a/kernel/cpu/idle.c +++ b/kernel/cpu/idle.c @@ -3,6 +3,7 @@ */ #include #include +#include #include #include #include @@ -95,8 +96,10 @@ static void cpu_idle_loop(void) if (!current_clr_polling_and_test()) { stop_critical_timings(); rcu_idle_enter(); - arch_cpu_idle(); - WARN_ON_ONCE(irqs_disabled()); + if (cpuidle_idle_call()) + arch_cpu_idle(); + if (WARN_ON_ONCE(irqs_disabled())) + local_irq_enable(); rcu_idle_exit(); start_critical_timings(); } else { -- cgit v0.10.2 From ad68cc7a777948b77f15a3dc3510918ff87ed0c0 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 29 Jan 2014 12:45:09 -0500 Subject: sched/idle, ARM: Remove redundant cpuidle_idle_call() The core idle loop now takes care of it. Signed-off-by: Nicolas Pitre Acked-by: Daniel Lezcano Cc: Preeti U Murthy Cc: Paul Mundt Cc: "Rafael J. Wysocki" Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-sh@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: Russell King Cc: linaro-kernel@lists.linaro.org Cc: Benjamin Herrenschmidt Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-y2nbw5j3ma5siy5584919z5i@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 92f7b15..adabeab 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include @@ -133,7 +132,11 @@ EXPORT_SYMBOL_GPL(arm_pm_restart); void (*arm_pm_idle)(void); -static void default_idle(void) +/* + * Called from the core idle loop. + */ + +void arch_cpu_idle(void) { if (arm_pm_idle) arm_pm_idle(); @@ -168,15 +171,6 @@ void arch_cpu_idle_dead(void) #endif /* - * Called from the core idle loop. - */ -void arch_cpu_idle(void) -{ - if (cpuidle_idle_call()) - default_idle(); -} - -/* * Called by kexec, immediately prior to machine_kexec(). * * This must completely disable all secondary CPUs; simply causing those CPUs -- cgit v0.10.2 From d8c6ad3184ca6516a5cf457af1afaa293d8f09c0 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 29 Jan 2014 12:45:10 -0500 Subject: sched/idle, PPC: Remove redundant cpuidle_idle_call() The core idle loop now takes care of it. However a few things need checking: - Invocation of cpuidle_idle_call() in pseries_lpar_idle() happened through arch_cpu_idle() and was therefore always preceded by a call to ppc64_runlatch_off(). To preserve this property now that cpuidle_idle_call() is invoked directly from core code, a call to ppc64_runlatch_off() has been added to idle_loop_prolog() in platforms/pseries/processor_idle.c. - Similarly, cpuidle_idle_call() was followed by ppc64_runlatch_off() so a call to the later has been added to idle_loop_epilog(). - And since arch_cpu_idle() always made sure to re-enable IRQs if they were not enabled, this is now done in idle_loop_epilog() as well. The above was made in order to keep the execution flow close to the original. I don't know if that was strictly necessary. Someone well aquainted with the platform details might find some room for possible optimizations. Signed-off-by: Nicolas Pitre Reviewed-by: Preeti U Murthy Cc: "Rafael J. Wysocki" Cc: Daniel Lezcano Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-sh@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: Russell King Cc: linaro-kernel@lists.linaro.org Cc: Benjamin Herrenschmidt Cc: Paul Mundt Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-47o4m03citrfg9y1vxic5asb@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 8e639d7..c737d55 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include @@ -356,29 +355,24 @@ early_initcall(alloc_dispatch_log_kmem_cache); static void pseries_lpar_idle(void) { - /* This would call on the cpuidle framework, and the back-end pseries - * driver to go to idle states + /* + * Default handler to go into low thread priority and possibly + * low power mode by cedeing processor to hypervisor */ - if (cpuidle_idle_call()) { - /* On error, execute default handler - * to go into low thread priority and possibly - * low power mode by cedeing processor to hypervisor - */ - /* Indicate to hypervisor that we are idle. */ - get_lppaca()->idle = 1; + /* Indicate to hypervisor that we are idle. */ + get_lppaca()->idle = 1; - /* - * Yield the processor to the hypervisor. We return if - * an external interrupt occurs (which are driven prior - * to returning here) or if a prod occurs from another - * processor. When returning here, external interrupts - * are enabled. - */ - cede_processor(); + /* + * Yield the processor to the hypervisor. We return if + * an external interrupt occurs (which are driven prior + * to returning here) or if a prod occurs from another + * processor. When returning here, external interrupts + * are enabled. + */ + cede_processor(); - get_lppaca()->idle = 0; - } + get_lppaca()->idle = 0; } /* diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c index 7ab564a..d486489 100644 --- a/drivers/cpuidle/cpuidle-pseries.c +++ b/drivers/cpuidle/cpuidle-pseries.c @@ -29,6 +29,7 @@ static struct cpuidle_state *cpuidle_state_table; static inline void idle_loop_prolog(unsigned long *in_purr) { + ppc64_runlatch_off(); *in_purr = mfspr(SPRN_PURR); /* * Indicate to the HV that we are idle. Now would be @@ -45,6 +46,10 @@ static inline void idle_loop_epilog(unsigned long in_purr) wait_cycles += mfspr(SPRN_PURR) - in_purr; get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles); get_lppaca()->idle = 0; + + if (irqs_disabled()) + local_irq_enable(); + ppc64_runlatch_on(); } static int snooze_loop(struct cpuidle_device *dev, -- cgit v0.10.2 From f0c5cdb5cf89e56bdef9d71da89d4966dea6ef71 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 29 Jan 2014 12:45:11 -0500 Subject: sched/idle, SH: Remove redundant cpuidle_idle_call() The core idle loop now takes care of it. Signed-off-by: Nicolas Pitre Acked-by: Daniel Lezcano Cc: Preeti U Murthy Cc: Paul Mundt Cc: "Rafael J. Wysocki" Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-sh@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: Russell King Cc: linaro-kernel@lists.linaro.org Cc: Benjamin Herrenschmidt Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-v2c3oswyd80xk2vh7fwmu6r8@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index 2ea4483..be616ee 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -40,8 +39,7 @@ void arch_cpu_idle_dead(void) void arch_cpu_idle(void) { - if (cpuidle_idle_call()) - sh_idle(); + sh_idle(); } void __init select_idle_routine(void) -- cgit v0.10.2 From 16f8b05abe33575b5fc0833cab1286bd6227f255 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 29 Jan 2014 12:45:12 -0500 Subject: sched/idle, x86: Remove redundant cpuidle_idle_call() The core idle loop now takes care of it. Signed-off-by: Nicolas Pitre Acked-by: Daniel Lezcano Cc: Preeti U Murthy Cc: Paul Mundt Cc: "Rafael J. Wysocki" Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-sh@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: Russell King Cc: linaro-kernel@lists.linaro.org Cc: Benjamin Herrenschmidt Cc: Linus Torvalds Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-3ioazimg4j5iq6kdefks04i8@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 3fb8d95..4505e2a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -298,10 +298,7 @@ void arch_cpu_idle_dead(void) */ void arch_cpu_idle(void) { - if (cpuidle_idle_call()) - x86_idle(); - else - local_irq_enable(); + x86_idle(); } /* -- cgit v0.10.2 From cf37b6b48428d6be8f8762b3599d529c44644fb2 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Sun, 26 Jan 2014 23:42:01 -0500 Subject: sched/idle: Move cpu/idle.c to sched/idle.c Integration of cpuidle with the scheduler requires that the idle loop be closely integrated with the scheduler proper. Moving cpu/idle.c into the sched directory will allow for a smoother integration, and eliminate a subdirectory which contained only one source file. Signed-off-by: Nicolas Pitre Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/alpine.LFD.2.11.1401301102210.1652@knanqh.ubzr Signed-off-by: Ingo Molnar diff --git a/kernel/Makefile b/kernel/Makefile index bc010ee..6f1c7e5 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -22,7 +22,6 @@ obj-y += sched/ obj-y += locking/ obj-y += power/ obj-y += printk/ -obj-y += cpu/ obj-y += irq/ obj-y += rcu/ diff --git a/kernel/cpu/Makefile b/kernel/cpu/Makefile deleted file mode 100644 index 59ab052..0000000 --- a/kernel/cpu/Makefile +++ /dev/null @@ -1 +0,0 @@ -obj-y = idle.o diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 9a95c8c..ab32b7b 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -13,7 +13,7 @@ endif obj-y += core.o proc.o clock.o cputime.o obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o -obj-y += wait.o completion.o +obj-y += wait.o completion.o idle.o obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o obj-$(CONFIG_SCHEDSTATS) += stats.o diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c new file mode 100644 index 0000000..14ca434 --- /dev/null +++ b/kernel/sched/idle.c @@ -0,0 +1,144 @@ +/* + * Generic entry point for the idle threads + */ +#include +#include +#include +#include +#include +#include + +#include + +#include + +static int __read_mostly cpu_idle_force_poll; + +void cpu_idle_poll_ctrl(bool enable) +{ + if (enable) { + cpu_idle_force_poll++; + } else { + cpu_idle_force_poll--; + WARN_ON_ONCE(cpu_idle_force_poll < 0); + } +} + +#ifdef CONFIG_GENERIC_IDLE_POLL_SETUP +static int __init cpu_idle_poll_setup(char *__unused) +{ + cpu_idle_force_poll = 1; + return 1; +} +__setup("nohlt", cpu_idle_poll_setup); + +static int __init cpu_idle_nopoll_setup(char *__unused) +{ + cpu_idle_force_poll = 0; + return 1; +} +__setup("hlt", cpu_idle_nopoll_setup); +#endif + +static inline int cpu_idle_poll(void) +{ + rcu_idle_enter(); + trace_cpu_idle_rcuidle(0, smp_processor_id()); + local_irq_enable(); + while (!tif_need_resched()) + cpu_relax(); + trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); + rcu_idle_exit(); + return 1; +} + +/* Weak implementations for optional arch specific functions */ +void __weak arch_cpu_idle_prepare(void) { } +void __weak arch_cpu_idle_enter(void) { } +void __weak arch_cpu_idle_exit(void) { } +void __weak arch_cpu_idle_dead(void) { } +void __weak arch_cpu_idle(void) +{ + cpu_idle_force_poll = 1; + local_irq_enable(); +} + +/* + * Generic idle loop implementation + */ +static void cpu_idle_loop(void) +{ + while (1) { + tick_nohz_idle_enter(); + + while (!need_resched()) { + check_pgt_cache(); + rmb(); + + if (cpu_is_offline(smp_processor_id())) + arch_cpu_idle_dead(); + + local_irq_disable(); + arch_cpu_idle_enter(); + + /* + * In poll mode we reenable interrupts and spin. + * + * Also if we detected in the wakeup from idle + * path that the tick broadcast device expired + * for us, we don't want to go deep idle as we + * know that the IPI is going to arrive right + * away + */ + if (cpu_idle_force_poll || tick_check_broadcast_expired()) { + cpu_idle_poll(); + } else { + if (!current_clr_polling_and_test()) { + stop_critical_timings(); + rcu_idle_enter(); + if (cpuidle_idle_call()) + arch_cpu_idle(); + if (WARN_ON_ONCE(irqs_disabled())) + local_irq_enable(); + rcu_idle_exit(); + start_critical_timings(); + } else { + local_irq_enable(); + } + __current_set_polling(); + } + arch_cpu_idle_exit(); + /* + * We need to test and propagate the TIF_NEED_RESCHED + * bit here because we might not have send the + * reschedule IPI to idle tasks. + */ + if (tif_need_resched()) + set_preempt_need_resched(); + } + tick_nohz_idle_exit(); + schedule_preempt_disabled(); + } +} + +void cpu_startup_entry(enum cpuhp_state state) +{ + /* + * This #ifdef needs to die, but it's too late in the cycle to + * make this generic (arm and sh have never invoked the canary + * init for the non boot cpus!). Will be fixed in 3.11 + */ +#ifdef CONFIG_X86 + /* + * If we're the non-boot CPU, nothing set the stack canary up + * for us. The boot CPU already has it initialized but no harm + * in doing it again. This is a good place for updating it, as + * we wont ever return from this function (so the invalid + * canaries already on the stack wont ever trigger). + */ + boot_init_stack_canary(); +#endif + __current_set_polling(); + arch_cpu_idle_prepare(); + cpu_idle_loop(); +} -- cgit v0.10.2 From 74b8af7837fa55c020e2ad1b34a6b10dfe25a9b1 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Tue, 11 Feb 2014 14:05:17 +0800 Subject: powerpc/spufs: Remove MAX_USER_PRIO define Current ppc64_defconfig fails with: arch/powerpc/platforms/cell/spufs/sched.c:86:0: error: "MAX_USER_PRIO" redefined [-Werror] cc1: all warnings being treated as errors Commit 6b6350f155af ("sched: Expose some macros related to priority") introduced a generic MAX_USER_PRIO macro to sched/prio.h, which is causing the conflit. Use that one instead of our own. Reported-by: Fengguang Wu Reported-by: Stephen Rothwell Signed-off-by: Jeremy Kerr Cc: Dongsheng Yang Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/1392098717.689604.970589769393.1.gpush@pablo Signed-off-by: Ingo Molnar diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 4931838..4a0a64f 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -83,7 +83,6 @@ static struct timer_list spuloadavg_timer; #define MIN_SPU_TIMESLICE max(5 * HZ / (1000 * SPUSCHED_TICK), 1) #define DEF_SPU_TIMESLICE (100 * HZ / (1000 * SPUSCHED_TICK)) -#define MAX_USER_PRIO (MAX_PRIO - MAX_RT_PRIO) #define SCALE_PRIO(x, prio) \ max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_SPU_TIMESLICE) -- cgit v0.10.2 From 32769814d54a5a360b83811b4039c776ec953c71 Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Tue, 11 Feb 2014 12:27:19 +0200 Subject: mac80211: fix sched_scan restart on recovery In case we were not suspended, the reconfig function returns without configuring the scheduled scan. Signed-off-by: Arik Nemtsov Signed-off-by: Johannes Berg diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 676dc09..1d1bb70 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1741,6 +1741,26 @@ int ieee80211_reconfig(struct ieee80211_local *local) IEEE80211_QUEUE_STOP_REASON_SUSPEND); /* + * Reconfigure sched scan if it was interrupted by FW restart or + * suspend. + */ + mutex_lock(&local->mtx); + sched_scan_sdata = rcu_dereference_protected(local->sched_scan_sdata, + lockdep_is_held(&local->mtx)); + if (sched_scan_sdata && local->sched_scan_req) + /* + * Sched scan stopped, but we don't want to report it. Instead, + * we're trying to reschedule. + */ + if (__ieee80211_request_sched_scan_start(sched_scan_sdata, + local->sched_scan_req)) + sched_scan_stopped = true; + mutex_unlock(&local->mtx); + + if (sched_scan_stopped) + cfg80211_sched_scan_stopped(local->hw.wiphy); + + /* * If this is for hw restart things are still running. * We may want to change that later, however. */ @@ -1768,26 +1788,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) WARN_ON(1); #endif - /* - * Reconfigure sched scan if it was interrupted by FW restart or - * suspend. - */ - mutex_lock(&local->mtx); - sched_scan_sdata = rcu_dereference_protected(local->sched_scan_sdata, - lockdep_is_held(&local->mtx)); - if (sched_scan_sdata && local->sched_scan_req) - /* - * Sched scan stopped, but we don't want to report it. Instead, - * we're trying to reschedule. - */ - if (__ieee80211_request_sched_scan_start(sched_scan_sdata, - local->sched_scan_req)) - sched_scan_stopped = true; - mutex_unlock(&local->mtx); - - if (sched_scan_stopped) - cfg80211_sched_scan_stopped(local->hw.wiphy); - return 0; } -- cgit v0.10.2 From c368ddaa9ad79fdffde4756804321feba6725c75 Mon Sep 17 00:00:00 2001 From: Eytan Lifshitz Date: Thu, 6 Feb 2014 21:01:32 +0200 Subject: mac80211: fix memory leak In case ieee80211_prep_connection() fails to dereference sdata->vif.chanctx_conf, the function returns and doesn't free new_sta. fixed. Signed-off-by: Eytan Lifshitz Signed-off-by: Johannes Berg diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index fc1d824..57d5482 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3753,6 +3753,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); if (WARN_ON(!chanctx_conf)) { rcu_read_unlock(); + sta_info_free(local, new_sta); return -EINVAL; } rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def); -- cgit v0.10.2 From 6e60163b9db6d0b203ffc0f0b8ef5a5f1c9d3fb8 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Sun, 9 Feb 2014 14:01:33 +0100 Subject: drm/tegra: fix typo 'CONFIG_TEGRA_DRM_FBDEV' Signed-off-by: Paul Bolle Signed-off-by: Thierry Reding diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 88a5290..c715947 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -104,7 +104,7 @@ static void tegra_drm_context_free(struct tegra_drm_context *context) static void tegra_drm_lastclose(struct drm_device *drm) { -#ifdef CONFIG_TEGRA_DRM_FBDEV +#ifdef CONFIG_DRM_TEGRA_FBDEV struct tegra_drm *tegra = drm->dev_private; tegra_fbdev_restore_mode(tegra->fbdev); -- cgit v0.10.2 From 89e6e8c85f21555a763d0e47841b515ede6278d1 Mon Sep 17 00:00:00 2001 From: Erik Faye-Lund Date: Tue, 7 Jan 2014 21:03:06 +0100 Subject: gpu: host1x: do not check previously handled gathers When patching gathers, we don't need to check against gathers with lower indices than the current one, as they are guaranteed to already have been handled. Signed-off-by: Erik Faye-Lund Acked-By: Terje Bergstrom Signed-off-by: Thierry Reding diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 1146e3b..112f27e 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -538,7 +538,7 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) g->base = job->gather_addr_phys[i]; - for (j = 0; j < job->num_gathers; j++) + for (j = i + 1; j < job->num_gathers; j++) if (job->gathers[j].bo == g->bo) job->gathers[j].handled = true; -- cgit v0.10.2 From b18915379071b31ea819a36ca67761620decba06 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 11 Feb 2014 21:12:27 +0400 Subject: drm/tegra: Add guard to avoid double disable/enable of RGB outputs Add guard to check whether RGB output is already enabled in the way it's done for HDMI output. Fixes possible hang on trying to disable output twice (first time during driver probe and second on fb registering). Signed-off-by: Dmitry Osipenko Signed-off-by: Thierry Reding diff --git a/drivers/gpu/drm/tegra/rgb.c b/drivers/gpu/drm/tegra/rgb.c index 338f7f6..0266fb4 100644 --- a/drivers/gpu/drm/tegra/rgb.c +++ b/drivers/gpu/drm/tegra/rgb.c @@ -15,6 +15,7 @@ struct tegra_rgb { struct tegra_output output; struct tegra_dc *dc; + bool enabled; struct clk *clk_parent; struct clk *clk; @@ -89,6 +90,9 @@ static int tegra_output_rgb_enable(struct tegra_output *output) struct tegra_rgb *rgb = to_rgb(output); unsigned long value; + if (rgb->enabled) + return 0; + tegra_dc_write_regs(rgb->dc, rgb_enable, ARRAY_SIZE(rgb_enable)); value = DE_SELECT_ACTIVE | DE_CONTROL_NORMAL; @@ -122,6 +126,8 @@ static int tegra_output_rgb_enable(struct tegra_output *output) tegra_dc_writel(rgb->dc, GENERAL_ACT_REQ << 8, DC_CMD_STATE_CONTROL); tegra_dc_writel(rgb->dc, GENERAL_ACT_REQ, DC_CMD_STATE_CONTROL); + rgb->enabled = true; + return 0; } @@ -130,6 +136,9 @@ static int tegra_output_rgb_disable(struct tegra_output *output) struct tegra_rgb *rgb = to_rgb(output); unsigned long value; + if (!rgb->enabled) + return 0; + value = tegra_dc_readl(rgb->dc, DC_CMD_DISPLAY_POWER_CONTROL); value &= ~(PW0_ENABLE | PW1_ENABLE | PW2_ENABLE | PW3_ENABLE | PW4_ENABLE | PM0_ENABLE | PM1_ENABLE); @@ -144,6 +153,8 @@ static int tegra_output_rgb_disable(struct tegra_output *output) tegra_dc_write_regs(rgb->dc, rgb_disable, ARRAY_SIZE(rgb_disable)); + rgb->enabled = false; + return 0; } -- cgit v0.10.2 From 3f67d962c64d9b6de9dab81bdbe6d5c94c80d9b9 Mon Sep 17 00:00:00 2001 From: Preeti U Murthy Date: Wed, 12 Feb 2014 10:18:45 +0530 Subject: cpuidle/pseries: Fix fallout caused due to cleanup in pseries cpuidle backend driver Commit d8c6ad3184ca651 ("sched/idle, PPC: Remove redundant cpuidle_idle_call()") reintroduced ppc64_runlatch_off/on() in the pseries cpuidle backend driver. Hence the cleanup caused by the commit "c0c4301c54adde05:pseries/cpuidle: Remove redundant call to ppc64_runlatch_off() in cpu idle routines" in conjuction with the commit d8c6ad3184ca651 causes a build failure. Signed-off-by: Preeti U Murthy Cc: Peter Zijlstra Cc: Nicolas Pitre Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/52FAFD2D.2090306@linux.vnet.ibm.com Signed-off-by: Ingo Molnar diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c index d486489..6f7b019 100644 --- a/drivers/cpuidle/cpuidle-pseries.c +++ b/drivers/cpuidle/cpuidle-pseries.c @@ -17,6 +17,7 @@ #include #include #include +#include #include struct cpuidle_driver pseries_idle_driver = { -- cgit v0.10.2 From 63ef79c25b5bbd356d04ef93d15693d8664c6141 Mon Sep 17 00:00:00 2001 From: Ingo Tuchscherer Date: Wed, 5 Feb 2014 12:29:57 +0100 Subject: s390/zcrypt: additional check to avoid overflow in msg-type 6 requests Signed-off-by: Ingo Tuchscherer Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index dc542e0..0bc91e4 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -311,7 +311,7 @@ static int XCRB_msg_to_type6CPRB_msgX(struct zcrypt_device *zdev, } __packed * msg = ap_msg->message; int rcblen = CEIL4(xcRB->request_control_blk_length); - int replylen; + int replylen, req_sumlen, resp_sumlen; char *req_data = ap_msg->message + sizeof(struct type6_hdr) + rcblen; char *function_code; @@ -321,12 +321,34 @@ static int XCRB_msg_to_type6CPRB_msgX(struct zcrypt_device *zdev, xcRB->request_data_length; if (ap_msg->length > MSGTYPE06_MAX_MSG_SIZE) return -EINVAL; + + /* Overflow check + sum must be greater (or equal) than the largest operand */ + req_sumlen = CEIL4(xcRB->request_control_blk_length) + + xcRB->request_data_length; + if ((CEIL4(xcRB->request_control_blk_length) <= + xcRB->request_data_length) ? + (req_sumlen < xcRB->request_data_length) : + (req_sumlen < CEIL4(xcRB->request_control_blk_length))) { + return -EINVAL; + } + replylen = sizeof(struct type86_fmt2_msg) + CEIL4(xcRB->reply_control_blk_length) + xcRB->reply_data_length; if (replylen > MSGTYPE06_MAX_MSG_SIZE) return -EINVAL; + /* Overflow check + sum must be greater (or equal) than the largest operand */ + resp_sumlen = CEIL4(xcRB->reply_control_blk_length) + + xcRB->reply_data_length; + if ((CEIL4(xcRB->reply_control_blk_length) <= xcRB->reply_data_length) ? + (resp_sumlen < xcRB->reply_data_length) : + (resp_sumlen < CEIL4(xcRB->reply_control_blk_length))) { + return -EINVAL; + } + /* prepare type6 header */ msg->hdr = static_type6_hdrX; memcpy(msg->hdr.agent_id , &(xcRB->agent_ID), sizeof(xcRB->agent_ID)); -- cgit v0.10.2 From 1bf4bbb4024dcdab5e57634dd8ae1072d42a53ac Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 11 Feb 2014 16:02:47 +0100 Subject: mac80211: send control port protocol frames to the VO queue Improves reliability of wifi connections with WPA, since authentication frames are prioritized over normal traffic and also typically exempt from aggregation. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 21211c6..d51422c 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -154,6 +154,11 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata, return IEEE80211_AC_BE; } + if (skb->protocol == sdata->control_port_protocol) { + skb->priority = 7; + return ieee80211_downgrade_queue(sdata, skb); + } + /* use the data classifier to determine what 802.1d tag the * data frame has */ rcu_read_lock(); -- cgit v0.10.2 From 1bc18086231c130895b87ec049be8ddcdab552b8 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Tue, 21 Jan 2014 16:22:51 -0500 Subject: ata: delete non-required instances of include None of these files are actually using any __init type directives and hence don't need to include . Most are just a left over from __devinit and __cpuinit removal, or simply due to code getting copied from one driver to the next. Cc: linux-ide@vger.kernel.org Signed-off-by: Paul Gortmaker Signed-off-by: Tejun Heo diff --git a/drivers/ata/acard-ahci.c b/drivers/ata/acard-ahci.c index fd665d9..b51605a 100644 --- a/drivers/ata/acard-ahci.c +++ b/drivers/ata/acard-ahci.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index dc2756f..b6a49ce 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index 4b231ba..9302d81 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/ata_generic.c b/drivers/ata/ata_generic.c index 7d19665..9498a7d 100644 --- a/drivers/ata/ata_generic.c +++ b/drivers/ata/ata_generic.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 36605ab..cba1b48 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_acpi.c b/drivers/ata/pata_acpi.c index 62c9ac8..5108b87 100644 --- a/drivers/ata/pata_acpi.c +++ b/drivers/ata/pata_acpi.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_amd.c b/drivers/ata/pata_amd.c index d23e2b3..1206fa6 100644 --- a/drivers/ata/pata_amd.c +++ b/drivers/ata/pata_amd.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_artop.c b/drivers/ata/pata_artop.c index 1581dee..3aa4e65 100644 --- a/drivers/ata/pata_artop.c +++ b/drivers/ata/pata_artop.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_at91.c b/drivers/ata/pata_at91.c index d63ee8f..e9c8727 100644 --- a/drivers/ata/pata_at91.c +++ b/drivers/ata/pata_at91.c @@ -18,7 +18,6 @@ #include #include -#include #include #include #include diff --git a/drivers/ata/pata_atiixp.c b/drivers/ata/pata_atiixp.c index 24e5105..30fa4ca 100644 --- a/drivers/ata/pata_atiixp.c +++ b/drivers/ata/pata_atiixp.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_atp867x.c b/drivers/ata/pata_atp867x.c index 2ca5026..7e73a0f 100644 --- a/drivers/ata/pata_atp867x.c +++ b/drivers/ata/pata_atp867x.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_cmd640.c b/drivers/ata/pata_cmd640.c index 8fb69e5..57f1be6 100644 --- a/drivers/ata/pata_cmd640.c +++ b/drivers/ata/pata_cmd640.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_cmd64x.c b/drivers/ata/pata_cmd64x.c index 1275a8d..6bca350 100644 --- a/drivers/ata/pata_cmd64x.c +++ b/drivers/ata/pata_cmd64x.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_cs5520.c b/drivers/ata/pata_cs5520.c index f10baab..bcde4b7 100644 --- a/drivers/ata/pata_cs5520.c +++ b/drivers/ata/pata_cs5520.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_cs5530.c b/drivers/ata/pata_cs5530.c index f07f229..8afe854 100644 --- a/drivers/ata/pata_cs5530.c +++ b/drivers/ata/pata_cs5530.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_cs5535.c b/drivers/ata/pata_cs5535.c index 997e16a..2c0986f 100644 --- a/drivers/ata/pata_cs5535.c +++ b/drivers/ata/pata_cs5535.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_cs5536.c b/drivers/ata/pata_cs5536.c index 0448860..32ddcae 100644 --- a/drivers/ata/pata_cs5536.c +++ b/drivers/ata/pata_cs5536.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_cypress.c b/drivers/ata/pata_cypress.c index 810bc99..3435bd6 100644 --- a/drivers/ata/pata_cypress.c +++ b/drivers/ata/pata_cypress.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_efar.c b/drivers/ata/pata_efar.c index 3c12fd7..f440892 100644 --- a/drivers/ata/pata_efar.c +++ b/drivers/ata/pata_efar.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_ep93xx.c b/drivers/ata/pata_ep93xx.c index 980b88e..cad9d45 100644 --- a/drivers/ata/pata_ep93xx.c +++ b/drivers/ata/pata_ep93xx.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_hpt366.c b/drivers/ata/pata_hpt366.c index 35b5213..8e76f79 100644 --- a/drivers/ata/pata_hpt366.c +++ b/drivers/ata/pata_hpt366.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c index a9d74ef..3ba843f 100644 --- a/drivers/ata/pata_hpt37x.c +++ b/drivers/ata/pata_hpt37x.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_hpt3x2n.c b/drivers/ata/pata_hpt3x2n.c index 4be0398..b93c0f0 100644 --- a/drivers/ata/pata_hpt3x2n.c +++ b/drivers/ata/pata_hpt3x2n.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_hpt3x3.c b/drivers/ata/pata_hpt3x3.c index 85cf286..255c5aa 100644 --- a/drivers/ata/pata_hpt3x3.c +++ b/drivers/ata/pata_hpt3x3.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_imx.c b/drivers/ata/pata_imx.c index 26386f0..97417d5 100644 --- a/drivers/ata/pata_imx.c +++ b/drivers/ata/pata_imx.c @@ -15,7 +15,6 @@ */ #include #include -#include #include #include #include diff --git a/drivers/ata/pata_it8213.c b/drivers/ata/pata_it8213.c index 2a8dd95..81369d1 100644 --- a/drivers/ata/pata_it8213.c +++ b/drivers/ata/pata_it8213.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_it821x.c b/drivers/ata/pata_it821x.c index 581e04d..dc3d787 100644 --- a/drivers/ata/pata_it821x.c +++ b/drivers/ata/pata_it821x.c @@ -72,7 +72,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_jmicron.c b/drivers/ata/pata_jmicron.c index 76e739b0..b1cfa02 100644 --- a/drivers/ata/pata_jmicron.c +++ b/drivers/ata/pata_jmicron.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_marvell.c b/drivers/ata/pata_marvell.c index a4f5e78..6bad3df 100644 --- a/drivers/ata/pata_marvell.c +++ b/drivers/ata/pata_marvell.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_mpiix.c b/drivers/ata/pata_mpiix.c index 1f5f28b..f39a537 100644 --- a/drivers/ata/pata_mpiix.c +++ b/drivers/ata/pata_mpiix.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_netcell.c b/drivers/ata/pata_netcell.c index ad1a0fe..e3b9709 100644 --- a/drivers/ata/pata_netcell.c +++ b/drivers/ata/pata_netcell.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_ninja32.c b/drivers/ata/pata_ninja32.c index 9513e07..56201a6 100644 --- a/drivers/ata/pata_ninja32.c +++ b/drivers/ata/pata_ninja32.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_ns87410.c b/drivers/ata/pata_ns87410.c index 0c424da..6154c3e 100644 --- a/drivers/ata/pata_ns87410.c +++ b/drivers/ata/pata_ns87410.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_ns87415.c b/drivers/ata/pata_ns87415.c index 16dc3a6..d44df7c 100644 --- a/drivers/ata/pata_ns87415.c +++ b/drivers/ata/pata_ns87415.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_oldpiix.c b/drivers/ata/pata_oldpiix.c index d77b2e1..319b644 100644 --- a/drivers/ata/pata_oldpiix.c +++ b/drivers/ata/pata_oldpiix.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_opti.c b/drivers/ata/pata_opti.c index 4ea70cd..fb042e0 100644 --- a/drivers/ata/pata_opti.c +++ b/drivers/ata/pata_opti.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_optidma.c b/drivers/ata/pata_optidma.c index 78ede3f..bb71ea2 100644 --- a/drivers/ata/pata_optidma.c +++ b/drivers/ata/pata_optidma.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c index 40254f4..bcc4b96 100644 --- a/drivers/ata/pata_pcmcia.c +++ b/drivers/ata/pata_pcmcia.c @@ -26,7 +26,6 @@ #include #include -#include #include #include #include diff --git a/drivers/ata/pata_pdc2027x.c b/drivers/ata/pata_pdc2027x.c index 9d874c8..1151f23 100644 --- a/drivers/ata/pata_pdc2027x.c +++ b/drivers/ata/pata_pdc2027x.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_pdc202xx_old.c b/drivers/ata/pata_pdc202xx_old.c index c34fc50..defa050 100644 --- a/drivers/ata/pata_pdc202xx_old.c +++ b/drivers/ata/pata_pdc202xx_old.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_piccolo.c b/drivers/ata/pata_piccolo.c index 2beb6b5..0b46be1 100644 --- a/drivers/ata/pata_piccolo.c +++ b/drivers/ata/pata_piccolo.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_platform.c b/drivers/ata/pata_platform.c index 0279488..a5579b5 100644 --- a/drivers/ata/pata_platform.c +++ b/drivers/ata/pata_platform.c @@ -13,7 +13,6 @@ */ #include #include -#include #include #include #include diff --git a/drivers/ata/pata_pxa.c b/drivers/ata/pata_pxa.c index a6f05ac..73259bf 100644 --- a/drivers/ata/pata_pxa.c +++ b/drivers/ata/pata_pxa.c @@ -20,7 +20,6 @@ #include #include -#include #include #include #include diff --git a/drivers/ata/pata_radisys.c b/drivers/ata/pata_radisys.c index f582ba1..be3f102 100644 --- a/drivers/ata/pata_radisys.c +++ b/drivers/ata/pata_radisys.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_rdc.c b/drivers/ata/pata_rdc.c index 79a970f..521b213 100644 --- a/drivers/ata/pata_rdc.c +++ b/drivers/ata/pata_rdc.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_rz1000.c b/drivers/ata/pata_rz1000.c index 040b093..caedc90 100644 --- a/drivers/ata/pata_rz1000.c +++ b/drivers/ata/pata_rz1000.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_sc1200.c b/drivers/ata/pata_sc1200.c index ce2f828..96a232f 100644 --- a/drivers/ata/pata_sc1200.c +++ b/drivers/ata/pata_sc1200.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_scc.c b/drivers/ata/pata_scc.c index f35f15f..f1f5b5a 100644 --- a/drivers/ata/pata_scc.c +++ b/drivers/ata/pata_scc.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_sch.c b/drivers/ata/pata_sch.c index d3830c4..5a1cde0 100644 --- a/drivers/ata/pata_sch.c +++ b/drivers/ata/pata_sch.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_serverworks.c b/drivers/ata/pata_serverworks.c index 96c6a79..e27f31f 100644 --- a/drivers/ata/pata_serverworks.c +++ b/drivers/ata/pata_serverworks.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_sil680.c b/drivers/ata/pata_sil680.c index c4b0b07..73fe362 100644 --- a/drivers/ata/pata_sil680.c +++ b/drivers/ata/pata_sil680.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_sis.c b/drivers/ata/pata_sis.c index 1e83636..78d913a 100644 --- a/drivers/ata/pata_sis.c +++ b/drivers/ata/pata_sis.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_sl82c105.c b/drivers/ata/pata_sl82c105.c index 6816911..900f0e4 100644 --- a/drivers/ata/pata_sl82c105.c +++ b/drivers/ata/pata_sl82c105.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_triflex.c b/drivers/ata/pata_triflex.c index 94473da..7bc78e2 100644 --- a/drivers/ata/pata_triflex.c +++ b/drivers/ata/pata_triflex.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c index c3ab9a6..f6c9632 100644 --- a/drivers/ata/pata_via.c +++ b/drivers/ata/pata_via.c @@ -55,7 +55,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/pdc_adma.c b/drivers/ata/pdc_adma.c index 8ea6e6a..f10631b 100644 --- a/drivers/ata/pdc_adma.c +++ b/drivers/ata/pdc_adma.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index 523524b..73510d0d 100644 --- a/drivers/ata/sata_dwc_460ex.c +++ b/drivers/ata/sata_dwc_460ex.c @@ -29,7 +29,6 @@ #include #include -#include #include #include #include diff --git a/drivers/ata/sata_highbank.c b/drivers/ata/sata_highbank.c index 870b11e..5a68b7b 100644 --- a/drivers/ata/sata_highbank.c +++ b/drivers/ata/sata_highbank.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index d74def8..ba5f271 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c index 97f4acb..3638887 100644 --- a/drivers/ata/sata_promise.c +++ b/drivers/ata/sata_promise.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/sata_qstor.c b/drivers/ata/sata_qstor.c index 3b0dd57..9a6bd4c 100644 --- a/drivers/ata/sata_qstor.c +++ b/drivers/ata/sata_qstor.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c index d67fc35..e476e7b 100644 --- a/drivers/ata/sata_sil.c +++ b/drivers/ata/sata_sil.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/sata_sis.c b/drivers/ata/sata_sis.c index 1ad2f62..b513428 100644 --- a/drivers/ata/sata_sis.c +++ b/drivers/ata/sata_sis.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/sata_svw.c b/drivers/ata/sata_svw.c index dc4f701..c630fa8 100644 --- a/drivers/ata/sata_svw.c +++ b/drivers/ata/sata_svw.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c index 9947010..6cd0312 100644 --- a/drivers/ata/sata_sx4.c +++ b/drivers/ata/sata_sx4.c @@ -82,7 +82,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/sata_uli.c b/drivers/ata/sata_uli.c index 6d64891..08f98c3 100644 --- a/drivers/ata/sata_uli.c +++ b/drivers/ata/sata_uli.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c index 87f056e..f72e842 100644 --- a/drivers/ata/sata_via.c +++ b/drivers/ata/sata_via.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/ata/sata_vsc.c b/drivers/ata/sata_vsc.c index 44f304b..29e847a 100644 --- a/drivers/ata/sata_vsc.c +++ b/drivers/ata/sata_vsc.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include -- cgit v0.10.2 From 52480137d82062bb8d0fb778cb9934667958e367 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 08:51:57 +0100 Subject: asmlinkage, kvm: Make kvm_rebooting visible kvm_rebooting is referenced from assembler code, thus needs to be visible. Cc: Gleb Natapov Cc: Paolo Bonzini Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391845930-28580-1-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 03a0381..b5ec7fb 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -102,7 +102,7 @@ static void kvm_release_pfn_dirty(pfn_t pfn); static void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, gfn_t gfn); -bool kvm_rebooting; +__visible bool kvm_rebooting; EXPORT_SYMBOL_GPL(kvm_rebooting); static bool largepages_enabled = true; -- cgit v0.10.2 From d47d5c8194579bce1d62f88e26fea91d7c553e42 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 08:51:58 +0100 Subject: asmlinkage: Make __iowrite32_copy visible This is a assembler function on x86, so it should be visible. Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391845930-28580-2-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/include/linux/io.h b/include/linux/io.h index f4f42fa..8a18e75 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -24,7 +24,7 @@ struct device; -void __iowrite32_copy(void __iomem *to, const void *from, size_t count); +__visible void __iowrite32_copy(void __iomem *to, const void *from, size_t count); void __iowrite64_copy(void __iomem *to, const void *from, size_t count); #ifdef CONFIG_MMU -- cgit v0.10.2 From 40747ffa5aa8d5b99ca46c696234b9194b59e0ac Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 08:51:59 +0100 Subject: asmlinkage: Make jiffies visible Jiffies is referenced by the linker script, so it has to be visible. Handled both the generic and the x86 version. Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391845930-28580-3-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 24d3c91..6ec91c0 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -23,7 +23,7 @@ #include #ifdef CONFIG_X86_64 -DEFINE_VVAR(volatile unsigned long, jiffies) = INITIAL_JIFFIES; +__visible DEFINE_VVAR(volatile unsigned long, jiffies) = INITIAL_JIFFIES; #endif unsigned long profile_pc(struct pt_regs *regs) diff --git a/kernel/timer.c b/kernel/timer.c index accfd24..d78de04 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -52,7 +52,7 @@ #define CREATE_TRACE_POINTS #include -u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; +__visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; EXPORT_SYMBOL(jiffies_64); -- cgit v0.10.2 From a99aa42d0253f033cbb85096d3f2bd82201321e6 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 08:52:00 +0100 Subject: asmlinkage, pnp: Make variables used from assembler code visible Mark variables referenced from assembler files visible. This fixes compile problems with LTO. Cc: Jaroslav Kysela Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391845930-28580-4-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/drivers/pnp/pnpbios/bioscalls.c b/drivers/pnp/pnpbios/bioscalls.c index 769d265..deb7f4b 100644 --- a/drivers/pnp/pnpbios/bioscalls.c +++ b/drivers/pnp/pnpbios/bioscalls.c @@ -21,7 +21,7 @@ #include "pnpbios.h" -static struct { +__visible struct { u16 offset; u16 segment; } pnp_bios_callpoint; @@ -41,6 +41,7 @@ asmlinkage void pnp_bios_callfunc(void); __asm__(".text \n" __ALIGN_STR "\n" + ".globl pnp_bios_callfunc\n" "pnp_bios_callfunc:\n" " pushl %edx \n" " pushl %ecx \n" @@ -66,9 +67,9 @@ static struct desc_struct bad_bios_desc = GDT_ENTRY_INIT(0x4092, * after PnP BIOS oopses. */ -u32 pnp_bios_fault_esp; -u32 pnp_bios_fault_eip; -u32 pnp_bios_is_utter_crap = 0; +__visible u32 pnp_bios_fault_esp; +__visible u32 pnp_bios_fault_eip; +__visible u32 pnp_bios_is_utter_crap = 0; static spinlock_t pnp_bios_lock; -- cgit v0.10.2 From 63f9a7fde715352e0769302527670542a664b981 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 08:52:01 +0100 Subject: asmlinkage: Make lockdep_sys_exit asmlinkage lockdep_sys_exit can be called from assembler code, so make it asmlinkage. Cc: Peter Zijlstra Cc: Ingo Molnar Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391845930-28580-5-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 92b1bfc..7df9aa6 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -265,7 +265,7 @@ extern void lockdep_info(void); extern void lockdep_reset(void); extern void lockdep_reset_lock(struct lockdep_map *lock); extern void lockdep_free_key_range(void *start, unsigned long size); -extern void lockdep_sys_exit(void); +extern asmlinkage void lockdep_sys_exit(void); extern void lockdep_off(void); extern void lockdep_on(void); diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index eb8a547..c8b6753c 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4191,7 +4191,7 @@ void debug_show_held_locks(struct task_struct *task) } EXPORT_SYMBOL_GPL(debug_show_held_locks); -void lockdep_sys_exit(void) +asmlinkage void lockdep_sys_exit(void) { struct task_struct *curr = current; -- cgit v0.10.2 From b35f8305339f1ba3070fe606c6ef0d86ef093dee Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 08:52:02 +0100 Subject: asmlinkage: Make trace_hardirq visible Can be called from assembler code. Cc: Peter Zijlstra Cc: Ingo Molnar Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391845930-28580-6-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index c8b6753c..aa3bf15 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2557,7 +2557,7 @@ static void __trace_hardirqs_on_caller(unsigned long ip) debug_atomic_inc(hardirqs_on_events); } -void trace_hardirqs_on_caller(unsigned long ip) +__visible void trace_hardirqs_on_caller(unsigned long ip) { time_hardirqs_on(CALLER_ADDR0, ip); @@ -2610,7 +2610,7 @@ EXPORT_SYMBOL(trace_hardirqs_on); /* * Hardirqs were disabled: */ -void trace_hardirqs_off_caller(unsigned long ip) +__visible void trace_hardirqs_off_caller(unsigned long ip) { struct task_struct *curr = current; -- cgit v0.10.2 From 22d9fd3411c693ccae5f5c2280fb1f9bb106ad4f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 08:52:03 +0100 Subject: asmlinkage, mutex: Mark __visible Various kernel/mutex.c functions can be called from inline assembler, so they should be all global and __visible. Cc: Ingo Molnar Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391845930-28580-7-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 4dd6e4c..adbc0d0 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -67,8 +67,7 @@ EXPORT_SYMBOL(__mutex_init); * We also put the fastpath first in the kernel image, to make sure the * branch is predicted by the CPU as default-untaken. */ -static __used noinline void __sched -__mutex_lock_slowpath(atomic_t *lock_count); +__visible void __sched __mutex_lock_slowpath(atomic_t *lock_count); /** * mutex_lock - acquire the mutex @@ -225,7 +224,8 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock) } #endif -static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); +__visible __used noinline +void __sched __mutex_unlock_slowpath(atomic_t *lock_count); /** * mutex_unlock - release the mutex @@ -746,7 +746,7 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) /* * Release the lock, slowpath: */ -static __used noinline void +__visible void __mutex_unlock_slowpath(atomic_t *lock_count) { __mutex_unlock_common_slowpath(lock_count, 1); @@ -803,7 +803,7 @@ int __sched mutex_lock_killable(struct mutex *lock) } EXPORT_SYMBOL(mutex_lock_killable); -static __used noinline void __sched +__visible void __sched __mutex_lock_slowpath(atomic_t *lock_count) { struct mutex *lock = container_of(lock_count, struct mutex, count); -- cgit v0.10.2 From 00b7103078596a243c16239004e0dc9416910f13 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 08:52:04 +0100 Subject: asmlinkage: Make main_extable_sort_needed visible main_extable_sort_needed is used by the build system and needs to be a normal ELF symbol. Make it visible so that LTO does not remove or mangle it. Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391845930-28580-8-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/kernel/extable.c b/kernel/extable.c index 763faf0..d8a6446 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -36,7 +36,7 @@ extern struct exception_table_entry __start___ex_table[]; extern struct exception_table_entry __stop___ex_table[]; /* Cleared by build time tools if the table is already sorted. */ -u32 __initdata main_extable_sort_needed = 1; +u32 __initdata __visible main_extable_sort_needed = 1; /* Sort the kernel's built-in exception table */ void __init sort_main_extable(void) -- cgit v0.10.2 From 3ebae4f3a2e746ae17f25c741e249294e7d6d7c2 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 08:52:05 +0100 Subject: asmlinkage: Mark rwsem functions that can be called from assembler asmlinkage Mark the rwsem functions that can be called from assembler asmlinkage. Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391845930-28580-9-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 19c5fa9..1d66e08 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -143,6 +143,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) /* * wait for the read lock to be granted */ +__visible struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) { long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; @@ -190,6 +191,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) /* * wait until we successfully acquire the write lock */ +__visible struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) { long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS; @@ -252,6 +254,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) * handle waking up a waiter on the semaphore * - up_read/up_write has decremented the active part of count if we come here */ +__visible struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) { unsigned long flags; @@ -272,6 +275,7 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) * - caller incremented waiting part of count and discovered it still negative * - just wake up any readers at the front of the queue */ +__visible struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) { unsigned long flags; -- cgit v0.10.2 From a7330c997d0f74d909a7d3553b1d550d8be2b61a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 08:52:06 +0100 Subject: asmlinkage Make __stack_chk_failed and memcmp visible In LTO symbols implicitely referenced by the compiler need to be visible. Earlier these symbols were visible implicitely from being exported, but we disabled implicit visibility fo EXPORTs when modules are disabled to improve code size. So now these symbols have to be marked visible explicitely. Do this for __stack_chk_fail (with stack protector) and memcmp. Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391845930-28580-10-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/kernel/panic.c b/kernel/panic.c index 6d63003..3eb0ffb 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -459,7 +459,7 @@ EXPORT_SYMBOL(warn_slowpath_null); * Called when gcc's -fstack-protector feature is used, and * gcc detects corruption of the on-stack canary value */ -void __stack_chk_fail(void) +__visible void __stack_chk_fail(void) { panic("stack-protector: Kernel stack is corrupted in: %p\n", __builtin_return_address(0)); diff --git a/lib/string.c b/lib/string.c index e5878de..9b1f906 100644 --- a/lib/string.c +++ b/lib/string.c @@ -648,7 +648,7 @@ EXPORT_SYMBOL(memmove); * @count: The size of the area. */ #undef memcmp -int memcmp(const void *cs, const void *ct, size_t count) +__visible int memcmp(const void *cs, const void *ct, size_t count) { const unsigned char *su1, *su2; int res = 0; -- cgit v0.10.2 From a9143296dd612dceb0765229ccfb13fd642e2840 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 08:52:07 +0100 Subject: asmlinkage, x86: Fix 32bit memcpy for LTO These functions can be called implicitely from gcc, and thus need to be visible. Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391845930-28580-11-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c index e78761d..a404b4b 100644 --- a/arch/x86/lib/memcpy_32.c +++ b/arch/x86/lib/memcpy_32.c @@ -4,7 +4,7 @@ #undef memcpy #undef memset -void *memcpy(void *to, const void *from, size_t n) +__visible void *memcpy(void *to, const void *from, size_t n) { #ifdef CONFIG_X86_USE_3DNOW return __memcpy3d(to, from, n); @@ -14,13 +14,13 @@ void *memcpy(void *to, const void *from, size_t n) } EXPORT_SYMBOL(memcpy); -void *memset(void *s, int c, size_t count) +__visible void *memset(void *s, int c, size_t count) { return __memset(s, c, count); } EXPORT_SYMBOL(memset); -void *memmove(void *dest, const void *src, size_t n) +__visible void *memmove(void *dest, const void *src, size_t n) { int d0,d1,d2,d3,d4,d5; char *ret = dest; -- cgit v0.10.2 From 285c00adf651c9b1d6c73d5eee482d2a617a64c1 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 08:52:08 +0100 Subject: asmlinkage: Make trace_hardirqs_on/off_caller visible These functions are called from assembler, and thus need to be __visible. Cc: Steven Rostedt Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391845930-28580-12-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 2aefbee..887ef88 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -498,14 +498,14 @@ void trace_hardirqs_off(void) } EXPORT_SYMBOL(trace_hardirqs_off); -void trace_hardirqs_on_caller(unsigned long caller_addr) +__visible void trace_hardirqs_on_caller(unsigned long caller_addr) { if (!preempt_trace() && irq_trace()) stop_critical_timing(CALLER_ADDR0, caller_addr); } EXPORT_SYMBOL(trace_hardirqs_on_caller); -void trace_hardirqs_off_caller(unsigned long caller_addr) +__visible void trace_hardirqs_off_caller(unsigned long caller_addr) { if (!preempt_trace() && irq_trace()) start_critical_timing(CALLER_ADDR0, caller_addr); -- cgit v0.10.2 From 3be5588ad5c2628c8aa4a578bcbb114fa2b49260 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 08:52:09 +0100 Subject: initconst: Fix initconst mistake in dcdbas const must be __initconst. Cc: Douglas_Warzecha@dell.com Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391845930-28580-13-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c index 1b5e8e4..7160c43 100644 --- a/drivers/firmware/dcdbas.c +++ b/drivers/firmware/dcdbas.c @@ -584,7 +584,7 @@ static struct platform_driver dcdbas_driver = { .remove = dcdbas_remove, }; -static const struct platform_device_info dcdbas_dev_info __initdata = { +static const struct platform_device_info dcdbas_dev_info __initconst = { .name = DRIVER_NAME, .id = -1, .dma_mask = DMA_BIT_MASK(32), -- cgit v0.10.2 From 634676c203f130c8efa138296c2efba219821346 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 08:52:10 +0100 Subject: initconst, x86: Fix initconst mistake in ts5500 code const data must be initconst. Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391845930-28580-14-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/arch/x86/platform/ts5500/ts5500.c b/arch/x86/platform/ts5500/ts5500.c index 39febb2..9471b94 100644 --- a/arch/x86/platform/ts5500/ts5500.c +++ b/arch/x86/platform/ts5500/ts5500.c @@ -88,7 +88,7 @@ struct ts5500_sbc { static const struct { const char * const string; const ssize_t offset; -} ts5500_signatures[] __initdata = { +} ts5500_signatures[] __initconst = { { "TS-5x00 AMD Elan", 0xb14 }, }; -- cgit v0.10.2 From 67424d5a22124fa2d115faa8f32d12506989628f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 09:01:05 +0100 Subject: x86, lto: Disable LTO for the x86 VDSO The VDSO does not play well with LTO, so just disable LTO for it. Also pass a 32bit linker flag for the 32bit version. [ hpa: change braces to parens to match kernel Makefile style ] Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391846481-31491-1-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index fd14be1..9206ac7 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -2,6 +2,8 @@ # Building vDSO images for x86. # +KBUILD_CFLAGS += $(DISABLE_LTO) + VDSO64-$(CONFIG_X86_64) := y VDSOX32-$(CONFIG_X86_X32_ABI) := y VDSO32-$(CONFIG_X86_32) := y @@ -35,7 +37,8 @@ export CPPFLAGS_vdso.lds += -P -C VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ -Wl,--no-undefined \ - -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 + -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 \ + $(DISABLE_LTO) $(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so @@ -127,7 +130,7 @@ vdso32.so-$(VDSO32-y) += sysenter vdso32-images = $(vdso32.so-y:%=vdso32-%.so) CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) -VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-soname=linux-gate.so.1 +VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1 # This makes sure the $(obj) subdirectory exists even though vdso32/ # is not a kbuild sub-make subdirectory. @@ -181,7 +184,8 @@ quiet_cmd_vdso = VDSO $@ -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \ sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' -VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \ + $(LTO_CFLAGS) GCOV_PROFILE := n # -- cgit v0.10.2 From 128ea04a9885af9629059e631ddf0cab4815b589 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 09:01:07 +0100 Subject: lto: Make asmlinkage __visible Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391846481-31491-3-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/include/linux/linkage.h b/include/linux/linkage.h index a6a42dd..34a513a 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -12,9 +12,9 @@ #endif #ifdef __cplusplus -#define CPP_ASMLINKAGE extern "C" +#define CPP_ASMLINKAGE extern "C" __visible #else -#define CPP_ASMLINKAGE +#define CPP_ASMLINKAGE __visible #endif #ifndef asmlinkage -- cgit v0.10.2 From ef1b893c29d0dba778f67ad97b554b37f9108dcc Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 09:01:08 +0100 Subject: lto, workaround: Add workaround for initcall reordering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Work around a LTO gcc problem: when there is no reference to a variable in a module it will be moved to the end of the program. This causes reordering of initcalls which the kernel does not like. Add a dummy reference function to avoid this. The function is deleted by the linker. This replaces a previous much slower workaround. Thanks to Jan "Honza" Hubička for suggesting this technique. Suggested-by: Jan Hubička Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391846481-31491-4-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/include/linux/init.h b/include/linux/init.h index e168880..a3ba270 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -163,6 +163,23 @@ extern bool initcall_debug; #ifndef __ASSEMBLY__ +#ifdef CONFIG_LTO +/* Work around a LTO gcc problem: when there is no reference to a variable + * in a module it will be moved to the end of the program. This causes + * reordering of initcalls which the kernel does not like. + * Add a dummy reference function to avoid this. The function is + * deleted by the linker. + */ +#define LTO_REFERENCE_INITCALL(x) \ + ; /* yes this is needed */ \ + static __used __exit void *reference_##x(void) \ + { \ + return &x; \ + } +#else +#define LTO_REFERENCE_INITCALL(x) +#endif + /* initcalls are now grouped by functionality into separate * subsections. Ordering inside the subsections is determined * by link order. @@ -175,7 +192,8 @@ extern bool initcall_debug; #define __define_initcall(fn, id) \ static initcall_t __initcall_##fn##id __used \ - __attribute__((__section__(".initcall" #id ".init"))) = fn + __attribute__((__section__(".initcall" #id ".init"))) = fn; \ + LTO_REFERENCE_INITCALL(__initcall_##fn##id) /* * Early initcalls run before initializing SMP. -- cgit v0.10.2 From 80375980f1608f43b47abc2671456b23ec68c434 Mon Sep 17 00:00:00 2001 From: Joe Mario Date: Sat, 8 Feb 2014 09:01:09 +0100 Subject: lto: Handle LTO common symbols in module loader Here is the workaround I made for having the kernel not reject modules built with -flto. The clean solution would be to get the compiler to not emit the symbol. Or if it has to emit the symbol, then emit it as initialized data but put it into a comdat/linkonce section. Minor tweaks by AK over Joe's patch. Cc: Rusty Russell Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391846481-31491-5-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/kernel/module.c b/kernel/module.c index d24fcf2..b99e801 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1948,6 +1948,10 @@ static int simplify_symbols(struct module *mod, const struct load_info *info) switch (sym[i].st_shndx) { case SHN_COMMON: + /* Ignore common symbols */ + if (!strncmp(name, "__gnu_lto", 9)) + break; + /* We compiled with -fno-common. These are not supposed to happen. */ pr_debug("Common symbol: %s\n", name); -- cgit v0.10.2 From 58edae3aac9f2ccd1afb12ea08127e840a0a706c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 09:01:10 +0100 Subject: lto: Disable LTO for sys_ni The assembler alias code in cond_syscall does not work when compiled for LTO. Just disable LTO for that file. Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391846481-31491-6-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/kernel/Makefile b/kernel/Makefile index bc010ee..31c26c6 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -18,6 +18,9 @@ CFLAGS_REMOVE_cgroup-debug.o = -pg CFLAGS_REMOVE_irq_work.o = -pg endif +# cond_syscall is currently not LTO compatible +CFLAGS_sys_ni.o = $(DISABLE_LTO) + obj-y += sched/ obj-y += locking/ obj-y += power/ -- cgit v0.10.2 From 77ab21adae509c5540956729e2d03bc1a59bc82a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 09:01:11 +0100 Subject: Kbuild, lto, workaround: Don't warn for initcall_reference in modpost This reference is discarded, but can cause warnings when it refers to exit. Ignore for now. This is a workaround and can be removed once we get rid of -fno-toplevel-reorder Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391846481-31491-7-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 4061098..1f1b154 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1455,6 +1455,10 @@ static void check_section_mismatch(const char *modname, struct elf_info *elf, to = find_elf_symbol(elf, r->r_addend, sym); tosym = sym_name(elf, to); + if (!strncmp(fromsym, "reference___initcall", + sizeof("reference___initcall")-1)) + return; + /* check whitelist - we may ignore it */ if (secref_whitelist(mismatch, fromsec, fromsym, tosec, tosym)) { -- cgit v0.10.2 From 7d02b490e93c199a15b3c4bce1c393588c1300ca Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 09:01:12 +0100 Subject: Kbuild, lto: Drop .number postfixes in modpost LTO turns all global symbols effectively into statics. This has the side effect that they all have a .NUMBER postfix to make them unique. In modpost drop this postfix because it confuses it. Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391846481-31491-8-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 1f1b154..f91dd45 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1684,6 +1684,19 @@ static void check_sec_ref(struct module *mod, const char *modname, } } +static char *remove_dot(char *s) +{ + char *end; + int n = strcspn(s, "."); + + if (n > 0 && s[n] != 0) { + strtoul(s + n + 1, &end, 10); + if (end > s + n + 1 && (*end == '.' || *end == 0)) + s[n] = 0; + } + return s; +} + static void read_symbols(char *modname) { const char *symname; @@ -1722,7 +1735,7 @@ static void read_symbols(char *modname) } for (sym = info.symtab_start; sym < info.symtab_stop; sym++) { - symname = info.strtab + sym->st_name; + symname = remove_dot(info.strtab + sym->st_name); handle_modversions(mod, &info, sym, symname); handle_moddevtable(mod, &info, sym, symname); diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h index 51207e4..168b43d 100644 --- a/scripts/mod/modpost.h +++ b/scripts/mod/modpost.h @@ -127,7 +127,7 @@ struct elf_info { Elf_Section export_gpl_sec; Elf_Section export_unused_gpl_sec; Elf_Section export_gpl_future_sec; - const char *strtab; + char *strtab; char *modinfo; unsigned int modinfo_len; -- cgit v0.10.2 From ccbef1674a1579842c7dbdf554efca85d2cd245a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 09:01:13 +0100 Subject: Kbuild, lto: add ld-version and ld-ifversion macros To check the linker version. Used by the LTO makefile. Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391846481-31491-9-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 547e15d..93a0da2 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -155,6 +155,15 @@ ld-option = $(call try-run,\ # Important: no spaces around options ar-option = $(call try-run, $(AR) rc$(1) "$$TMP",$(1),$(2)) +# ld-version +# Usage: $(call ld-version) +# Note this is mainly for HJ Lu's 3 number binutil versions +ld-version = $(shell $(LD) --version | $(srctree)/scripts/ld-version.sh) + +# ld-ifversion +# Usage: $(call ld-ifversion, -ge, 22252, y) +ld-ifversion = $(shell [ $(call ld-version) $(1) $(2) ] && echo $(3)) + ###### ### diff --git a/scripts/ld-version.sh b/scripts/ld-version.sh new file mode 100755 index 0000000..198580d --- /dev/null +++ b/scripts/ld-version.sh @@ -0,0 +1,8 @@ +#!/usr/bin/awk -f +# extract linker version number from stdin and turn into single number + { + gsub(".*)", ""); + split($1,a, "."); + print a[1]*10000000 + a[2]*100000 + a[3]*10000 + a[4]*100 + a[5]; + exit + } -- cgit v0.10.2 From 8564ed2b3888176ac323eefea1722003daeba3d3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 09:01:14 +0100 Subject: Kbuild, lto: Add a gcc-ld script to let run gcc as ld For LTO we need to run the link step with gcc, not ld. Since there are a lot of linker options passed to it, add a gcc-ld wrapper that wraps them as -Wl, Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391846481-31491-10-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/scripts/gcc-ld b/scripts/gcc-ld new file mode 100644 index 0000000..cadab9a --- /dev/null +++ b/scripts/gcc-ld @@ -0,0 +1,29 @@ +#!/bin/sh +# run gcc with ld options +# used as a wrapper to execute link time optimizations +# yes virginia, this is not pretty + +ARGS="-nostdlib" + +while [ "$1" != "" ] ; do + case "$1" in + -save-temps|-m32|-m64) N="$1" ;; + -r) N="$1" ;; + -[Wg]*) N="$1" ;; + -[olv]|-[Ofd]*|-nostdlib) N="$1" ;; + --end-group|--start-group) + N="-Wl,$1" ;; + -[RTFGhIezcbyYu]*|\ +--script|--defsym|-init|-Map|--oformat|-rpath|\ +-rpath-link|--sort-section|--section-start|-Tbss|-Tdata|-Ttext|\ +--version-script|--dynamic-list|--version-exports-symbol|--wrap|-m) + A="$1" ; shift ; N="-Wl,$A,$1" ;; + -[m]*) N="$1" ;; + -*) N="-Wl,$1" ;; + *) N="$1" ;; + esac + ARGS="$ARGS $N" + shift +done + +exec $CC $ARGS -- cgit v0.10.2 From 1e64ff42ea3d8d2fc8aa71f9717b3c1cb6c2f893 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 09:01:15 +0100 Subject: Kbuild, lto: Disable LTO for asm-offsets.c The asm-offset.c technique to fish data out of the assembler file does not work with LTO. Just disable for the asm-offset.c build. Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391846481-31491-11-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/scripts/Makefile.build b/scripts/Makefile.build index d5d859c..9f0ee22 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -198,7 +198,7 @@ $(multi-objs-y:.o=.s) : modname = $(modname-multi) $(multi-objs-y:.o=.lst) : modname = $(modname-multi) quiet_cmd_cc_s_c = CC $(quiet_modtag) $@ -cmd_cc_s_c = $(CC) $(c_flags) -fverbose-asm -S -o $@ $< +cmd_cc_s_c = $(CC) $(c_flags) $(DISABLE_LTO) -fverbose-asm -S -o $@ $< $(obj)/%.s: $(src)/%.c FORCE $(call if_changed_dep,cc_s_c) -- cgit v0.10.2 From ef178f9238b142cc1020265e176b20d27fd02ba9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 8 Feb 2014 09:01:17 +0100 Subject: Kbuild, lto: Handle basic LTO in modpost - Don't warn about LTO marker symbols. modpost runs before the linker, so the module is not necessarily LTOed yet. - Don't complain about .gnu.lto* sections Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1391846481-31491-13-git-send-email-ak@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index f91dd45..63804a1 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -623,7 +623,10 @@ static void handle_modversions(struct module *mod, struct elf_info *info, switch (sym->st_shndx) { case SHN_COMMON: - warn("\"%s\" [%s] is COMMON symbol\n", symname, mod->name); + if (!strncmp(symname, "__gnu_lto_", sizeof("__gnu_lto_")-1)) { + /* Should warn here, but modpost runs before the linker */ + } else + warn("\"%s\" [%s] is COMMON symbol\n", symname, mod->name); break; case SHN_UNDEF: /* undefined symbol */ @@ -849,6 +852,7 @@ static const char *section_white_list[] = ".xt.lit", /* xtensa */ ".arcextmap*", /* arc */ ".gnu.linkonce.arcext*", /* arc : modules */ + ".gnu.lto*", NULL }; -- cgit v0.10.2 From ce898ecb5a3c0027855dcee21ed99690b867d017 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 12 Feb 2014 21:13:16 +0000 Subject: netfilter: nft_reject_inet: fix unintended fall-through in switch-statatement For IPv4 packets, we call both IPv4 and IPv6 reject. Reported-by: Dave Jones Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index 8a310f2..b718a52 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -21,9 +21,9 @@ static void nft_reject_inet_eval(const struct nft_expr *expr, { switch (pkt->ops->pf) { case NFPROTO_IPV4: - nft_reject_ipv4_eval(expr, data, pkt); + return nft_reject_ipv4_eval(expr, data, pkt); case NFPROTO_IPV6: - nft_reject_ipv6_eval(expr, data, pkt); + return nft_reject_ipv6_eval(expr, data, pkt); } } -- cgit v0.10.2 From 06efbd6d5694b2e3cde176f724ba572d57709616 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Wed, 12 Feb 2014 10:53:01 +0100 Subject: netfilter: nft_meta: fix typo "CONFIG_NET_CLS_ROUTE" There are two checks for CONFIG_NET_CLS_ROUTE, but the corresponding Kconfig symbol was dropped in v2.6.39. Since the code guards access to dst_entry.tclassid it seems CONFIG_IP_ROUTE_CLASSID should be used instead. Signed-off-by: Paul Bolle Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index e8254ad..425cf39 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -116,7 +116,7 @@ static void nft_meta_get_eval(const struct nft_expr *expr, skb->sk->sk_socket->file->f_cred->fsgid); read_unlock_bh(&skb->sk->sk_callback_lock); break; -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID case NFT_META_RTCLASSID: { const struct dst_entry *dst = skb_dst(skb); @@ -199,7 +199,7 @@ static int nft_meta_init_validate_get(uint32_t key) case NFT_META_OIFTYPE: case NFT_META_SKUID: case NFT_META_SKGID: -#ifdef CONFIG_NET_CLS_ROUTE +#ifdef CONFIG_IP_ROUTE_CLASSID case NFT_META_RTCLASSID: #endif #ifdef CONFIG_NETWORK_SECMARK -- cgit v0.10.2 From 2b7a79bae2dc0327af2352e1d1793b9d752648aa Mon Sep 17 00:00:00 2001 From: FX Le Bail Date: Tue, 11 Feb 2014 15:49:25 +0100 Subject: netfilter: nf_nat_snmp_basic: fix duplicates in if/else branches The solution was found by Patrick in 2.4 kernel sources. Cc: Patrick McHardy Signed-off-by: Francois-Xavier Le Bail Acked-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index d551e31..7c67667 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1198,8 +1198,8 @@ static int snmp_translate(struct nf_conn *ct, map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip); } else { /* DNAT replies */ - map.from = NOCT1(&ct->tuplehash[dir].tuple.src.u3.ip); - map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip); + map.from = NOCT1(&ct->tuplehash[!dir].tuple.src.u3.ip); + map.to = NOCT1(&ct->tuplehash[dir].tuple.dst.u3.ip); } if (map.from == map.to) -- cgit v0.10.2 From 8ba14654282ed6bb386d0a2f1ab329bfb293403f Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 10 Feb 2014 17:09:54 +0100 Subject: timer: Spare IPI when deferrable timer is queued on idle remote targets When a timer is enqueued or modified on a remote target, the latter is expected to see and handle this timer on its next tick. However if the target is idle and CONFIG_NO_HZ_IDLE=y, the CPU may be sleeping tickless and the timer may be ignored. wake_up_nohz_cpu() takes care of that by setting TIF_NEED_RESCHED and sending an IPI to idle targets so that the tick is reevaluated on the idle loop through the tick_nohz_idle_*() APIs. Now this is all performed regardless of the power properties of the timer. If the timer is deferrable, idle targets don't need to be woken up. Only the next buzy tick needs to care about it, and no IPI kick is needed for that to happen. So lets spare the IPI on idle targets when the timer is deferrable. Meanwhile we keep the current behaviour on full dynticks targets. We can spare IPIs on idle full dynticks targets as well but some tricky races against idle_cpu() must be dealt all along to make sure that the timer is well handled after idle exit. We can deal with that later since NO_HZ_FULL already has more important powersaving issues. Reported-by: Thomas Gleixner Signed-off-by: Viresh Kumar Cc: Ingo Molnar Cc: Paul Gortmaker Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/CAKohpomMZ0TAN2e6N76_g4ZRzxd5vZ1XfuZfxrP7GMxfTNiLVw@mail.gmail.com Signed-off-by: Frederic Weisbecker diff --git a/kernel/timer.c b/kernel/timer.c index accfd24..b75e789 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -939,8 +939,15 @@ void add_timer_on(struct timer_list *timer, int cpu) * with the timer by holding the timer base lock. This also * makes sure that a CPU on the way to stop its tick can not * evaluate the timer wheel. + * + * Spare the IPI for deferrable timers on idle targets though. + * The next busy ticks will take care of it. Except full dynticks + * require special care against races with idle_cpu(), lets deal + * with that later. */ - wake_up_nohz_cpu(cpu); + if (!tbase_get_deferrable(timer->base) || tick_nohz_full_cpu(cpu)) + wake_up_nohz_cpu(cpu); + spin_unlock_irqrestore(&base->lock, flags); } EXPORT_SYMBOL_GPL(add_timer_on); -- cgit v0.10.2 From f96a34e27df19335155394a235ea3a096bc52a71 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Thu, 6 Feb 2014 13:36:21 -0500 Subject: nohz: ensure users are aware boot CPU is not NO_HZ_FULL This bit of information is in the Kconfig help text: "Note the boot CPU will still be kept outside the range to handle the timekeeping duty." However neither the variable NO_HZ_FULL_ALL, or the prompt convey this important detail, so lets add it to the prompt to make it more explicitly obvious to the average user. Acked-by: Paul E. McKenney Signed-off-by: Paul Gortmaker Cc: Ingo Molnar Cc: Paul Gortmaker Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1391711781-7466-1-git-send-email-paul.gortmaker@windriver.com Signed-off-by: Frederic Weisbecker diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 3ce6e8c..f448513 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -124,7 +124,7 @@ config NO_HZ_FULL endchoice config NO_HZ_FULL_ALL - bool "Full dynticks system on all CPUs by default" + bool "Full dynticks system on all CPUs by default (except CPU 0)" depends on NO_HZ_FULL help If the user doesn't pass the nohz_full boot option to -- cgit v0.10.2 From 28fa65e643ed7cf753e89db20745e264c41f3682 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 12 Feb 2014 16:08:28 -0800 Subject: mtd: nand: fix off-by-one read retry mode counting A flash may support N read retry voltage threshold modes, numbered 0 through N-1 (where mode 0 represents the initial state). However, nand_do_read_ops() tries to use mode 0 through N. This off-by-one error shows up, for instance, when using nanddump, and we have cycled through available modes: nand: setting READ RETRY mode 0 nand: setting READ RETRY mode 1 nand: setting READ RETRY mode 2 nand: setting READ RETRY mode 3 nand: setting READ RETRY mode 4 nand: setting READ RETRY mode 5 nand: setting READ RETRY mode 6 nand: setting READ RETRY mode 7 nand: setting READ RETRY mode 8 libmtd: error!: cannot read 8192 bytes from mtd0 (eraseblock 20, offset 0) error 22 (Invalid argument) nanddump: error!: mtd_read Tested on Micron MT29F64G08CBCBBH1, with 8 retry modes. Signed-off-by: Brian Norris Acked-by: Huang Shijie diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 59eba5d..9715a7b 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -1584,7 +1584,7 @@ read_retry: } if (mtd->ecc_stats.failed - ecc_failures) { - if (retry_mode + 1 <= chip->read_retries) { + if (retry_mode + 1 < chip->read_retries) { retry_mode++; ret = nand_setup_read_retry(mtd, retry_mode); -- cgit v0.10.2 From 910a9f5636f5c128c02bf9ccd71ac03325700b57 Mon Sep 17 00:00:00 2001 From: Jean-Francois Dagenais Date: Wed, 12 Feb 2014 15:14:28 -0800 Subject: Input: adp5588-keys - get value from data out when dir is out As discussed here: http://ez.analog.com/message/35852, the 5587 revC and 5588 revB spec sheets contain a mistake in the GPIO_DAT_STATx register description. According to R.Shnell at ADI, as well as my own observations, it should read: "GPIO data status (shows GPIO state when read for inputs)". This commit changes the get value function accordingly. Signed-off-by: Jean-Francois Dagenais Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c index bb3b57b..5ef7fcf 100644 --- a/drivers/input/keyboard/adp5588-keys.c +++ b/drivers/input/keyboard/adp5588-keys.c @@ -76,8 +76,18 @@ static int adp5588_gpio_get_value(struct gpio_chip *chip, unsigned off) struct adp5588_kpad *kpad = container_of(chip, struct adp5588_kpad, gc); unsigned int bank = ADP5588_BANK(kpad->gpiomap[off]); unsigned int bit = ADP5588_BIT(kpad->gpiomap[off]); + int val; - return !!(adp5588_read(kpad->client, GPIO_DAT_STAT1 + bank) & bit); + mutex_lock(&kpad->gpio_lock); + + if (kpad->dir[bank] & bit) + val = kpad->dat_out[bank]; + else + val = adp5588_read(kpad->client, GPIO_DAT_STAT1 + bank); + + mutex_unlock(&kpad->gpio_lock); + + return !!(val & bit); } static void adp5588_gpio_set_value(struct gpio_chip *chip, -- cgit v0.10.2 From 3c7eacfc8a9a4c2bd48e0093c4f43cf69afd5210 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 14 Feb 2014 11:42:36 +0100 Subject: ovs: fix dp check in ovs_dp_reset_user_features This fixes crash when userspace does "ovs-dpctl add-dp dev" where dev is existing non-dp netdevice. Introduced by: commit 44da5ae5fbea4686f667dc854e5ea16814e44c59 "openvswitch: Drop user features if old user space attempted to create datapath" Signed-off-by: Jiri Pirko Signed-off-by: Jesse Gross diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index e9a48ba..e42340d 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1174,7 +1174,7 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *in struct datapath *dp; dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); - if (!dp) + if (IS_ERR(dp)) return; WARN(dp->user_features, "Dropping previously announced user features\n"); -- cgit v0.10.2 From 04382a3303c22b0c536fbd0c94c1f012f2b8ed60 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Sat, 15 Feb 2014 17:37:45 -0800 Subject: openvswitch: Read tcp flags only then the tranport header is present. Only the first IP fragment can have a TCP header, check for this. Signed-off-by: Jarno Rajahalme Signed-off-by: Jesse Gross diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 16f4b46..d71e60f 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -73,6 +73,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb) if ((flow->key.eth.type == htons(ETH_P_IP) || flow->key.eth.type == htons(ETH_P_IPV6)) && + flow->key.ip.frag != OVS_FRAG_TYPE_LATER && flow->key.ip.proto == IPPROTO_TCP && likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) { tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb)); -- cgit v0.10.2 From 42ee19e2939277a5277c307e517ce2d7ba5f0703 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Sat, 15 Feb 2014 17:42:29 -0800 Subject: openvswitch: Fix race. ovs_vport_cmd_dump() did rcu_read_lock() only after getting the datapath, which could have been deleted in between. Resolved by taking rcu_read_lock() before the get_dp() call. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index e42340d..8601b32 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1762,11 +1762,12 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) int bucket = cb->args[0], skip = cb->args[1]; int i, j = 0; + rcu_read_lock(); dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); - if (!dp) + if (!dp) { + rcu_read_unlock(); return -ENODEV; - - rcu_read_lock(); + } for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) { struct vport *vport; -- cgit v0.10.2 From e9baa9d9d520fb0e24cca671e430689de2d4a4b2 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 13 Feb 2014 10:39:01 +0100 Subject: dma: ste_dma40: don't dereference free:d descriptor It appears that in the DMA40 driver the DMA tasklet will very often dereference memory for a descriptor just free:d from the DMA40 slab. Nothing happens because no other part of the driver has yet had a chance to claim this memory, but it's really nasty to dereference free:d memory, so let's check the flag before the descriptor is free and store it in a bool variable. Cc: stable@vger.kernel.org Reported-by: Dan Carpenter Signed-off-by: Linus Walleij Signed-off-by: Vinod Koul diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index 00a2de9..bf18c78 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -1641,6 +1641,7 @@ static void dma_tasklet(unsigned long data) struct d40_chan *d40c = (struct d40_chan *) data; struct d40_desc *d40d; unsigned long flags; + bool callback_active; dma_async_tx_callback callback; void *callback_param; @@ -1668,6 +1669,7 @@ static void dma_tasklet(unsigned long data) } /* Callback to client */ + callback_active = !!(d40d->txd.flags & DMA_PREP_INTERRUPT); callback = d40d->txd.callback; callback_param = d40d->txd.callback_param; @@ -1690,7 +1692,7 @@ static void dma_tasklet(unsigned long data) spin_unlock_irqrestore(&d40c->lock, flags); - if (callback && (d40d->txd.flags & DMA_PREP_INTERRUPT)) + if (callback_active && callback) callback(callback_param); return; -- cgit v0.10.2 From e306dfd06fcb44d21c80acb8e5a88d55f3d1cf63 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Fri, 14 Feb 2014 19:35:15 +0000 Subject: ARM64: unwind: Fix PC calculation The frame PC value in the unwind code used to just take the saved LR value and use that. That's incorrect as a stack trace, since it shows the return path stack, not the call path stack. In particular, it shows faulty information in case the bl is done as the very last instruction of one label, since the return point will be in the next label. That can easily be seen with tail calls to panic(), which is marked __noreturn and thus doesn't have anything useful after it. Easiest here is to just correct the unwind code and do a -4, to get the actual call site for the backtrace instead of the return site. Signed-off-by: Olof Johansson Cc: stable@vger.kernel.org Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index c3b6c63..38f0558 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -48,7 +48,11 @@ int unwind_frame(struct stackframe *frame) frame->sp = fp + 0x10; frame->fp = *(unsigned long *)(fp); - frame->pc = *(unsigned long *)(fp + 8); + /* + * -4 here because we care about the PC at time of bl, + * not where the return will go. + */ + frame->pc = *(unsigned long *)(fp + 8) - 4; return 0; } -- cgit v0.10.2 From 478b360a47b71f3b5030eacd3aae6acb1a32c2b6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 15 Feb 2014 23:48:45 +0100 Subject: netfilter: nf_tables: fix nf_trace always-on with XT_TRACE=n When using nftables with CONFIG_NETFILTER_XT_TARGET_TRACE=n, we get lots of "TRACE: filter:output:policy:1 IN=..." warnings as several places will leave skb->nf_trace uninitialised. Unlike iptables tracing functionality is not conditional in nftables, so always copy/zero nf_trace setting when nftables is enabled. Move this into __nf_copy() helper. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f589c9af..d40d40b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2725,7 +2725,7 @@ static inline void nf_reset(struct sk_buff *skb) static inline void nf_reset_trace(struct sk_buff *skb) { -#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES) skb->nf_trace = 0; #endif } @@ -2742,6 +2742,9 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src) dst->nf_bridge = src->nf_bridge; nf_bridge_get(src->nf_bridge); #endif +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES) + dst->nf_trace = src->nf_trace; +#endif } static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5976ef0..5d6236d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -707,9 +707,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->mark = old->mark; new->skb_iif = old->skb_iif; __nf_copy(new, old); -#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) - new->nf_trace = old->nf_trace; -#endif #ifdef CONFIG_NET_SCHED new->tc_index = old->tc_index; #ifdef CONFIG_NET_CLS_ACT diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8971780..73c6b63 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -422,9 +422,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif nf_copy(to, from); -#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) - to->nf_trace = from->nf_trace; -#endif #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) to->ipvs_property = from->ipvs_property; #endif diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ef02b26..4cfbe0f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -517,9 +517,6 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif nf_copy(to, from); -#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) - to->nf_trace = from->nf_trace; -#endif skb_copy_secmark(to, from); } -- cgit v0.10.2 From f627ed91d85ed7a189ec8b3b045a0d831e1655e2 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sun, 16 Feb 2014 14:01:58 +0100 Subject: netfilter: nf_tables: check if payload length is a power of 2 Add a check if payload's length is a power of 2 when selecting ops. The fast ops were meant for well aligned loads, also this fixes a small bug when using a length of 3 with some offsets which causes only 1 byte to be loaded because the fast ops are chosen. Signed-off-by: Nikolay Aleksandrov Acked-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index a2aeb31..85daa84 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -135,7 +135,8 @@ nft_payload_select_ops(const struct nft_ctx *ctx, if (len == 0 || len > FIELD_SIZEOF(struct nft_data, data)) return ERR_PTR(-EINVAL); - if (len <= 4 && IS_ALIGNED(offset, len) && base != NFT_PAYLOAD_LL_HEADER) + if (len <= 4 && is_power_of_2(len) && IS_ALIGNED(offset, len) && + base != NFT_PAYLOAD_LL_HEADER) return &nft_payload_fast_ops; else return &nft_payload_ops; -- cgit v0.10.2 From cb6448ab0a9ac5f1d1c72a0f573dd9677d8e5418 Mon Sep 17 00:00:00 2001 From: Peter De Schrijver Date: Thu, 19 Dec 2013 16:18:20 +0200 Subject: clk: tegra: Add missing Tegra20 fuse clks Add clocks required for accessing fuses on Tegra20. Signed-off-by: Peter De Schrijver Acked-by: Stephen Warren diff --git a/drivers/clk/tegra/clk-tegra20.c b/drivers/clk/tegra/clk-tegra20.c index dbace15..dace2b1 100644 --- a/drivers/clk/tegra/clk-tegra20.c +++ b/drivers/clk/tegra/clk-tegra20.c @@ -574,6 +574,8 @@ static struct tegra_clk tegra20_clks[tegra_clk_max] __initdata = { [tegra_clk_tvdac] = { .dt_id = TEGRA20_CLK_TVDAC, .present = true }, [tegra_clk_vi_sensor] = { .dt_id = TEGRA20_CLK_VI_SENSOR, .present = true }, [tegra_clk_afi] = { .dt_id = TEGRA20_CLK_AFI, .present = true }, + [tegra_clk_fuse] = { .dt_id = TEGRA20_CLK_FUSE, .present = true }, + [tegra_clk_kfuse] = { .dt_id = TEGRA20_CLK_KFUSE, .present = true }, }; static unsigned long tegra20_clk_measure_input_freq(void) -- cgit v0.10.2 From 2edf3e035302776e4756e446baf3b6c7b94c3698 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 2 Dec 2013 12:30:25 +0100 Subject: clk: tegra: Correct clock number for UARTE UARTE has clock number 66. Number 65 is the right one for UARTD. Signed-off-by: Thierry Reding diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c index 5c35885..3744a6f 100644 --- a/drivers/clk/tegra/clk-tegra-periph.c +++ b/drivers/clk/tegra/clk-tegra-periph.c @@ -492,7 +492,7 @@ static struct tegra_periph_init_data periph_clks[] = { UART("uartb", mux_pllp_pllc_pllm_clkm, CLK_SOURCE_UARTB, 7, tegra_clk_uartb), UART("uartc", mux_pllp_pllc_pllm_clkm, CLK_SOURCE_UARTC, 55, tegra_clk_uartc), UART("uartd", mux_pllp_pllc_pllm_clkm, CLK_SOURCE_UARTD, 65, tegra_clk_uartd), - UART("uarte", mux_pllp_pllc_pllm_clkm, CLK_SOURCE_UARTE, 65, tegra_clk_uarte), + UART("uarte", mux_pllp_pllc_pllm_clkm, CLK_SOURCE_UARTE, 66, tegra_clk_uarte), XUSB("xusb_host_src", mux_clkm_pllp_pllc_pllre, CLK_SOURCE_XUSB_HOST_SRC, 143, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_xusb_host_src), XUSB("xusb_falcon_src", mux_clkm_pllp_pllc_pllre, CLK_SOURCE_XUSB_FALCON_SRC, 143, TEGRA_PERIPH_NO_RESET, tegra_clk_xusb_falcon_src), XUSB("xusb_fs_src", mux_clkm_48M_pllp_480M, CLK_SOURCE_XUSB_FS_SRC, 143, TEGRA_PERIPH_NO_RESET, tegra_clk_xusb_fs_src), -- cgit v0.10.2 From 2ec35fd503bf6367ba55ed94dcb68edfe0d26e6a Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Thu, 26 Dec 2013 16:44:21 -0800 Subject: clk: tegra: Fix PLLP rate table This table had settings for 216MHz, but PLLP is (and is supposed to be) configured at 408MHz. If that table is used and PLLP_BASE_OVRRIDE is not set, the kernel will panic in clk_pll_recalc_rate(). Signed-off-by: Gabe Black Signed-off-by: Andrew Bresticker diff --git a/drivers/clk/tegra/clk-tegra124.c b/drivers/clk/tegra/clk-tegra124.c index aff86b5..28bb238 100644 --- a/drivers/clk/tegra/clk-tegra124.c +++ b/drivers/clk/tegra/clk-tegra124.c @@ -516,11 +516,11 @@ static struct div_nmp pllp_nmp = { }; static struct tegra_clk_pll_freq_table pll_p_freq_table[] = { - {12000000, 216000000, 432, 12, 1, 8}, - {13000000, 216000000, 432, 13, 1, 8}, - {16800000, 216000000, 360, 14, 1, 8}, - {19200000, 216000000, 360, 16, 1, 8}, - {26000000, 216000000, 432, 26, 1, 8}, + {12000000, 408000000, 408, 12, 0, 8}, + {13000000, 408000000, 408, 13, 0, 8}, + {16800000, 408000000, 340, 14, 0, 8}, + {19200000, 408000000, 340, 16, 0, 8}, + {26000000, 408000000, 408, 26, 0, 8}, {0, 0, 0, 0, 0, 0}, }; -- cgit v0.10.2 From 67fc26bfd7a265883fd0804f24f6287d16769e3d Mon Sep 17 00:00:00 2001 From: Rhyland Klein Date: Thu, 26 Dec 2013 16:44:22 -0800 Subject: clk: tegra: Fix PLLD mnp table PLLD was using the same mnp table as PLLP. Fix it to use its own table which is different from PLLP's. Signed-off-by: Rhyland Klein Signed-off-by: Andrew Bresticker diff --git a/drivers/clk/tegra/clk-tegra124.c b/drivers/clk/tegra/clk-tegra124.c index 28bb238..14c3f2f 100644 --- a/drivers/clk/tegra/clk-tegra124.c +++ b/drivers/clk/tegra/clk-tegra124.c @@ -570,6 +570,15 @@ static struct tegra_clk_pll_params pll_a_params = { .flags = TEGRA_PLL_HAS_CPCON | TEGRA_PLL_USE_LOCK, }; +static struct div_nmp plld_nmp = { + .divm_shift = 0, + .divm_width = 5, + .divn_shift = 8, + .divn_width = 11, + .divp_shift = 20, + .divp_width = 3, +}; + static struct tegra_clk_pll_freq_table pll_d_freq_table[] = { {12000000, 216000000, 864, 12, 4, 12}, {13000000, 216000000, 864, 13, 4, 12}, @@ -603,7 +612,7 @@ static struct tegra_clk_pll_params pll_d_params = { .lock_mask = PLL_BASE_LOCK, .lock_enable_bit_idx = PLLDU_MISC_LOCK_ENABLE, .lock_delay = 1000, - .div_nmp = &pllp_nmp, + .div_nmp = &plld_nmp, .freq_table = pll_d_freq_table, .flags = TEGRA_PLL_HAS_CPCON | TEGRA_PLL_SET_LFCON | TEGRA_PLL_USE_LOCK, -- cgit v0.10.2 From 0e766c2d9fc8cd2ad0e0fe97ff4e264cb686fc32 Mon Sep 17 00:00:00 2001 From: David Ung Date: Thu, 26 Dec 2013 16:44:23 -0800 Subject: clk: tegra: PLLD2 fixes for hdmi Set correct pll_d2_out0 divider and correct the p div values for pll_d2. Signed-off-by: David Ung Signed-off-by: Andrew Bresticker diff --git a/drivers/clk/tegra/clk-tegra124.c b/drivers/clk/tegra/clk-tegra124.c index 14c3f2f..0fc9126 100644 --- a/drivers/clk/tegra/clk-tegra124.c +++ b/drivers/clk/tegra/clk-tegra124.c @@ -619,12 +619,11 @@ static struct tegra_clk_pll_params pll_d_params = { }; static struct tegra_clk_pll_freq_table tegra124_pll_d2_freq_table[] = { - { 12000000, 148500000, 99, 1, 8}, - { 12000000, 594000000, 99, 1, 1}, - { 13000000, 594000000, 91, 1, 1}, /* actual: 591.5 MHz */ - { 16800000, 594000000, 71, 1, 1}, /* actual: 596.4 MHz */ - { 19200000, 594000000, 62, 1, 1}, /* actual: 595.2 MHz */ - { 26000000, 594000000, 91, 2, 1}, /* actual: 591.5 MHz */ + { 12000000, 594000000, 99, 1, 2}, + { 13000000, 594000000, 91, 1, 2}, /* actual: 591.5 MHz */ + { 16800000, 594000000, 71, 1, 2}, /* actual: 596.4 MHz */ + { 19200000, 594000000, 62, 1, 2}, /* actual: 595.2 MHz */ + { 26000000, 594000000, 91, 2, 2}, /* actual: 591.5 MHz */ { 0, 0, 0, 0, 0, 0 }, }; @@ -1295,9 +1294,9 @@ static void __init tegra124_pll_init(void __iomem *clk_base, clk_register_clkdev(clk, "pll_d2", NULL); clks[TEGRA124_CLK_PLL_D2] = clk; - /* PLLD2_OUT0 ?? */ + /* PLLD2_OUT0 */ clk = clk_register_fixed_factor(NULL, "pll_d2_out0", "pll_d2", - CLK_SET_RATE_PARENT, 1, 2); + CLK_SET_RATE_PARENT, 1, 1); clk_register_clkdev(clk, "pll_d2_out0", NULL); clks[TEGRA124_CLK_PLL_D2_OUT0] = clk; -- cgit v0.10.2 From 82ba1c3c9988a8055f4a4d7ca2168e9efe7e7874 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Thu, 26 Dec 2013 16:44:24 -0800 Subject: clk: tegra: fix host1x clock on Tegra124 The host1x clock on Tegra124 is a 3-bit wide mux with 6 parents. Change thte id to tegra_clk_host1x_8 so that the correct clock gets registered. Signed-off-by: Mark Zhang Signed-off-by: Andrew Bresticker diff --git a/drivers/clk/tegra/clk-tegra124.c b/drivers/clk/tegra/clk-tegra124.c index 0fc9126..743ccb4 100644 --- a/drivers/clk/tegra/clk-tegra124.c +++ b/drivers/clk/tegra/clk-tegra124.c @@ -775,7 +775,7 @@ static struct tegra_clk tegra124_clks[tegra_clk_max] __initdata = { [tegra_clk_gr3d] = { .dt_id = TEGRA124_CLK_GR_3D, .present = true }, [tegra_clk_disp2] = { .dt_id = TEGRA124_CLK_DISP2, .present = true }, [tegra_clk_disp1] = { .dt_id = TEGRA124_CLK_DISP1, .present = true }, - [tegra_clk_host1x] = { .dt_id = TEGRA124_CLK_HOST1X, .present = true }, + [tegra_clk_host1x_8] = { .dt_id = TEGRA124_CLK_HOST1X, .present = true }, [tegra_clk_vcp] = { .dt_id = TEGRA124_CLK_VCP, .present = true }, [tegra_clk_i2s0] = { .dt_id = TEGRA124_CLK_I2S0, .present = true }, [tegra_clk_apbdma] = { .dt_id = TEGRA124_CLK_APBDMA, .present = true }, -- cgit v0.10.2 From 20e7c323abac390deb35248705807bd844590048 Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Thu, 26 Dec 2013 16:44:25 -0800 Subject: clk: tegra: fix sdmmc clks on Tegra1x4 The sdmmc clocks on Tegra114 and Tegra124 are 3-bit wide muxes with 6 parents. Add support for tegra_clk_sdmmc*_8 and switch Tegra114 and Tegra124 to use these clocks instead. Signed-off-by: Andrew Bresticker diff --git a/drivers/clk/tegra/clk-id.h b/drivers/clk/tegra/clk-id.h index cf0c323..c39613c 100644 --- a/drivers/clk/tegra/clk-id.h +++ b/drivers/clk/tegra/clk-id.h @@ -180,9 +180,13 @@ enum clk_id { tegra_clk_sbc6_8, tegra_clk_sclk, tegra_clk_sdmmc1, + tegra_clk_sdmmc1_8, tegra_clk_sdmmc2, + tegra_clk_sdmmc2_8, tegra_clk_sdmmc3, + tegra_clk_sdmmc3_8, tegra_clk_sdmmc4, + tegra_clk_sdmmc4_8, tegra_clk_se, tegra_clk_soc_therm, tegra_clk_sor0, diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c index 3744a6f..f5376a3 100644 --- a/drivers/clk/tegra/clk-tegra-periph.c +++ b/drivers/clk/tegra/clk-tegra-periph.c @@ -465,6 +465,10 @@ static struct tegra_periph_init_data periph_clks[] = { MUX("adx1", mux_plla_pllc_pllp_clkm, CLK_SOURCE_ADX1, 180, TEGRA_PERIPH_ON_APB, tegra_clk_adx1), MUX("amx1", mux_plla_pllc_pllp_clkm, CLK_SOURCE_AMX1, 185, TEGRA_PERIPH_ON_APB, tegra_clk_amx1), MUX("vi_sensor2", mux_pllm_pllc2_c_c3_pllp_plla, CLK_SOURCE_VI_SENSOR2, 20, TEGRA_PERIPH_NO_RESET, tegra_clk_vi_sensor2), + MUX8("sdmmc1", mux_pllp_pllc2_c_c3_pllm_clkm, CLK_SOURCE_SDMMC1, 14, 0, tegra_clk_sdmmc1_8), + MUX8("sdmmc2", mux_pllp_pllc2_c_c3_pllm_clkm, CLK_SOURCE_SDMMC2, 9, 0, tegra_clk_sdmmc2_8), + MUX8("sdmmc3", mux_pllp_pllc2_c_c3_pllm_clkm, CLK_SOURCE_SDMMC3, 69, 0, tegra_clk_sdmmc3_8), + MUX8("sdmmc4", mux_pllp_pllc2_c_c3_pllm_clkm, CLK_SOURCE_SDMMC4, 15, 0, tegra_clk_sdmmc4_8), MUX8("sbc1", mux_pllp_pllc2_c_c3_pllm_clkm, CLK_SOURCE_SBC1, 41, TEGRA_PERIPH_ON_APB, tegra_clk_sbc1_8), MUX8("sbc2", mux_pllp_pllc2_c_c3_pllm_clkm, CLK_SOURCE_SBC2, 44, TEGRA_PERIPH_ON_APB, tegra_clk_sbc2_8), MUX8("sbc3", mux_pllp_pllc2_c_c3_pllm_clkm, CLK_SOURCE_SBC3, 46, TEGRA_PERIPH_ON_APB, tegra_clk_sbc3_8), diff --git a/drivers/clk/tegra/clk-tegra114.c b/drivers/clk/tegra/clk-tegra114.c index 90d9d25..80431f0 100644 --- a/drivers/clk/tegra/clk-tegra114.c +++ b/drivers/clk/tegra/clk-tegra114.c @@ -682,12 +682,12 @@ static struct tegra_clk tegra114_clks[tegra_clk_max] __initdata = { [tegra_clk_timer] = { .dt_id = TEGRA114_CLK_TIMER, .present = true }, [tegra_clk_uarta] = { .dt_id = TEGRA114_CLK_UARTA, .present = true }, [tegra_clk_uartd] = { .dt_id = TEGRA114_CLK_UARTD, .present = true }, - [tegra_clk_sdmmc2] = { .dt_id = TEGRA114_CLK_SDMMC2, .present = true }, + [tegra_clk_sdmmc2_8] = { .dt_id = TEGRA114_CLK_SDMMC2, .present = true }, [tegra_clk_i2s1] = { .dt_id = TEGRA114_CLK_I2S1, .present = true }, [tegra_clk_i2c1] = { .dt_id = TEGRA114_CLK_I2C1, .present = true }, [tegra_clk_ndflash] = { .dt_id = TEGRA114_CLK_NDFLASH, .present = true }, - [tegra_clk_sdmmc1] = { .dt_id = TEGRA114_CLK_SDMMC1, .present = true }, - [tegra_clk_sdmmc4] = { .dt_id = TEGRA114_CLK_SDMMC4, .present = true }, + [tegra_clk_sdmmc1_8] = { .dt_id = TEGRA114_CLK_SDMMC1, .present = true }, + [tegra_clk_sdmmc4_8] = { .dt_id = TEGRA114_CLK_SDMMC4, .present = true }, [tegra_clk_pwm] = { .dt_id = TEGRA114_CLK_PWM, .present = true }, [tegra_clk_i2s0] = { .dt_id = TEGRA114_CLK_I2S0, .present = true }, [tegra_clk_i2s2] = { .dt_id = TEGRA114_CLK_I2S2, .present = true }, @@ -723,7 +723,7 @@ static struct tegra_clk tegra114_clks[tegra_clk_max] __initdata = { [tegra_clk_bsev] = { .dt_id = TEGRA114_CLK_BSEV, .present = true }, [tegra_clk_i2c3] = { .dt_id = TEGRA114_CLK_I2C3, .present = true }, [tegra_clk_sbc4_8] = { .dt_id = TEGRA114_CLK_SBC4, .present = true }, - [tegra_clk_sdmmc3] = { .dt_id = TEGRA114_CLK_SDMMC3, .present = true }, + [tegra_clk_sdmmc3_8] = { .dt_id = TEGRA114_CLK_SDMMC3, .present = true }, [tegra_clk_owr] = { .dt_id = TEGRA114_CLK_OWR, .present = true }, [tegra_clk_csite] = { .dt_id = TEGRA114_CLK_CSITE, .present = true }, [tegra_clk_la] = { .dt_id = TEGRA114_CLK_LA, .present = true }, diff --git a/drivers/clk/tegra/clk-tegra124.c b/drivers/clk/tegra/clk-tegra124.c index 743ccb4..b4cf650 100644 --- a/drivers/clk/tegra/clk-tegra124.c +++ b/drivers/clk/tegra/clk-tegra124.c @@ -761,12 +761,12 @@ static struct tegra_clk tegra124_clks[tegra_clk_max] __initdata = { [tegra_clk_rtc] = { .dt_id = TEGRA124_CLK_RTC, .present = true }, [tegra_clk_timer] = { .dt_id = TEGRA124_CLK_TIMER, .present = true }, [tegra_clk_uarta] = { .dt_id = TEGRA124_CLK_UARTA, .present = true }, - [tegra_clk_sdmmc2] = { .dt_id = TEGRA124_CLK_SDMMC2, .present = true }, + [tegra_clk_sdmmc2_8] = { .dt_id = TEGRA124_CLK_SDMMC2, .present = true }, [tegra_clk_i2s1] = { .dt_id = TEGRA124_CLK_I2S1, .present = true }, [tegra_clk_i2c1] = { .dt_id = TEGRA124_CLK_I2C1, .present = true }, [tegra_clk_ndflash] = { .dt_id = TEGRA124_CLK_NDFLASH, .present = true }, - [tegra_clk_sdmmc1] = { .dt_id = TEGRA124_CLK_SDMMC1, .present = true }, - [tegra_clk_sdmmc4] = { .dt_id = TEGRA124_CLK_SDMMC4, .present = true }, + [tegra_clk_sdmmc1_8] = { .dt_id = TEGRA124_CLK_SDMMC1, .present = true }, + [tegra_clk_sdmmc4_8] = { .dt_id = TEGRA124_CLK_SDMMC4, .present = true }, [tegra_clk_pwm] = { .dt_id = TEGRA124_CLK_PWM, .present = true }, [tegra_clk_i2s2] = { .dt_id = TEGRA124_CLK_I2S2, .present = true }, [tegra_clk_gr2d] = { .dt_id = TEGRA124_CLK_GR_2D, .present = true }, @@ -802,7 +802,7 @@ static struct tegra_clk tegra124_clks[tegra_clk_max] __initdata = { [tegra_clk_uartd] = { .dt_id = TEGRA124_CLK_UARTD, .present = true }, [tegra_clk_i2c3] = { .dt_id = TEGRA124_CLK_I2C3, .present = true }, [tegra_clk_sbc4] = { .dt_id = TEGRA124_CLK_SBC4, .present = true }, - [tegra_clk_sdmmc3] = { .dt_id = TEGRA124_CLK_SDMMC3, .present = true }, + [tegra_clk_sdmmc3_8] = { .dt_id = TEGRA124_CLK_SDMMC3, .present = true }, [tegra_clk_pcie] = { .dt_id = TEGRA124_CLK_PCIE, .present = true }, [tegra_clk_owr] = { .dt_id = TEGRA124_CLK_OWR, .present = true }, [tegra_clk_afi] = { .dt_id = TEGRA124_CLK_AFI, .present = true }, -- cgit v0.10.2 From 88b4bd7071ac06e321b4bf4bdb8c69db40182c5a Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Thu, 26 Dec 2013 16:44:26 -0800 Subject: clk: tegra: cclk_lp has a pllx/2 divider When pll_x is the parent of cclk_lp, PLLX_DIV2_BYPASS_LP determines whether cclk_lp output is divided by 2. Set TEGRA_DIVIDER_2 so that the clk_super driver is aware of this. Signed-off-by: Andrew Bresticker diff --git a/drivers/clk/tegra/clk-tegra-super-gen4.c b/drivers/clk/tegra/clk-tegra-super-gen4.c index 05dce4a..feb3201 100644 --- a/drivers/clk/tegra/clk-tegra-super-gen4.c +++ b/drivers/clk/tegra/clk-tegra-super-gen4.c @@ -120,7 +120,7 @@ void __init tegra_super_clk_gen4_init(void __iomem *clk_base, ARRAY_SIZE(cclk_lp_parents), CLK_SET_RATE_PARENT, clk_base + CCLKLP_BURST_POLICY, - 0, 4, 8, 9, NULL); + TEGRA_DIVIDER_2, 4, 8, 9, NULL); *dt_clk = clk; } -- cgit v0.10.2 From 3de5bdfb4cb3bd99052a4ffaee358189779be042 Mon Sep 17 00:00:00 2001 From: Andrew Bresticker Date: Thu, 26 Dec 2013 16:44:27 -0800 Subject: clk: tegra: use max divider if divider overflows When requesting a rate less than the minimum clock rate for a divider, use the maximum divider value instead of bailing out with an error. This matches the behavior of the generic clock divider. Signed-off-by: Andrew Bresticker diff --git a/drivers/clk/tegra/clk-divider.c b/drivers/clk/tegra/clk-divider.c index 4d75b1f..290f9c1 100644 --- a/drivers/clk/tegra/clk-divider.c +++ b/drivers/clk/tegra/clk-divider.c @@ -59,7 +59,7 @@ static int get_div(struct tegra_clk_frac_div *divider, unsigned long rate, return 0; if (divider_ux1 > get_max_div(divider)) - return -EINVAL; + return get_max_div(divider); return divider_ux1; } -- cgit v0.10.2 From c6eda5e81c4fcc77185117255c7419eda771f67f Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 31 Jan 2014 14:11:54 -0500 Subject: dm cache: move hook_info into common portion of per_bio_data structure Commit c9d28d5d ("dm cache: promotion optimisation for writes") incorrectly placed the 'hook_info' member in the writethrough-only portion of the per_bio_data structure. Given that the overwrite optimization may be used for writeback the 'hook_info' member must be placed above the 'cache' member of the per_bio_data structure. Any members above 'cache' are available from both writeback and writethrough modes' per_bio_data structure. Signed-off-by: Mike Snitzer Acked-by: Joe Thornber Cc: stable@vger.kernel.org # 3.13+ diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index ffd472e..14256b7 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -289,6 +289,7 @@ struct per_bio_data { bool tick:1; unsigned req_nr:2; struct dm_deferred_entry *all_io_entry; + struct dm_hook_info hook_info; /* * writethrough fields. These MUST remain at the end of this @@ -297,7 +298,6 @@ struct per_bio_data { */ struct cache *cache; dm_cblock_t cblock; - struct dm_hook_info hook_info; struct dm_bio_details bio_details; }; -- cgit v0.10.2 From 80ae49aaed32af72630251eb06161b15cde15ac8 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 31 Jan 2014 14:30:37 -0500 Subject: dm cache: do not add migration to completed list before unhooking bio When completing an overwrite bio, in overwrite_endio(), the associated migration should not be added to the 'completed_migrations' until the bio's fields are restored with dm_unhook_bio(). Otherwise, do_worker() can race to process 'completed_migrations' before dm_unhook_bio() -- so the bio's bi_end_io is incorrect. This is unlikely to cause any problems given the current code but should be fixed on the basis of correctness. Also, the cache's spinlock only needs to be held when manipulating the 'completed_migrations' list -- other changes don't need protection. Signed-off-by: Mike Snitzer Acked-by: Joe Thornber diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 14256b7..db09444 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1010,13 +1010,15 @@ static void overwrite_endio(struct bio *bio, int err) struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); unsigned long flags; + dm_unhook_bio(&pb->hook_info, bio); + if (err) mg->err = true; + mg->requeue_holder = false; + spin_lock_irqsave(&cache->lock, flags); list_add_tail(&mg->list, &cache->completed_migrations); - dm_unhook_bio(&pb->hook_info, bio); - mg->requeue_holder = false; spin_unlock_irqrestore(&cache->lock, flags); wake_worker(cache); -- cgit v0.10.2 From 4d1662a30dde6e545086fe0e8fd7e474c4e0b639 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 6 Feb 2014 06:08:56 -0500 Subject: dm thin: avoid metadata commit if a pool's thin devices haven't changed Commit 905e51b ("dm thin: commit outstanding data every second") introduced a periodic commit. This commit occurs regardless of whether any thin devices have made changes. Fix the periodic commit to check if any of a pool's thin devices have changed using dm_pool_changed_this_transaction(). Reported-by: Alexander Larsson Signed-off-by: Mike Snitzer Acked-by: Joe Thornber Cc: stable@vger.kernel.org diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 7da3476..3bb4506 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1489,6 +1489,23 @@ bool dm_thin_changed_this_transaction(struct dm_thin_device *td) return r; } +bool dm_pool_changed_this_transaction(struct dm_pool_metadata *pmd) +{ + bool r = false; + struct dm_thin_device *td, *tmp; + + down_read(&pmd->root_lock); + list_for_each_entry_safe(td, tmp, &pmd->thin_devices, list) { + if (td->changed) { + r = td->changed; + break; + } + } + up_read(&pmd->root_lock); + + return r; +} + bool dm_thin_aborted_changes(struct dm_thin_device *td) { bool r; diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h index 9a36856..dd6088a 100644 --- a/drivers/md/dm-thin-metadata.h +++ b/drivers/md/dm-thin-metadata.h @@ -161,6 +161,8 @@ int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block); */ bool dm_thin_changed_this_transaction(struct dm_thin_device *td); +bool dm_pool_changed_this_transaction(struct dm_pool_metadata *pmd); + bool dm_thin_aborted_changes(struct dm_thin_device *td); int dm_thin_get_highest_mapped_block(struct dm_thin_device *td, diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index faaf944..d501e43 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1357,7 +1357,8 @@ static void process_deferred_bios(struct pool *pool) bio_list_init(&pool->deferred_flush_bios); spin_unlock_irqrestore(&pool->lock, flags); - if (bio_list_empty(&bios) && !need_commit_due_to_time(pool)) + if (bio_list_empty(&bios) && + !(dm_pool_changed_this_transaction(pool->pmd) && need_commit_due_to_time(pool))) return; if (commit(pool)) { -- cgit v0.10.2 From d73f9907294c670da14baea3fb31be27879e818e Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 12 Feb 2014 16:37:30 -0500 Subject: dm io: fix I/O to multiple destinations Commit 003b5c5719f159f4f4bf97511c4702a0638313dd ("block: Convert drivers to immutable biovecs") broke dm-mirror due to dm-io breakage. dm-io had three possible iterators (DM_IO_PAGE_LIST, DM_IO_BVEC, DM_IO_VMA) that iterate over pages where the I/O should be performed. The switch to immutable biovecs changed the DM_IO_BVEC iterator to DM_IO_BIO. Before this change the iterator stored the pointer to a bio vector in the dpages structure. The iterator incremented the pointer in the dpages structure as it advanced over the pages. After the immutable biovecs change, the DM_IO_BIO iterator stores a pointer to the bio in the dpages structure and uses bio_advance to change the bio as it advances. The problem is that the function dispatch_io stores the content of the dpages structure into the variable old_pages and restores it before issuing I/O to each of the devices. Before the change, the statement "*dp = old_pages;" restored the iterator to its starting position. After the change, struct dpages holds a pointer to the bio, thus the statement "*dp = old_pages;" doesn't restore the iterator. Consequently, in the context of dm-mirror: only the first mirror leg is written correctly, the kernel locks up when trying to write the other mirror legs because the number of sectors to write in the where->count variable doesn't match the number of sectors returned by the iterator. This patch fixes the bug by partially reverting the original patch - it changes the code so that struct dpages holds a pointer to the bio vector, so that the statement "*dp = old_pages;" restores the iterator correctly. The field "context_u" holds the offset from the beginning of the current bio vector entry, just like the "bio->bi_iter.bi_bvec_done" field. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index b2b8a10..3842ac7 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -201,29 +201,28 @@ static void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offse /* * Functions for getting the pages from a bvec. */ -static void bio_get_page(struct dpages *dp, - struct page **p, unsigned long *len, unsigned *offset) +static void bio_get_page(struct dpages *dp, struct page **p, + unsigned long *len, unsigned *offset) { - struct bio *bio = dp->context_ptr; - struct bio_vec bvec = bio_iovec(bio); - *p = bvec.bv_page; - *len = bvec.bv_len; - *offset = bvec.bv_offset; + struct bio_vec *bvec = dp->context_ptr; + *p = bvec->bv_page; + *len = bvec->bv_len - dp->context_u; + *offset = bvec->bv_offset + dp->context_u; } static void bio_next_page(struct dpages *dp) { - struct bio *bio = dp->context_ptr; - struct bio_vec bvec = bio_iovec(bio); - - bio_advance(bio, bvec.bv_len); + struct bio_vec *bvec = dp->context_ptr; + dp->context_ptr = bvec + 1; + dp->context_u = 0; } static void bio_dp_init(struct dpages *dp, struct bio *bio) { dp->get_page = bio_get_page; dp->next_page = bio_next_page; - dp->context_ptr = bio; + dp->context_ptr = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); + dp->context_u = bio->bi_iter.bi_bvec_done; } /* -- cgit v0.10.2 From 70b0052425ffd549bb27fb08649a4d30daaf40e4 Mon Sep 17 00:00:00 2001 From: Anthony Olech Date: Mon, 17 Feb 2014 11:23:39 -0800 Subject: Input: da9052_onkey - use correct register bit for key status The wrong register bit of the DA9052/3 PMIC registers was used to determine the status on the ONKEY. Also a failure in reading the status register will no longer result in the work queue being rescheduled as that would result in a (potentially) endless retry. Signed-off-by: Anthony Olech Acked-by: David Dajun Chen Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/misc/da9052_onkey.c b/drivers/input/misc/da9052_onkey.c index 1f695f2..184c8f2 100644 --- a/drivers/input/misc/da9052_onkey.c +++ b/drivers/input/misc/da9052_onkey.c @@ -27,29 +27,32 @@ struct da9052_onkey { static void da9052_onkey_query(struct da9052_onkey *onkey) { - int key_stat; + int ret; - key_stat = da9052_reg_read(onkey->da9052, DA9052_EVENT_B_REG); - if (key_stat < 0) { + ret = da9052_reg_read(onkey->da9052, DA9052_STATUS_A_REG); + if (ret < 0) { dev_err(onkey->da9052->dev, - "Failed to read onkey event %d\n", key_stat); + "Failed to read onkey event err=%d\n", ret); } else { /* * Since interrupt for deassertion of ONKEY pin is not * generated, onkey event state determines the onkey * button state. */ - key_stat &= DA9052_EVENTB_ENONKEY; - input_report_key(onkey->input, KEY_POWER, key_stat); + bool pressed = !(ret & DA9052_STATUSA_NONKEY); + + input_report_key(onkey->input, KEY_POWER, pressed); input_sync(onkey->input); - } - /* - * Interrupt is generated only when the ONKEY pin is asserted. - * Hence the deassertion of the pin is simulated through work queue. - */ - if (key_stat) - schedule_delayed_work(&onkey->work, msecs_to_jiffies(50)); + /* + * Interrupt is generated only when the ONKEY pin + * is asserted. Hence the deassertion of the pin + * is simulated through work queue. + */ + if (pressed) + schedule_delayed_work(&onkey->work, + msecs_to_jiffies(50)); + } } static void da9052_onkey_work(struct work_struct *work) -- cgit v0.10.2 From 6e67669678d2d51b2bcf0411aeb629b4353a9880 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 5 Dec 2013 14:56:54 -0800 Subject: documentation: Document call_rcu() safety mechanisms and limitations The call_rcu() family of primitives will take action to accelerate grace periods when the number of callbacks pending on a given CPU becomes excessive. Although this safety mechanism can be useful, it is no substitute for users of call_rcu() having rate-limit controls in place. This commit adds this nuance to the documentation. Reported-by: "Michael S. Tsirkin" Reported-by: Gleb Natapov Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index 9126619..9d10d1d 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt @@ -256,10 +256,10 @@ over a rather long period of time, but improvements are always welcome! variations on this theme. b. Limiting update rate. For example, if updates occur only - once per hour, then no explicit rate limiting is required, - unless your system is already badly broken. The dcache - subsystem takes this approach -- updates are guarded - by a global lock, limiting their rate. + once per hour, then no explicit rate limiting is + required, unless your system is already badly broken. + Older versions of the dcache subsystem take this approach, + guarding updates with a global lock, limiting their rate. c. Trusted update -- if updates can only be done manually by superuser or some other trusted user, then it might not @@ -268,7 +268,8 @@ over a rather long period of time, but improvements are always welcome! the machine. d. Use call_rcu_bh() rather than call_rcu(), in order to take - advantage of call_rcu_bh()'s faster grace periods. + advantage of call_rcu_bh()'s faster grace periods. (This + is only a partial solution, though.) e. Periodically invoke synchronize_rcu(), permitting a limited number of updates per grace period. @@ -276,6 +277,13 @@ over a rather long period of time, but improvements are always welcome! The same cautions apply to call_rcu_bh(), call_rcu_sched(), call_srcu(), and kfree_rcu(). + Note that although these primitives do take action to avoid memory + exhaustion when any given CPU has too many callbacks, a determined + user could still exhaust memory. This is especially the case + if a system with a large number of CPUs has been configured to + offload all of its RCU callbacks onto a single CPU, or if the + system has relatively little free memory. + 9. All RCU list-traversal primitives, which include rcu_dereference(), list_for_each_entry_rcu(), and list_for_each_safe_rcu(), must be either within an RCU read-side -- cgit v0.10.2 From 449f7413c876a229fd95362cc12bc7ade18d0661 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 2 Jan 2014 15:03:50 -0800 Subject: Documentation/memory-barriers.txt: ACCESS_ONCE() provides cache coherence The ACCESS_ONCE() primitive provides cache coherence, but the documentation does not clearly state this. This commit therefore upgrades the documentation. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 102dc19..f9ff060 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1249,6 +1249,23 @@ The ACCESS_ONCE() function can prevent any number of optimizations that, while perfectly safe in single-threaded code, can be fatal in concurrent code. Here are some examples of these sorts of optimizations: + (*) The compiler is within its rights to reorder loads and stores + to the same variable, and in some cases, the CPU is within its + rights to reorder loads to the same variable. This means that + the following code: + + a[0] = x; + a[1] = x; + + Might result in an older value of x stored in a[1] than in a[0]. + Prevent both the compiler and the CPU from doing this as follows: + + a[0] = ACCESS_ONCE(x); + a[1] = ACCESS_ONCE(x); + + In short, ACCESS_ONCE() provides cache coherence for accesses from + multiple CPUs to a single variable. + (*) The compiler is within its rights to merge successive loads from the same variable. Such merging can cause the compiler to "optimize" the following code: -- cgit v0.10.2 From 586dd56a4c17611e3927c9ff02ab8d0a6a545b38 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 11 Feb 2014 12:28:06 -0800 Subject: Documentation/memory-barriers.txt: Conditional must use prior load A control dependency consists of a load, a conditional that depends on that load, and a store. This commit emphasizes this point in the summary. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index f9ff060..6b25efd 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -724,7 +724,8 @@ In summary: later loads, smp_mb(). (*) Control dependencies require at least one run-time conditional - between the prior load and the subsequent store. If the compiler + between the prior load and the subsequent store, and this + conditional must involve the prior load. If the compiler is able to optimize the conditional away, it will have also optimized away the ordering. Careful use of ACCESS_ONCE() can help to preserve the needed conditional. -- cgit v0.10.2 From bbf393b0d5350d68dbcf1f231b8af07b1b31121d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 12 Feb 2014 11:12:37 -0800 Subject: Documentation/kernel-per-CPU-kthreads.txt: Workqueue affinity This commit documents the ability to apply CPU affinity to WQ_SYSFS workqueues, thus offloading them from the desired worker CPUs. Signed-off-by: Paul E. McKenney Reviewed-by: Tejun Heo Acked-by: Frederic Weisbecker Reviewed-by: Lai Jiangshan Reviewed-by: Josh Triplett diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt index 827104f..f3cd299 100644 --- a/Documentation/kernel-per-CPU-kthreads.txt +++ b/Documentation/kernel-per-CPU-kthreads.txt @@ -162,7 +162,18 @@ Purpose: Execute workqueue requests To reduce its OS jitter, do any of the following: 1. Run your workload at a real-time priority, which will allow preempting the kworker daemons. -2. Do any of the following needed to avoid jitter that your +2. A given workqueue can be made visible in the sysfs filesystem + by passing the WQ_SYSFS to that workqueue's alloc_workqueue(). + Such a workqueue can be confined to a given subset of the + CPUs using the /sys/devices/virtual/workqueue/*/cpumask sysfs + files. The set of WQ_SYSFS workqueues can be displayed using + "ls sys/devices/virtual/workqueue". That said, the workqueues + maintainer would like to caution people against indiscriminately + sprinkling WQ_SYSFS across all the workqueues. The reason for + caution is that it is easy to add WQ_SYSFS, but because sysfs is + part of the formal user/kernel API, it can be nearly impossible + to remove it, even if its addition was a mistake. +3. Do any of the following needed to avoid jitter that your application cannot tolerate: a. Build your kernel with CONFIG_SLUB=y rather than CONFIG_SLAB=y, thus avoiding the slab allocator's periodic -- cgit v0.10.2 From 9b2b3bf53124dca4ac815bd2fca53a31e5e262bd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 12 Feb 2014 20:19:47 -0800 Subject: Documentation/memory-barriers.txt: Need barriers() for some control dependencies Current compilers can "speculate" stores in the case where both legs of the "if" statement start with identical stores. Because the stores are identical, the compiler knows that the store will unconditionally execute regardless of the "if" condition, and so the compiler is within its rights to hoist the store to precede the condition. Such hoisting destroys the control-dependency ordering. This ordering can be restored by placing a barrier() at the beginning of each leg of the "if" statement. This commit adds this requirement to the control-dependencies section. Signed-off-by: Paul E. McKenney diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 6b25efd..9dde54c 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -608,26 +608,30 @@ as follows: b = p; /* BUG: Compiler can reorder!!! */ do_something(); -The solution is again ACCESS_ONCE(), which preserves the ordering between -the load from variable 'a' and the store to variable 'b': +The solution is again ACCESS_ONCE() and barrier(), which preserves the +ordering between the load from variable 'a' and the store to variable 'b': q = ACCESS_ONCE(a); if (q) { + barrier(); ACCESS_ONCE(b) = p; do_something(); } else { + barrier(); ACCESS_ONCE(b) = p; do_something_else(); } -You could also use barrier() to prevent the compiler from moving -the stores to variable 'b', but barrier() would not prevent the -compiler from proving to itself that a==1 always, so ACCESS_ONCE() -is also needed. +The initial ACCESS_ONCE() is required to prevent the compiler from +proving the value of 'a', and the pair of barrier() invocations are +required to prevent the compiler from pulling the two identical stores +to 'b' out from the legs of the "if" statement. It is important to note that control dependencies absolutely require a a conditional. For example, the following "optimized" version of -the above example breaks ordering: +the above example breaks ordering, which is why the barrier() invocations +are absolutely required if you have identical stores in both legs of +the "if" statement: q = ACCESS_ONCE(a); ACCESS_ONCE(b) = p; /* BUG: No ordering vs. load from a!!! */ @@ -643,9 +647,11 @@ It is of course legal for the prior load to be part of the conditional, for example, as follows: if (ACCESS_ONCE(a) > 0) { + barrier(); ACCESS_ONCE(b) = q / 2; do_something(); } else { + barrier(); ACCESS_ONCE(b) = q / 3; do_something_else(); } @@ -659,9 +665,11 @@ the needed conditional. For example: q = ACCESS_ONCE(a); if (q % MAX) { + barrier(); ACCESS_ONCE(b) = p; do_something(); } else { + barrier(); ACCESS_ONCE(b) = p; do_something_else(); } @@ -723,6 +731,10 @@ In summary: use smb_rmb(), smp_wmb(), or, in the case of prior stores and later loads, smp_mb(). + (*) If both legs of the "if" statement begin with identical stores + to the same variable, a barrier() statement is required at the + beginning of each leg of the "if" statement. + (*) Control dependencies require at least one run-time conditional between the prior load and the subsequent store, and this conditional must involve the prior load. If the compiler -- cgit v0.10.2 From e4696a1d3b1125d427de685531a44258ea6263df Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 15 Feb 2014 08:52:32 -0800 Subject: documentation: Fix some inconsistencies in RTFP.txt Some of the early history leaves out some citations and vice versa. This commit fixes these up. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/Documentation/RCU/RTFP.txt b/Documentation/RCU/RTFP.txt index 273e654d..2f0fcb2 100644 --- a/Documentation/RCU/RTFP.txt +++ b/Documentation/RCU/RTFP.txt @@ -31,6 +31,14 @@ has lapsed, so this approach may be used in non-GPL software, if desired. (In contrast, implementation of RCU is permitted only in software licensed under either GPL or LGPL. Sorry!!!) +In 1987, Rashid et al. described lazy TLB-flush [RichardRashid87a]. +At first glance, this has nothing to do with RCU, but nevertheless +this paper helped inspire the update-side batching used in the later +RCU implementation in DYNIX/ptx. In 1988, Barbara Liskov published +a description of Argus that noted that use of out-of-date values can +be tolerated in some situations. Thus, this paper provides some early +theoretical justification for use of stale data. + In 1990, Pugh [Pugh90] noted that explicitly tracking which threads were reading a given data structure permitted deferred free to operate in the presence of non-terminating threads. However, this explicit @@ -41,11 +49,11 @@ providing a fine-grained locking design, however, it would be interesting to see how much of the performance advantage reported in 1990 remains today. -At about this same time, Adams [Adams91] described ``chaotic relaxation'', -where the normal barriers between successive iterations of convergent -numerical algorithms are relaxed, so that iteration $n$ might use -data from iteration $n-1$ or even $n-2$. This introduces error, -which typically slows convergence and thus increases the number of +At about this same time, Andrews [Andrews91textbook] described ``chaotic +relaxation'', where the normal barriers between successive iterations +of convergent numerical algorithms are relaxed, so that iteration $n$ +might use data from iteration $n-1$ or even $n-2$. This introduces +error, which typically slows convergence and thus increases the number of iterations required. However, this increase is sometimes more than made up for by a reduction in the number of expensive barrier operations, which are otherwise required to synchronize the threads at the end @@ -55,7 +63,8 @@ is thus inapplicable to most data structures in operating-system kernels. In 1992, Henry (now Alexia) Massalin completed a dissertation advising parallel programmers to defer processing when feasible to simplify -synchronization. RCU makes extremely heavy use of this advice. +synchronization [HMassalinPhD]. RCU makes extremely heavy use of +this advice. In 1993, Jacobson [Jacobson93] verbally described what is perhaps the simplest deferred-free technique: simply waiting a fixed amount of time @@ -90,27 +99,29 @@ mechanism, which is quite similar to RCU [Gamsa99]. These operating systems made pervasive use of RCU in place of "existence locks", which greatly simplifies locking hierarchies and helps avoid deadlocks. -2001 saw the first RCU presentation involving Linux [McKenney01a] -at OLS. The resulting abundance of RCU patches was presented the -following year [McKenney02a], and use of RCU in dcache was first -described that same year [Linder02a]. +The year 2000 saw an email exchange that would likely have +led to yet another independent invention of something like RCU +[RustyRussell2000a,RustyRussell2000b]. Instead, 2001 saw the first +RCU presentation involving Linux [McKenney01a] at OLS. The resulting +abundance of RCU patches was presented the following year [McKenney02a], +and use of RCU in dcache was first described that same year [Linder02a]. Also in 2002, Michael [Michael02b,Michael02a] presented "hazard-pointer" techniques that defer the destruction of data structures to simplify non-blocking synchronization (wait-free synchronization, lock-free synchronization, and obstruction-free synchronization are all examples of -non-blocking synchronization). In particular, this technique eliminates -locking, reduces contention, reduces memory latency for readers, and -parallelizes pipeline stalls and memory latency for writers. However, -these techniques still impose significant read-side overhead in the -form of memory barriers. Researchers at Sun worked along similar lines -in the same timeframe [HerlihyLM02]. These techniques can be thought -of as inside-out reference counts, where the count is represented by the -number of hazard pointers referencing a given data structure rather than -the more conventional counter field within the data structure itself. -The key advantage of inside-out reference counts is that they can be -stored in immortal variables, thus allowing races between access and -deletion to be avoided. +non-blocking synchronization). The corresponding journal article appeared +in 2004 [MagedMichael04a]. This technique eliminates locking, reduces +contention, reduces memory latency for readers, and parallelizes pipeline +stalls and memory latency for writers. However, these techniques still +impose significant read-side overhead in the form of memory barriers. +Researchers at Sun worked along similar lines in the same timeframe +[HerlihyLM02]. These techniques can be thought of as inside-out reference +counts, where the count is represented by the number of hazard pointers +referencing a given data structure rather than the more conventional +counter field within the data structure itself. The key advantage +of inside-out reference counts is that they can be stored in immortal +variables, thus allowing races between access and deletion to be avoided. By the same token, RCU can be thought of as a "bulk reference count", where some form of reference counter covers all reference by a given CPU @@ -123,8 +134,10 @@ can be thought of in other terms as well. In 2003, the K42 group described how RCU could be used to create hot-pluggable implementations of operating-system functions [Appavoo03a]. -Later that year saw a paper describing an RCU implementation of System -V IPC [Arcangeli03], and an introduction to RCU in Linux Journal +Later that year saw a paper describing an RCU implementation +of System V IPC [Arcangeli03] (following up on a suggestion by +Hugh Dickins [Dickins02a] and an implementation by Mingming Cao +[MingmingCao2002IPCRCU]), and an introduction to RCU in Linux Journal [McKenney03a]. 2004 has seen a Linux-Journal article on use of RCU in dcache @@ -383,6 +396,21 @@ for Programming Languages and Operating Systems}" } } +@phdthesis{HMassalinPhD +,author="H. Massalin" +,title="Synthesis: An Efficient Implementation of Fundamental Operating +System Services" +,school="Columbia University" +,address="New York, NY" +,year="1992" +,annotation={ + Mondo optimizing compiler. + Wait-free stuff. + Good advice: defer work to avoid synchronization. See page 90 + (PDF page 106), Section 5.4, fourth bullet point. +} +} + @unpublished{Jacobson93 ,author="Van Jacobson" ,title="Avoid Read-Side Locking Via Delayed Free" @@ -671,6 +699,20 @@ Orran Krieger and Rusty Russell and Dipankar Sarma and Maneesh Soni" [Viewed October 18, 2004]" } +@conference{Michael02b +,author="Maged M. Michael" +,title="High Performance Dynamic Lock-Free Hash Tables and List-Based Sets" +,Year="2002" +,Month="August" +,booktitle="{Proceedings of the 14\textsuperscript{th} Annual ACM +Symposium on Parallel +Algorithms and Architecture}" +,pages="73-82" +,annotation={ +Like the title says... +} +} + @Conference{Linder02a ,Author="Hanna Linder and Dipankar Sarma and Maneesh Soni" ,Title="Scalability of the Directory Entry Cache" @@ -727,6 +769,24 @@ Andrea Arcangeli and Andi Kleen and Orran Krieger and Rusty Russell" } } +@conference{Michael02a +,author="Maged M. Michael" +,title="Safe Memory Reclamation for Dynamic Lock-Free Objects Using Atomic +Reads and Writes" +,Year="2002" +,Month="August" +,booktitle="{Proceedings of the 21\textsuperscript{st} Annual ACM +Symposium on Principles of Distributed Computing}" +,pages="21-30" +,annotation={ + Each thread keeps an array of pointers to items that it is + currently referencing. Sort of an inside-out garbage collection + mechanism, but one that requires the accessing code to explicitly + state its needs. Also requires read-side memory barriers on + most architectures. +} +} + @unpublished{Dickins02a ,author="Hugh Dickins" ,title="Use RCU for System-V IPC" @@ -735,6 +795,17 @@ Andrea Arcangeli and Andi Kleen and Orran Krieger and Rusty Russell" ,note="private communication" } +@InProceedings{HerlihyLM02 +,author={Maurice Herlihy and Victor Luchangco and Mark Moir} +,title="The Repeat Offender Problem: A Mechanism for Supporting Dynamic-Sized, +Lock-Free Data Structures" +,booktitle={Proceedings of 16\textsuperscript{th} International +Symposium on Distributed Computing} +,year=2002 +,month="October" +,pages="339-353" +} + @unpublished{Sarma02b ,Author="Dipankar Sarma" ,Title="Some dcache\_rcu benchmark numbers" @@ -749,6 +820,19 @@ Andrea Arcangeli and Andi Kleen and Orran Krieger and Rusty Russell" } } +@unpublished{MingmingCao2002IPCRCU +,Author="Mingming Cao" +,Title="[PATCH]updated ipc lock patch" +,month="October" +,year="2002" +,note="Available: +\url{https://lkml.org/lkml/2002/10/24/262} +[Viewed February 15, 2014]" +,annotation={ + Mingming Cao's patch to introduce RCU to SysV IPC. +} +} + @unpublished{LinusTorvalds2003a ,Author="Linus Torvalds" ,Title="Re: {[PATCH]} small fixes in brlock.h" @@ -982,6 +1066,23 @@ Realtime Applications" } } +@article{MagedMichael04a +,author="Maged M. Michael" +,title="Hazard Pointers: Safe Memory Reclamation for Lock-Free Objects" +,Year="2004" +,Month="June" +,journal="IEEE Transactions on Parallel and Distributed Systems" +,volume="15" +,number="6" +,pages="491-504" +,url="Available: +\url{http://www.research.ibm.com/people/m/michael/ieeetpds-2004.pdf} +[Viewed March 1, 2005]" +,annotation={ + New canonical hazard-pointer citation. +} +} + @phdthesis{PaulEdwardMcKenneyPhD ,author="Paul E. McKenney" ,title="Exploiting Deferred Destruction: -- cgit v0.10.2 From 3660c2813fb6d0ba48ee44bcbf9feddf7218c11d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Dec 2013 09:24:02 -0800 Subject: rcu: Add ACCESS_ONCE() to ->n_force_qs_lh accesses The ->n_force_qs_lh field is accessed without the benefit of any synchronization, so this commit adds the needed ACCESS_ONCE() wrappers. Yes, increments to ->n_force_qs_lh can be lost, but contention should be low and the field is strictly statistical in nature, so this is not a problem. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b3d116c..e641577 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2304,7 +2304,7 @@ static void force_quiescent_state(struct rcu_state *rsp) if (rnp_old != NULL) raw_spin_unlock(&rnp_old->fqslock); if (ret) { - rsp->n_force_qs_lh++; + ACCESS_ONCE(rsp->n_force_qs_lh)++; return; } rnp_old = rnp; @@ -2316,7 +2316,7 @@ static void force_quiescent_state(struct rcu_state *rsp) smp_mb__after_unlock_lock(); raw_spin_unlock(&rnp_old->fqslock); if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { - rsp->n_force_qs_lh++; + ACCESS_ONCE(rsp->n_force_qs_lh)++; raw_spin_unlock_irqrestore(&rnp_old->lock, flags); return; /* Someone beat us to it. */ } diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index 4def475..d1f1e64 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -273,7 +273,7 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n", rsp->n_force_qs, rsp->n_force_qs_ngp, rsp->n_force_qs - rsp->n_force_qs_ngp, - rsp->n_force_qs_lh, rsp->qlen_lazy, rsp->qlen); + ACCESS_ONCE(rsp->n_force_qs_lh), rsp->qlen_lazy, rsp->qlen); for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) { if (rnp->level != level) { seq_puts(m, "\n"); -- cgit v0.10.2 From 87de1cfdc55b16b794e245b07322340725149d62 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 3 Dec 2013 10:02:52 -0800 Subject: rcu: Stop tracking FSF's postal address All of the RCU source files have the usual GPL header, which contains a long-obsolete postal address for FSF. To avoid the need to track the FSF office's movements, this commit substitutes the URL where GPL may be found. Reported-by: Greg KH Reported-by: Steven Rostedt Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 72bf3a0..b200756 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -12,8 +12,8 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * * Copyright IBM Corporation, 2001 * diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 6f01771..c364e91 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -12,8 +12,8 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * * Copyright IBM Corporation, 2008 * diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 72137ee..08b0840 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -12,8 +12,8 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * * Copyright IBM Corporation, 2008 * diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 9b058ee..a2783cb 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -12,8 +12,8 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * * Copyright (C) IBM Corporation, 2006 * Copyright (C) Fujitsu, 2012 diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 79c3877..1bd787f 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -12,8 +12,8 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * * Copyright IBM Corporation, 2011 * diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index 3318d82..5db7e92 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c @@ -12,8 +12,8 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * * Copyright (C) IBM Corporation, 2006 * Copyright (C) Fujitsu, 2012 diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 1254f31..53b95bb 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -12,8 +12,8 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * * Copyright IBM Corporation, 2008 * diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h index 280d06c..4315285 100644 --- a/kernel/rcu/tiny_plugin.h +++ b/kernel/rcu/tiny_plugin.h @@ -14,8 +14,8 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * * Copyright (c) 2010 Linaro * diff --git a/kernel/rcu/torture.c b/kernel/rcu/torture.c index 732f8ae..ab7dd19 100644 --- a/kernel/rcu/torture.c +++ b/kernel/rcu/torture.c @@ -12,8 +12,8 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * * Copyright (C) IBM Corporation, 2005, 2006 * diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index e641577..321feef 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -12,8 +12,8 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * * Copyright IBM Corporation, 2008 * diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 8c19873..75dc3c3 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -13,8 +13,8 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * * Copyright IBM Corporation, 2008 * diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 6e2ef4b..f9b9cdd 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -14,8 +14,8 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * * Copyright Red Hat, 2009 * Copyright IBM Corporation, 2009 diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index d1f1e64..5cdc62e 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -12,8 +12,8 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * * Copyright IBM Corporation, 2008 * diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index c54609f..fd0d5b5 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -12,8 +12,8 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. * * Copyright IBM Corporation, 2001 * -- cgit v0.10.2 From cb1e78cfa267453bb19e7edafd214c03834b664c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 4 Dec 2013 18:42:03 -0800 Subject: rcu: Remove ACCESS_ONCE() from jiffies Because jiffies is one of a very few variables marked "volatile", there is no need to use ACCESS_ONCE() when accessing it. This commit therefore removes the redundant ACCESS_ONCE() wrappers. Reported by: Eric Dumazet Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/kernel/rcu/torture.c b/kernel/rcu/torture.c index ab7dd19..022c531 100644 --- a/kernel/rcu/torture.c +++ b/kernel/rcu/torture.c @@ -1352,7 +1352,7 @@ rcu_torture_shutdown(void *arg) unsigned long jiffies_snap; VERBOSE_PRINTK_STRING("rcu_torture_shutdown task started"); - jiffies_snap = ACCESS_ONCE(jiffies); + jiffies_snap = jiffies; while (ULONG_CMP_LT(jiffies_snap, shutdown_time) && !kthread_should_stop()) { delta = shutdown_time - jiffies_snap; @@ -1361,7 +1361,7 @@ rcu_torture_shutdown(void *arg) "rcu_torture_shutdown task: %lu jiffies remaining\n", torture_type, delta); schedule_timeout_interruptible(delta); - jiffies_snap = ACCESS_ONCE(jiffies); + jiffies_snap = jiffies; } if (kthread_should_stop()) { VERBOSE_PRINTK_STRING("rcu_torture_shutdown task stopping"); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 321feef..73c3cd2 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -837,7 +837,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, * to the next. Only do this for the primary flavor of RCU. */ if (rdp->rsp == rcu_state && - ULONG_CMP_GE(ACCESS_ONCE(jiffies), rdp->rsp->jiffies_resched)) { + ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) { rdp->rsp->jiffies_resched += 5; resched_cpu(rdp->cpu); } @@ -847,7 +847,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, static void record_gp_stall_check_time(struct rcu_state *rsp) { - unsigned long j = ACCESS_ONCE(jiffies); + unsigned long j = jiffies; unsigned long j1; rsp->gp_start = j; @@ -1005,7 +1005,7 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) if (rcu_cpu_stall_suppress || !rcu_gp_in_progress(rsp)) return; - j = ACCESS_ONCE(jiffies); + j = jiffies; /* * Lots of memory barriers to reject false positives. -- cgit v0.10.2 From 41f4abd92a34f9c5110bbb870c04f8854604e28d Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 5 Dec 2013 15:10:23 -0800 Subject: rcu: Glue ASCII strings together Split strings make it difficult to find the code that resulted in a given console message, so this commit glues split strings back together despite the resulting long lines. Signed-off-by: Joe Perches Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index b200756..d946d36 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -479,11 +479,9 @@ static inline void rcu_preempt_sleep_check(void) do { \ rcu_preempt_sleep_check(); \ rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), \ - "Illegal context switch in RCU-bh" \ - " read-side critical section"); \ + "Illegal context switch in RCU-bh read-side critical section"); \ rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map), \ - "Illegal context switch in RCU-sched"\ - " read-side critical section"); \ + "Illegal context switch in RCU-sched read-side critical section"); \ } while (0) #else /* #ifdef CONFIG_PROVE_RCU */ @@ -518,16 +516,14 @@ static inline void rcu_preempt_sleep_check(void) #define __rcu_dereference_check(p, c, space) \ ({ \ typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ - rcu_lockdep_assert(c, "suspicious rcu_dereference_check()" \ - " usage"); \ + rcu_lockdep_assert(c, "suspicious rcu_dereference_check() usage"); \ rcu_dereference_sparse(p, space); \ smp_read_barrier_depends(); \ ((typeof(*p) __force __kernel *)(_________p1)); \ }) #define __rcu_dereference_protected(p, c, space) \ ({ \ - rcu_lockdep_assert(c, "suspicious rcu_dereference_protected()" \ - " usage"); \ + rcu_lockdep_assert(c, "suspicious rcu_dereference_protected() usage"); \ rcu_dereference_sparse(p, space); \ ((typeof(*p) __force __kernel *)(p)); \ }) @@ -542,8 +538,7 @@ static inline void rcu_preempt_sleep_check(void) ({ \ typeof(p) _________p1 = ACCESS_ONCE(p); \ rcu_lockdep_assert(c, \ - "suspicious rcu_dereference_index_check()" \ - " usage"); \ + "suspicious rcu_dereference_index_check() usage"); \ smp_read_barrier_depends(); \ (_________p1); \ }) -- cgit v0.10.2 From 0adab9b9aa18d7e90337d43567f1eec3d5401b81 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 5 Dec 2013 16:19:15 -0800 Subject: rcu: Indentation and spacing fixes. This commit outdents expression-statement macros, thus repairing a few line-length complaints. Also fix some spacing errors called out by checkpatch.pl. Signed-off-by: Joe Perches Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/include/linux/rculist.h b/include/linux/rculist.h index dbaf990..8183b46f 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -247,9 +247,10 @@ static inline void list_splice_init_rcu(struct list_head *list, * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). */ #define list_entry_rcu(ptr, type, member) \ - ({typeof (*ptr) __rcu *__ptr = (typeof (*ptr) __rcu __force *)ptr; \ - container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \ - }) +({ \ + typeof(*ptr) __rcu *__ptr = (typeof(*ptr) __rcu __force *)ptr; \ + container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \ +}) /** * Where are list_empty_rcu() and list_first_entry_rcu()? @@ -285,11 +286,11 @@ static inline void list_splice_init_rcu(struct list_head *list, * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). */ #define list_first_or_null_rcu(ptr, type, member) \ - ({struct list_head *__ptr = (ptr); \ - struct list_head *__next = ACCESS_ONCE(__ptr->next); \ - likely(__ptr != __next) ? \ - list_entry_rcu(__next, type, member) : NULL; \ - }) +({ \ + struct list_head *__ptr = (ptr); \ + struct list_head *__next = ACCESS_ONCE(__ptr->next); \ + likely(__ptr != __next) ? list_entry_rcu(__next, type, member) : NULL; \ +}) /** * list_for_each_entry_rcu - iterate over rcu list of given type diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d946d36..278a9da 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -508,40 +508,40 @@ static inline void rcu_preempt_sleep_check(void) #endif /* #else #ifdef __CHECKER__ */ #define __rcu_access_pointer(p, space) \ - ({ \ - typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ - rcu_dereference_sparse(p, space); \ - ((typeof(*p) __force __kernel *)(_________p1)); \ - }) +({ \ + typeof(*p) *_________p1 = (typeof(*p) *__force)ACCESS_ONCE(p); \ + rcu_dereference_sparse(p, space); \ + ((typeof(*p) __force __kernel *)(_________p1)); \ +}) #define __rcu_dereference_check(p, c, space) \ - ({ \ - typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ - rcu_lockdep_assert(c, "suspicious rcu_dereference_check() usage"); \ - rcu_dereference_sparse(p, space); \ - smp_read_barrier_depends(); \ - ((typeof(*p) __force __kernel *)(_________p1)); \ - }) +({ \ + typeof(*p) *_________p1 = (typeof(*p) *__force)ACCESS_ONCE(p); \ + rcu_lockdep_assert(c, "suspicious rcu_dereference_check() usage"); \ + rcu_dereference_sparse(p, space); \ + smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ + ((typeof(*p) __force __kernel *)(_________p1)); \ +}) #define __rcu_dereference_protected(p, c, space) \ - ({ \ - rcu_lockdep_assert(c, "suspicious rcu_dereference_protected() usage"); \ - rcu_dereference_sparse(p, space); \ - ((typeof(*p) __force __kernel *)(p)); \ - }) +({ \ + rcu_lockdep_assert(c, "suspicious rcu_dereference_protected() usage"); \ + rcu_dereference_sparse(p, space); \ + ((typeof(*p) __force __kernel *)(p)); \ +}) #define __rcu_access_index(p, space) \ - ({ \ - typeof(p) _________p1 = ACCESS_ONCE(p); \ - rcu_dereference_sparse(p, space); \ - (_________p1); \ - }) +({ \ + typeof(p) _________p1 = ACCESS_ONCE(p); \ + rcu_dereference_sparse(p, space); \ + (_________p1); \ +}) #define __rcu_dereference_index_check(p, c) \ - ({ \ - typeof(p) _________p1 = ACCESS_ONCE(p); \ - rcu_lockdep_assert(c, \ - "suspicious rcu_dereference_index_check() usage"); \ - smp_read_barrier_depends(); \ - (_________p1); \ - }) +({ \ + typeof(p) _________p1 = ACCESS_ONCE(p); \ + rcu_lockdep_assert(c, \ + "suspicious rcu_dereference_index_check() usage"); \ + smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ + (_________p1); \ +}) /** * RCU_INITIALIZER() - statically initialize an RCU-protected global variable -- cgit v0.10.2 From 88c1863066ccfa456797e12c5d8b4631aa1ad0d0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 16 Dec 2013 13:24:32 -0800 Subject: rcu: Define rcu_assign_pointer() in terms of smp_store_release() The new smp_store_release() function provides better guarantees than did rcu_assign_pointer(), and potentially less overhead on some architectures. The guarantee that smp_store_release() provides that rcu_assign_pointer() does that is obscure, but its lack could cause considerable confusion. This guarantee is illustrated by the following code fragment: struct foo { int a; int b; int c; struct foo *next; }; struct foo foo1; struct foo foo2; struct foo __rcu *foop; ... foo2.a = 1; foo2.b = 2; BUG_ON(foo2.c); rcu_assign_pointer(foop, &foo); ... fp = rcu_dereference(foop); fp.c = 3; The current rcu_assign_pointer() semantics permit the BUG_ON() to trigger because rcu_assign_pointer()'s smp_wmb() is not guaranteed to order prior reads against later writes. This commit therefore upgrades rcu_assign_pointer() from smp_wmb() to smp_store_release() to avoid this counter-intuitive outcome. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 278a9da..32decf1 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -44,6 +44,7 @@ #include #include #include +#include #ifdef CONFIG_RCU_TORTURE_TEST extern int rcutorture_runnable; /* for sysctl */ @@ -580,12 +581,7 @@ static inline void rcu_preempt_sleep_check(void) * please be careful when making changes to rcu_assign_pointer() and the * other macros that it invokes. */ -#define rcu_assign_pointer(p, v) \ - do { \ - smp_wmb(); \ - ACCESS_ONCE(p) = RCU_INITIALIZER(v); \ - } while (0) - +#define rcu_assign_pointer(p, v) smp_store_release(&p, RCU_INITIALIZER(v)) /** * rcu_access_pointer() - fetch RCU pointer with no dereferencing -- cgit v0.10.2 From 52e2bb958ac4f9b3c4bdd78606d279852fd72922 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Sun, 9 Feb 2014 14:35:11 +0100 Subject: rcu: Disambiguate CONFIG_RCU_NOCB_CPUs This commit fixes a grammar issue in the rcu_nohz_full_cpu() comment header, so that it is clear that the plural is CPUs not Kconfig options. Signed-off-by: Paul Bolle Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index f9b9cdd..fffe417 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -2893,7 +2893,7 @@ static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp) * CPU unless the grace period has extended for too long. * * This code relies on the fact that all NO_HZ_FULL CPUs are also - * CONFIG_RCU_NOCB_CPUs. + * CONFIG_RCU_NOCB_CPU CPUs. */ static bool rcu_nohz_full_cpu(struct rcu_state *rsp) { -- cgit v0.10.2 From ae1670339c95c3ff96ab10582506cf827c5fecc8 Mon Sep 17 00:00:00 2001 From: Shaibal Dutta Date: Fri, 31 Jan 2014 11:53:06 -0800 Subject: rcu: Move SRCU grace period work to power efficient workqueue For better use of CPU idle time, allow the scheduler to select the CPU on which the SRCU grace period work would be scheduled. This improves idle residency time and conserves power. This functionality is enabled when CONFIG_WQ_POWER_EFFICIENT is selected. Cc: Lai Jiangshan Cc: "Paul E. McKenney" Cc: Dipankar Sarma Signed-off-by: Shaibal Dutta [zoran.markovic@linaro.org: Rebased to latest kernel version. Added commit message. Fixed code alignment.] Signed-off-by: Zoran Markovic Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index 5db7e92..2359779 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c @@ -398,7 +398,7 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *head, rcu_batch_queue(&sp->batch_queue, head); if (!sp->running) { sp->running = true; - schedule_delayed_work(&sp->work, 0); + queue_delayed_work(system_power_efficient_wq, &sp->work, 0); } spin_unlock_irqrestore(&sp->queue_lock, flags); } @@ -674,7 +674,8 @@ static void srcu_reschedule(struct srcu_struct *sp) } if (pending) - schedule_delayed_work(&sp->work, SRCU_INTERVAL); + queue_delayed_work(system_power_efficient_wq, + &sp->work, SRCU_INTERVAL); } /* -- cgit v0.10.2 From add1f0995454374d90c9d6b2c420d2fba3d0a4e3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 12 Feb 2014 12:51:09 -0800 Subject: fs: Substitute rcu_access_pointer() for rcu_dereference_raw() (Trivial patch.) If the code is looking at the RCU-protected pointer itself, but not dereferencing it, the rcu_dereference() functions can be downgraded to rcu_access_pointer(). This commit makes this downgrade in __alloc_fd(), which simply compares the RCU-protected pointer against NULL with no dereferencing. Signed-off-by: Paul E. McKenney Cc: Alexander Viro Cc: linux-fsdevel@vger.kernel.org Reviewed-by: Josh Triplett diff --git a/fs/file.c b/fs/file.c index db25c2b..18f7d27 100644 --- a/fs/file.c +++ b/fs/file.c @@ -497,7 +497,7 @@ repeat: error = fd; #if 1 /* Sanity check */ - if (rcu_dereference_raw(fdt->fd[fd]) != NULL) { + if (rcu_access_pointer(fdt->fd[fd]) != NULL) { printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); rcu_assign_pointer(fdt->fd[fd], NULL); } -- cgit v0.10.2 From 0eba801b64cc8284d9024c7ece30415a2b981a72 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 16 Feb 2014 12:15:43 +0100 Subject: netfilter: ctnetlink: force null nat binding on insert Quoting Andrey Vagin: When a conntrack is created by kernel, it is initialized (sets IPS_{DST,SRC}_NAT_DONE_BIT bits in nf_nat_setup_info) and only then it is added in hashes (__nf_conntrack_hash_insert), so one conntract can't be initialized from a few threads concurrently. ctnetlink can add an uninitialized conntrack (w/o IPS_{DST,SRC}_NAT_DONE_BIT) in hashes, then a few threads can look up this conntrack and start initialize it concurrently. It's dangerous, because BUG can be triggered from nf_nat_setup_info. Fix this race by always setting up nat, even if no CTA_NAT_ attribute was requested before inserting the ct into the hash table. In absence of CTA_NAT_ attribute, a null binding is created. This alters current behaviour: Before this patch, the first packet matching the newly injected conntrack would be run through the nat table since nf_nat_initialized() returns false. IOW, this forces ctnetlink users to specify the desired nat transformation on ct creation time. Thanks for Florian Westphal, this patch is based on his original patch to address this problem, including this patch description. Reported-By: Andrey Vagin Signed-off-by: Pablo Neira Ayuso Acked-by: Florian Westphal diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index bb322d0..b9f0e03 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1310,27 +1310,22 @@ ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[]) } static int -ctnetlink_change_nat(struct nf_conn *ct, const struct nlattr * const cda[]) +ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[]) { #ifdef CONFIG_NF_NAT_NEEDED int ret; - if (cda[CTA_NAT_DST]) { - ret = ctnetlink_parse_nat_setup(ct, - NF_NAT_MANIP_DST, - cda[CTA_NAT_DST]); - if (ret < 0) - return ret; - } - if (cda[CTA_NAT_SRC]) { - ret = ctnetlink_parse_nat_setup(ct, - NF_NAT_MANIP_SRC, - cda[CTA_NAT_SRC]); - if (ret < 0) - return ret; - } - return 0; + ret = ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_DST, + cda[CTA_NAT_DST]); + if (ret < 0) + return ret; + + ret = ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_SRC, + cda[CTA_NAT_SRC]); + return ret; #else + if (!cda[CTA_NAT_DST] && !cda[CTA_NAT_SRC]) + return 0; return -EOPNOTSUPP; #endif } @@ -1659,11 +1654,9 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, goto err2; } - if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { - err = ctnetlink_change_nat(ct, cda); - if (err < 0) - goto err2; - } + err = ctnetlink_setup_nat(ct, cda); + if (err < 0) + goto err2; nf_ct_acct_ext_add(ct, GFP_ATOMIC); nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index d3f5cd6..52ca952 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -432,15 +432,15 @@ nf_nat_setup_info(struct nf_conn *ct, } EXPORT_SYMBOL(nf_nat_setup_info); -unsigned int -nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) +static unsigned int +__nf_nat_alloc_null_binding(struct nf_conn *ct, enum nf_nat_manip_type manip) { /* Force range to this IP; let proto decide mapping for * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). * Use reply in case it's already been mangled (eg local packet). */ union nf_inet_addr ip = - (HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? + (manip == NF_NAT_MANIP_SRC ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3); struct nf_nat_range range = { @@ -448,7 +448,13 @@ nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) .min_addr = ip, .max_addr = ip, }; - return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); + return nf_nat_setup_info(ct, &range, manip); +} + +unsigned int +nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) +{ + return __nf_nat_alloc_null_binding(ct, HOOK2MANIP(hooknum)); } EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding); @@ -702,9 +708,9 @@ static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { static int nfnetlink_parse_nat(const struct nlattr *nat, - const struct nf_conn *ct, struct nf_nat_range *range) + const struct nf_conn *ct, struct nf_nat_range *range, + const struct nf_nat_l3proto *l3proto) { - const struct nf_nat_l3proto *l3proto; struct nlattr *tb[CTA_NAT_MAX+1]; int err; @@ -714,38 +720,46 @@ nfnetlink_parse_nat(const struct nlattr *nat, if (err < 0) return err; - rcu_read_lock(); - l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct)); - if (l3proto == NULL) { - err = -EAGAIN; - goto out; - } err = l3proto->nlattr_to_range(tb, range); if (err < 0) - goto out; + return err; if (!tb[CTA_NAT_PROTO]) - goto out; + return 0; - err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range); -out: - rcu_read_unlock(); - return err; + return nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range); } +/* This function is called under rcu_read_lock() */ static int nfnetlink_parse_nat_setup(struct nf_conn *ct, enum nf_nat_manip_type manip, const struct nlattr *attr) { struct nf_nat_range range; + const struct nf_nat_l3proto *l3proto; int err; - err = nfnetlink_parse_nat(attr, ct, &range); + /* Should not happen, restricted to creating new conntracks + * via ctnetlink. + */ + if (WARN_ON_ONCE(nf_nat_initialized(ct, manip))) + return -EEXIST; + + /* Make sure that L3 NAT is there by when we call nf_nat_setup_info to + * attach the null binding, otherwise this may oops. + */ + l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct)); + if (l3proto == NULL) + return -EAGAIN; + + /* No NAT information has been passed, allocate the null-binding */ + if (attr == NULL) + return __nf_nat_alloc_null_binding(ct, manip); + + err = nfnetlink_parse_nat(attr, ct, &range, l3proto); if (err < 0) return err; - if (nf_nat_initialized(ct, manip)) - return -EEXIST; return nf_nat_setup_info(ct, &range, manip); } -- cgit v0.10.2 From 2f33b512a5460578f6cf11d7b7867bed53157c7c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 17 Nov 2013 18:25:48 -0800 Subject: rcu: Optimize rcu_is_nocb_cpu() for RCU_NOCB_CPU_ALL If CONFIG_RCU_NOCB_CPU_ALL=y, then rcu_is_nocb_cpu() will always return true, however, the current version nevertheless checks rcu_nocb_mask. This commit therefore creates a static inline implementation of rcu_is_nocb_cpu() that unconditionally returns true when CONFIG_RCU_NOCB_CPU_ALL=y. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 72bf3a0..281c90f 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -1015,11 +1015,13 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define kfree_rcu(ptr, rcu_head) \ __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) -#ifdef CONFIG_RCU_NOCB_CPU +#if defined(CONFIG_RCU_NOCB_CPU_ALL) +static inline bool rcu_is_nocb_cpu(int cpu) { return true; } +#elif defined(CONFIG_RCU_NOCB_CPU) bool rcu_is_nocb_cpu(int cpu); #else static inline bool rcu_is_nocb_cpu(int cpu) { return false; } -#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ +#endif /* Only for use by adaptive-ticks code. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 6e2ef4b..39a50b9 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -2101,6 +2101,7 @@ static void rcu_init_one_nocb(struct rcu_node *rnp) init_waitqueue_head(&rnp->nocb_gp_wq[1]); } +#ifndef CONFIG_RCU_NOCB_CPU_ALL /* Is the specified CPU a no-CPUs CPU? */ bool rcu_is_nocb_cpu(int cpu) { @@ -2108,6 +2109,7 @@ bool rcu_is_nocb_cpu(int cpu) return cpumask_test_cpu(cpu, rcu_nocb_mask); return false; } +#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ /* * Enqueue the specified string of rcu_head structures onto the specified -- cgit v0.10.2 From ffa83fb565fbc397cbafb4b71fd1cce276d4c3b6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 17 Nov 2013 19:27:16 -0800 Subject: rcu: Optimize rcu_needs_cpu() for RCU_NOCB_CPU_ALL If CONFIG_RCU_NOCB_CPU_ALL=y, then rcu_needs_cpu() will always return false, however, the current version nevertheless checks for RCU callbacks. This commit therefore creates a static inline implementation of rcu_needs_cpu() that unconditionally returns false when CONFIG_RCU_NOCB_CPU_ALL=y. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 281c90f..5f7d5f4 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -1015,6 +1015,14 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define kfree_rcu(ptr, rcu_head) \ __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) +#if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL) +static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) +{ + *delta_jiffies = ULONG_MAX; + return 0; +} +#endif /* #if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL) */ + #if defined(CONFIG_RCU_NOCB_CPU_ALL) static inline bool rcu_is_nocb_cpu(int cpu) { return true; } #elif defined(CONFIG_RCU_NOCB_CPU) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 6f01771..9524903 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -68,12 +68,6 @@ static inline void kfree_call_rcu(struct rcu_head *head, call_rcu(head, func); } -static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) -{ - *delta_jiffies = ULONG_MAX; - return 0; -} - static inline void rcu_note_context_switch(int cpu) { rcu_sched_qs(cpu); diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 72137ee..81198c8 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -31,7 +31,9 @@ #define __LINUX_RCUTREE_H void rcu_note_context_switch(int cpu); +#ifndef CONFIG_RCU_NOCB_CPU_ALL int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies); +#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ void rcu_cpu_stall_reset(void); /* diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index b3d116c..c2c8234 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2880,7 +2880,7 @@ static int rcu_pending(int cpu) * non-NULL, store an indication of whether all callbacks are lazy. * (If there are no callbacks, all of them are deemed to be lazy.) */ -static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy) +static int __maybe_unused rcu_cpu_has_callbacks(int cpu, bool *all_lazy) { bool al = true; bool hc = false; diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 39a50b9..820b06a 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1586,11 +1586,13 @@ static void rcu_prepare_kthreads(int cpu) * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs * any flavor of RCU. */ +#ifndef CONFIG_RCU_NOCB_CPU_ALL int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) { *delta_jiffies = ULONG_MAX; return rcu_cpu_has_callbacks(cpu, NULL); } +#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ /* * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up @@ -1696,6 +1698,7 @@ static bool rcu_try_advance_all_cbs(void) * * The caller must have disabled interrupts. */ +#ifndef CONFIG_RCU_NOCB_CPU_ALL int rcu_needs_cpu(int cpu, unsigned long *dj) { struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); @@ -1726,6 +1729,7 @@ int rcu_needs_cpu(int cpu, unsigned long *dj) } return 0; } +#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ /* * Prepare a CPU for idle from an RCU perspective. The first major task -- cgit v0.10.2 From f1f399d1281ea339a08469f7e58193624992f620 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 17 Nov 2013 21:08:07 -0800 Subject: rcu: Optimize RCU_FAST_NO_HZ for RCU_NOCB_CPU_ALL If CONFIG_RCU_NOCB_CPU_ALL=y, then no CPU will ever have RCU callbacks because these callbacks will instead be handled by the rcuo kthreads. However, the current version of RCU_FAST_NO_HZ nevertheless checks for RCU callbacks. This commit therefore creates static inline implementations of rcu_prepare_for_idle() and rcu_cleanup_after_idle() that are no-ops when CONFIG_RCU_NOCB_CPU_ALL=y. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 820b06a..41afc3f 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1658,7 +1658,7 @@ extern int tick_nohz_active; * only if it has been awhile since the last time we did so. Afterwards, * if there are any callbacks ready for immediate invocation, return true. */ -static bool rcu_try_advance_all_cbs(void) +static bool __maybe_unused rcu_try_advance_all_cbs(void) { bool cbs_ready = false; struct rcu_data *rdp; @@ -1743,6 +1743,7 @@ int rcu_needs_cpu(int cpu, unsigned long *dj) */ static void rcu_prepare_for_idle(int cpu) { +#ifndef CONFIG_RCU_NOCB_CPU_ALL struct rcu_data *rdp; struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); struct rcu_node *rnp; @@ -1794,6 +1795,7 @@ static void rcu_prepare_for_idle(int cpu) rcu_accelerate_cbs(rsp, rnp, rdp); raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ } +#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ } /* @@ -1803,11 +1805,12 @@ static void rcu_prepare_for_idle(int cpu) */ static void rcu_cleanup_after_idle(int cpu) { - +#ifndef CONFIG_RCU_NOCB_CPU_ALL if (rcu_is_nocb_cpu(cpu)) return; if (rcu_try_advance_all_cbs()) invoke_rcu_core(); +#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ } /* -- cgit v0.10.2 From 45a22f4c11fef4ecd5c61c0a299cd3f23d77be8e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 17 Feb 2014 13:09:50 +0100 Subject: inotify: Fix reporting of cookies for inotify events My rework of handling of notification events (namely commit 7053aee26a35 "fsnotify: do not share events between notification groups") broke sending of cookies with inotify events. We didn't propagate the value passed to fsnotify() properly and passed 4 uninitialized bytes to userspace instead (so it is also an information leak). Sadly I didn't notice this during my testing because inotify cookies aren't used very much and LTP inotify tests ignore them. Fix the problem by passing the cookie value properly. Fixes: 7053aee26a3548ebaba046ae2e52396ccf56ac6c Reported-by: Vegard Nossum Signed-off-by: Jan Kara diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index 0b9ff43..abc8cbc 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -86,7 +86,7 @@ static int dnotify_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, void *data, int data_type, - const unsigned char *file_name) + const unsigned char *file_name, u32 cookie) { struct dnotify_mark *dn_mark; struct dnotify_struct *dn; diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 0e792f5..205dc21 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -147,7 +147,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *fanotify_mark, u32 mask, void *data, int data_type, - const unsigned char *file_name) + const unsigned char *file_name, u32 cookie) { int ret = 0; struct fanotify_event_info *event; diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 1d4e1ea..9d3e9c5 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -179,7 +179,7 @@ static int send_to_group(struct inode *to_tell, return group->ops->handle_event(group, to_tell, inode_mark, vfsmount_mark, mask, data, data_is, - file_name); + file_name, cookie); } /* diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h index 485eef3..ed855ef 100644 --- a/fs/notify/inotify/inotify.h +++ b/fs/notify/inotify/inotify.h @@ -27,6 +27,6 @@ extern int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, void *data, int data_type, - const unsigned char *file_name); + const unsigned char *file_name, u32 cookie); extern const struct fsnotify_ops inotify_fsnotify_ops; diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index d5ee563..43ab1e1 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -67,7 +67,7 @@ int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, void *data, int data_type, - const unsigned char *file_name) + const unsigned char *file_name, u32 cookie) { struct inotify_inode_mark *i_mark; struct inotify_event_info *event; @@ -103,6 +103,7 @@ int inotify_handle_event(struct fsnotify_group *group, fsn_event = &event->fse; fsnotify_init_event(fsn_event, inode, mask); event->wd = i_mark->wd; + event->sync_cookie = cookie; event->name_len = len; if (len) strcpy(event->name, file_name); diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 497395c..6528b5a 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -495,7 +495,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, /* Queue ignore event for the watch */ inotify_handle_event(group, NULL, fsn_mark, NULL, FS_IN_IGNORED, - NULL, FSNOTIFY_EVENT_NONE, NULL); + NULL, FSNOTIFY_EVENT_NONE, NULL, 0); i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); /* remove this mark from the idr */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 3d286ff..c84bc7c 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -99,7 +99,7 @@ struct fsnotify_ops { struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, void *data, int data_type, - const unsigned char *file_name); + const unsigned char *file_name, u32 cookie); void (*free_group_priv)(struct fsnotify_group *group); void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group); void (*free_event)(struct fsnotify_event *event); diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 67ccf0e..135944a 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -916,7 +916,7 @@ static int audit_tree_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, void *data, int data_type, - const unsigned char *file_name) + const unsigned char *file_name, u32 cookie) { return 0; } diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 2596fac..70b4554 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -471,7 +471,7 @@ static int audit_watch_handle_event(struct fsnotify_group *group, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, void *data, int data_type, - const unsigned char *dname) + const unsigned char *dname, u32 cookie) { struct inode *inode; struct audit_parent *parent; -- cgit v0.10.2 From 26c17a1c56f957560cb96d219c13d21dfb605819 Mon Sep 17 00:00:00 2001 From: Beomho Seo Date: Fri, 14 Feb 2014 10:04:00 +0000 Subject: iio: cm36651: Fix read/write integration time function. This patch is fixed [read/write] integration time function. cm36651 have integration time from 1 to 640 milliseconds. But, print more then the thousand second. when call *_integration_time attribute. Because read_integration_time function return IIO_VAL_INT. read integration time function is changed return IIO_VAL_INT_PLUS_MICRO; And then .write_raw_get_fmt callback function for parse a fixed-point number from a string. Some description is revised milliseconds unit. v2: cm36651_write_int_time function fixed as it was. Signed-off-by: Beomho Seo Signed-off-by: Jonathan Cameron diff --git a/drivers/iio/light/cm36651.c b/drivers/iio/light/cm36651.c index 0a142af..a45e074 100644 --- a/drivers/iio/light/cm36651.c +++ b/drivers/iio/light/cm36651.c @@ -50,10 +50,10 @@ #define CM36651_CS_CONF2_DEFAULT_BIT 0x08 /* CS_CONF3 channel integration time */ -#define CM36651_CS_IT1 0x00 /* Integration time 80000 usec */ -#define CM36651_CS_IT2 0x40 /* Integration time 160000 usec */ -#define CM36651_CS_IT3 0x80 /* Integration time 320000 usec */ -#define CM36651_CS_IT4 0xC0 /* Integration time 640000 usec */ +#define CM36651_CS_IT1 0x00 /* Integration time 80 msec */ +#define CM36651_CS_IT2 0x40 /* Integration time 160 msec */ +#define CM36651_CS_IT3 0x80 /* Integration time 320 msec */ +#define CM36651_CS_IT4 0xC0 /* Integration time 640 msec */ /* PS_CONF1 command code */ #define CM36651_PS_ENABLE 0x00 @@ -64,10 +64,10 @@ #define CM36651_PS_PERS4 0x0C /* PS_CONF1 command code: integration time */ -#define CM36651_PS_IT1 0x00 /* Integration time 320 usec */ -#define CM36651_PS_IT2 0x10 /* Integration time 420 usec */ -#define CM36651_PS_IT3 0x20 /* Integration time 520 usec */ -#define CM36651_PS_IT4 0x30 /* Integration time 640 usec */ +#define CM36651_PS_IT1 0x00 /* Integration time 0.32 msec */ +#define CM36651_PS_IT2 0x10 /* Integration time 0.42 msec */ +#define CM36651_PS_IT3 0x20 /* Integration time 0.52 msec */ +#define CM36651_PS_IT4 0x30 /* Integration time 0.64 msec */ /* PS_CONF1 command code: duty ratio */ #define CM36651_PS_DR1 0x00 /* Duty ratio 1/80 */ @@ -93,8 +93,8 @@ #define CM36651_CLOSE_PROXIMITY 0x32 #define CM36651_FAR_PROXIMITY 0x33 -#define CM36651_CS_INT_TIME_AVAIL "80000 160000 320000 640000" -#define CM36651_PS_INT_TIME_AVAIL "320 420 520 640" +#define CM36651_CS_INT_TIME_AVAIL "0.08 0.16 0.32 0.64" +#define CM36651_PS_INT_TIME_AVAIL "0.000320 0.000420 0.000520 0.000640" enum cm36651_operation_mode { CM36651_LIGHT_EN, @@ -356,30 +356,30 @@ static int cm36651_read_channel(struct cm36651_data *cm36651, } static int cm36651_read_int_time(struct cm36651_data *cm36651, - struct iio_chan_spec const *chan, int *val) + struct iio_chan_spec const *chan, int *val2) { switch (chan->type) { case IIO_LIGHT: if (cm36651->cs_int_time[chan->address] == CM36651_CS_IT1) - *val = 80000; + *val2 = 80000; else if (cm36651->cs_int_time[chan->address] == CM36651_CS_IT2) - *val = 160000; + *val2 = 160000; else if (cm36651->cs_int_time[chan->address] == CM36651_CS_IT3) - *val = 320000; + *val2 = 320000; else if (cm36651->cs_int_time[chan->address] == CM36651_CS_IT4) - *val = 640000; + *val2 = 640000; else return -EINVAL; break; case IIO_PROXIMITY: if (cm36651->ps_int_time == CM36651_PS_IT1) - *val = 320; + *val2 = 320; else if (cm36651->ps_int_time == CM36651_PS_IT2) - *val = 420; + *val2 = 420; else if (cm36651->ps_int_time == CM36651_PS_IT3) - *val = 520; + *val2 = 520; else if (cm36651->ps_int_time == CM36651_PS_IT4) - *val = 640; + *val2 = 640; else return -EINVAL; break; @@ -387,7 +387,7 @@ static int cm36651_read_int_time(struct cm36651_data *cm36651, return -EINVAL; } - return IIO_VAL_INT; + return IIO_VAL_INT_PLUS_MICRO; } static int cm36651_write_int_time(struct cm36651_data *cm36651, @@ -459,7 +459,8 @@ static int cm36651_read_raw(struct iio_dev *indio_dev, ret = cm36651_read_channel(cm36651, chan, val); break; case IIO_CHAN_INFO_INT_TIME: - ret = cm36651_read_int_time(cm36651, chan, val); + *val = 0; + ret = cm36651_read_int_time(cm36651, chan, val2); break; default: ret = -EINVAL; @@ -479,7 +480,7 @@ static int cm36651_write_raw(struct iio_dev *indio_dev, int ret = -EINVAL; if (mask == IIO_CHAN_INFO_INT_TIME) { - ret = cm36651_write_int_time(cm36651, chan, val); + ret = cm36651_write_int_time(cm36651, chan, val2); if (ret < 0) dev_err(&client->dev, "Integration time write failed\n"); } -- cgit v0.10.2 From 1463a166b4eafc654874528ba5e2f66dbdfb8048 Mon Sep 17 00:00:00 2001 From: Beomho Seo Date: Fri, 14 Feb 2014 10:04:00 +0000 Subject: iio: cm32181: Change cm32181 ambient light sensor driver Integration time of cm32181 is guessed about milliseconds. But cm32181_read_als_it function return IIO_VAL_INT. So fixed to return IIO_VAL_INT_PLUS_MICRO. Next, add .write_raw_get_fmt callback function for call iio_str_to_fixpoint. v2: cm32181_write_als_id function fixed as it was. Cc: Kevin Tsai Signed-off-by: Beomho Seo Signed-off-by: Jonathan Cameron diff --git a/drivers/iio/light/cm32181.c b/drivers/iio/light/cm32181.c index f17b4e6..47a6dba 100644 --- a/drivers/iio/light/cm32181.c +++ b/drivers/iio/light/cm32181.c @@ -103,13 +103,13 @@ static int cm32181_reg_init(struct cm32181_chip *cm32181) /** * cm32181_read_als_it() - Get sensor integration time (ms) * @cm32181: pointer of struct cm32181 - * @val: pointer of int to load the als_it value. + * @val2: pointer of int to load the als_it value. * * Report the current integartion time by millisecond. * - * Return: IIO_VAL_INT for success, otherwise -EINVAL. + * Return: IIO_VAL_INT_PLUS_MICRO for success, otherwise -EINVAL. */ -static int cm32181_read_als_it(struct cm32181_chip *cm32181, int *val) +static int cm32181_read_als_it(struct cm32181_chip *cm32181, int *val2) { u16 als_it; int i; @@ -119,8 +119,8 @@ static int cm32181_read_als_it(struct cm32181_chip *cm32181, int *val) als_it >>= CM32181_CMD_ALS_IT_SHIFT; for (i = 0; i < ARRAY_SIZE(als_it_bits); i++) { if (als_it == als_it_bits[i]) { - *val = als_it_value[i]; - return IIO_VAL_INT; + *val2 = als_it_value[i]; + return IIO_VAL_INT_PLUS_MICRO; } } @@ -221,7 +221,7 @@ static int cm32181_read_raw(struct iio_dev *indio_dev, *val = cm32181->calibscale; return IIO_VAL_INT; case IIO_CHAN_INFO_INT_TIME: - ret = cm32181_read_als_it(cm32181, val); + ret = cm32181_read_als_it(cm32181, val2); return ret; } @@ -240,7 +240,7 @@ static int cm32181_write_raw(struct iio_dev *indio_dev, cm32181->calibscale = val; return val; case IIO_CHAN_INFO_INT_TIME: - ret = cm32181_write_als_it(cm32181, val); + ret = cm32181_write_als_it(cm32181, val2); return ret; } @@ -264,7 +264,7 @@ static ssize_t cm32181_get_it_available(struct device *dev, n = ARRAY_SIZE(als_it_value); for (i = 0, len = 0; i < n; i++) - len += sprintf(buf + len, "%d ", als_it_value[i]); + len += sprintf(buf + len, "0.%06u ", als_it_value[i]); return len + sprintf(buf + len, "\n"); } -- cgit v0.10.2 From a0657716416f834ef7710a9044614d50a36c3bdc Mon Sep 17 00:00:00 2001 From: Denis CIOCCA Date: Fri, 14 Feb 2014 14:15:00 +0000 Subject: iio:gyro: bug on L3GD20H gyroscope support The driver was not able to manage the sensor: during probe function and wai check, the driver stops and writes: "device name and WhoAmI mismatch." The correct value of L3GD20H wai is 0xd7 instead of 0xd4. Dropped support for the sensor. Signed-off-by: Denis Ciocca Cc: stable@vger.kernel.org Signed-off-by: Jonathan Cameron diff --git a/drivers/iio/gyro/Kconfig b/drivers/iio/gyro/Kconfig index 41c64a4..ac2d69e 100644 --- a/drivers/iio/gyro/Kconfig +++ b/drivers/iio/gyro/Kconfig @@ -70,7 +70,7 @@ config IIO_ST_GYRO_3AXIS select IIO_TRIGGERED_BUFFER if (IIO_BUFFER) help Say yes here to build support for STMicroelectronics gyroscopes: - L3G4200D, LSM330DL, L3GD20, L3GD20H, LSM330DLC, L3G4IS, LSM330. + L3G4200D, LSM330DL, L3GD20, LSM330DLC, L3G4IS, LSM330. This driver can also be built as a module. If so, these modules will be created: diff --git a/drivers/iio/gyro/st_gyro.h b/drivers/iio/gyro/st_gyro.h index f8f2bf8..c197360 100644 --- a/drivers/iio/gyro/st_gyro.h +++ b/drivers/iio/gyro/st_gyro.h @@ -19,7 +19,6 @@ #define LSM330DL_GYRO_DEV_NAME "lsm330dl_gyro" #define LSM330DLC_GYRO_DEV_NAME "lsm330dlc_gyro" #define L3GD20_GYRO_DEV_NAME "l3gd20" -#define L3GD20H_GYRO_DEV_NAME "l3gd20h" #define L3G4IS_GYRO_DEV_NAME "l3g4is_ui" #define LSM330_GYRO_DEV_NAME "lsm330_gyro" diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c index d53d91a..a8e174a 100644 --- a/drivers/iio/gyro/st_gyro_core.c +++ b/drivers/iio/gyro/st_gyro_core.c @@ -167,11 +167,10 @@ static const struct st_sensors st_gyro_sensors[] = { .wai = ST_GYRO_2_WAI_EXP, .sensors_supported = { [0] = L3GD20_GYRO_DEV_NAME, - [1] = L3GD20H_GYRO_DEV_NAME, - [2] = LSM330D_GYRO_DEV_NAME, - [3] = LSM330DLC_GYRO_DEV_NAME, - [4] = L3G4IS_GYRO_DEV_NAME, - [5] = LSM330_GYRO_DEV_NAME, + [1] = LSM330D_GYRO_DEV_NAME, + [2] = LSM330DLC_GYRO_DEV_NAME, + [3] = L3G4IS_GYRO_DEV_NAME, + [4] = LSM330_GYRO_DEV_NAME, }, .ch = (struct iio_chan_spec *)st_gyro_16bit_channels, .odr = { diff --git a/drivers/iio/gyro/st_gyro_i2c.c b/drivers/iio/gyro/st_gyro_i2c.c index 16b8b8d..23c12f3 100644 --- a/drivers/iio/gyro/st_gyro_i2c.c +++ b/drivers/iio/gyro/st_gyro_i2c.c @@ -55,7 +55,6 @@ static const struct i2c_device_id st_gyro_id_table[] = { { LSM330DL_GYRO_DEV_NAME }, { LSM330DLC_GYRO_DEV_NAME }, { L3GD20_GYRO_DEV_NAME }, - { L3GD20H_GYRO_DEV_NAME }, { L3G4IS_GYRO_DEV_NAME }, { LSM330_GYRO_DEV_NAME }, {}, diff --git a/drivers/iio/gyro/st_gyro_spi.c b/drivers/iio/gyro/st_gyro_spi.c index 94763e2..b4ad3be 100644 --- a/drivers/iio/gyro/st_gyro_spi.c +++ b/drivers/iio/gyro/st_gyro_spi.c @@ -54,7 +54,6 @@ static const struct spi_device_id st_gyro_id_table[] = { { LSM330DL_GYRO_DEV_NAME }, { LSM330DLC_GYRO_DEV_NAME }, { L3GD20_GYRO_DEV_NAME }, - { L3GD20H_GYRO_DEV_NAME }, { L3G4IS_GYRO_DEV_NAME }, { LSM330_GYRO_DEV_NAME }, {}, -- cgit v0.10.2 From 63edea16b7fea3e422e63e218c9e7349b2d0a63b Mon Sep 17 00:00:00 2001 From: Markus Pargmann Date: Sun, 16 Feb 2014 20:10:55 +0100 Subject: dma: sdma: Add imx25 compatible imx25 did not work without a firmware previously. This patch adds a DT compatible to pass the correct data with the default script addresses for imx25. Add imx25 compatible to the list of compatibles in the binding documentation. Signed-off-by: Markus Pargmann Acked-by: Shawn Guo Signed-off-by: Vinod Koul diff --git a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt index 68b83ec..ee9be99 100644 --- a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt +++ b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt @@ -1,12 +1,16 @@ * Freescale Smart Direct Memory Access (SDMA) Controller for i.MX Required properties: -- compatible : Should be "fsl,imx31-sdma", "fsl,imx31-to1-sdma", - "fsl,imx31-to2-sdma", "fsl,imx35-sdma", "fsl,imx35-to1-sdma", - "fsl,imx35-to2-sdma", "fsl,imx51-sdma", "fsl,imx53-sdma" or - "fsl,imx6q-sdma". The -to variants should be preferred since they - allow to determnine the correct ROM script addresses needed for - the driver to work without additional firmware. +- compatible : Should be one of + "fsl,imx25-sdma" + "fsl,imx31-sdma", "fsl,imx31-to1-sdma", "fsl,imx31-to2-sdma" + "fsl,imx35-sdma", "fsl,imx35-to1-sdma", "fsl,imx35-to2-sdma" + "fsl,imx51-sdma" + "fsl,imx53-sdma" + "fsl,imx6q-sdma" + The -to variants should be preferred since they allow to determnine the + correct ROM script addresses needed for the driver to work without additional + firmware. - reg : Should contain SDMA registers location and length - interrupts : Should contain SDMA interrupt - #dma-cells : Must be <3>. diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 4e79183..19041ce 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -449,6 +449,7 @@ static const struct of_device_id sdma_dt_ids[] = { { .compatible = "fsl,imx51-sdma", .data = &sdma_imx51, }, { .compatible = "fsl,imx35-sdma", .data = &sdma_imx35, }, { .compatible = "fsl,imx31-sdma", .data = &sdma_imx31, }, + { .compatible = "fsl,imx25-sdma", .data = &sdma_imx25, }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, sdma_dt_ids); -- cgit v0.10.2 From 17f22a3fbc9d81c1d8f9f853dd23d0e5e8f4c994 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 22 Jan 2014 12:55:32 -0300 Subject: perf report: Use al->cpumode where applicable We don't need to recalculate cpumode from the perf_event->header field, as this is already available in the struct addr_location->cpumode field. Remove the function signature of functions that receive both perf_event and addr_location parameters but use perf_event just to extract the cpumode. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-tmct07y7mka54allj82trlnx@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3c53ec2..8f9d6ed 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -76,12 +76,10 @@ static int report__config(const char *var, const char *value, void *cb) } static int report__add_mem_hist_entry(struct perf_tool *tool, struct addr_location *al, - struct perf_sample *sample, struct perf_evsel *evsel, - union perf_event *event) + struct perf_sample *sample, struct perf_evsel *evsel) { struct report *rep = container_of(tool, struct report, tool); struct symbol *parent = NULL; - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; struct hist_entry *he; struct mem_info *mi, *mx; uint64_t cost; @@ -90,7 +88,7 @@ static int report__add_mem_hist_entry(struct perf_tool *tool, struct addr_locati if (err) return err; - mi = machine__resolve_mem(al->machine, al->thread, sample, cpumode); + mi = machine__resolve_mem(al->machine, al->thread, sample, al->cpumode); if (!mi) return -ENOMEM; @@ -240,7 +238,7 @@ static int process_sample_event(struct perf_tool *tool, if (ret < 0) pr_debug("problem adding lbr entry, skipping event\n"); } else if (rep->mem_mode == 1) { - ret = report__add_mem_hist_entry(tool, &al, sample, evsel, event); + ret = report__add_mem_hist_entry(tool, &al, sample, evsel); if (ret < 0) pr_debug("problem adding mem entry, skipping event\n"); } else { -- cgit v0.10.2 From e80faac0460f178a5be576b4260897f997109e73 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 22 Jan 2014 13:05:06 -0300 Subject: perf tools: Shorten sample symbol resolving function signature Since three of the parameters come from the same 'struct addr_location', rename machine__resolve_mem() to sample__resolve_mem() and pass the that addr_location instead. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-3f5otpssefh9l5hi1t259h8n@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8f9d6ed..0d381dc 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -88,7 +88,7 @@ static int report__add_mem_hist_entry(struct perf_tool *tool, struct addr_locati if (err) return err; - mi = machine__resolve_mem(al->machine, al->thread, sample, al->cpumode); + mi = sample__resolve_mem(sample, al); if (!mi) return -ENOMEM; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index e4e6249..6240ca42 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -369,7 +369,7 @@ static struct hist_entry *add_hist_entry(struct hists *hists, he_stat__add_period(&he->stat, period, weight); /* - * This mem info was allocated from machine__resolve_mem + * This mem info was allocated from sample__resolve_mem * and will not be used anymore. */ zfree(&entry->mem_info); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index c872991..0d304d8 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1238,18 +1238,17 @@ static void ip__resolve_data(struct machine *machine, struct thread *thread, ams->map = al.map; } -struct mem_info *machine__resolve_mem(struct machine *machine, - struct thread *thr, - struct perf_sample *sample, - u8 cpumode) +struct mem_info *sample__resolve_mem(struct perf_sample *sample, + struct addr_location *al) { struct mem_info *mi = zalloc(sizeof(*mi)); if (!mi) return NULL; - ip__resolve_ams(machine, thr, &mi->iaddr, sample->ip); - ip__resolve_data(machine, thr, cpumode, &mi->daddr, sample->addr); + ip__resolve_ams(al->machine, al->thread, &mi->iaddr, sample->ip); + ip__resolve_data(al->machine, al->thread, al->cpumode, + &mi->daddr, sample->addr); mi->data_src.val = sample->data_src; return mi; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index f77e91e..9ddacd9 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -94,9 +94,8 @@ void machine__delete(struct machine *machine); struct branch_info *machine__resolve_bstack(struct machine *machine, struct thread *thread, struct branch_stack *bs); -struct mem_info *machine__resolve_mem(struct machine *machine, - struct thread *thread, - struct perf_sample *sample, u8 cpumode); +struct mem_info *sample__resolve_mem(struct perf_sample *sample, + struct addr_location *al); int machine__resolve_callchain(struct machine *machine, struct perf_evsel *evsel, struct thread *thread, -- cgit v0.10.2 From 644f2df29faf66f408fea2e50f16d3b5302403da Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 22 Jan 2014 13:15:36 -0300 Subject: perf tools: Shorten sample symbol resolving function signature Since two of the parameters come from the same 'struct addr_location', rename machine__resolve_bstack() to sample__resolve_bstack() and pass the that addr_location instead. This is also for consistency with the same change that resulted in the sample__resolve_mem() function. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-99ecqt8jiyyksiyx3se7l5ia@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0d381dc..6b7a0a0 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -140,8 +140,7 @@ static int report__add_branch_hist_entry(struct perf_tool *tool, struct addr_loc if (err) return err; - bi = machine__resolve_bstack(al->machine, al->thread, - sample->branch_stack); + bi = sample__resolve_bstack(sample, al); if (!bi) return -ENOMEM; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6240ca42..0466efa 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -290,7 +290,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template) if (he->branch_info) { /* * This branch info is (a part of) allocated from - * machine__resolve_bstack() and will be freed after + * sample__resolve_bstack() and will be freed after * adding new entries. So we need to save a copy. */ he->branch_info = malloc(sizeof(*he->branch_info)); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 0d304d8..6c08ab0 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1254,20 +1254,19 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, return mi; } -struct branch_info *machine__resolve_bstack(struct machine *machine, - struct thread *thr, - struct branch_stack *bs) +struct branch_info *sample__resolve_bstack(struct perf_sample *sample, + struct addr_location *al) { - struct branch_info *bi; unsigned int i; + const struct branch_stack *bs = sample->branch_stack; + struct branch_info *bi = calloc(bs->nr, sizeof(struct branch_info)); - bi = calloc(bs->nr, sizeof(struct branch_info)); if (!bi) return NULL; for (i = 0; i < bs->nr; i++) { - ip__resolve_ams(machine, thr, &bi[i].to, bs->entries[i].to); - ip__resolve_ams(machine, thr, &bi[i].from, bs->entries[i].from); + ip__resolve_ams(al->machine, al->thread, &bi[i].to, bs->entries[i].to); + ip__resolve_ams(al->machine, al->thread, &bi[i].from, bs->entries[i].from); bi[i].flags = bs->entries[i].flags; } return bi; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 9ddacd9..2e6c248 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -91,9 +91,8 @@ void machine__delete_dead_threads(struct machine *machine); void machine__delete_threads(struct machine *machine); void machine__delete(struct machine *machine); -struct branch_info *machine__resolve_bstack(struct machine *machine, - struct thread *thread, - struct branch_stack *bs); +struct branch_info *sample__resolve_bstack(struct perf_sample *sample, + struct addr_location *al); struct mem_info *sample__resolve_mem(struct perf_sample *sample, struct addr_location *al); int machine__resolve_callchain(struct machine *machine, -- cgit v0.10.2 From 106395dfda0397a6a88cc309bb7beb1fdf2ed798 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 22 Jan 2014 13:21:32 -0300 Subject: perf report: Remove some needless container_of usage Since all it wants is to get the 'struct record' from the received 'struct perf_tool', and this is already done at the callers of these functions, short circuit it. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-xz8p659sjpad396vye5t24gx@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 6b7a0a0..d882b6f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -75,10 +75,9 @@ static int report__config(const char *var, const char *value, void *cb) return perf_default_config(var, value, cb); } -static int report__add_mem_hist_entry(struct perf_tool *tool, struct addr_location *al, +static int report__add_mem_hist_entry(struct report *rep, struct addr_location *al, struct perf_sample *sample, struct perf_evsel *evsel) { - struct report *rep = container_of(tool, struct report, tool); struct symbol *parent = NULL; struct hist_entry *he; struct mem_info *mi, *mx; @@ -127,10 +126,9 @@ out: return err; } -static int report__add_branch_hist_entry(struct perf_tool *tool, struct addr_location *al, +static int report__add_branch_hist_entry(struct report *rep, struct addr_location *al, struct perf_sample *sample, struct perf_evsel *evsel) { - struct report *rep = container_of(tool, struct report, tool); struct symbol *parent = NULL; unsigned i; struct hist_entry *he; @@ -181,10 +179,9 @@ out: return err; } -static int report__add_hist_entry(struct perf_tool *tool, struct perf_evsel *evsel, +static int report__add_hist_entry(struct report *rep, struct perf_evsel *evsel, struct addr_location *al, struct perf_sample *sample) { - struct report *rep = container_of(tool, struct report, tool); struct symbol *parent = NULL; struct hist_entry *he; int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack); @@ -233,18 +230,18 @@ static int process_sample_event(struct perf_tool *tool, return 0; if (sort__mode == SORT_MODE__BRANCH) { - ret = report__add_branch_hist_entry(tool, &al, sample, evsel); + ret = report__add_branch_hist_entry(rep, &al, sample, evsel); if (ret < 0) pr_debug("problem adding lbr entry, skipping event\n"); } else if (rep->mem_mode == 1) { - ret = report__add_mem_hist_entry(tool, &al, sample, evsel); + ret = report__add_mem_hist_entry(rep, &al, sample, evsel); if (ret < 0) pr_debug("problem adding mem entry, skipping event\n"); } else { if (al.map != NULL) al.map->dso->hit = 1; - ret = report__add_hist_entry(tool, evsel, &al, sample); + ret = report__add_hist_entry(rep, evsel, &al, sample); if (ret < 0) pr_debug("problem incrementing symbol period, skipping event\n"); } -- cgit v0.10.2 From bc5290869d0a7f7abbde76ac95a7f7b6f5d7bb7b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 3 Feb 2014 12:44:41 +0100 Subject: perf tools: Put proper period for for samples without PERIOD sample_type We use PERF_SAMPLE_PERIOD sample type only for frequency setup -F (default) option. The -c does not need store period, because it's always the same. In -c case the report code uses '1' as period. Fixing it to perf_event_attr::sample_period. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1391427883-13443-1-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 55407c5..c6f8ce9 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1220,7 +1220,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, memset(data, 0, sizeof(*data)); data->cpu = data->pid = data->tid = -1; data->stream_id = data->id = data->time = -1ULL; - data->period = 1; + data->period = evsel->attr.sample_period; data->weight = 0; if (event->header.type != PERF_RECORD_SAMPLE) { -- cgit v0.10.2 From eb853e80324fa87faf7ae7e1a763ad643f908f2d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 3 Feb 2014 12:44:42 +0100 Subject: perf tools: Add call-graph option support into .perfconfig Adding call-graph option support into .perfconfig file, so it's now possible use call-graph option like: [top] call-graph = fp [record] call-graph = dwarf,8192 Above options ONLY setup the unwind method. To enable perf record/top to actually use it the command line option -g/-G must be specified. The --call-graph option overloads .perfconfig setup. Assuming above configuration: $ perf record -g ls - enables dwarf unwind with user stack size dump 8192 bytes $ perf top -G - enables frame pointer unwind $ perf record --call-graph=fp ls - enables frame pointer unwind $ perf top --call-graph=dwarf,4096 ls - enables dwarf unwind with user stack size dump 4096 bytes Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1391427883-13443-2-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index af47531..be9e8bc 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -749,6 +749,8 @@ int record_parse_callchain_opt(const struct option *opt, struct record_opts *opts = opt->value; int ret; + opts->call_graph_enabled = !unset; + /* --no-call-graph */ if (unset) { opts->call_graph = CALLCHAIN_NONE; @@ -769,6 +771,8 @@ int record_callchain_opt(const struct option *opt, { struct record_opts *opts = opt->value; + opts->call_graph_enabled = !unset; + if (opts->call_graph == CALLCHAIN_NONE) opts->call_graph = CALLCHAIN_FP; @@ -776,6 +780,16 @@ int record_callchain_opt(const struct option *opt, return 0; } +static int perf_record_config(const char *var, const char *value, void *cb) +{ + struct record *rec = cb; + + if (!strcmp(var, "record.call-graph")) + return record_parse_callchain(value, &rec->opts); + + return perf_default_config(var, value, cb); +} + static const char * const record_usage[] = { "perf record [] []", "perf record [] -- []", @@ -907,6 +921,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) if (rec->evlist == NULL) return -ENOMEM; + perf_config(perf_record_config, rec); + argc = parse_options(argc, argv, record_options, record_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (!argc && target__none(&rec->opts.target)) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 76cd510..ed99ec4 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -991,6 +991,16 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset) return record_parse_callchain_opt(opt, arg, unset); } +static int perf_top_config(const char *var, const char *value, void *cb) +{ + struct perf_top *top = cb; + + if (!strcmp(var, "top.call-graph")) + return record_parse_callchain(value, &top->record_opts); + + return perf_default_config(var, value, cb); +} + static int parse_percent_limit(const struct option *opt, const char *arg, int unset __maybe_unused) @@ -1115,6 +1125,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) if (top.evlist == NULL) return -ENOMEM; + perf_config(perf_top_config, &top); + argc = parse_options(argc, argv, options, top_usage, 0); if (argc) usage_with_options(top_usage, options); diff --git a/tools/perf/perf.h b/tools/perf/perf.h index e84fa26..2078f33 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -257,6 +257,7 @@ enum perf_call_graph_mode { struct record_opts { struct target target; int call_graph; + bool call_graph_enabled; bool group; bool inherit_stat; bool no_buffering; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index c6f8ce9..8201abe 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -595,7 +595,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) attr->mmap_data = track; } - if (opts->call_graph) { + if (opts->call_graph_enabled) { perf_evsel__set_sample_bit(evsel, CALLCHAIN); if (opts->call_graph == CALLCHAIN_DWARF) { -- cgit v0.10.2 From a601fdff1af20ea0208e918f5e97a247a3c37a40 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 3 Feb 2014 12:44:43 +0100 Subject: perf record: Add readable output for callchain debug Adding people readable output for callchain debug, to get following '-v' output: $ perf record -v -g ls callchain: type DWARF callchain: stack dump size 4096 ... Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1391427883-13443-3-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index be9e8bc..7b8f0e6 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -735,7 +735,9 @@ int record_parse_callchain(const char *arg, struct record_opts *opts) static void callchain_debug(struct record_opts *opts) { - pr_debug("callchain: type %d\n", opts->call_graph); + static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF" }; + + pr_debug("callchain: type %s\n", str[opts->call_graph]); if (opts->call_graph == CALLCHAIN_DWARF) pr_debug("callchain: stack dump size %d\n", diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 2078f33..6898ad0 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -251,7 +251,8 @@ void pthread__unblock_sigwinch(void); enum perf_call_graph_mode { CALLCHAIN_NONE, CALLCHAIN_FP, - CALLCHAIN_DWARF + CALLCHAIN_DWARF, + CALLCHAIN_MAX }; struct record_opts { -- cgit v0.10.2 From b58f608e31010cb76ee953a6919f9d96b4eb58d9 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 2 Feb 2014 22:38:49 +0100 Subject: perf tools: Fix memory leak in event_format__print function Properly destroying trace_seq object. Signed-off-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1391377150-23920-2-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index e0d6d07f..c36636f 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -126,6 +126,7 @@ void event_format__print(struct event_format *event, trace_seq_init(&s); pevent_event_info(&s, event, &record); trace_seq_do_printf(&s); + trace_seq_destroy(&s); } void parse_proc_kallsyms(struct pevent *pevent, -- cgit v0.10.2 From 3c8b06f981091f91ee603768855e9739a8938296 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Jan 2014 13:47:21 +0100 Subject: perf tests x86: Introduce perf_regs_load function Introducing perf_regs_load function, which is going to be used for dwarf unwind test in following patches. It takes single argument as a pointer to the regs dump buffer and populates it with current registers values. Signed-off-by: Jiri Olsa Acked-by: Jean Pihet Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1389098853-14466-5-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 8801fe0..1cbef73 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -4,6 +4,7 @@ LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif ifndef NO_LIBUNWIND LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind.o +LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o endif LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/tsc.o diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h index e84ca76..f3435d62 100644 --- a/tools/perf/arch/x86/include/perf_regs.h +++ b/tools/perf/arch/x86/include/perf_regs.h @@ -5,6 +5,8 @@ #include "../../util/types.h" #include +void perf_regs_load(u64 *regs); + #ifndef HAVE_ARCH_X86_64_SUPPORT #define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1) #else diff --git a/tools/perf/arch/x86/tests/regs_load.S b/tools/perf/arch/x86/tests/regs_load.S new file mode 100644 index 0000000..99167bf --- /dev/null +++ b/tools/perf/arch/x86/tests/regs_load.S @@ -0,0 +1,92 @@ + +#include + +#define AX 0 +#define BX 1 * 8 +#define CX 2 * 8 +#define DX 3 * 8 +#define SI 4 * 8 +#define DI 5 * 8 +#define BP 6 * 8 +#define SP 7 * 8 +#define IP 8 * 8 +#define FLAGS 9 * 8 +#define CS 10 * 8 +#define SS 11 * 8 +#define DS 12 * 8 +#define ES 13 * 8 +#define FS 14 * 8 +#define GS 15 * 8 +#define R8 16 * 8 +#define R9 17 * 8 +#define R10 18 * 8 +#define R11 19 * 8 +#define R12 20 * 8 +#define R13 21 * 8 +#define R14 22 * 8 +#define R15 23 * 8 + +.text +#ifdef HAVE_ARCH_X86_64_SUPPORT +ENTRY(perf_regs_load) + movq %rax, AX(%rdi) + movq %rbx, BX(%rdi) + movq %rcx, CX(%rdi) + movq %rdx, DX(%rdi) + movq %rsi, SI(%rdi) + movq %rdi, DI(%rdi) + movq %rbp, BP(%rdi) + + leaq 8(%rsp), %rax /* exclude this call. */ + movq %rax, SP(%rdi) + + movq 0(%rsp), %rax + movq %rax, IP(%rdi) + + movq $0, FLAGS(%rdi) + movq $0, CS(%rdi) + movq $0, SS(%rdi) + movq $0, DS(%rdi) + movq $0, ES(%rdi) + movq $0, FS(%rdi) + movq $0, GS(%rdi) + + movq %r8, R8(%rdi) + movq %r9, R9(%rdi) + movq %r10, R10(%rdi) + movq %r11, R11(%rdi) + movq %r12, R12(%rdi) + movq %r13, R13(%rdi) + movq %r14, R14(%rdi) + movq %r15, R15(%rdi) + ret +ENDPROC(perf_regs_load) +#else +ENTRY(perf_regs_load) + push %edi + movl 8(%esp), %edi + movl %eax, AX(%edi) + movl %ebx, BX(%edi) + movl %ecx, CX(%edi) + movl %edx, DX(%edi) + movl %esi, SI(%edi) + pop %eax + movl %eax, DI(%edi) + movl %ebp, BP(%edi) + + leal 4(%esp), %eax /* exclude this call. */ + movl %eax, SP(%edi) + + movl 0(%esp), %eax + movl %eax, IP(%edi) + + movl $0, FLAGS(%edi) + movl $0, CS(%edi) + movl $0, SS(%edi) + movl $0, DS(%edi) + movl $0, ES(%edi) + movl $0, FS(%edi) + movl $0, GS(%edi) + ret +ENDPROC(perf_regs_load) +#endif -- cgit v0.10.2 From aa16b81fe916378ef6474530c59f719c36cd6ec4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Jan 2014 13:47:22 +0100 Subject: perf tests x86: Add dwarf unwind test Adding dwarf unwind test, that setups live machine data over the perf test thread and does the remote unwind. At this moment this test fails due to bug in the max_stack processing in unwind__get_entries function. This is fixed in following patch. Need to use -fno-optimize-sibling-calls for test compilation, otherwise 'krava_*' function calls are optimized into jumps and ommited from the stack unwind. So far it's enabled only for x86. Signed-off-by: Jiri Olsa Acked-by: Jean Pihet Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1389098853-14466-6-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 7257e7e..10fcf33 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -408,6 +408,11 @@ endif LIB_OBJS += $(OUTPUT)tests/code-reading.o LIB_OBJS += $(OUTPUT)tests/sample-parsing.o LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o +ifndef NO_LIBUNWIND +ifeq ($(ARCH),x86) +LIB_OBJS += $(OUTPUT)tests/dwarf-unwind.o +endif +endif BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o BUILTIN_OBJS += $(OUTPUT)builtin-bench.o @@ -655,6 +660,9 @@ $(OUTPUT)tests/python-use.o: tests/python-use.c $(OUTPUT)PERF-CFLAGS -DPYTHON='"$(PYTHON_WORD)"' \ $< +$(OUTPUT)tests/dwarf-unwind.o: tests/dwarf-unwind.c + $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -fno-optimize-sibling-calls $< + $(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 1cbef73..948ea6c 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -5,6 +5,7 @@ endif ifndef NO_LIBUNWIND LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind.o LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o +LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o endif LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/tsc.o diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h index f3435d62..fc819ca 100644 --- a/tools/perf/arch/x86/include/perf_regs.h +++ b/tools/perf/arch/x86/include/perf_regs.h @@ -9,12 +9,16 @@ void perf_regs_load(u64 *regs); #ifndef HAVE_ARCH_X86_64_SUPPORT #define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1) +#define PERF_REGS_MAX PERF_REG_X86_32_MAX +#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32 #else #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ (1ULL << PERF_REG_X86_ES) | \ (1ULL << PERF_REG_X86_FS) | \ (1ULL << PERF_REG_X86_GS)) #define PERF_REGS_MASK (((1ULL << PERF_REG_X86_64_MAX) - 1) & ~REG_NOSUPPORT) +#define PERF_REGS_MAX PERF_REG_X86_64_MAX +#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64 #endif #define PERF_REG_IP PERF_REG_X86_IP #define PERF_REG_SP PERF_REG_X86_SP diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c new file mode 100644 index 0000000..371f849 --- /dev/null +++ b/tools/perf/arch/x86/tests/dwarf-unwind.c @@ -0,0 +1,58 @@ +#include +#include "perf_regs.h" +#include "thread.h" +#include "map.h" +#include "event.h" +#include "tests/tests.h" + +#define STACK_SIZE 8192 + +static int sample_ustack(struct perf_sample *sample, + struct thread *thread, u64 *regs) +{ + struct stack_dump *stack = &sample->user_stack; + struct map *map; + unsigned long sp; + u64 stack_size, *buf; + + buf = malloc(STACK_SIZE); + if (!buf) { + pr_debug("failed to allocate sample uregs data\n"); + return -1; + } + + sp = (unsigned long) regs[PERF_REG_X86_SP]; + + map = map_groups__find(&thread->mg, MAP__FUNCTION, (u64) sp); + if (!map) { + pr_debug("failed to get stack map\n"); + return -1; + } + + stack_size = map->end - sp; + stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size; + + memcpy(buf, (void *) sp, stack_size); + stack->data = (char *) buf; + stack->size = stack_size; + return 0; +} + +int test__arch_unwind_sample(struct perf_sample *sample, + struct thread *thread) +{ + struct regs_dump *regs = &sample->user_regs; + u64 *buf; + + buf = malloc(sizeof(u64) * PERF_REGS_MAX); + if (!buf) { + pr_debug("failed to allocate sample uregs data\n"); + return -1; + } + + perf_regs_load(buf); + regs->abi = PERF_SAMPLE_REGS_ABI; + regs->regs = buf; + + return sample_ustack(sample, thread, buf); +} diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 1e67437..f5a6ffb 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -115,6 +115,14 @@ static struct test { .desc = "Test parsing with no sample_id_all bit set", .func = test__parse_no_sample_id_all, }, +#if defined(__x86_64__) || defined(__i386__) +#ifdef HAVE_LIBUNWIND_SUPPORT + { + .desc = "Test dwarf unwind", + .func = test__dwarf_unwind, + }, +#endif +#endif { .func = NULL, }, diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c new file mode 100644 index 0000000..a203c0c --- /dev/null +++ b/tools/perf/tests/dwarf-unwind.c @@ -0,0 +1,144 @@ +#include +#include +#include +#include "tests.h" +#include "debug.h" +#include "machine.h" +#include "event.h" +#include "unwind.h" +#include "perf_regs.h" +#include "map.h" +#include "thread.h" + +static int mmap_handler(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine) +{ + return machine__process_mmap_event(machine, event, NULL); +} + +static int init_live_machine(struct machine *machine) +{ + union perf_event event; + pid_t pid = getpid(); + + return perf_event__synthesize_mmap_events(NULL, &event, pid, pid, + mmap_handler, machine, true); +} + +#define MAX_STACK 6 + +static int unwind_entry(struct unwind_entry *entry, void *arg) +{ + unsigned long *cnt = (unsigned long *) arg; + char *symbol = entry->sym ? entry->sym->name : NULL; + static const char *funcs[MAX_STACK] = { + "test__arch_unwind_sample", + "unwind_thread", + "krava_3", + "krava_2", + "krava_1", + "test__dwarf_unwind" + }; + + if (*cnt >= MAX_STACK) { + pr_debug("failed: crossed the max stack value %d\n", MAX_STACK); + return -1; + } + + if (!symbol) { + pr_debug("failed: got unresolved address 0x%" PRIx64 "\n", + entry->ip); + return -1; + } + + pr_debug("got: %s 0x%" PRIx64 "\n", symbol, entry->ip); + return strcmp((const char *) symbol, funcs[(*cnt)++]); +} + +__attribute__ ((noinline)) +static int unwind_thread(struct thread *thread, struct machine *machine) +{ + struct perf_sample sample; + unsigned long cnt = 0; + int err = -1; + + memset(&sample, 0, sizeof(sample)); + + if (test__arch_unwind_sample(&sample, thread)) { + pr_debug("failed to get unwind sample\n"); + goto out; + } + + err = unwind__get_entries(unwind_entry, &cnt, machine, thread, + PERF_REGS_MASK, &sample, MAX_STACK); + if (err) + pr_debug("unwind failed\n"); + else if (cnt != MAX_STACK) { + pr_debug("got wrong number of stack entries %lu != %d\n", + cnt, MAX_STACK); + err = -1; + } + + out: + free(sample.user_stack.data); + free(sample.user_regs.regs); + return err; +} + +__attribute__ ((noinline)) +static int krava_3(struct thread *thread, struct machine *machine) +{ + return unwind_thread(thread, machine); +} + +__attribute__ ((noinline)) +static int krava_2(struct thread *thread, struct machine *machine) +{ + return krava_3(thread, machine); +} + +__attribute__ ((noinline)) +static int krava_1(struct thread *thread, struct machine *machine) +{ + return krava_2(thread, machine); +} + +int test__dwarf_unwind(void) +{ + struct machines machines; + struct machine *machine; + struct thread *thread; + int err = -1; + + machines__init(&machines); + + machine = machines__find(&machines, HOST_KERNEL_ID); + if (!machine) { + pr_err("Could not get machine\n"); + return -1; + } + + if (init_live_machine(machine)) { + pr_err("Could not init machine\n"); + goto out; + } + + if (verbose > 1) + machine__fprintf(machine, stderr); + + thread = machine__find_thread(machine, getpid()); + if (!thread) { + pr_err("Could not get thread\n"); + goto out; + } + + err = krava_1(thread, machine); + + out: + machine__delete_threads(machine); + machine__exit(machine); + machines__exit(&machines); + return err; +} diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index e0ac713..8979309 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -40,5 +40,14 @@ int test__code_reading(void); int test__sample_parsing(void); int test__keep_tracking(void); int test__parse_no_sample_id_all(void); +int test__dwarf_unwind(void); +#if defined(__x86_64__) || defined(__i386__) +#ifdef HAVE_LIBUNWIND_SUPPORT +struct thread; +struct perf_sample; +int test__arch_unwind_sample(struct perf_sample *sample, + struct thread *thread); +#endif +#endif #endif /* TESTS_H */ -- cgit v0.10.2 From b42dc32d4f91e4c0f34b628fdb012eb423da9e69 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Jan 2014 13:47:23 +0100 Subject: perf tools: Fix dwarf unwind max_stack processing The 'unwind__get_entries' function currently returns 'max_stack + 1' entries (instead of exact max_stack entries), because max_stack value does not get decremented for the first entry. This fix makes dwarf-unwind test pass. Signed-off-by: Jiri Olsa Acked-by: Jean Pihet Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1389098853-14466-7-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/unwind.c b/tools/perf/util/unwind.c index 742f23b..bff3209 100644 --- a/tools/perf/util/unwind.c +++ b/tools/perf/util/unwind.c @@ -595,5 +595,5 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, if (ret) return -ENOMEM; - return get_entries(&ui, cb, arg, max_stack); + return --max_stack > 0 ? get_entries(&ui, cb, arg, max_stack) : 0; } -- cgit v0.10.2 From 1cf0382af98f6365b01b59453fe18dffe3c73d2f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Jan 2014 13:47:24 +0100 Subject: perf callchain: Do not report zero address in unwind We are not interested in zero addresses in callchain, do not report them. Signed-off-by: Jiri Olsa Acked-by: Jean Pihet Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1389098853-14466-8-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/unwind.c b/tools/perf/util/unwind.c index bff3209..3b70181 100644 --- a/tools/perf/util/unwind.c +++ b/tools/perf/util/unwind.c @@ -563,7 +563,7 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, unw_word_t ip; unw_get_reg(&c, UNW_REG_IP, &ip); - ret = entry(ip, ui->thread, ui->machine, cb, arg); + ret = ip ? entry(ip, ui->thread, ui->machine, cb, arg) : 0; } unw_destroy_addr_space(addr_space); -- cgit v0.10.2 From 352ea45a7229df8f5ae83c0757f6d426ba0f41b5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Jan 2014 13:47:25 +0100 Subject: perf callchain: Add mask into struct regs_dump Adding mask info into struct regs_dump to make the registers information compact. The mask was always passed along, so logically the mask info fits more into the struct regs_dump. Signed-off-by: Jiri Olsa Acked-by: Jean Pihet Cc: Adrian Hunter Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1389098853-14466-9-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c index 371f849..b602ad9 100644 --- a/tools/perf/arch/x86/tests/dwarf-unwind.c +++ b/tools/perf/arch/x86/tests/dwarf-unwind.c @@ -53,6 +53,7 @@ int test__arch_unwind_sample(struct perf_sample *sample, perf_regs_load(buf); regs->abi = PERF_SAMPLE_REGS_ABI; regs->regs = buf; + regs->mask = PERF_REGS_MASK; return sample_ustack(sample, thread, buf); } diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index b346601..3a73875 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -312,7 +312,6 @@ found: sample_sw.period = sample->period; sample_sw.time = sample->time; perf_event__synthesize_sample(event_sw, evsel->attr.sample_type, - evsel->attr.sample_regs_user, evsel->attr.read_format, &sample_sw, false); build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine); diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index a203c0c..f16ea28 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -72,7 +72,7 @@ static int unwind_thread(struct thread *thread, struct machine *machine) } err = unwind__get_entries(unwind_entry, &cnt, machine, thread, - PERF_REGS_MASK, &sample, MAX_STACK); + &sample, MAX_STACK); if (err) pr_debug("unwind failed\n"); else if (cnt != MAX_STACK) { diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 1b67720..0014d3c 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -22,8 +22,8 @@ } while (0) static bool samples_same(const struct perf_sample *s1, - const struct perf_sample *s2, u64 type, u64 regs_user, - u64 read_format) + const struct perf_sample *s2, + u64 type, u64 read_format) { size_t i; @@ -95,8 +95,9 @@ static bool samples_same(const struct perf_sample *s1, } if (type & PERF_SAMPLE_REGS_USER) { - size_t sz = hweight_long(regs_user) * sizeof(u64); + size_t sz = hweight_long(s1->user_regs.mask) * sizeof(u64); + COMP(user_regs.mask); COMP(user_regs.abi); if (s1->user_regs.abi && (!s1->user_regs.regs || !s2->user_regs.regs || @@ -174,6 +175,7 @@ static int do_test(u64 sample_type, u64 sample_regs_user, u64 read_format) .branch_stack = &branch_stack.branch_stack, .user_regs = { .abi = PERF_SAMPLE_REGS_ABI_64, + .mask = sample_regs_user, .regs = user_regs, }, .user_stack = { @@ -201,8 +203,7 @@ static int do_test(u64 sample_type, u64 sample_regs_user, u64 read_format) sample.read.one.id = 99; } - sz = perf_event__sample_event_size(&sample, sample_type, - sample_regs_user, read_format); + sz = perf_event__sample_event_size(&sample, sample_type, read_format); bufsz = sz + 4096; /* Add a bit for overrun checking */ event = malloc(bufsz); if (!event) { @@ -215,8 +216,7 @@ static int do_test(u64 sample_type, u64 sample_regs_user, u64 read_format) event->header.misc = 0; event->header.size = sz; - err = perf_event__synthesize_sample(event, sample_type, - sample_regs_user, read_format, + err = perf_event__synthesize_sample(event, sample_type, read_format, &sample, false); if (err) { pr_debug("%s failed for sample_type %#"PRIx64", error %d\n", @@ -244,8 +244,7 @@ static int do_test(u64 sample_type, u64 sample_regs_user, u64 read_format) goto out_free; } - if (!samples_same(&sample, &sample_out, sample_type, - sample_regs_user, read_format)) { + if (!samples_same(&sample, &sample_out, sample_type, read_format)) { pr_debug("parsing failed for sample_type %#"PRIx64"\n", sample_type); goto out_free; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 851fa06..38457d4 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -85,6 +85,7 @@ struct sample_event { struct regs_dump { u64 abi; + u64 mask; u64 *regs; }; @@ -259,9 +260,9 @@ int perf_event__preprocess_sample(const union perf_event *event, const char *perf_event__name(unsigned int id); size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, - u64 sample_regs_user, u64 read_format); + u64 read_format); int perf_event__synthesize_sample(union perf_event *event, u64 type, - u64 sample_regs_user, u64 read_format, + u64 read_format, const struct perf_sample *sample, bool swapped); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 8201abe..adc94dd 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1396,10 +1396,11 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, array++; if (data->user_regs.abi) { - u64 regs_user = evsel->attr.sample_regs_user; + u64 mask = evsel->attr.sample_regs_user; - sz = hweight_long(regs_user) * sizeof(u64); + sz = hweight_long(mask) * sizeof(u64); OVERFLOW_CHECK(array, sz, max_size); + data->user_regs.mask = mask; data->user_regs.regs = (u64 *)array; array = (void *)array + sz; } @@ -1451,7 +1452,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, } size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, - u64 sample_regs_user, u64 read_format) + u64 read_format) { size_t sz, result = sizeof(struct sample_event); @@ -1517,7 +1518,7 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, if (type & PERF_SAMPLE_REGS_USER) { if (sample->user_regs.abi) { result += sizeof(u64); - sz = hweight_long(sample_regs_user) * sizeof(u64); + sz = hweight_long(sample->user_regs.mask) * sizeof(u64); result += sz; } else { result += sizeof(u64); @@ -1546,7 +1547,7 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, } int perf_event__synthesize_sample(union perf_event *event, u64 type, - u64 sample_regs_user, u64 read_format, + u64 read_format, const struct perf_sample *sample, bool swapped) { @@ -1687,7 +1688,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, if (type & PERF_SAMPLE_REGS_USER) { if (sample->user_regs.abi) { *array++ = sample->user_regs.abi; - sz = hweight_long(sample_regs_user) * sizeof(u64); + sz = hweight_long(sample->user_regs.mask) * sizeof(u64); memcpy(array, sample->user_regs.regs, sz); array = (void *)array + sz; } else { diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 6c08ab0..ac37d78 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1383,8 +1383,7 @@ int machine__resolve_callchain(struct machine *machine, return 0; return unwind__get_entries(unwind_entry, &callchain_cursor, machine, - thread, evsel->attr.sample_regs_user, - sample, max_stack); + thread, sample, max_stack); } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 5da6ce7..1d555d6 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -702,11 +702,12 @@ static void regs_dump__printf(u64 mask, u64 *regs) } } -static void regs_user__printf(struct perf_sample *sample, u64 mask) +static void regs_user__printf(struct perf_sample *sample) { struct regs_dump *user_regs = &sample->user_regs; if (user_regs->regs) { + u64 mask = user_regs->mask; printf("... user regs: mask 0x%" PRIx64 "\n", mask); regs_dump__printf(mask, user_regs->regs); } @@ -806,7 +807,7 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, branch_stack__printf(sample); if (sample_type & PERF_SAMPLE_REGS_USER) - regs_user__printf(sample, evsel->attr.sample_regs_user); + regs_user__printf(sample); if (sample_type & PERF_SAMPLE_STACK_USER) stack_user__printf(&sample->user_stack); diff --git a/tools/perf/util/unwind.c b/tools/perf/util/unwind.c index 3b70181..720a4ca 100644 --- a/tools/perf/util/unwind.c +++ b/tools/perf/util/unwind.c @@ -86,7 +86,6 @@ struct unwind_info { struct perf_sample *sample; struct machine *machine; struct thread *thread; - u64 sample_uregs; }; #define dw_read(ptr, type, end) ({ \ @@ -391,16 +390,16 @@ static int access_dso_mem(struct unwind_info *ui, unw_word_t addr, return !(size == sizeof(*data)); } -static int reg_value(unw_word_t *valp, struct regs_dump *regs, int id, - u64 sample_regs) +static int reg_value(unw_word_t *valp, struct regs_dump *regs, int id) { int i, idx = 0; + u64 mask = regs->mask; - if (!(sample_regs & (1 << id))) + if (!(mask & (1 << id))) return -EINVAL; for (i = 0; i < id; i++) { - if (sample_regs & (1 << i)) + if (mask & (1 << i)) idx++; } @@ -424,8 +423,7 @@ static int access_mem(unw_addr_space_t __maybe_unused as, return 0; } - ret = reg_value(&start, &ui->sample->user_regs, PERF_REG_SP, - ui->sample_uregs); + ret = reg_value(&start, &ui->sample->user_regs, PERF_REG_SP); if (ret) return ret; @@ -475,7 +473,7 @@ static int access_reg(unw_addr_space_t __maybe_unused as, if (id < 0) return -EINVAL; - ret = reg_value(valp, &ui->sample->user_regs, id, ui->sample_uregs); + ret = reg_value(valp, &ui->sample->user_regs, id); if (ret) { pr_err("unwind: can't read reg %d\n", regnum); return ret; @@ -572,13 +570,11 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct machine *machine, struct thread *thread, - u64 sample_uregs, struct perf_sample *data, - int max_stack) + struct perf_sample *data, int max_stack) { unw_word_t ip; struct unwind_info ui = { .sample = data, - .sample_uregs = sample_uregs, .thread = thread, .machine = machine, }; @@ -587,7 +583,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, if (!data->user_regs.regs) return -EINVAL; - ret = reg_value(&ip, &data->user_regs, PERF_REG_IP, sample_uregs); + ret = reg_value(&ip, &data->user_regs, PERF_REG_IP); if (ret) return ret; diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h index d5966f49..356e1d6 100644 --- a/tools/perf/util/unwind.h +++ b/tools/perf/util/unwind.h @@ -17,7 +17,6 @@ typedef int (*unwind_entry_cb_t)(struct unwind_entry *entry, void *arg); int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct machine *machine, struct thread *thread, - u64 sample_uregs, struct perf_sample *data, int max_stack); int unwind__arch_reg_id(int regnum); #else @@ -26,7 +25,6 @@ unwind__get_entries(unwind_entry_cb_t cb __maybe_unused, void *arg __maybe_unused, struct machine *machine __maybe_unused, struct thread *thread __maybe_unused, - u64 sample_uregs __maybe_unused, struct perf_sample *data __maybe_unused, int max_stack __maybe_unused) { -- cgit v0.10.2 From 436aa749bb193385f6a3b741ddc6bfba61770a6e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Jan 2014 13:47:26 +0100 Subject: perf callchain: Separate libunwind code to special object We are going to add libdw library support to do dwarf post unwind. Making the code ready by moving libunwind dwarf post unwind stuff into separate object. Signed-off-by: Jiri Olsa Acked-by: Jean Pihet Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1389098853-14466-10-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 10fcf33..9ef6b33 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -481,7 +481,7 @@ endif # NO_DWARF endif # NO_LIBELF ifndef NO_LIBUNWIND - LIB_OBJS += $(OUTPUT)util/unwind.o + LIB_OBJS += $(OUTPUT)util/unwind-libunwind.o endif LIB_OBJS += $(OUTPUT)tests/keep-tracking.o diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile index fe9b61e..67e9b3d 100644 --- a/tools/perf/arch/arm/Makefile +++ b/tools/perf/arch/arm/Makefile @@ -3,5 +3,5 @@ PERF_HAVE_DWARF_REGS := 1 LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif ifndef NO_LIBUNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind.o +LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o endif diff --git a/tools/perf/arch/arm/util/unwind-libunwind.c b/tools/perf/arch/arm/util/unwind-libunwind.c new file mode 100644 index 0000000..da3dc95 --- /dev/null +++ b/tools/perf/arch/arm/util/unwind-libunwind.c @@ -0,0 +1,48 @@ + +#include +#include +#include "perf_regs.h" +#include "../../util/unwind.h" + +int unwind__arch_reg_id(int regnum) +{ + switch (regnum) { + case UNW_ARM_R0: + return PERF_REG_ARM_R0; + case UNW_ARM_R1: + return PERF_REG_ARM_R1; + case UNW_ARM_R2: + return PERF_REG_ARM_R2; + case UNW_ARM_R3: + return PERF_REG_ARM_R3; + case UNW_ARM_R4: + return PERF_REG_ARM_R4; + case UNW_ARM_R5: + return PERF_REG_ARM_R5; + case UNW_ARM_R6: + return PERF_REG_ARM_R6; + case UNW_ARM_R7: + return PERF_REG_ARM_R7; + case UNW_ARM_R8: + return PERF_REG_ARM_R8; + case UNW_ARM_R9: + return PERF_REG_ARM_R9; + case UNW_ARM_R10: + return PERF_REG_ARM_R10; + case UNW_ARM_R11: + return PERF_REG_ARM_FP; + case UNW_ARM_R12: + return PERF_REG_ARM_IP; + case UNW_ARM_R13: + return PERF_REG_ARM_SP; + case UNW_ARM_R14: + return PERF_REG_ARM_LR; + case UNW_ARM_R15: + return PERF_REG_ARM_PC; + default: + pr_err("unwind: invalid reg id %d\n", regnum); + return -EINVAL; + } + + return -EINVAL; +} diff --git a/tools/perf/arch/arm/util/unwind.c b/tools/perf/arch/arm/util/unwind.c deleted file mode 100644 index da3dc95..0000000 --- a/tools/perf/arch/arm/util/unwind.c +++ /dev/null @@ -1,48 +0,0 @@ - -#include -#include -#include "perf_regs.h" -#include "../../util/unwind.h" - -int unwind__arch_reg_id(int regnum) -{ - switch (regnum) { - case UNW_ARM_R0: - return PERF_REG_ARM_R0; - case UNW_ARM_R1: - return PERF_REG_ARM_R1; - case UNW_ARM_R2: - return PERF_REG_ARM_R2; - case UNW_ARM_R3: - return PERF_REG_ARM_R3; - case UNW_ARM_R4: - return PERF_REG_ARM_R4; - case UNW_ARM_R5: - return PERF_REG_ARM_R5; - case UNW_ARM_R6: - return PERF_REG_ARM_R6; - case UNW_ARM_R7: - return PERF_REG_ARM_R7; - case UNW_ARM_R8: - return PERF_REG_ARM_R8; - case UNW_ARM_R9: - return PERF_REG_ARM_R9; - case UNW_ARM_R10: - return PERF_REG_ARM_R10; - case UNW_ARM_R11: - return PERF_REG_ARM_FP; - case UNW_ARM_R12: - return PERF_REG_ARM_IP; - case UNW_ARM_R13: - return PERF_REG_ARM_SP; - case UNW_ARM_R14: - return PERF_REG_ARM_LR; - case UNW_ARM_R15: - return PERF_REG_ARM_PC; - default: - pr_err("unwind: invalid reg id %d\n", regnum); - return -EINVAL; - } - - return -EINVAL; -} diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 948ea6c..4fa9be9 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -3,7 +3,7 @@ PERF_HAVE_DWARF_REGS := 1 LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif ifndef NO_LIBUNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind.o +LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o endif diff --git a/tools/perf/arch/x86/util/unwind-libunwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c new file mode 100644 index 0000000..456a88c --- /dev/null +++ b/tools/perf/arch/x86/util/unwind-libunwind.c @@ -0,0 +1,111 @@ + +#include +#include +#include "perf_regs.h" +#include "../../util/unwind.h" + +#ifdef HAVE_ARCH_X86_64_SUPPORT +int unwind__arch_reg_id(int regnum) +{ + int id; + + switch (regnum) { + case UNW_X86_64_RAX: + id = PERF_REG_X86_AX; + break; + case UNW_X86_64_RDX: + id = PERF_REG_X86_DX; + break; + case UNW_X86_64_RCX: + id = PERF_REG_X86_CX; + break; + case UNW_X86_64_RBX: + id = PERF_REG_X86_BX; + break; + case UNW_X86_64_RSI: + id = PERF_REG_X86_SI; + break; + case UNW_X86_64_RDI: + id = PERF_REG_X86_DI; + break; + case UNW_X86_64_RBP: + id = PERF_REG_X86_BP; + break; + case UNW_X86_64_RSP: + id = PERF_REG_X86_SP; + break; + case UNW_X86_64_R8: + id = PERF_REG_X86_R8; + break; + case UNW_X86_64_R9: + id = PERF_REG_X86_R9; + break; + case UNW_X86_64_R10: + id = PERF_REG_X86_R10; + break; + case UNW_X86_64_R11: + id = PERF_REG_X86_R11; + break; + case UNW_X86_64_R12: + id = PERF_REG_X86_R12; + break; + case UNW_X86_64_R13: + id = PERF_REG_X86_R13; + break; + case UNW_X86_64_R14: + id = PERF_REG_X86_R14; + break; + case UNW_X86_64_R15: + id = PERF_REG_X86_R15; + break; + case UNW_X86_64_RIP: + id = PERF_REG_X86_IP; + break; + default: + pr_err("unwind: invalid reg id %d\n", regnum); + return -EINVAL; + } + + return id; +} +#else +int unwind__arch_reg_id(int regnum) +{ + int id; + + switch (regnum) { + case UNW_X86_EAX: + id = PERF_REG_X86_AX; + break; + case UNW_X86_EDX: + id = PERF_REG_X86_DX; + break; + case UNW_X86_ECX: + id = PERF_REG_X86_CX; + break; + case UNW_X86_EBX: + id = PERF_REG_X86_BX; + break; + case UNW_X86_ESI: + id = PERF_REG_X86_SI; + break; + case UNW_X86_EDI: + id = PERF_REG_X86_DI; + break; + case UNW_X86_EBP: + id = PERF_REG_X86_BP; + break; + case UNW_X86_ESP: + id = PERF_REG_X86_SP; + break; + case UNW_X86_EIP: + id = PERF_REG_X86_IP; + break; + default: + pr_err("unwind: invalid reg id %d\n", regnum); + return -EINVAL; + } + + return id; +} +#endif /* HAVE_ARCH_X86_64_SUPPORT */ diff --git a/tools/perf/arch/x86/util/unwind.c b/tools/perf/arch/x86/util/unwind.c deleted file mode 100644 index 456a88c..0000000 --- a/tools/perf/arch/x86/util/unwind.c +++ /dev/null @@ -1,111 +0,0 @@ - -#include -#include -#include "perf_regs.h" -#include "../../util/unwind.h" - -#ifdef HAVE_ARCH_X86_64_SUPPORT -int unwind__arch_reg_id(int regnum) -{ - int id; - - switch (regnum) { - case UNW_X86_64_RAX: - id = PERF_REG_X86_AX; - break; - case UNW_X86_64_RDX: - id = PERF_REG_X86_DX; - break; - case UNW_X86_64_RCX: - id = PERF_REG_X86_CX; - break; - case UNW_X86_64_RBX: - id = PERF_REG_X86_BX; - break; - case UNW_X86_64_RSI: - id = PERF_REG_X86_SI; - break; - case UNW_X86_64_RDI: - id = PERF_REG_X86_DI; - break; - case UNW_X86_64_RBP: - id = PERF_REG_X86_BP; - break; - case UNW_X86_64_RSP: - id = PERF_REG_X86_SP; - break; - case UNW_X86_64_R8: - id = PERF_REG_X86_R8; - break; - case UNW_X86_64_R9: - id = PERF_REG_X86_R9; - break; - case UNW_X86_64_R10: - id = PERF_REG_X86_R10; - break; - case UNW_X86_64_R11: - id = PERF_REG_X86_R11; - break; - case UNW_X86_64_R12: - id = PERF_REG_X86_R12; - break; - case UNW_X86_64_R13: - id = PERF_REG_X86_R13; - break; - case UNW_X86_64_R14: - id = PERF_REG_X86_R14; - break; - case UNW_X86_64_R15: - id = PERF_REG_X86_R15; - break; - case UNW_X86_64_RIP: - id = PERF_REG_X86_IP; - break; - default: - pr_err("unwind: invalid reg id %d\n", regnum); - return -EINVAL; - } - - return id; -} -#else -int unwind__arch_reg_id(int regnum) -{ - int id; - - switch (regnum) { - case UNW_X86_EAX: - id = PERF_REG_X86_AX; - break; - case UNW_X86_EDX: - id = PERF_REG_X86_DX; - break; - case UNW_X86_ECX: - id = PERF_REG_X86_CX; - break; - case UNW_X86_EBX: - id = PERF_REG_X86_BX; - break; - case UNW_X86_ESI: - id = PERF_REG_X86_SI; - break; - case UNW_X86_EDI: - id = PERF_REG_X86_DI; - break; - case UNW_X86_EBP: - id = PERF_REG_X86_BP; - break; - case UNW_X86_ESP: - id = PERF_REG_X86_SP; - break; - case UNW_X86_EIP: - id = PERF_REG_X86_IP; - break; - default: - pr_err("unwind: invalid reg id %d\n", regnum); - return -EINVAL; - } - - return id; -} -#endif /* HAVE_ARCH_X86_64_SUPPORT */ diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c new file mode 100644 index 0000000..720a4ca --- /dev/null +++ b/tools/perf/util/unwind-libunwind.c @@ -0,0 +1,595 @@ +/* + * Post mortem Dwarf CFI based unwinding on top of regs and stack dumps. + * + * Lots of this code have been borrowed or heavily inspired from parts of + * the libunwind 0.99 code which are (amongst other contributors I may have + * forgotten): + * + * Copyright (C) 2002-2007 Hewlett-Packard Co + * Contributed by David Mosberger-Tang + * + * And the bugs have been added by: + * + * Copyright (C) 2010, Frederic Weisbecker + * Copyright (C) 2012, Jiri Olsa + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "thread.h" +#include "session.h" +#include "perf_regs.h" +#include "unwind.h" +#include "symbol.h" +#include "util.h" + +extern int +UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as, + unw_word_t ip, + unw_dyn_info_t *di, + unw_proc_info_t *pi, + int need_unwind_info, void *arg); + +#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table) + +extern int +UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug, + unw_word_t ip, + unw_word_t segbase, + const char *obj_name, unw_word_t start, + unw_word_t end); + +#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame) + +#define DW_EH_PE_FORMAT_MASK 0x0f /* format of the encoded value */ +#define DW_EH_PE_APPL_MASK 0x70 /* how the value is to be applied */ + +/* Pointer-encoding formats: */ +#define DW_EH_PE_omit 0xff +#define DW_EH_PE_ptr 0x00 /* pointer-sized unsigned value */ +#define DW_EH_PE_udata4 0x03 /* unsigned 32-bit value */ +#define DW_EH_PE_udata8 0x04 /* unsigned 64-bit value */ +#define DW_EH_PE_sdata4 0x0b /* signed 32-bit value */ +#define DW_EH_PE_sdata8 0x0c /* signed 64-bit value */ + +/* Pointer-encoding application: */ +#define DW_EH_PE_absptr 0x00 /* absolute value */ +#define DW_EH_PE_pcrel 0x10 /* rel. to addr. of encoded value */ + +/* + * The following are not documented by LSB v1.3, yet they are used by + * GCC, presumably they aren't documented by LSB since they aren't + * used on Linux: + */ +#define DW_EH_PE_funcrel 0x40 /* start-of-procedure-relative */ +#define DW_EH_PE_aligned 0x50 /* aligned pointer */ + +/* Flags intentionaly not handled, since they're not needed: + * #define DW_EH_PE_indirect 0x80 + * #define DW_EH_PE_uleb128 0x01 + * #define DW_EH_PE_udata2 0x02 + * #define DW_EH_PE_sleb128 0x09 + * #define DW_EH_PE_sdata2 0x0a + * #define DW_EH_PE_textrel 0x20 + * #define DW_EH_PE_datarel 0x30 + */ + +struct unwind_info { + struct perf_sample *sample; + struct machine *machine; + struct thread *thread; +}; + +#define dw_read(ptr, type, end) ({ \ + type *__p = (type *) ptr; \ + type __v; \ + if ((__p + 1) > (type *) end) \ + return -EINVAL; \ + __v = *__p++; \ + ptr = (typeof(ptr)) __p; \ + __v; \ + }) + +static int __dw_read_encoded_value(u8 **p, u8 *end, u64 *val, + u8 encoding) +{ + u8 *cur = *p; + *val = 0; + + switch (encoding) { + case DW_EH_PE_omit: + *val = 0; + goto out; + case DW_EH_PE_ptr: + *val = dw_read(cur, unsigned long, end); + goto out; + default: + break; + } + + switch (encoding & DW_EH_PE_APPL_MASK) { + case DW_EH_PE_absptr: + break; + case DW_EH_PE_pcrel: + *val = (unsigned long) cur; + break; + default: + return -EINVAL; + } + + if ((encoding & 0x07) == 0x00) + encoding |= DW_EH_PE_udata4; + + switch (encoding & DW_EH_PE_FORMAT_MASK) { + case DW_EH_PE_sdata4: + *val += dw_read(cur, s32, end); + break; + case DW_EH_PE_udata4: + *val += dw_read(cur, u32, end); + break; + case DW_EH_PE_sdata8: + *val += dw_read(cur, s64, end); + break; + case DW_EH_PE_udata8: + *val += dw_read(cur, u64, end); + break; + default: + return -EINVAL; + } + + out: + *p = cur; + return 0; +} + +#define dw_read_encoded_value(ptr, end, enc) ({ \ + u64 __v; \ + if (__dw_read_encoded_value(&ptr, end, &__v, enc)) { \ + return -EINVAL; \ + } \ + __v; \ + }) + +static u64 elf_section_offset(int fd, const char *name) +{ + Elf *elf; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + u64 offset = 0; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) + return 0; + + do { + if (gelf_getehdr(elf, &ehdr) == NULL) + break; + + if (!elf_section_by_name(elf, &ehdr, &shdr, name, NULL)) + break; + + offset = shdr.sh_offset; + } while (0); + + elf_end(elf); + return offset; +} + +struct table_entry { + u32 start_ip_offset; + u32 fde_offset; +}; + +struct eh_frame_hdr { + unsigned char version; + unsigned char eh_frame_ptr_enc; + unsigned char fde_count_enc; + unsigned char table_enc; + + /* + * The rest of the header is variable-length and consists of the + * following members: + * + * encoded_t eh_frame_ptr; + * encoded_t fde_count; + */ + + /* A single encoded pointer should not be more than 8 bytes. */ + u64 enc[2]; + + /* + * struct { + * encoded_t start_ip; + * encoded_t fde_addr; + * } binary_search_table[fde_count]; + */ + char data[0]; +} __packed; + +static int unwind_spec_ehframe(struct dso *dso, struct machine *machine, + u64 offset, u64 *table_data, u64 *segbase, + u64 *fde_count) +{ + struct eh_frame_hdr hdr; + u8 *enc = (u8 *) &hdr.enc; + u8 *end = (u8 *) &hdr.data; + ssize_t r; + + r = dso__data_read_offset(dso, machine, offset, + (u8 *) &hdr, sizeof(hdr)); + if (r != sizeof(hdr)) + return -EINVAL; + + /* We dont need eh_frame_ptr, just skip it. */ + dw_read_encoded_value(enc, end, hdr.eh_frame_ptr_enc); + + *fde_count = dw_read_encoded_value(enc, end, hdr.fde_count_enc); + *segbase = offset; + *table_data = (enc - (u8 *) &hdr) + offset; + return 0; +} + +static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine, + u64 *table_data, u64 *segbase, + u64 *fde_count) +{ + int ret = -EINVAL, fd; + u64 offset; + + fd = dso__data_fd(dso, machine); + if (fd < 0) + return -EINVAL; + + /* Check the .eh_frame section for unwinding info */ + offset = elf_section_offset(fd, ".eh_frame_hdr"); + close(fd); + + if (offset) + ret = unwind_spec_ehframe(dso, machine, offset, + table_data, segbase, + fde_count); + + return ret; +} + +#ifndef NO_LIBUNWIND_DEBUG_FRAME +static int read_unwind_spec_debug_frame(struct dso *dso, + struct machine *machine, u64 *offset) +{ + int fd = dso__data_fd(dso, machine); + + if (fd < 0) + return -EINVAL; + + /* Check the .debug_frame section for unwinding info */ + *offset = elf_section_offset(fd, ".debug_frame"); + close(fd); + + if (*offset) + return 0; + + return -EINVAL; +} +#endif + +static struct map *find_map(unw_word_t ip, struct unwind_info *ui) +{ + struct addr_location al; + + thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER, + MAP__FUNCTION, ip, &al); + return al.map; +} + +static int +find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, + int need_unwind_info, void *arg) +{ + struct unwind_info *ui = arg; + struct map *map; + unw_dyn_info_t di; + u64 table_data, segbase, fde_count; + + map = find_map(ip, ui); + if (!map || !map->dso) + return -EINVAL; + + pr_debug("unwind: find_proc_info dso %s\n", map->dso->name); + + /* Check the .eh_frame section for unwinding info */ + if (!read_unwind_spec_eh_frame(map->dso, ui->machine, + &table_data, &segbase, &fde_count)) { + memset(&di, 0, sizeof(di)); + di.format = UNW_INFO_FORMAT_REMOTE_TABLE; + di.start_ip = map->start; + di.end_ip = map->end; + di.u.rti.segbase = map->start + segbase; + di.u.rti.table_data = map->start + table_data; + di.u.rti.table_len = fde_count * sizeof(struct table_entry) + / sizeof(unw_word_t); + return dwarf_search_unwind_table(as, ip, &di, pi, + need_unwind_info, arg); + } + +#ifndef NO_LIBUNWIND_DEBUG_FRAME + /* Check the .debug_frame section for unwinding info */ + if (!read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) { + memset(&di, 0, sizeof(di)); + if (dwarf_find_debug_frame(0, &di, ip, 0, map->dso->name, + map->start, map->end)) + return dwarf_search_unwind_table(as, ip, &di, pi, + need_unwind_info, arg); + } +#endif + + return -EINVAL; +} + +static int access_fpreg(unw_addr_space_t __maybe_unused as, + unw_regnum_t __maybe_unused num, + unw_fpreg_t __maybe_unused *val, + int __maybe_unused __write, + void __maybe_unused *arg) +{ + pr_err("unwind: access_fpreg unsupported\n"); + return -UNW_EINVAL; +} + +static int get_dyn_info_list_addr(unw_addr_space_t __maybe_unused as, + unw_word_t __maybe_unused *dil_addr, + void __maybe_unused *arg) +{ + return -UNW_ENOINFO; +} + +static int resume(unw_addr_space_t __maybe_unused as, + unw_cursor_t __maybe_unused *cu, + void __maybe_unused *arg) +{ + pr_err("unwind: resume unsupported\n"); + return -UNW_EINVAL; +} + +static int +get_proc_name(unw_addr_space_t __maybe_unused as, + unw_word_t __maybe_unused addr, + char __maybe_unused *bufp, size_t __maybe_unused buf_len, + unw_word_t __maybe_unused *offp, void __maybe_unused *arg) +{ + pr_err("unwind: get_proc_name unsupported\n"); + return -UNW_EINVAL; +} + +static int access_dso_mem(struct unwind_info *ui, unw_word_t addr, + unw_word_t *data) +{ + struct addr_location al; + ssize_t size; + + thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER, + MAP__FUNCTION, addr, &al); + if (!al.map) { + pr_debug("unwind: no map for %lx\n", (unsigned long)addr); + return -1; + } + + if (!al.map->dso) + return -1; + + size = dso__data_read_addr(al.map->dso, al.map, ui->machine, + addr, (u8 *) data, sizeof(*data)); + + return !(size == sizeof(*data)); +} + +static int reg_value(unw_word_t *valp, struct regs_dump *regs, int id) +{ + int i, idx = 0; + u64 mask = regs->mask; + + if (!(mask & (1 << id))) + return -EINVAL; + + for (i = 0; i < id; i++) { + if (mask & (1 << i)) + idx++; + } + + *valp = regs->regs[idx]; + return 0; +} + +static int access_mem(unw_addr_space_t __maybe_unused as, + unw_word_t addr, unw_word_t *valp, + int __write, void *arg) +{ + struct unwind_info *ui = arg; + struct stack_dump *stack = &ui->sample->user_stack; + unw_word_t start, end; + int offset; + int ret; + + /* Don't support write, probably not needed. */ + if (__write || !stack || !ui->sample->user_regs.regs) { + *valp = 0; + return 0; + } + + ret = reg_value(&start, &ui->sample->user_regs, PERF_REG_SP); + if (ret) + return ret; + + end = start + stack->size; + + /* Check overflow. */ + if (addr + sizeof(unw_word_t) < addr) + return -EINVAL; + + if (addr < start || addr + sizeof(unw_word_t) >= end) { + ret = access_dso_mem(ui, addr, valp); + if (ret) { + pr_debug("unwind: access_mem %p not inside range %p-%p\n", + (void *)addr, (void *)start, (void *)end); + *valp = 0; + return ret; + } + return 0; + } + + offset = addr - start; + *valp = *(unw_word_t *)&stack->data[offset]; + pr_debug("unwind: access_mem addr %p, val %lx, offset %d\n", + (void *)addr, (unsigned long)*valp, offset); + return 0; +} + +static int access_reg(unw_addr_space_t __maybe_unused as, + unw_regnum_t regnum, unw_word_t *valp, + int __write, void *arg) +{ + struct unwind_info *ui = arg; + int id, ret; + + /* Don't support write, I suspect we don't need it. */ + if (__write) { + pr_err("unwind: access_reg w %d\n", regnum); + return 0; + } + + if (!ui->sample->user_regs.regs) { + *valp = 0; + return 0; + } + + id = unwind__arch_reg_id(regnum); + if (id < 0) + return -EINVAL; + + ret = reg_value(valp, &ui->sample->user_regs, id); + if (ret) { + pr_err("unwind: can't read reg %d\n", regnum); + return ret; + } + + pr_debug("unwind: reg %d, val %lx\n", regnum, (unsigned long)*valp); + return 0; +} + +static void put_unwind_info(unw_addr_space_t __maybe_unused as, + unw_proc_info_t *pi __maybe_unused, + void *arg __maybe_unused) +{ + pr_debug("unwind: put_unwind_info called\n"); +} + +static int entry(u64 ip, struct thread *thread, struct machine *machine, + unwind_entry_cb_t cb, void *arg) +{ + struct unwind_entry e; + struct addr_location al; + + thread__find_addr_location(thread, machine, + PERF_RECORD_MISC_USER, + MAP__FUNCTION, ip, &al); + + e.ip = ip; + e.map = al.map; + e.sym = al.sym; + + pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n", + al.sym ? al.sym->name : "''", + ip, + al.map ? al.map->map_ip(al.map, ip) : (u64) 0); + + return cb(&e, arg); +} + +static void display_error(int err) +{ + switch (err) { + case UNW_EINVAL: + pr_err("unwind: Only supports local.\n"); + break; + case UNW_EUNSPEC: + pr_err("unwind: Unspecified error.\n"); + break; + case UNW_EBADREG: + pr_err("unwind: Register unavailable.\n"); + break; + default: + break; + } +} + +static unw_accessors_t accessors = { + .find_proc_info = find_proc_info, + .put_unwind_info = put_unwind_info, + .get_dyn_info_list_addr = get_dyn_info_list_addr, + .access_mem = access_mem, + .access_reg = access_reg, + .access_fpreg = access_fpreg, + .resume = resume, + .get_proc_name = get_proc_name, +}; + +static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, + void *arg, int max_stack) +{ + unw_addr_space_t addr_space; + unw_cursor_t c; + int ret; + + addr_space = unw_create_addr_space(&accessors, 0); + if (!addr_space) { + pr_err("unwind: Can't create unwind address space.\n"); + return -ENOMEM; + } + + ret = unw_init_remote(&c, addr_space, ui); + if (ret) + display_error(ret); + + while (!ret && (unw_step(&c) > 0) && max_stack--) { + unw_word_t ip; + + unw_get_reg(&c, UNW_REG_IP, &ip); + ret = ip ? entry(ip, ui->thread, ui->machine, cb, arg) : 0; + } + + unw_destroy_addr_space(addr_space); + return ret; +} + +int unwind__get_entries(unwind_entry_cb_t cb, void *arg, + struct machine *machine, struct thread *thread, + struct perf_sample *data, int max_stack) +{ + unw_word_t ip; + struct unwind_info ui = { + .sample = data, + .thread = thread, + .machine = machine, + }; + int ret; + + if (!data->user_regs.regs) + return -EINVAL; + + ret = reg_value(&ip, &data->user_regs, PERF_REG_IP); + if (ret) + return ret; + + ret = entry(ip, thread, machine, cb, arg); + if (ret) + return -ENOMEM; + + return --max_stack > 0 ? get_entries(&ui, cb, arg, max_stack) : 0; +} diff --git a/tools/perf/util/unwind.c b/tools/perf/util/unwind.c deleted file mode 100644 index 720a4ca..0000000 --- a/tools/perf/util/unwind.c +++ /dev/null @@ -1,595 +0,0 @@ -/* - * Post mortem Dwarf CFI based unwinding on top of regs and stack dumps. - * - * Lots of this code have been borrowed or heavily inspired from parts of - * the libunwind 0.99 code which are (amongst other contributors I may have - * forgotten): - * - * Copyright (C) 2002-2007 Hewlett-Packard Co - * Contributed by David Mosberger-Tang - * - * And the bugs have been added by: - * - * Copyright (C) 2010, Frederic Weisbecker - * Copyright (C) 2012, Jiri Olsa - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "thread.h" -#include "session.h" -#include "perf_regs.h" -#include "unwind.h" -#include "symbol.h" -#include "util.h" - -extern int -UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as, - unw_word_t ip, - unw_dyn_info_t *di, - unw_proc_info_t *pi, - int need_unwind_info, void *arg); - -#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table) - -extern int -UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug, - unw_word_t ip, - unw_word_t segbase, - const char *obj_name, unw_word_t start, - unw_word_t end); - -#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame) - -#define DW_EH_PE_FORMAT_MASK 0x0f /* format of the encoded value */ -#define DW_EH_PE_APPL_MASK 0x70 /* how the value is to be applied */ - -/* Pointer-encoding formats: */ -#define DW_EH_PE_omit 0xff -#define DW_EH_PE_ptr 0x00 /* pointer-sized unsigned value */ -#define DW_EH_PE_udata4 0x03 /* unsigned 32-bit value */ -#define DW_EH_PE_udata8 0x04 /* unsigned 64-bit value */ -#define DW_EH_PE_sdata4 0x0b /* signed 32-bit value */ -#define DW_EH_PE_sdata8 0x0c /* signed 64-bit value */ - -/* Pointer-encoding application: */ -#define DW_EH_PE_absptr 0x00 /* absolute value */ -#define DW_EH_PE_pcrel 0x10 /* rel. to addr. of encoded value */ - -/* - * The following are not documented by LSB v1.3, yet they are used by - * GCC, presumably they aren't documented by LSB since they aren't - * used on Linux: - */ -#define DW_EH_PE_funcrel 0x40 /* start-of-procedure-relative */ -#define DW_EH_PE_aligned 0x50 /* aligned pointer */ - -/* Flags intentionaly not handled, since they're not needed: - * #define DW_EH_PE_indirect 0x80 - * #define DW_EH_PE_uleb128 0x01 - * #define DW_EH_PE_udata2 0x02 - * #define DW_EH_PE_sleb128 0x09 - * #define DW_EH_PE_sdata2 0x0a - * #define DW_EH_PE_textrel 0x20 - * #define DW_EH_PE_datarel 0x30 - */ - -struct unwind_info { - struct perf_sample *sample; - struct machine *machine; - struct thread *thread; -}; - -#define dw_read(ptr, type, end) ({ \ - type *__p = (type *) ptr; \ - type __v; \ - if ((__p + 1) > (type *) end) \ - return -EINVAL; \ - __v = *__p++; \ - ptr = (typeof(ptr)) __p; \ - __v; \ - }) - -static int __dw_read_encoded_value(u8 **p, u8 *end, u64 *val, - u8 encoding) -{ - u8 *cur = *p; - *val = 0; - - switch (encoding) { - case DW_EH_PE_omit: - *val = 0; - goto out; - case DW_EH_PE_ptr: - *val = dw_read(cur, unsigned long, end); - goto out; - default: - break; - } - - switch (encoding & DW_EH_PE_APPL_MASK) { - case DW_EH_PE_absptr: - break; - case DW_EH_PE_pcrel: - *val = (unsigned long) cur; - break; - default: - return -EINVAL; - } - - if ((encoding & 0x07) == 0x00) - encoding |= DW_EH_PE_udata4; - - switch (encoding & DW_EH_PE_FORMAT_MASK) { - case DW_EH_PE_sdata4: - *val += dw_read(cur, s32, end); - break; - case DW_EH_PE_udata4: - *val += dw_read(cur, u32, end); - break; - case DW_EH_PE_sdata8: - *val += dw_read(cur, s64, end); - break; - case DW_EH_PE_udata8: - *val += dw_read(cur, u64, end); - break; - default: - return -EINVAL; - } - - out: - *p = cur; - return 0; -} - -#define dw_read_encoded_value(ptr, end, enc) ({ \ - u64 __v; \ - if (__dw_read_encoded_value(&ptr, end, &__v, enc)) { \ - return -EINVAL; \ - } \ - __v; \ - }) - -static u64 elf_section_offset(int fd, const char *name) -{ - Elf *elf; - GElf_Ehdr ehdr; - GElf_Shdr shdr; - u64 offset = 0; - - elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); - if (elf == NULL) - return 0; - - do { - if (gelf_getehdr(elf, &ehdr) == NULL) - break; - - if (!elf_section_by_name(elf, &ehdr, &shdr, name, NULL)) - break; - - offset = shdr.sh_offset; - } while (0); - - elf_end(elf); - return offset; -} - -struct table_entry { - u32 start_ip_offset; - u32 fde_offset; -}; - -struct eh_frame_hdr { - unsigned char version; - unsigned char eh_frame_ptr_enc; - unsigned char fde_count_enc; - unsigned char table_enc; - - /* - * The rest of the header is variable-length and consists of the - * following members: - * - * encoded_t eh_frame_ptr; - * encoded_t fde_count; - */ - - /* A single encoded pointer should not be more than 8 bytes. */ - u64 enc[2]; - - /* - * struct { - * encoded_t start_ip; - * encoded_t fde_addr; - * } binary_search_table[fde_count]; - */ - char data[0]; -} __packed; - -static int unwind_spec_ehframe(struct dso *dso, struct machine *machine, - u64 offset, u64 *table_data, u64 *segbase, - u64 *fde_count) -{ - struct eh_frame_hdr hdr; - u8 *enc = (u8 *) &hdr.enc; - u8 *end = (u8 *) &hdr.data; - ssize_t r; - - r = dso__data_read_offset(dso, machine, offset, - (u8 *) &hdr, sizeof(hdr)); - if (r != sizeof(hdr)) - return -EINVAL; - - /* We dont need eh_frame_ptr, just skip it. */ - dw_read_encoded_value(enc, end, hdr.eh_frame_ptr_enc); - - *fde_count = dw_read_encoded_value(enc, end, hdr.fde_count_enc); - *segbase = offset; - *table_data = (enc - (u8 *) &hdr) + offset; - return 0; -} - -static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine, - u64 *table_data, u64 *segbase, - u64 *fde_count) -{ - int ret = -EINVAL, fd; - u64 offset; - - fd = dso__data_fd(dso, machine); - if (fd < 0) - return -EINVAL; - - /* Check the .eh_frame section for unwinding info */ - offset = elf_section_offset(fd, ".eh_frame_hdr"); - close(fd); - - if (offset) - ret = unwind_spec_ehframe(dso, machine, offset, - table_data, segbase, - fde_count); - - return ret; -} - -#ifndef NO_LIBUNWIND_DEBUG_FRAME -static int read_unwind_spec_debug_frame(struct dso *dso, - struct machine *machine, u64 *offset) -{ - int fd = dso__data_fd(dso, machine); - - if (fd < 0) - return -EINVAL; - - /* Check the .debug_frame section for unwinding info */ - *offset = elf_section_offset(fd, ".debug_frame"); - close(fd); - - if (*offset) - return 0; - - return -EINVAL; -} -#endif - -static struct map *find_map(unw_word_t ip, struct unwind_info *ui) -{ - struct addr_location al; - - thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER, - MAP__FUNCTION, ip, &al); - return al.map; -} - -static int -find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, - int need_unwind_info, void *arg) -{ - struct unwind_info *ui = arg; - struct map *map; - unw_dyn_info_t di; - u64 table_data, segbase, fde_count; - - map = find_map(ip, ui); - if (!map || !map->dso) - return -EINVAL; - - pr_debug("unwind: find_proc_info dso %s\n", map->dso->name); - - /* Check the .eh_frame section for unwinding info */ - if (!read_unwind_spec_eh_frame(map->dso, ui->machine, - &table_data, &segbase, &fde_count)) { - memset(&di, 0, sizeof(di)); - di.format = UNW_INFO_FORMAT_REMOTE_TABLE; - di.start_ip = map->start; - di.end_ip = map->end; - di.u.rti.segbase = map->start + segbase; - di.u.rti.table_data = map->start + table_data; - di.u.rti.table_len = fde_count * sizeof(struct table_entry) - / sizeof(unw_word_t); - return dwarf_search_unwind_table(as, ip, &di, pi, - need_unwind_info, arg); - } - -#ifndef NO_LIBUNWIND_DEBUG_FRAME - /* Check the .debug_frame section for unwinding info */ - if (!read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) { - memset(&di, 0, sizeof(di)); - if (dwarf_find_debug_frame(0, &di, ip, 0, map->dso->name, - map->start, map->end)) - return dwarf_search_unwind_table(as, ip, &di, pi, - need_unwind_info, arg); - } -#endif - - return -EINVAL; -} - -static int access_fpreg(unw_addr_space_t __maybe_unused as, - unw_regnum_t __maybe_unused num, - unw_fpreg_t __maybe_unused *val, - int __maybe_unused __write, - void __maybe_unused *arg) -{ - pr_err("unwind: access_fpreg unsupported\n"); - return -UNW_EINVAL; -} - -static int get_dyn_info_list_addr(unw_addr_space_t __maybe_unused as, - unw_word_t __maybe_unused *dil_addr, - void __maybe_unused *arg) -{ - return -UNW_ENOINFO; -} - -static int resume(unw_addr_space_t __maybe_unused as, - unw_cursor_t __maybe_unused *cu, - void __maybe_unused *arg) -{ - pr_err("unwind: resume unsupported\n"); - return -UNW_EINVAL; -} - -static int -get_proc_name(unw_addr_space_t __maybe_unused as, - unw_word_t __maybe_unused addr, - char __maybe_unused *bufp, size_t __maybe_unused buf_len, - unw_word_t __maybe_unused *offp, void __maybe_unused *arg) -{ - pr_err("unwind: get_proc_name unsupported\n"); - return -UNW_EINVAL; -} - -static int access_dso_mem(struct unwind_info *ui, unw_word_t addr, - unw_word_t *data) -{ - struct addr_location al; - ssize_t size; - - thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER, - MAP__FUNCTION, addr, &al); - if (!al.map) { - pr_debug("unwind: no map for %lx\n", (unsigned long)addr); - return -1; - } - - if (!al.map->dso) - return -1; - - size = dso__data_read_addr(al.map->dso, al.map, ui->machine, - addr, (u8 *) data, sizeof(*data)); - - return !(size == sizeof(*data)); -} - -static int reg_value(unw_word_t *valp, struct regs_dump *regs, int id) -{ - int i, idx = 0; - u64 mask = regs->mask; - - if (!(mask & (1 << id))) - return -EINVAL; - - for (i = 0; i < id; i++) { - if (mask & (1 << i)) - idx++; - } - - *valp = regs->regs[idx]; - return 0; -} - -static int access_mem(unw_addr_space_t __maybe_unused as, - unw_word_t addr, unw_word_t *valp, - int __write, void *arg) -{ - struct unwind_info *ui = arg; - struct stack_dump *stack = &ui->sample->user_stack; - unw_word_t start, end; - int offset; - int ret; - - /* Don't support write, probably not needed. */ - if (__write || !stack || !ui->sample->user_regs.regs) { - *valp = 0; - return 0; - } - - ret = reg_value(&start, &ui->sample->user_regs, PERF_REG_SP); - if (ret) - return ret; - - end = start + stack->size; - - /* Check overflow. */ - if (addr + sizeof(unw_word_t) < addr) - return -EINVAL; - - if (addr < start || addr + sizeof(unw_word_t) >= end) { - ret = access_dso_mem(ui, addr, valp); - if (ret) { - pr_debug("unwind: access_mem %p not inside range %p-%p\n", - (void *)addr, (void *)start, (void *)end); - *valp = 0; - return ret; - } - return 0; - } - - offset = addr - start; - *valp = *(unw_word_t *)&stack->data[offset]; - pr_debug("unwind: access_mem addr %p, val %lx, offset %d\n", - (void *)addr, (unsigned long)*valp, offset); - return 0; -} - -static int access_reg(unw_addr_space_t __maybe_unused as, - unw_regnum_t regnum, unw_word_t *valp, - int __write, void *arg) -{ - struct unwind_info *ui = arg; - int id, ret; - - /* Don't support write, I suspect we don't need it. */ - if (__write) { - pr_err("unwind: access_reg w %d\n", regnum); - return 0; - } - - if (!ui->sample->user_regs.regs) { - *valp = 0; - return 0; - } - - id = unwind__arch_reg_id(regnum); - if (id < 0) - return -EINVAL; - - ret = reg_value(valp, &ui->sample->user_regs, id); - if (ret) { - pr_err("unwind: can't read reg %d\n", regnum); - return ret; - } - - pr_debug("unwind: reg %d, val %lx\n", regnum, (unsigned long)*valp); - return 0; -} - -static void put_unwind_info(unw_addr_space_t __maybe_unused as, - unw_proc_info_t *pi __maybe_unused, - void *arg __maybe_unused) -{ - pr_debug("unwind: put_unwind_info called\n"); -} - -static int entry(u64 ip, struct thread *thread, struct machine *machine, - unwind_entry_cb_t cb, void *arg) -{ - struct unwind_entry e; - struct addr_location al; - - thread__find_addr_location(thread, machine, - PERF_RECORD_MISC_USER, - MAP__FUNCTION, ip, &al); - - e.ip = ip; - e.map = al.map; - e.sym = al.sym; - - pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n", - al.sym ? al.sym->name : "''", - ip, - al.map ? al.map->map_ip(al.map, ip) : (u64) 0); - - return cb(&e, arg); -} - -static void display_error(int err) -{ - switch (err) { - case UNW_EINVAL: - pr_err("unwind: Only supports local.\n"); - break; - case UNW_EUNSPEC: - pr_err("unwind: Unspecified error.\n"); - break; - case UNW_EBADREG: - pr_err("unwind: Register unavailable.\n"); - break; - default: - break; - } -} - -static unw_accessors_t accessors = { - .find_proc_info = find_proc_info, - .put_unwind_info = put_unwind_info, - .get_dyn_info_list_addr = get_dyn_info_list_addr, - .access_mem = access_mem, - .access_reg = access_reg, - .access_fpreg = access_fpreg, - .resume = resume, - .get_proc_name = get_proc_name, -}; - -static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, - void *arg, int max_stack) -{ - unw_addr_space_t addr_space; - unw_cursor_t c; - int ret; - - addr_space = unw_create_addr_space(&accessors, 0); - if (!addr_space) { - pr_err("unwind: Can't create unwind address space.\n"); - return -ENOMEM; - } - - ret = unw_init_remote(&c, addr_space, ui); - if (ret) - display_error(ret); - - while (!ret && (unw_step(&c) > 0) && max_stack--) { - unw_word_t ip; - - unw_get_reg(&c, UNW_REG_IP, &ip); - ret = ip ? entry(ip, ui->thread, ui->machine, cb, arg) : 0; - } - - unw_destroy_addr_space(addr_space); - return ret; -} - -int unwind__get_entries(unwind_entry_cb_t cb, void *arg, - struct machine *machine, struct thread *thread, - struct perf_sample *data, int max_stack) -{ - unw_word_t ip; - struct unwind_info ui = { - .sample = data, - .thread = thread, - .machine = machine, - }; - int ret; - - if (!data->user_regs.regs) - return -EINVAL; - - ret = reg_value(&ip, &data->user_regs, PERF_REG_IP); - if (ret) - return ret; - - ret = entry(ip, thread, machine, cb, arg); - if (ret) - return -ENOMEM; - - return --max_stack > 0 ? get_entries(&ui, cb, arg, max_stack) : 0; -} -- cgit v0.10.2 From ea3da69df959dfa1c807586c061bfcc4ec7e5e17 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Jan 2014 13:47:27 +0100 Subject: perf callchain: Rename unwind__arch_reg_id into libunwind__arch_reg_id Renaming unwind__arch_reg_id into libunwind__arch_reg_id, so it's clear it's specific to libunwind. Signed-off-by: Jiri Olsa Acked-by: Jean Pihet Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1389098853-14466-11-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/arch/arm/util/unwind-libunwind.c b/tools/perf/arch/arm/util/unwind-libunwind.c index da3dc95..729ed69 100644 --- a/tools/perf/arch/arm/util/unwind-libunwind.c +++ b/tools/perf/arch/arm/util/unwind-libunwind.c @@ -4,7 +4,7 @@ #include "perf_regs.h" #include "../../util/unwind.h" -int unwind__arch_reg_id(int regnum) +int libunwind__arch_reg_id(int regnum) { switch (regnum) { case UNW_ARM_R0: diff --git a/tools/perf/arch/x86/util/unwind-libunwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c index 456a88c..3261f68 100644 --- a/tools/perf/arch/x86/util/unwind-libunwind.c +++ b/tools/perf/arch/x86/util/unwind-libunwind.c @@ -5,7 +5,7 @@ #include "../../util/unwind.h" #ifdef HAVE_ARCH_X86_64_SUPPORT -int unwind__arch_reg_id(int regnum) +int libunwind__arch_reg_id(int regnum) { int id; @@ -69,7 +69,7 @@ int unwind__arch_reg_id(int regnum) return id; } #else -int unwind__arch_reg_id(int regnum) +int libunwind__arch_reg_id(int regnum) { int id; diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index 720a4ca..79dbfbb 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -469,7 +469,7 @@ static int access_reg(unw_addr_space_t __maybe_unused as, return 0; } - id = unwind__arch_reg_id(regnum); + id = libunwind__arch_reg_id(regnum); if (id < 0) return -EINVAL; diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h index 356e1d6..18f33b4 100644 --- a/tools/perf/util/unwind.h +++ b/tools/perf/util/unwind.h @@ -18,7 +18,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct machine *machine, struct thread *thread, struct perf_sample *data, int max_stack); -int unwind__arch_reg_id(int regnum); +int libunwind__arch_reg_id(int regnum); #else static inline int unwind__get_entries(unwind_entry_cb_t cb __maybe_unused, -- cgit v0.10.2 From 9ff125d132001c02d32a193a9423be0690526e11 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Jan 2014 13:47:28 +0100 Subject: perf callchain: Introduce HAVE_DWARF_UNWIND_SUPPORT macro Introducing global macro HAVE_DWARF_UNWIND_SUPPORT to indicate we have dwarf unwind support. Any library providing the dwarf post unwind support will enable this macro. Signed-off-by: Jiri Olsa Acked-by: Jean Pihet Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1389098853-14466-12-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 7b8f0e6..eb524f9 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -649,7 +649,7 @@ error: return ret; } -#ifdef HAVE_LIBUNWIND_SUPPORT +#ifdef HAVE_DWARF_UNWIND_SUPPORT static int get_stack_size(char *str, unsigned long *_size) { char *endptr; @@ -675,7 +675,7 @@ static int get_stack_size(char *str, unsigned long *_size) max_size, str); return -1; } -#endif /* HAVE_LIBUNWIND_SUPPORT */ +#endif /* HAVE_DWARF_UNWIND_SUPPORT */ int record_parse_callchain(const char *arg, struct record_opts *opts) { @@ -704,7 +704,7 @@ int record_parse_callchain(const char *arg, struct record_opts *opts) "needed for -g fp\n"); break; -#ifdef HAVE_LIBUNWIND_SUPPORT +#ifdef HAVE_DWARF_UNWIND_SUPPORT /* Dwarf style */ } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) { const unsigned long default_stack_dump_size = 8192; @@ -720,7 +720,7 @@ int record_parse_callchain(const char *arg, struct record_opts *opts) ret = get_stack_size(tok, &size); opts->stack_dump_size = size; } -#endif /* HAVE_LIBUNWIND_SUPPORT */ +#endif /* HAVE_DWARF_UNWIND_SUPPORT */ } else { pr_err("callchain: Unknown --call-graph option " "value: %s\n", arg); @@ -823,7 +823,7 @@ static struct record record = { #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: " -#ifdef HAVE_LIBUNWIND_SUPPORT +#ifdef HAVE_DWARF_UNWIND_SUPPORT const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf"; #else const char record_callchain_help[] = CALLCHAIN_HELP "fp"; diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index c48d449..1686583 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -338,7 +338,7 @@ ifndef NO_LIBUNWIND CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME endif - CFLAGS += -DHAVE_LIBUNWIND_SUPPORT + CFLAGS += -DHAVE_DWARF_UNWIND_SUPPORT -DHAVE_LIBUNWIND_SUPPORT EXTLIBS += $(LIBUNWIND_LIBS) CFLAGS += $(LIBUNWIND_CFLAGS) LDFLAGS += $(LIBUNWIND_LDFLAGS) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index f5a6ffb..b11bf8a 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -116,7 +116,7 @@ static struct test { .func = test__parse_no_sample_id_all, }, #if defined(__x86_64__) || defined(__i386__) -#ifdef HAVE_LIBUNWIND_SUPPORT +#ifdef HAVE_DWARF_UNWIND_SUPPORT { .desc = "Test dwarf unwind", .func = test__dwarf_unwind, diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 8979309..a24795c 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -43,7 +43,7 @@ int test__parse_no_sample_id_all(void); int test__dwarf_unwind(void); #if defined(__x86_64__) || defined(__i386__) -#ifdef HAVE_LIBUNWIND_SUPPORT +#ifdef HAVE_DWARF_UNWIND_SUPPORT struct thread; struct perf_sample; int test__arch_unwind_sample(struct perf_sample *sample, diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h index 18f33b4..b031316 100644 --- a/tools/perf/util/unwind.h +++ b/tools/perf/util/unwind.h @@ -13,12 +13,15 @@ struct unwind_entry { typedef int (*unwind_entry_cb_t)(struct unwind_entry *entry, void *arg); -#ifdef HAVE_LIBUNWIND_SUPPORT +#ifdef HAVE_DWARF_UNWIND_SUPPORT int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct machine *machine, struct thread *thread, struct perf_sample *data, int max_stack); +/* libunwind specific */ +#ifdef HAVE_LIBUNWIND_SUPPORT int libunwind__arch_reg_id(int regnum); +#endif #else static inline int unwind__get_entries(unwind_entry_cb_t cb __maybe_unused, @@ -30,5 +33,5 @@ unwind__get_entries(unwind_entry_cb_t cb __maybe_unused, { return 0; } -#endif /* HAVE_LIBUNWIND_SUPPORT */ +#endif /* HAVE_DWARF_UNWIND_SUPPORT */ #endif /* __UNWIND_H */ -- cgit v0.10.2 From c9b951c4d12f0b2e9a07dd459c554bc05628d092 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Jan 2014 13:47:29 +0100 Subject: perf callchain: Separate perf_reg_value function in perf_regs object Making perf_reg_value function global (formely reg_value), because it's going to be used globaly across all code providing the dwarf post unwind feature. Changing its prototype to be generic: -int reg_value(unw_word_t *valp, struct regs_dump *regs, int id) +int perf_reg_value(u64 *valp, struct regs_dump *regs, int id); Changing the valp type from libunwind specific 'unw_word_t' to u64. Signed-off-by: Jiri Olsa Acked-by: Jean Pihet Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1389098853-14466-13-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 9ef6b33..f99a392 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -538,6 +538,7 @@ ifeq ($(NO_PERF_REGS),0) ifeq ($(ARCH),x86) LIB_H += arch/x86/include/perf_regs.h endif + LIB_OBJS += $(OUTPUT)util/perf_regs.o endif ifndef NO_LIBNUMA diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c new file mode 100644 index 0000000..a3539ef --- /dev/null +++ b/tools/perf/util/perf_regs.c @@ -0,0 +1,19 @@ +#include +#include "perf_regs.h" + +int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) +{ + int i, idx = 0; + u64 mask = regs->mask; + + if (!(mask & (1 << id))) + return -EINVAL; + + for (i = 0; i < id; i++) { + if (mask & (1 << i)) + idx++; + } + + *valp = regs->regs[idx]; + return 0; +} diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index a3d42cd..d6e8b6a 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -1,8 +1,14 @@ #ifndef __PERF_REGS_H #define __PERF_REGS_H +#include "types.h" +#include "event.h" + #ifdef HAVE_PERF_REGS_SUPPORT #include + +int perf_reg_value(u64 *valp, struct regs_dump *regs, int id); + #else #define PERF_REGS_MASK 0 @@ -10,5 +16,12 @@ static inline const char *perf_reg_name(int id __maybe_unused) { return NULL; } + +static inline int perf_reg_value(u64 *valp __maybe_unused, + struct regs_dump *regs __maybe_unused, + int id __maybe_unused) +{ + return 0; +} #endif /* HAVE_PERF_REGS_SUPPORT */ #endif /* __PERF_REGS_H */ diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index 79dbfbb..bd5768d 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -390,30 +390,13 @@ static int access_dso_mem(struct unwind_info *ui, unw_word_t addr, return !(size == sizeof(*data)); } -static int reg_value(unw_word_t *valp, struct regs_dump *regs, int id) -{ - int i, idx = 0; - u64 mask = regs->mask; - - if (!(mask & (1 << id))) - return -EINVAL; - - for (i = 0; i < id; i++) { - if (mask & (1 << i)) - idx++; - } - - *valp = regs->regs[idx]; - return 0; -} - static int access_mem(unw_addr_space_t __maybe_unused as, unw_word_t addr, unw_word_t *valp, int __write, void *arg) { struct unwind_info *ui = arg; struct stack_dump *stack = &ui->sample->user_stack; - unw_word_t start, end; + u64 start, end; int offset; int ret; @@ -423,7 +406,7 @@ static int access_mem(unw_addr_space_t __maybe_unused as, return 0; } - ret = reg_value(&start, &ui->sample->user_regs, PERF_REG_SP); + ret = perf_reg_value(&start, &ui->sample->user_regs, PERF_REG_SP); if (ret) return ret; @@ -436,8 +419,9 @@ static int access_mem(unw_addr_space_t __maybe_unused as, if (addr < start || addr + sizeof(unw_word_t) >= end) { ret = access_dso_mem(ui, addr, valp); if (ret) { - pr_debug("unwind: access_mem %p not inside range %p-%p\n", - (void *)addr, (void *)start, (void *)end); + pr_debug("unwind: access_mem %p not inside range" + " 0x%" PRIx64 "-0x%" PRIx64 "\n", + (void *) addr, start, end); *valp = 0; return ret; } @@ -446,8 +430,8 @@ static int access_mem(unw_addr_space_t __maybe_unused as, offset = addr - start; *valp = *(unw_word_t *)&stack->data[offset]; - pr_debug("unwind: access_mem addr %p, val %lx, offset %d\n", - (void *)addr, (unsigned long)*valp, offset); + pr_debug("unwind: access_mem addr %p val %lx, offset %d\n", + (void *) addr, (unsigned long)*valp, offset); return 0; } @@ -457,6 +441,7 @@ static int access_reg(unw_addr_space_t __maybe_unused as, { struct unwind_info *ui = arg; int id, ret; + u64 val; /* Don't support write, I suspect we don't need it. */ if (__write) { @@ -473,12 +458,13 @@ static int access_reg(unw_addr_space_t __maybe_unused as, if (id < 0) return -EINVAL; - ret = reg_value(valp, &ui->sample->user_regs, id); + ret = perf_reg_value(&val, &ui->sample->user_regs, id); if (ret) { pr_err("unwind: can't read reg %d\n", regnum); return ret; } + *valp = (unw_word_t) val; pr_debug("unwind: reg %d, val %lx\n", regnum, (unsigned long)*valp); return 0; } @@ -572,7 +558,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct machine *machine, struct thread *thread, struct perf_sample *data, int max_stack) { - unw_word_t ip; + u64 ip; struct unwind_info ui = { .sample = data, .thread = thread, @@ -583,7 +569,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, if (!data->user_regs.regs) return -EINVAL; - ret = reg_value(&ip, &data->user_regs, PERF_REG_IP); + ret = perf_reg_value(&ip, &data->user_regs, PERF_REG_IP); if (ret) return ret; -- cgit v0.10.2 From cd0cfad74eb88e54ba9d205da3ed376e48981448 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 9 Dec 2013 17:14:24 +0100 Subject: perf tools: Move fs.* to lib/api/fs/ Move to generic library and kill magic.h as it is needed only in fs.h. Signed-off-by: Borislav Petkov Cc: Adrian Hunter Cc: Andi Kleen Cc: Arjan van de Ven Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Robert Richter Cc: Stanislav Fomichev Cc: Stephane Eranian Cc: Steven Rostedt Link: http://lkml.kernel.org/r/1386605664-24041-3-git-send-email-bp@alien8.de Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index ed2f51e..ce00f7e 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -9,8 +9,10 @@ LIB_H= LIB_OBJS= LIB_H += fs/debugfs.h +LIB_H += fs/fs.h LIB_OBJS += $(OUTPUT)fs/debugfs.o +LIB_OBJS += $(OUTPUT)fs/fs.o LIBFILE = libapikfs.a diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c new file mode 100644 index 0000000..5b5eb78 --- /dev/null +++ b/tools/lib/api/fs/fs.c @@ -0,0 +1,124 @@ +/* TODO merge/factor in debugfs.c here */ + +#include +#include +#include +#include +#include + +#include "debugfs.h" +#include "fs.h" + +static const char * const sysfs__fs_known_mountpoints[] = { + "/sys", + 0, +}; + +static const char * const procfs__known_mountpoints[] = { + "/proc", + 0, +}; + +struct fs { + const char *name; + const char * const *mounts; + char path[PATH_MAX + 1]; + bool found; + long magic; +}; + +enum { + FS__SYSFS = 0, + FS__PROCFS = 1, +}; + +static struct fs fs__entries[] = { + [FS__SYSFS] = { + .name = "sysfs", + .mounts = sysfs__fs_known_mountpoints, + .magic = SYSFS_MAGIC, + }, + [FS__PROCFS] = { + .name = "proc", + .mounts = procfs__known_mountpoints, + .magic = PROC_SUPER_MAGIC, + }, +}; + +static bool fs__read_mounts(struct fs *fs) +{ + bool found = false; + char type[100]; + FILE *fp; + + fp = fopen("/proc/mounts", "r"); + if (fp == NULL) + return NULL; + + while (!found && + fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n", + fs->path, type) == 2) { + + if (strcmp(type, fs->name) == 0) + found = true; + } + + fclose(fp); + return fs->found = found; +} + +static int fs__valid_mount(const char *fs, long magic) +{ + struct statfs st_fs; + + if (statfs(fs, &st_fs) < 0) + return -ENOENT; + else if (st_fs.f_type != magic) + return -ENOENT; + + return 0; +} + +static bool fs__check_mounts(struct fs *fs) +{ + const char * const *ptr; + + ptr = fs->mounts; + while (*ptr) { + if (fs__valid_mount(*ptr, fs->magic) == 0) { + fs->found = true; + strcpy(fs->path, *ptr); + return true; + } + ptr++; + } + + return false; +} + +static const char *fs__get_mountpoint(struct fs *fs) +{ + if (fs__check_mounts(fs)) + return fs->path; + + return fs__read_mounts(fs) ? fs->path : NULL; +} + +static const char *fs__mountpoint(int idx) +{ + struct fs *fs = &fs__entries[idx]; + + if (fs->found) + return (const char *)fs->path; + + return fs__get_mountpoint(fs); +} + +#define FS__MOUNTPOINT(name, idx) \ +const char *name##__mountpoint(void) \ +{ \ + return fs__mountpoint(idx); \ +} + +FS__MOUNTPOINT(sysfs, FS__SYSFS); +FS__MOUNTPOINT(procfs, FS__PROCFS); diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h new file mode 100644 index 0000000..cb70495 --- /dev/null +++ b/tools/lib/api/fs/fs.h @@ -0,0 +1,14 @@ +#ifndef __API_FS__ +#define __API_FS__ + +#ifndef SYSFS_MAGIC +#define SYSFS_MAGIC 0x62656572 +#endif + +#ifndef PROC_SUPER_MAGIC +#define PROC_SUPER_MAGIC 0x9fa0 +#endif + +const char *sysfs__mountpoint(void); +const char *procfs__mountpoint(void); +#endif /* __API_FS__ */ diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index f99a392..869b34a 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -218,7 +218,6 @@ LIB_H += util/include/linux/ctype.h LIB_H += util/include/linux/kernel.h LIB_H += util/include/linux/list.h LIB_H += util/include/linux/export.h -LIB_H += util/include/linux/magic.h LIB_H += util/include/linux/poison.h LIB_H += util/include/linux/prefetch.h LIB_H += util/include/linux/rbtree.h @@ -244,7 +243,6 @@ LIB_H += util/cache.h LIB_H += util/callchain.h LIB_H += util/build-id.h LIB_H += util/debug.h -LIB_H += util/fs.h LIB_H += util/pmu.h LIB_H += util/event.h LIB_H += util/evsel.h @@ -306,7 +304,6 @@ LIB_OBJS += $(OUTPUT)util/annotate.o LIB_OBJS += $(OUTPUT)util/build-id.o LIB_OBJS += $(OUTPUT)util/config.o LIB_OBJS += $(OUTPUT)util/ctype.o -LIB_OBJS += $(OUTPUT)util/fs.o LIB_OBJS += $(OUTPUT)util/pmu.o LIB_OBJS += $(OUTPUT)util/environment.o LIB_OBJS += $(OUTPUT)util/event.o diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 4db0ae6..8605ff5 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -2,7 +2,7 @@ #include "parse-events.h" #include "evsel.h" #include "evlist.h" -#include "fs.h" +#include #include #include "tests.h" #include diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index a9b48c4..7fe4994 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -1,5 +1,5 @@ #include "util.h" -#include "fs.h" +#include #include "../perf.h" #include "cpumap.h" #include diff --git a/tools/perf/util/fs.c b/tools/perf/util/fs.c deleted file mode 100644 index f5be1f2..0000000 --- a/tools/perf/util/fs.c +++ /dev/null @@ -1,119 +0,0 @@ - -/* TODO merge/factor into tools/lib/lk/debugfs.c */ - -#include "util.h" -#include "util/fs.h" - -static const char * const sysfs__fs_known_mountpoints[] = { - "/sys", - 0, -}; - -static const char * const procfs__known_mountpoints[] = { - "/proc", - 0, -}; - -struct fs { - const char *name; - const char * const *mounts; - char path[PATH_MAX + 1]; - bool found; - long magic; -}; - -enum { - FS__SYSFS = 0, - FS__PROCFS = 1, -}; - -static struct fs fs__entries[] = { - [FS__SYSFS] = { - .name = "sysfs", - .mounts = sysfs__fs_known_mountpoints, - .magic = SYSFS_MAGIC, - }, - [FS__PROCFS] = { - .name = "proc", - .mounts = procfs__known_mountpoints, - .magic = PROC_SUPER_MAGIC, - }, -}; - -static bool fs__read_mounts(struct fs *fs) -{ - bool found = false; - char type[100]; - FILE *fp; - - fp = fopen("/proc/mounts", "r"); - if (fp == NULL) - return NULL; - - while (!found && - fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n", - fs->path, type) == 2) { - - if (strcmp(type, fs->name) == 0) - found = true; - } - - fclose(fp); - return fs->found = found; -} - -static int fs__valid_mount(const char *fs, long magic) -{ - struct statfs st_fs; - - if (statfs(fs, &st_fs) < 0) - return -ENOENT; - else if (st_fs.f_type != magic) - return -ENOENT; - - return 0; -} - -static bool fs__check_mounts(struct fs *fs) -{ - const char * const *ptr; - - ptr = fs->mounts; - while (*ptr) { - if (fs__valid_mount(*ptr, fs->magic) == 0) { - fs->found = true; - strcpy(fs->path, *ptr); - return true; - } - ptr++; - } - - return false; -} - -static const char *fs__get_mountpoint(struct fs *fs) -{ - if (fs__check_mounts(fs)) - return fs->path; - - return fs__read_mounts(fs) ? fs->path : NULL; -} - -static const char *fs__mountpoint(int idx) -{ - struct fs *fs = &fs__entries[idx]; - - if (fs->found) - return (const char *)fs->path; - - return fs__get_mountpoint(fs); -} - -#define FS__MOUNTPOINT(name, idx) \ -const char *name##__mountpoint(void) \ -{ \ - return fs__mountpoint(idx); \ -} - -FS__MOUNTPOINT(sysfs, FS__SYSFS); -FS__MOUNTPOINT(procfs, FS__PROCFS); diff --git a/tools/perf/util/fs.h b/tools/perf/util/fs.h deleted file mode 100644 index 5e09ce1..0000000 --- a/tools/perf/util/fs.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef __PERF_FS -#define __PERF_FS - -const char *sysfs__mountpoint(void); -const char *procfs__mountpoint(void); - -#endif /* __PERF_FS */ diff --git a/tools/perf/util/include/linux/magic.h b/tools/perf/util/include/linux/magic.h deleted file mode 100644 index 07d63cf..0000000 --- a/tools/perf/util/include/linux/magic.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef _PERF_LINUX_MAGIC_H_ -#define _PERF_LINUX_MAGIC_H_ - -#ifndef DEBUGFS_MAGIC -#define DEBUGFS_MAGIC 0x64626720 -#endif - -#ifndef SYSFS_MAGIC -#define SYSFS_MAGIC 0x62656572 -#endif - -#ifndef PROC_SUPER_MAGIC -#define PROC_SUPER_MAGIC 0x9fa0 -#endif - -#endif diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index b752ecb..00a7dcb 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -3,7 +3,7 @@ #include #include #include -#include "fs.h" +#include #include #include "util.h" #include "pmu.h" diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 595bfc7..16a475a 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -17,6 +17,6 @@ util/xyarray.c util/cgroup.c util/rblist.c util/strlist.c -util/fs.c +../lib/api/fs/fs.c util/trace-event.c ../../lib/rbtree.c diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 3737625..049e0a0 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -2,7 +2,7 @@ #include "evsel.h" #include "cpumap.h" #include "parse-events.h" -#include "fs.h" +#include #include "util.h" typedef void (*setup_probe_fn_t)(struct perf_evsel *evsel); diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 42ad667..9f66549 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -1,6 +1,6 @@ #include "../perf.h" #include "util.h" -#include "fs.h" +#include #include #ifdef HAVE_BACKTRACE_SUPPORT #include -- cgit v0.10.2 From 0e55fa1131dcacfc8f18fabecc93643a105bdc14 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 5 Feb 2014 15:51:53 +0100 Subject: perf tools: Move hash.h header Put it into tools/include/ for general usage. Signed-off-by: Borislav Petkov Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Robert Richter Link: http://lkml.kernel.org/r/1391611914-26054-3-git-send-email-bp@alien8.de Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/include/linux/hash.h b/tools/include/linux/hash.h new file mode 100644 index 0000000..d026c65 --- /dev/null +++ b/tools/include/linux/hash.h @@ -0,0 +1,5 @@ +#include "../../../include/linux/hash.h" + +#ifndef _TOOLS_LINUX_HASH_H +#define _TOOLS_LINUX_HASH_H +#endif diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index f41572d..c0c87c8 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -6,6 +6,7 @@ tools/lib/symbol/kallsyms.c tools/lib/symbol/kallsyms.h tools/include/asm/bug.h tools/include/linux/compiler.h +tools/include/linux/hash.h include/linux/const.h include/linux/perf_event.h include/linux/rbtree.h diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 869b34a..496871a 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -208,7 +208,7 @@ LIB_H += ../../include/uapi/linux/perf_event.h LIB_H += ../../include/linux/rbtree.h LIB_H += ../../include/linux/list.h LIB_H += ../../include/uapi/linux/const.h -LIB_H += ../../include/linux/hash.h +LIB_H += ../include/linux/hash.h LIB_H += ../../include/linux/stringify.h LIB_H += util/include/linux/bitmap.h LIB_H += util/include/linux/bitops.h diff --git a/tools/perf/util/include/linux/hash.h b/tools/perf/util/include/linux/hash.h deleted file mode 100644 index 201f573..0000000 --- a/tools/perf/util/include/linux/hash.h +++ /dev/null @@ -1,5 +0,0 @@ -#include "../../../../include/linux/hash.h" - -#ifndef PERF_HASH_H -#define PERF_HASH_H -#endif -- cgit v0.10.2 From 5072f2733afe26c354aa9d277cb1544e4b0b3846 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 5 Feb 2014 15:51:54 +0100 Subject: perf tools: Drop prefetch.h This was needed at the time before e66eed651fd1 ("list: remove prefetching from regular list iterators") where the list iterators did prefetch elements. This turned out to be counter-productive and hurt performance and they were removed. Which makes the prefetch.h header unused so drop it. Signed-off-by: Borislav Petkov Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Robert Richter Link: http://lkml.kernel.org/r/1391611914-26054-4-git-send-email-bp@alien8.de Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 496871a..77b153f 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -219,7 +219,6 @@ LIB_H += util/include/linux/kernel.h LIB_H += util/include/linux/list.h LIB_H += util/include/linux/export.h LIB_H += util/include/linux/poison.h -LIB_H += util/include/linux/prefetch.h LIB_H += util/include/linux/rbtree.h LIB_H += util/include/linux/rbtree_augmented.h LIB_H += util/include/linux/string.h diff --git a/tools/perf/util/include/linux/list.h b/tools/perf/util/include/linux/list.h index 1d928a0..bfe0a2a 100644 --- a/tools/perf/util/include/linux/list.h +++ b/tools/perf/util/include/linux/list.h @@ -1,5 +1,4 @@ #include -#include #include "../../../../include/linux/list.h" diff --git a/tools/perf/util/include/linux/prefetch.h b/tools/perf/util/include/linux/prefetch.h deleted file mode 100644 index 7841e48..0000000 --- a/tools/perf/util/include/linux/prefetch.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef PERF_LINUX_PREFETCH_H -#define PERF_LINUX_PREFETCH_H - -static inline void prefetch(void *a __attribute__((unused))) { } - -#endif -- cgit v0.10.2 From c96626b1da589075b1b3e815239ceace11350662 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Feb 2014 17:09:10 -0300 Subject: perf symbols: No need to export dso__first_symbol There are no users outside the file that defines it. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-sybihqycxrmssa4df9516jib@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index a9d758a..46e2ede 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -410,7 +410,7 @@ struct symbol *dso__find_symbol(struct dso *dso, return symbols__find(&dso->symbols[type], addr); } -struct symbol *dso__first_symbol(struct dso *dso, enum map_type type) +static struct symbol *dso__first_symbol(struct dso *dso, enum map_type type) { return symbols__first(&dso->symbols[type]); } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index fffe288..538d484 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -223,7 +223,6 @@ struct symbol *dso__find_symbol(struct dso *dso, enum map_type type, u64 addr); struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type, const char *name); -struct symbol *dso__first_symbol(struct dso *dso, enum map_type type); int filename__read_build_id(const char *filename, void *bf, size_t size); int sysfs__read_build_id(const char *filename, void *bf, size_t size); -- cgit v0.10.2 From ee45b6c2c52d4217aae82eb2e8136fa2f8b93303 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 6 Feb 2014 05:32:04 +0000 Subject: perf probe: Fix to do exit call for symbol maps Some perf-probe commands do symbol_init() but doesn't do exit call. This fixes that to call symbol_exit() and releases machine if needed. This also merges init_vmlinux() and init_user_exec() because both of them are doing similar things. (init_user_exec() just skips init vmlinux related symbol maps) Changes from v2: - Not to set symbol_conf.try_vmlinux_path in init_symbol_maps() (Thanks to Namhyung Kim!) Signed-off-by: Masami Hiramatsu Cc: David Ahern Cc: "David A. Long" Cc: Ingo Molnar Cc: Namhyung Kim Cc: Oleg Nesterov Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20140206053204.29635.28334.stgit@kbuild-fedora.yrl.intra.hitachi.co.jp Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index d8b048c..9aa7783 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -73,31 +73,31 @@ static char *synthesize_perf_probe_point(struct perf_probe_point *pp); static int convert_name_to_addr(struct perf_probe_event *pev, const char *exec); static void clear_probe_trace_event(struct probe_trace_event *tev); -static struct machine machine; +static struct machine *host_machine; /* Initialize symbol maps and path of vmlinux/modules */ -static int init_vmlinux(void) +static int init_symbol_maps(bool user_only) { int ret; symbol_conf.sort_by_name = true; - if (symbol_conf.vmlinux_name == NULL) - symbol_conf.try_vmlinux_path = true; - else - pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name); ret = symbol__init(); if (ret < 0) { pr_debug("Failed to init symbol map.\n"); goto out; } - ret = machine__init(&machine, "", HOST_KERNEL_ID); - if (ret < 0) - goto out; + if (host_machine || user_only) /* already initialized */ + return 0; - if (machine__create_kernel_maps(&machine) < 0) { - pr_debug("machine__create_kernel_maps() failed.\n"); - goto out; + if (symbol_conf.vmlinux_name) + pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name); + + host_machine = machine__new_host(); + if (!host_machine) { + pr_debug("machine__new_host() failed.\n"); + symbol__exit(); + ret = -1; } out: if (ret < 0) @@ -105,21 +105,30 @@ out: return ret; } +static void exit_symbol_maps(void) +{ + if (host_machine) { + machine__delete(host_machine); + host_machine = NULL; + } + symbol__exit(); +} + static struct symbol *__find_kernel_function_by_name(const char *name, struct map **mapp) { - return machine__find_kernel_function_by_name(&machine, name, mapp, + return machine__find_kernel_function_by_name(host_machine, name, mapp, NULL); } static struct map *kernel_get_module_map(const char *module) { struct rb_node *nd; - struct map_groups *grp = &machine.kmaps; + struct map_groups *grp = &host_machine->kmaps; /* A file path -- this is an offline module */ if (module && strchr(module, '/')) - return machine__new_module(&machine, 0, module); + return machine__new_module(host_machine, 0, module); if (!module) module = "kernel"; @@ -141,7 +150,7 @@ static struct dso *kernel_get_module_dso(const char *module) const char *vmlinux_name; if (module) { - list_for_each_entry(dso, &machine.kernel_dsos, node) { + list_for_each_entry(dso, &host_machine->kernel_dsos, node) { if (strncmp(dso->short_name + 1, module, dso->short_name_len - 2) == 0) goto found; @@ -150,7 +159,7 @@ static struct dso *kernel_get_module_dso(const char *module) return NULL; } - map = machine.vmlinux_maps[MAP__FUNCTION]; + map = host_machine->vmlinux_maps[MAP__FUNCTION]; dso = map->dso; vmlinux_name = symbol_conf.vmlinux_name; @@ -173,20 +182,6 @@ const char *kernel_get_module_path(const char *module) return (dso) ? dso->long_name : NULL; } -static int init_user_exec(void) -{ - int ret = 0; - - symbol_conf.try_vmlinux_path = false; - symbol_conf.sort_by_name = true; - ret = symbol__init(); - - if (ret < 0) - pr_debug("Failed to init symbol map.\n"); - - return ret; -} - static int convert_exec_to_group(const char *exec, char **result) { char *ptr1, *ptr2, *exec_copy; @@ -563,7 +558,7 @@ static int _show_one_line(FILE *fp, int l, bool skip, bool show_num) * Show line-range always requires debuginfo to find source file and * line number. */ -int show_line_range(struct line_range *lr, const char *module) +static int __show_line_range(struct line_range *lr, const char *module) { int l = 1; struct line_node *ln; @@ -573,10 +568,6 @@ int show_line_range(struct line_range *lr, const char *module) char *tmp; /* Search a line range */ - ret = init_vmlinux(); - if (ret < 0) - return ret; - dinfo = open_debuginfo(module); if (!dinfo) { pr_warning("Failed to open debuginfo file.\n"); @@ -646,6 +637,19 @@ end: return ret; } +int show_line_range(struct line_range *lr, const char *module) +{ + int ret; + + ret = init_symbol_maps(false); + if (ret < 0) + return ret; + ret = __show_line_range(lr, module); + exit_symbol_maps(); + + return ret; +} + static int show_available_vars_at(struct debuginfo *dinfo, struct perf_probe_event *pev, int max_vls, struct strfilter *_filter, @@ -707,14 +711,15 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs, int i, ret = 0; struct debuginfo *dinfo; - ret = init_vmlinux(); + ret = init_symbol_maps(false); if (ret < 0) return ret; dinfo = open_debuginfo(module); if (!dinfo) { pr_warning("Failed to open debuginfo file.\n"); - return -ENOENT; + ret = -ENOENT; + goto out; } setup_pager(); @@ -724,6 +729,8 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs, externs); debuginfo__delete(dinfo); +out: + exit_symbol_maps(); return ret; } @@ -1807,7 +1814,7 @@ int show_perf_probe_events(void) if (fd < 0) return fd; - ret = init_vmlinux(); + ret = init_symbol_maps(false); if (ret < 0) return ret; @@ -1820,6 +1827,7 @@ int show_perf_probe_events(void) close(fd); } + exit_symbol_maps(); return ret; } @@ -2135,12 +2143,7 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, if (pkgs == NULL) return -ENOMEM; - if (!pevs->uprobes) - /* Init vmlinux path */ - ret = init_vmlinux(); - else - ret = init_user_exec(); - + ret = init_symbol_maps(pevs->uprobes); if (ret < 0) { free(pkgs); return ret; @@ -2174,6 +2177,7 @@ end: zfree(&pkgs[i].tevs); } free(pkgs); + exit_symbol_maps(); return ret; } @@ -2347,7 +2351,7 @@ static int available_kernel_funcs(const char *module) struct map *map; int ret; - ret = init_vmlinux(); + ret = init_symbol_maps(false); if (ret < 0) return ret; @@ -2356,7 +2360,10 @@ static int available_kernel_funcs(const char *module) pr_err("Failed to find %s map.\n", (module) ? : "kernel"); return -EINVAL; } - return __show_available_funcs(map); + ret = __show_available_funcs(map); + exit_symbol_maps(); + + return ret; } static int available_user_funcs(const char *target) @@ -2364,7 +2371,7 @@ static int available_user_funcs(const char *target) struct map *map; int ret; - ret = init_user_exec(); + ret = init_symbol_maps(true); if (ret < 0) return ret; @@ -2372,6 +2379,7 @@ static int available_user_funcs(const char *target) ret = __show_available_funcs(map); dso__delete(map->dso); map__delete(map); + exit_symbol_maps(); return ret; } -- cgit v0.10.2 From f49540b17c1c6fa5a0734cc1d8b57614fd2036be Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 6 Feb 2014 05:32:06 +0000 Subject: perf probe: Remove incorrect symbol check for --list Remove unneeded symbol check for --list option. This code actually checks whether the given symbol exists in the kernel. But this is incorrect for online kernel/module and offline module too: - For online kernel/module, the kprobes itself already ensured the symbol exist in the kernel. - For offline module, this code can't access the offlined modules. Ignore it. Signed-off-by: Masami Hiramatsu Cc: David Ahern Cc: "David A. Long" Cc: Ingo Molnar Cc: Namhyung Kim Cc: Oleg Nesterov Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20140206053206.29635.7453.stgit@kbuild-fedora.yrl.intra.hitachi.co.jp Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 9aa7783..a4649e7 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -739,14 +739,6 @@ out: static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp, struct perf_probe_point *pp) { - struct symbol *sym; - - sym = __find_kernel_function_by_name(tp->symbol, NULL); - if (!sym) { - pr_err("Failed to find symbol %s in kernel.\n", tp->symbol); - return -ENOENT; - } - return convert_to_perf_probe_point(tp, pp); } -- cgit v0.10.2 From 5a62257a3ddd1a09cf278eae0697fcbe20897447 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 6 Feb 2014 05:32:09 +0000 Subject: perf probe: Replace line_list with intlist Replace line_list (struct line_node) with intlist for reducing similar codes. Signed-off-by: Masami Hiramatsu Acked-by: Namhyung Kim Cc: David Ahern Cc: "David A. Long" Cc: Ingo Molnar Cc: Namhyung Kim Cc: Oleg Nesterov Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20140206053209.29635.81043.stgit@kbuild-fedora.yrl.intra.hitachi.co.jp Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 7894888..cdcd4eb 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -268,9 +268,9 @@ static int opt_set_filter(const struct option *opt __maybe_unused, return 0; } -static void init_params(void) +static int init_params(void) { - line_range__init(¶ms.line_range); + return line_range__init(¶ms.line_range); } static void cleanup_params(void) @@ -515,9 +515,11 @@ int cmd_probe(int argc, const char **argv, const char *prefix) { int ret; - init_params(); - ret = __cmd_probe(argc, argv, prefix); - cleanup_params(); + ret = init_params(); + if (!ret) { + ret = __cmd_probe(argc, argv, prefix); + cleanup_params(); + } return ret; } diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index a4649e7..f70fd08 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -561,7 +561,7 @@ static int _show_one_line(FILE *fp, int l, bool skip, bool show_num) static int __show_line_range(struct line_range *lr, const char *module) { int l = 1; - struct line_node *ln; + struct int_node *ln; struct debuginfo *dinfo; FILE *fp; int ret; @@ -614,8 +614,8 @@ static int __show_line_range(struct line_range *lr, const char *module) goto end; } - list_for_each_entry(ln, &lr->line_list, list) { - for (; ln->line > l; l++) { + intlist__for_each(ln, lr->line_list) { + for (; ln->i > l; l++) { ret = show_one_line(fp, l - lr->offset); if (ret < 0) goto end; @@ -775,24 +775,22 @@ int show_available_vars(struct perf_probe_event *pevs __maybe_unused, void line_range__clear(struct line_range *lr) { - struct line_node *ln; - free(lr->function); free(lr->file); free(lr->path); free(lr->comp_dir); - while (!list_empty(&lr->line_list)) { - ln = list_first_entry(&lr->line_list, struct line_node, list); - list_del(&ln->list); - free(ln); - } + intlist__delete(lr->line_list); memset(lr, 0, sizeof(*lr)); } -void line_range__init(struct line_range *lr) +int line_range__init(struct line_range *lr) { memset(lr, 0, sizeof(*lr)); - INIT_LIST_HEAD(&lr->line_list); + lr->line_list = intlist__new(NULL); + if (!lr->line_list) + return -ENOMEM; + else + return 0; } static int parse_line_num(char **ptr, int *val, const char *what) diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index fcaf727..776c934 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -2,6 +2,7 @@ #define _PROBE_EVENT_H #include +#include "intlist.h" #include "strlist.h" #include "strfilter.h" @@ -76,13 +77,6 @@ struct perf_probe_event { struct perf_probe_arg *args; /* Arguments */ }; - -/* Line number container */ -struct line_node { - struct list_head list; - int line; -}; - /* Line range */ struct line_range { char *file; /* File name */ @@ -92,7 +86,7 @@ struct line_range { int offset; /* Start line offset */ char *path; /* Real path name */ char *comp_dir; /* Compile directory */ - struct list_head line_list; /* Visible lines */ + struct intlist *line_list; /* Visible lines */ }; /* List of variables */ @@ -124,7 +118,7 @@ extern int parse_line_range_desc(const char *cmd, struct line_range *lr); extern void line_range__clear(struct line_range *lr); /* Initialize line range */ -extern void line_range__init(struct line_range *lr); +extern int line_range__init(struct line_range *lr); /* Internal use: Return kernel/module path */ extern const char *kernel_get_module_path(const char *module); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 061edb1..e5e589f 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -35,6 +35,7 @@ #include #include "event.h" #include "debug.h" +#include "intlist.h" #include "util.h" #include "symbol.h" #include "probe-finder.h" @@ -42,65 +43,6 @@ /* Kprobe tracer basic type is up to u64 */ #define MAX_BASIC_TYPE_BITS 64 -/* Line number list operations */ - -/* Add a line to line number list */ -static int line_list__add_line(struct list_head *head, int line) -{ - struct line_node *ln; - struct list_head *p; - - /* Reverse search, because new line will be the last one */ - list_for_each_entry_reverse(ln, head, list) { - if (ln->line < line) { - p = &ln->list; - goto found; - } else if (ln->line == line) /* Already exist */ - return 1; - } - /* List is empty, or the smallest entry */ - p = head; -found: - pr_debug("line list: add a line %u\n", line); - ln = zalloc(sizeof(struct line_node)); - if (ln == NULL) - return -ENOMEM; - ln->line = line; - INIT_LIST_HEAD(&ln->list); - list_add(&ln->list, p); - return 0; -} - -/* Check if the line in line number list */ -static int line_list__has_line(struct list_head *head, int line) -{ - struct line_node *ln; - - /* Reverse search, because new line will be the last one */ - list_for_each_entry(ln, head, list) - if (ln->line == line) - return 1; - - return 0; -} - -/* Init line number list */ -static void line_list__init(struct list_head *head) -{ - INIT_LIST_HEAD(head); -} - -/* Free line number list */ -static void line_list__free(struct list_head *head) -{ - struct line_node *ln; - while (!list_empty(head)) { - ln = list_first_entry(head, struct line_node, list); - list_del(&ln->list); - free(ln); - } -} - /* Dwarf FL wrappers */ static char *debuginfo_path; /* Currently dummy */ @@ -880,7 +822,7 @@ static int find_probe_point_by_line(struct probe_finder *pf) } /* Find lines which match lazy pattern */ -static int find_lazy_match_lines(struct list_head *head, +static int find_lazy_match_lines(struct intlist *list, const char *fname, const char *pat) { FILE *fp; @@ -901,7 +843,7 @@ static int find_lazy_match_lines(struct list_head *head, line[len - 1] = '\0'; if (strlazymatch(line, pat)) { - line_list__add_line(head, linenum); + intlist__add(list, linenum); count++; } linenum++; @@ -924,7 +866,7 @@ static int probe_point_lazy_walker(const char *fname, int lineno, Dwarf_Die *sc_die, die_mem; int ret; - if (!line_list__has_line(&pf->lcache, lineno) || + if (!intlist__has_entry(pf->lcache, lineno) || strtailcmp(fname, pf->fname) != 0) return 0; @@ -952,9 +894,9 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) { int ret = 0; - if (list_empty(&pf->lcache)) { + if (intlist__empty(pf->lcache)) { /* Matching lazy line pattern */ - ret = find_lazy_match_lines(&pf->lcache, pf->fname, + ret = find_lazy_match_lines(pf->lcache, pf->fname, pf->pev->point.lazy_line); if (ret <= 0) return ret; @@ -1096,7 +1038,9 @@ static int debuginfo__find_probes(struct debuginfo *dbg, #endif off = 0; - line_list__init(&pf->lcache); + pf->lcache = intlist__new(NULL); + if (!pf->lcache) + return -ENOMEM; /* Fastpath: lookup by function name from .debug_pubnames section */ if (pp->function) { @@ -1149,7 +1093,8 @@ static int debuginfo__find_probes(struct debuginfo *dbg, } found: - line_list__free(&pf->lcache); + intlist__delete(pf->lcache); + pf->lcache = NULL; return ret; } @@ -1537,7 +1482,7 @@ static int line_range_add_line(const char *src, unsigned int lineno, if (lr->path == NULL) return -ENOMEM; } - return line_list__add_line(&lr->line_list, lineno); + return intlist__add(lr->line_list, lineno); } static int line_range_walk_cb(const char *fname, int lineno, @@ -1565,7 +1510,7 @@ static int find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf) /* Update status */ if (ret >= 0) - if (!list_empty(&lf->lr->line_list)) + if (!intlist__empty(lf->lr->line_list)) ret = lf->found = 1; else ret = 0; /* Lines are not found */ diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index ffc33cd..592c4da 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -3,6 +3,7 @@ #include #include "util.h" +#include "intlist.h" #include "probe-event.h" #define MAX_PROBE_BUFFER 1024 @@ -66,7 +67,7 @@ struct probe_finder { const char *fname; /* Real file name */ Dwarf_Die cu_die; /* Current CU */ Dwarf_Die sp_die; - struct list_head lcache; /* Line cache for lazy match */ + struct intlist *lcache; /* Line cache for lazy match */ /* For variable searching */ #if _ELFUTILS_PREREQ(0, 142) -- cgit v0.10.2 From 2df58634cd2ad33d7c7ca2e02e1a44db6c8cf68d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 6 Feb 2014 05:32:11 +0000 Subject: perf probe: Unify show_available_functions for uprobes/kprobes Unify show_available_functions for uprobes/kprobes to cleanup and reduce the code. This also improves error messages. Signed-off-by: Masami Hiramatsu Cc: David Ahern Cc: "David A. Long" Cc: Ingo Molnar Cc: Namhyung Kim Cc: Oleg Nesterov Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20140206053211.29635.20563.stgit@kbuild-fedora.yrl.intra.hitachi.co.jp Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index f70fd08..ace3ba3 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2323,66 +2323,46 @@ static int filter_available_functions(struct map *map __maybe_unused, return 1; } -static int __show_available_funcs(struct map *map) -{ - if (map__load(map, filter_available_functions)) { - pr_err("Failed to load map.\n"); - return -EINVAL; - } - if (!dso__sorted_by_name(map->dso, map->type)) - dso__sort_by_name(map->dso, map->type); - - dso__fprintf_symbols_by_name(map->dso, map->type, stdout); - return 0; -} - -static int available_kernel_funcs(const char *module) +int show_available_funcs(const char *target, struct strfilter *_filter, + bool user) { struct map *map; int ret; - ret = init_symbol_maps(false); + ret = init_symbol_maps(user); if (ret < 0) return ret; - map = kernel_get_module_map(module); + /* Get a symbol map */ + if (user) + map = dso__new_map(target); + else + map = kernel_get_module_map(target); if (!map) { - pr_err("Failed to find %s map.\n", (module) ? : "kernel"); + pr_err("Failed to get a map for %s\n", (target) ? : "kernel"); return -EINVAL; } - ret = __show_available_funcs(map); - exit_symbol_maps(); - - return ret; -} - -static int available_user_funcs(const char *target) -{ - struct map *map; - int ret; - ret = init_symbol_maps(true); - if (ret < 0) - return ret; - - map = dso__new_map(target); - ret = __show_available_funcs(map); - dso__delete(map->dso); - map__delete(map); - exit_symbol_maps(); - return ret; -} - -int show_available_funcs(const char *target, struct strfilter *_filter, - bool user) -{ - setup_pager(); + /* Load symbols with given filter */ available_func_filter = _filter; + if (map__load(map, filter_available_functions)) { + pr_err("Failed to load symbols in %s\n", (target) ? : "kernel"); + goto end; + } + if (!dso__sorted_by_name(map->dso, map->type)) + dso__sort_by_name(map->dso, map->type); - if (!user) - return available_kernel_funcs(target); + /* Show all (filtered) symbols */ + setup_pager(); + dso__fprintf_symbols_by_name(map->dso, map->type, stdout); +end: + if (user) { + dso__delete(map->dso); + map__delete(map); + } + exit_symbol_maps(); - return available_user_funcs(target); + return ret; } /* -- cgit v0.10.2 From fb226ccd2a6f77be13009edc196da2077800066b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 6 Feb 2014 05:32:13 +0000 Subject: perf probe: Show in what binaries/modules probes are set Show the name of binary file or modules in which the probes are set with --list option. Without this change; # ./perf probe -m drm drm_av_sync_delay # ./perf probe -x perf dso__load_vmlinux # ./perf probe -l probe:drm_av_sync_delay (on drm_av_sync_delay) probe_perf:dso__load_vmlinux (on 0x000000000006d110) With this change; # ./perf probe -l probe:drm_av_sync_delay (on drm_av_sync_delay in drm) probe_perf:dso__load_vmlinux (on 0x000000000006d110 in /kbuild/ksrc/linux-3/tools/perf/perf) Signed-off-by: Masami Hiramatsu Cc: David Ahern Cc: "David A. Long" Cc: Ingo Molnar Cc: Namhyung Kim Cc: Oleg Nesterov Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20140206053213.29635.69948.stgit@kbuild-fedora.yrl.intra.hitachi.co.jp Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index ace3ba3..de9fe90 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1728,7 +1728,8 @@ static struct strlist *get_probe_trace_command_rawlist(int fd) } /* Show an event */ -static int show_perf_probe_event(struct perf_probe_event *pev) +static int show_perf_probe_event(struct perf_probe_event *pev, + const char *module) { int i, ret; char buf[128]; @@ -1744,6 +1745,8 @@ static int show_perf_probe_event(struct perf_probe_event *pev) return ret; printf(" %-20s (on %s", buf, place); + if (module) + printf(" in %s", module); if (pev->nargs > 0) { printf(" with"); @@ -1781,7 +1784,8 @@ static int __show_perf_probe_events(int fd, bool is_kprobe) ret = convert_to_perf_probe_event(&tev, &pev, is_kprobe); if (ret >= 0) - ret = show_perf_probe_event(&pev); + ret = show_perf_probe_event(&pev, + tev.point.module); } clear_perf_probe_event(&pev); clear_probe_trace_event(&tev); @@ -1980,7 +1984,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, group = pev->group; pev->event = tev->event; pev->group = tev->group; - show_perf_probe_event(pev); + show_perf_probe_event(pev, tev->point.module); /* Trick here - restore current event/group */ pev->event = (char *)event; pev->group = (char *)group; -- cgit v0.10.2 From dfef99cd0b2c8abafb571e5992ce954135be5f40 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 6 Feb 2014 05:32:16 +0000 Subject: perf probe: Use ref_reloc_sym based address instead of the symbol name Since several local symbols can have same name (e.g. t_show), we need to use the relative address from the symbol referred by kmap->ref_reloc_sym instead of the target symbol name itself. Because the kernel address space layout randomize (kASLR) changes the absolute address of kernel symbols, we can't rely on the absolute address. Note that this works only with debuginfo. E.g. without this change; ---- # ./perf probe -a "t_show \$vars" Added new events: probe:t_show (on t_show with $vars) probe:t_show_1 (on t_show with $vars) probe:t_show_2 (on t_show with $vars) probe:t_show_3 (on t_show with $vars) You can now use it in all perf tools, such as: perf record -e probe:t_show_3 -aR sleep 1 ---- OK, we have 4 different t_show()s. All functions have different arguments as below; ---- # cat /sys/kernel/debug/tracing/kprobe_events p:probe/t_show t_show m=%di:u64 v=%si:u64 p:probe/t_show_1 t_show m=%di:u64 v=%si:u64 t=%si:u64 p:probe/t_show_2 t_show m=%di:u64 v=%si:u64 fmt=%si:u64 p:probe/t_show_3 t_show m=%di:u64 v=%si:u64 file=%si:u64 ---- However, all of them have been put on the *same* address. ---- # cat /sys/kernel/debug/kprobes/list ffffffff810d9720 k t_show+0x0 [DISABLED] ffffffff810d9720 k t_show+0x0 [DISABLED] ffffffff810d9720 k t_show+0x0 [DISABLED] ffffffff810d9720 k t_show+0x0 [DISABLED] ---- With this change; ---- # ./perf probe -a "t_show \$vars" Added new events: probe:t_show (on t_show with $vars) probe:t_show_1 (on t_show with $vars) probe:t_show_2 (on t_show with $vars) probe:t_show_3 (on t_show with $vars) You can now use it in all perf tools, such as: perf record -e probe:t_show_3 -aR sleep 1 # cat /sys/kernel/debug/tracing/kprobe_events p:probe/t_show _stext+889880 m=%di:u64 v=%si:u64 p:probe/t_show_1 _stext+928568 m=%di:u64 v=%si:u64 t=%si:u64 p:probe/t_show_2 _stext+969512 m=%di:u64 v=%si:u64 fmt=%si:u64 p:probe/t_show_3 _stext+1001416 m=%di:u64 v=%si:u64 file=%si:u64 # cat /sys/kernel/debug/kprobes/list ffffffffb50d95e0 k t_show+0x0 [DISABLED] ffffffffb50e2d00 k t_show+0x0 [DISABLED] ffffffffb50f4990 k t_show+0x0 [DISABLED] ffffffffb50eccf0 k t_show+0x0 [DISABLED] ---- This time, each event is put in different address correctly. Note that currently this doesn't support address-based probe on modules (thus the probes on modules are symbol based), since it requires relative address probe syntax for kprobe-tracer, and it isn't implemented yet. One more note, this allows us to put events on correct address, but --list option should be updated to show correct corresponding source code. Changes from v2: - Refer kmap->ref_reloc_sym instead of "_stext". - Refer map->reloc to catch up the kASLR perf fix. Changes from v1: - Use _stext relative address instead of actual absolute address recorded in debuginfo. Signed-off-by: Masami Hiramatsu Cc: David Ahern Cc: "David A. Long" Cc: Ingo Molnar Cc: Namhyung Kim Cc: Oleg Nesterov Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20140206053216.29635.22584.stgit@kbuild-fedora.yrl.intra.hitachi.co.jp Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index de9fe90..1ce2cb9 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -383,6 +383,51 @@ static int add_module_to_probe_trace_events(struct probe_trace_event *tevs, return ret; } +static struct ref_reloc_sym *__kernel_get_ref_reloc_sym(void) +{ + /* kmap->ref_reloc_sym should be set if host_machine is initialized */ + struct kmap *kmap; + + kmap = map__kmap(host_machine->vmlinux_maps[MAP__FUNCTION]); + return kmap->ref_reloc_sym; +} + +/* Post processing the probe events */ +static int post_process_probe_trace_events(struct probe_trace_event *tevs, + int ntevs, const char *module, + bool uprobe) +{ + struct ref_reloc_sym *reloc_sym; + char *tmp; + int i; + + if (uprobe) + return add_exec_to_probe_trace_events(tevs, ntevs, module); + + /* Note that currently ref_reloc_sym based probe is not for drivers */ + if (module) + return add_module_to_probe_trace_events(tevs, ntevs, module); + + reloc_sym = __kernel_get_ref_reloc_sym(); + if (!reloc_sym) { + pr_warning("Relocated base symbol is not found!\n"); + return -EINVAL; + } + + for (i = 0; i < ntevs; i++) { + if (tevs[i].point.address) { + tmp = strdup(reloc_sym->name); + if (!tmp) + return -ENOMEM; + free(tevs[i].point.symbol); + tevs[i].point.symbol = tmp; + tevs[i].point.offset = tevs[i].point.address - + reloc_sym->unrelocated_addr; + } + } + return 0; +} + static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs) { int i; @@ -411,21 +456,16 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, return 0; } + pr_debug("Try to find probe point from debuginfo.\n"); /* Searching trace events corresponding to a probe event */ ntevs = debuginfo__find_trace_events(dinfo, pev, tevs, max_tevs); debuginfo__delete(dinfo); if (ntevs > 0) { /* Succeeded to find trace events */ - pr_debug("find %d probe_trace_events.\n", ntevs); - if (target) { - if (pev->uprobes) - ret = add_exec_to_probe_trace_events(*tevs, - ntevs, target); - else - ret = add_module_to_probe_trace_events(*tevs, - ntevs, target); - } + pr_debug("Found %d probe_trace_events.\n", ntevs); + ret = post_process_probe_trace_events(*tevs, ntevs, + target, pev->uprobes); if (ret < 0) { clear_probe_trace_events(*tevs, ntevs); zfree(tevs); -- cgit v0.10.2 From f90acac75713cc6f18a4b2f1b9162bc1cd893c20 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 6 Feb 2014 05:32:18 +0000 Subject: perf probe: Find given address from offline dwarf Find the given address from offline dwarfs instead of online kernel dwarfs. On the KASLR enabled kernel, the kernel text section is loaded with random offset, and the debuginfo__new_online_kernel can't handle it. So let's move to the offline dwarf loader instead of using the online dwarf loader. As a result, since we don't need debuginfo__new_online_kernel any more, this also removes the functions related to that. Without this change; # ./perf probe -l probe:t_show (on _stext+901288 with m v) probe:t_show_1 (on _stext+939624 with m v t) probe:t_show_2 (on _stext+980296 with m v fmt) probe:t_show_3 (on _stext+1014392 with m v file) With this change; # ./perf probe -l probe:t_show (on t_show@linux-3/kernel/trace/ftrace.c with m v) probe:t_show_1 (on t_show@linux-3/kernel/trace/trace.c with m v t) probe:t_show_2 (on t_show@kernel/trace/trace_printk.c with m v fmt) probe:t_show_3 (on t_show@kernel/trace/trace_events.c with m v file) Changes from v2: - Instead of retrying, directly opens offline dwarf. - Remove debuginfo__new_online_kernel and related functions. - Refer map->reloc to get the correct address of a symbol. - Add a special case for handling ref_reloc_sym based address. Signed-off-by: Masami Hiramatsu Cc: David Ahern Cc: "David A. Long" Cc: Ingo Molnar Cc: Namhyung Kim Cc: Oleg Nesterov Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20140206053218.29635.74821.stgit@kbuild-fedora.yrl.intra.hitachi.co.jp Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 1ce2cb9..8e34c8d 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -248,6 +248,18 @@ static struct debuginfo *open_debuginfo(const char *module) return debuginfo__new(path); } +static struct ref_reloc_sym *__kernel_get_ref_reloc_sym(void) +{ + /* kmap->ref_reloc_sym should be set if host_machine is initialized */ + struct kmap *kmap; + + if (map__load(host_machine->vmlinux_maps[MAP__FUNCTION], NULL) < 0) + return NULL; + + kmap = map__kmap(host_machine->vmlinux_maps[MAP__FUNCTION]); + return kmap->ref_reloc_sym; +} + /* * Convert trace point to probe point with debuginfo * Currently only handles kprobes. @@ -256,18 +268,27 @@ static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp, struct perf_probe_point *pp) { struct symbol *sym; + struct ref_reloc_sym *reloc_sym; struct map *map; - u64 addr; + u64 addr = 0; int ret = -ENOENT; struct debuginfo *dinfo; - sym = __find_kernel_function_by_name(tp->symbol, &map); - if (sym) { - addr = map->unmap_ip(map, sym->start + tp->offset); + /* ref_reloc_sym is just a label. Need a special fix*/ + reloc_sym = __kernel_get_ref_reloc_sym(); + if (reloc_sym && strcmp(tp->symbol, reloc_sym->name) == 0) + addr = reloc_sym->unrelocated_addr + tp->offset; + else { + sym = __find_kernel_function_by_name(tp->symbol, &map); + if (sym) + addr = map->unmap_ip(map, sym->start + tp->offset) - + map->reloc; + } + if (addr) { pr_debug("try to find %s+%ld@%" PRIx64 "\n", tp->symbol, tp->offset, addr); - dinfo = debuginfo__new_online_kernel(addr); + dinfo = open_debuginfo(tp->module); if (dinfo) { ret = debuginfo__find_probe_point(dinfo, (unsigned long)addr, pp); @@ -383,15 +404,6 @@ static int add_module_to_probe_trace_events(struct probe_trace_event *tevs, return ret; } -static struct ref_reloc_sym *__kernel_get_ref_reloc_sym(void) -{ - /* kmap->ref_reloc_sym should be set if host_machine is initialized */ - struct kmap *kmap; - - kmap = map__kmap(host_machine->vmlinux_maps[MAP__FUNCTION]); - return kmap->ref_reloc_sym; -} - /* Post processing the probe events */ static int post_process_probe_trace_events(struct probe_trace_event *tevs, int ntevs, const char *module, diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index e5e589f..4f6e277 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -89,79 +89,6 @@ error: return -ENOENT; } -#if _ELFUTILS_PREREQ(0, 148) -/* This method is buggy if elfutils is older than 0.148 */ -static int __linux_kernel_find_elf(Dwfl_Module *mod, - void **userdata, - const char *module_name, - Dwarf_Addr base, - char **file_name, Elf **elfp) -{ - int fd; - const char *path = kernel_get_module_path(module_name); - - pr_debug2("Use file %s for %s\n", path, module_name); - if (path) { - fd = open(path, O_RDONLY); - if (fd >= 0) { - *file_name = strdup(path); - return fd; - } - } - /* If failed, try to call standard method */ - return dwfl_linux_kernel_find_elf(mod, userdata, module_name, base, - file_name, elfp); -} - -static const Dwfl_Callbacks kernel_callbacks = { - .find_debuginfo = dwfl_standard_find_debuginfo, - .debuginfo_path = &debuginfo_path, - - .find_elf = __linux_kernel_find_elf, - .section_address = dwfl_linux_kernel_module_section_address, -}; - -/* Get a Dwarf from live kernel image */ -static int debuginfo__init_online_kernel_dwarf(struct debuginfo *dbg, - Dwarf_Addr addr) -{ - dbg->dwfl = dwfl_begin(&kernel_callbacks); - if (!dbg->dwfl) - return -EINVAL; - - /* Load the kernel dwarves: Don't care the result here */ - dwfl_linux_kernel_report_kernel(dbg->dwfl); - dwfl_linux_kernel_report_modules(dbg->dwfl); - - dbg->dbg = dwfl_addrdwarf(dbg->dwfl, addr, &dbg->bias); - /* Here, check whether we could get a real dwarf */ - if (!dbg->dbg) { - pr_debug("Failed to find kernel dwarf at %lx\n", - (unsigned long)addr); - dwfl_end(dbg->dwfl); - memset(dbg, 0, sizeof(*dbg)); - return -ENOENT; - } - - return 0; -} -#else -/* With older elfutils, this just support kernel module... */ -static int debuginfo__init_online_kernel_dwarf(struct debuginfo *dbg, - Dwarf_Addr addr __maybe_unused) -{ - const char *path = kernel_get_module_path("kernel"); - - if (!path) { - pr_err("Failed to find vmlinux path\n"); - return -ENOENT; - } - - pr_debug2("Use file %s for debuginfo\n", path); - return debuginfo__init_offline_dwarf(dbg, path); -} -#endif - struct debuginfo *debuginfo__new(const char *path) { struct debuginfo *dbg = zalloc(sizeof(*dbg)); @@ -174,19 +101,6 @@ struct debuginfo *debuginfo__new(const char *path) return dbg; } -struct debuginfo *debuginfo__new_online_kernel(unsigned long addr) -{ - struct debuginfo *dbg = zalloc(sizeof(*dbg)); - - if (!dbg) - return NULL; - - if (debuginfo__init_online_kernel_dwarf(dbg, (Dwarf_Addr)addr) < 0) - zfree(&dbg); - - return dbg; -} - void debuginfo__delete(struct debuginfo *dbg) { if (dbg) { diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 592c4da..3fc5973 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -31,7 +31,6 @@ struct debuginfo { }; extern struct debuginfo *debuginfo__new(const char *path); -extern struct debuginfo *debuginfo__new_online_kernel(unsigned long addr); extern void debuginfo__delete(struct debuginfo *dbg); /* Find probe_trace_events specified by perf_probe_event from debuginfo */ -- cgit v0.10.2 From 8f33f7deac485a61f38aa690b85489322a4d958e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 6 Feb 2014 05:32:20 +0000 Subject: perf probe: Show appropriate symbol for ref_reloc_sym based kprobes Show appropriate symbol for ref_reloc_sym based kprobes instead of refpoint+offset when perf-probe -l runs without debuginfo. Without this change: # ./perf probe -l probe:t_show (on _stext+889880 with m v) probe:t_show_1 (on _stext+928568 with m v t) probe:t_show_2 (on _stext+969512 with m v fmt) probe:t_show_3 (on _stext+1001416 with m v file) With this change: # ./perf probe -l probe:t_show (on t_show with m v) probe:t_show_1 (on t_show with m v t) probe:t_show_2 (on t_show with m v fmt) probe:t_show_3 (on t_show with m v file) Changes from v2: - Check ref_reloc_sym to find correct unrelocated address. Signed-off-by: Masami Hiramatsu Cc: David Ahern Cc: "David A. Long" Cc: Ingo Molnar Cc: Namhyung Kim Cc: Oleg Nesterov Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20140206053220.29635.81819.stgit@kbuild-fedora.yrl.intra.hitachi.co.jp Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 8e34c8d..f86820c 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -121,6 +121,42 @@ static struct symbol *__find_kernel_function_by_name(const char *name, NULL); } +static struct symbol *__find_kernel_function(u64 addr, struct map **mapp) +{ + return machine__find_kernel_function(host_machine, addr, mapp, NULL); +} + +static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void) +{ + /* kmap->ref_reloc_sym should be set if host_machine is initialized */ + struct kmap *kmap; + + if (map__load(host_machine->vmlinux_maps[MAP__FUNCTION], NULL) < 0) + return NULL; + + kmap = map__kmap(host_machine->vmlinux_maps[MAP__FUNCTION]); + return kmap->ref_reloc_sym; +} + +static u64 kernel_get_symbol_address_by_name(const char *name, bool reloc) +{ + struct ref_reloc_sym *reloc_sym; + struct symbol *sym; + struct map *map; + + /* ref_reloc_sym is just a label. Need a special fix*/ + reloc_sym = kernel_get_ref_reloc_sym(); + if (reloc_sym && strcmp(name, reloc_sym->name) == 0) + return (reloc) ? reloc_sym->addr : reloc_sym->unrelocated_addr; + else { + sym = __find_kernel_function_by_name(name, &map); + if (sym) + return map->unmap_ip(map, sym->start) - + (reloc) ? 0 : map->reloc; + } + return 0; +} + static struct map *kernel_get_module_map(const char *module) { struct rb_node *nd; @@ -216,12 +252,26 @@ out: static int convert_to_perf_probe_point(struct probe_trace_point *tp, struct perf_probe_point *pp) { - pp->function = strdup(tp->symbol); + struct symbol *sym; + struct map *map; + u64 addr = kernel_get_symbol_address_by_name(tp->symbol, true); + + if (addr) { + addr += tp->offset; + sym = __find_kernel_function(addr, &map); + if (!sym) + goto failed; + pp->function = strdup(sym->name); + pp->offset = addr - map->unmap_ip(map, sym->start); + } else { +failed: + pp->function = strdup(tp->symbol); + pp->offset = tp->offset; + } if (pp->function == NULL) return -ENOMEM; - pp->offset = tp->offset; pp->retprobe = tp->retprobe; return 0; @@ -248,18 +298,6 @@ static struct debuginfo *open_debuginfo(const char *module) return debuginfo__new(path); } -static struct ref_reloc_sym *__kernel_get_ref_reloc_sym(void) -{ - /* kmap->ref_reloc_sym should be set if host_machine is initialized */ - struct kmap *kmap; - - if (map__load(host_machine->vmlinux_maps[MAP__FUNCTION], NULL) < 0) - return NULL; - - kmap = map__kmap(host_machine->vmlinux_maps[MAP__FUNCTION]); - return kmap->ref_reloc_sym; -} - /* * Convert trace point to probe point with debuginfo * Currently only handles kprobes. @@ -267,24 +305,13 @@ static struct ref_reloc_sym *__kernel_get_ref_reloc_sym(void) static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp, struct perf_probe_point *pp) { - struct symbol *sym; - struct ref_reloc_sym *reloc_sym; - struct map *map; u64 addr = 0; int ret = -ENOENT; struct debuginfo *dinfo; - /* ref_reloc_sym is just a label. Need a special fix*/ - reloc_sym = __kernel_get_ref_reloc_sym(); - if (reloc_sym && strcmp(tp->symbol, reloc_sym->name) == 0) - addr = reloc_sym->unrelocated_addr + tp->offset; - else { - sym = __find_kernel_function_by_name(tp->symbol, &map); - if (sym) - addr = map->unmap_ip(map, sym->start + tp->offset) - - map->reloc; - } + addr = kernel_get_symbol_address_by_name(tp->symbol, false); if (addr) { + addr += tp->offset; pr_debug("try to find %s+%ld@%" PRIx64 "\n", tp->symbol, tp->offset, addr); @@ -420,7 +447,7 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs, if (module) return add_module_to_probe_trace_events(tevs, ntevs, module); - reloc_sym = __kernel_get_ref_reloc_sym(); + reloc_sym = kernel_get_ref_reloc_sym(); if (!reloc_sym) { pr_warning("Relocated base symbol is not found!\n"); return -EINVAL; -- cgit v0.10.2 From 5a6f63145491f905de1c5c6c46c5cd62c004d0d1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 6 Feb 2014 05:32:23 +0000 Subject: perf probe: Show source-level or symbol-level info for uprobes Show source-level or symbol-level information for uprobe events. Without this change; # ./perf probe -l probe_perf:dso__load_vmlinux (on 0x000000000006d110 in /kbuild/ksrc/linux-3/tools/perf/perf) With this change; # ./perf probe -l probe_perf:dso__load_vmlinux (on dso__load_vmlinux@util/symbol.c in /kbuild/ksrc/linux-3/tools/perf/perf) Changes from v2: - Update according to previous patches. Changes from v1: - Rewrite the code based on new series. Signed-off-by: Masami Hiramatsu Cc: David Ahern Cc: "David A. Long" Cc: Ingo Molnar Cc: Namhyung Kim Cc: Oleg Nesterov Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20140206053223.29635.51280.stgit@kbuild-fedora.yrl.intra.hitachi.co.jp Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index f86820c..3c35b7a 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -249,34 +249,6 @@ out: return ret; } -static int convert_to_perf_probe_point(struct probe_trace_point *tp, - struct perf_probe_point *pp) -{ - struct symbol *sym; - struct map *map; - u64 addr = kernel_get_symbol_address_by_name(tp->symbol, true); - - if (addr) { - addr += tp->offset; - sym = __find_kernel_function(addr, &map); - if (!sym) - goto failed; - pp->function = strdup(sym->name); - pp->offset = addr - map->unmap_ip(map, sym->start); - } else { -failed: - pp->function = strdup(tp->symbol); - pp->offset = tp->offset; - } - - if (pp->function == NULL) - return -ENOMEM; - - pp->retprobe = tp->retprobe; - - return 0; -} - #ifdef HAVE_DWARF_SUPPORT /* Open new debuginfo of given module */ static struct debuginfo *open_debuginfo(const char *module) @@ -298,44 +270,6 @@ static struct debuginfo *open_debuginfo(const char *module) return debuginfo__new(path); } -/* - * Convert trace point to probe point with debuginfo - * Currently only handles kprobes. - */ -static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp, - struct perf_probe_point *pp) -{ - u64 addr = 0; - int ret = -ENOENT; - struct debuginfo *dinfo; - - addr = kernel_get_symbol_address_by_name(tp->symbol, false); - if (addr) { - addr += tp->offset; - pr_debug("try to find %s+%ld@%" PRIx64 "\n", tp->symbol, - tp->offset, addr); - - dinfo = open_debuginfo(tp->module); - if (dinfo) { - ret = debuginfo__find_probe_point(dinfo, - (unsigned long)addr, pp); - debuginfo__delete(dinfo); - } else { - pr_debug("Failed to open debuginfo at 0x%" PRIx64 "\n", - addr); - ret = -ENOENT; - } - } - if (ret <= 0) { - pr_debug("Failed to find corresponding probes from " - "debuginfo. Use kprobe event information.\n"); - return convert_to_perf_probe_point(tp, pp); - } - pp->retprobe = tp->retprobe; - - return 0; -} - static int get_text_start_address(const char *exec, unsigned long *address) { Elf *elf; @@ -364,6 +298,57 @@ out: return ret; } +/* + * Convert trace point to probe point with debuginfo + */ +static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp, + struct perf_probe_point *pp, + bool is_kprobe) +{ + struct debuginfo *dinfo = NULL; + unsigned long stext = 0; + u64 addr = tp->address; + int ret = -ENOENT; + + /* convert the address to dwarf address */ + if (!is_kprobe) { + if (!addr) { + ret = -EINVAL; + goto error; + } + ret = get_text_start_address(tp->module, &stext); + if (ret < 0) + goto error; + addr += stext; + } else { + addr = kernel_get_symbol_address_by_name(tp->symbol, false); + if (addr == 0) + goto error; + addr += tp->offset; + } + + pr_debug("try to find information at %" PRIx64 " in %s\n", addr, + tp->module ? : "kernel"); + + dinfo = open_debuginfo(tp->module); + if (dinfo) { + ret = debuginfo__find_probe_point(dinfo, + (unsigned long)addr, pp); + debuginfo__delete(dinfo); + } else { + pr_debug("Failed to open debuginfo at 0x%" PRIx64 "\n", addr); + ret = -ENOENT; + } + + if (ret > 0) { + pp->retprobe = tp->retprobe; + return 0; + } +error: + pr_debug("Failed to find corresponding probes from debuginfo.\n"); + return ret ? : -ENOENT; +} + static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs, int ntevs, const char *exec) { @@ -815,10 +800,12 @@ out: #else /* !HAVE_DWARF_SUPPORT */ -static int kprobe_convert_to_perf_probe(struct probe_trace_point *tp, - struct perf_probe_point *pp) +static int +find_perf_probe_point_from_dwarf(struct probe_trace_point *tp __maybe_unused, + struct perf_probe_point *pp __maybe_unused, + bool is_kprobe __maybe_unused) { - return convert_to_perf_probe_point(tp, pp); + return -ENOSYS; } static int try_to_find_probe_trace_events(struct perf_probe_event *pev, @@ -1343,16 +1330,21 @@ static int parse_probe_trace_command(const char *cmd, } else p = argv[1]; fmt1_str = strtok_r(p, "+", &fmt); - tp->symbol = strdup(fmt1_str); - if (tp->symbol == NULL) { - ret = -ENOMEM; - goto out; + if (fmt1_str[0] == '0') /* only the address started with 0x */ + tp->address = strtoul(fmt1_str, NULL, 0); + else { + /* Only the symbol-based probe has offset */ + tp->symbol = strdup(fmt1_str); + if (tp->symbol == NULL) { + ret = -ENOMEM; + goto out; + } + fmt2_str = strtok_r(NULL, "", &fmt); + if (fmt2_str == NULL) + tp->offset = 0; + else + tp->offset = strtoul(fmt2_str, NULL, 10); } - fmt2_str = strtok_r(NULL, "", &fmt); - if (fmt2_str == NULL) - tp->offset = 0; - else - tp->offset = strtoul(fmt2_str, NULL, 10); tev->nargs = argc - 2; tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs); @@ -1623,6 +1615,79 @@ error: return NULL; } +static int find_perf_probe_point_from_map(struct probe_trace_point *tp, + struct perf_probe_point *pp, + bool is_kprobe) +{ + struct symbol *sym = NULL; + struct map *map; + u64 addr; + int ret = -ENOENT; + + if (!is_kprobe) { + map = dso__new_map(tp->module); + if (!map) + goto out; + addr = tp->address; + sym = map__find_symbol(map, addr, NULL); + } else { + addr = kernel_get_symbol_address_by_name(tp->symbol, true); + if (addr) { + addr += tp->offset; + sym = __find_kernel_function(addr, &map); + } + } + if (!sym) + goto out; + + pp->retprobe = tp->retprobe; + pp->offset = addr - map->unmap_ip(map, sym->start); + pp->function = strdup(sym->name); + ret = pp->function ? 0 : -ENOMEM; + +out: + if (map && !is_kprobe) { + dso__delete(map->dso); + map__delete(map); + } + + return ret; +} + +static int convert_to_perf_probe_point(struct probe_trace_point *tp, + struct perf_probe_point *pp, + bool is_kprobe) +{ + char buf[128]; + int ret; + + ret = find_perf_probe_point_from_dwarf(tp, pp, is_kprobe); + if (!ret) + return 0; + ret = find_perf_probe_point_from_map(tp, pp, is_kprobe); + if (!ret) + return 0; + + pr_debug("Failed to find probe point from both of dwarf and map.\n"); + + if (tp->symbol) { + pp->function = strdup(tp->symbol); + pp->offset = tp->offset; + } else if (!tp->module && !is_kprobe) { + ret = e_snprintf(buf, 128, "0x%" PRIx64, (u64)tp->address); + if (ret < 0) + return ret; + pp->function = strdup(buf); + pp->offset = 0; + } + if (pp->function == NULL) + return -ENOMEM; + + pp->retprobe = tp->retprobe; + + return 0; +} + static int convert_to_perf_probe_event(struct probe_trace_event *tev, struct perf_probe_event *pev, bool is_kprobe) { @@ -1636,11 +1701,7 @@ static int convert_to_perf_probe_event(struct probe_trace_event *tev, return -ENOMEM; /* Convert trace_point to probe_point */ - if (is_kprobe) - ret = kprobe_convert_to_perf_probe(&tev->point, &pev->point); - else - ret = convert_to_perf_probe_point(&tev->point, &pev->point); - + ret = convert_to_perf_probe_point(&tev->point, &pev->point, is_kprobe); if (ret < 0) return ret; -- cgit v0.10.2 From eb948e50831bc64e6bb2589be7575ed7c159a429 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 6 Feb 2014 05:32:25 +0000 Subject: perf probe: Allow to add events on the local functions Allow to add events on the local functions without debuginfo. (With the debuginfo, we can add events even on inlined functions) Currently, probing on local functions requires debuginfo to locate actual address. It is also possible without debuginfo since we have symbol maps. Without this change; ---- # ./perf probe -a t_show Added new event: probe:t_show (on t_show) You can now use it in all perf tools, such as: perf record -e probe:t_show -aR sleep 1 # ./perf probe -x perf -a identity__map_ip no symbols found in /kbuild/ksrc/linux-3/tools/perf/perf, maybe install a debug package? Failed to load map. Error: Failed to add events. (-22) ---- As the above results, perf probe just put one event on the first found symbol for kprobe event. Moreover, for uprobe event, perf probe failed to find local functions. With this change; ---- # ./perf probe -a t_show Added new events: probe:t_show (on t_show) probe:t_show_1 (on t_show) probe:t_show_2 (on t_show) probe:t_show_3 (on t_show) You can now use it in all perf tools, such as: perf record -e probe:t_show_3 -aR sleep 1 # ./perf probe -x perf -a identity__map_ip Added new events: probe_perf:identity__map_ip (on identity__map_ip in /kbuild/ksrc/linux-3/tools/perf/perf) probe_perf:identity__map_ip_1 (on identity__map_ip in /kbuild/ksrc/linux-3/tools/perf/perf) probe_perf:identity__map_ip_2 (on identity__map_ip in /kbuild/ksrc/linux-3/tools/perf/perf) probe_perf:identity__map_ip_3 (on identity__map_ip in /kbuild/ksrc/linux-3/tools/perf/perf) You can now use it in all perf tools, such as: perf record -e probe_perf:identity__map_ip_3 -aR sleep 1 ---- Now we succeed to put events on every given local functions for both kprobes and uprobes. :) Note that this also introduces some symbol rbtree iteration macros; symbols__for_each, dso__for_each_symbol, and map__for_each_symbol. These are for walking through the symbol list in a map. Changes from v2: - Fix add_exec_to_probe_trace_events() not to convert address to tp->symbol any more. - Fix to set kernel probes based on ref_reloc_sym. Signed-off-by: Masami Hiramatsu Cc: David Ahern Cc: "David A. Long" Cc: Ingo Molnar Cc: Namhyung Kim Cc: Oleg Nesterov Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20140206053225.29635.15026.stgit@kbuild-fedora.yrl.intra.hitachi.co.jp Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index cd7d6f0..ab06f1c 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -102,6 +102,16 @@ struct dso { char name[0]; }; +/* dso__for_each_symbol - iterate over the symbols of given type + * + * @dso: the 'struct dso *' in which symbols itereated + * @pos: the 'struct symbol *' to use as a loop cursor + * @n: the 'struct rb_node *' to use as a temporary storage + * @type: the 'enum map_type' type of symbols + */ +#define dso__for_each_symbol(dso, pos, n, type) \ + symbols__for_each_entry(&(dso)->symbols[(type)], pos, n) + static inline void dso__set_loaded(struct dso *dso, enum map_type type) { dso->loaded |= (1 << type); diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 257e513..f00f058 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -90,6 +90,16 @@ u64 map__objdump_2mem(struct map *map, u64 ip); struct symbol; +/* map__for_each_symbol - iterate over the symbols in the given map + * + * @map: the 'struct map *' in which symbols itereated + * @pos: the 'struct symbol *' to use as a loop cursor + * @n: the 'struct rb_node *' to use as a temporary storage + * Note: caller must ensure map->dso is not NULL (map is loaded). + */ +#define map__for_each_symbol(map, pos, n) \ + dso__for_each_symbol(map->dso, pos, n, map->type) + typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); void map__init(struct map *map, enum map_type type, diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 3c35b7a..42bec67 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -70,8 +70,6 @@ static int e_snprintf(char *str, size_t size, const char *format, ...) } static char *synthesize_perf_probe_point(struct perf_probe_point *pp); -static int convert_name_to_addr(struct perf_probe_event *pev, - const char *exec); static void clear_probe_trace_event(struct probe_trace_event *tev); static struct machine *host_machine; @@ -249,6 +247,14 @@ out: return ret; } +static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs) +{ + int i; + + for (i = 0; i < ntevs; i++) + clear_probe_trace_event(tevs + i); +} + #ifdef HAVE_DWARF_SUPPORT /* Open new debuginfo of given module */ static struct debuginfo *open_debuginfo(const char *module) @@ -353,8 +359,7 @@ static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs, int ntevs, const char *exec) { int i, ret = 0; - unsigned long offset, stext = 0; - char buf[32]; + unsigned long stext = 0; if (!exec) return 0; @@ -365,15 +370,9 @@ static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs, for (i = 0; i < ntevs && ret >= 0; i++) { /* point.address is the addres of point.symbol + point.offset */ - offset = tevs[i].point.address - stext; - tevs[i].point.offset = 0; - zfree(&tevs[i].point.symbol); - ret = e_snprintf(buf, 32, "0x%lx", offset); - if (ret < 0) - break; + tevs[i].point.address -= stext; tevs[i].point.module = strdup(exec); - tevs[i].point.symbol = strdup(buf); - if (!tevs[i].point.symbol || !tevs[i].point.module) { + if (!tevs[i].point.module) { ret = -ENOMEM; break; } @@ -452,14 +451,6 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs, return 0; } -static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs) -{ - int i; - - for (i = 0; i < ntevs; i++) - clear_probe_trace_event(tevs + i); -} - /* Try to find perf_probe_event with debuginfo */ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, struct probe_trace_event **tevs, @@ -1586,20 +1577,27 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev) if (buf == NULL) return NULL; + len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s ", tp->retprobe ? 'r' : 'p', + tev->group, tev->event); + if (len <= 0) + goto error; + + /* Uprobes must have tp->address and tp->module */ + if (tev->uprobes && (!tp->address || !tp->module)) + goto error; + + /* Use the tp->address for uprobes */ if (tev->uprobes) - len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s:%s", - tp->retprobe ? 'r' : 'p', - tev->group, tev->event, - tp->module, tp->symbol); + ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s:0x%lx", + tp->module, tp->address); else - len = e_snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s%s%s+%lu", - tp->retprobe ? 'r' : 'p', - tev->group, tev->event, + ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s%s+%lu", tp->module ?: "", tp->module ? ":" : "", tp->symbol, tp->offset); - if (len <= 0) + if (ret <= 0) goto error; + len += ret; for (i = 0; i < tev->nargs; i++) { ret = synthesize_probe_trace_arg(&tev->args[i], buf + len, @@ -2150,113 +2148,175 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, return ret; } -static int convert_to_probe_trace_events(struct perf_probe_event *pev, - struct probe_trace_event **tevs, - int max_tevs, const char *target) +static char *looking_function_name; +static int num_matched_functions; + +static int probe_function_filter(struct map *map __maybe_unused, + struct symbol *sym) +{ + if ((sym->binding == STB_GLOBAL || sym->binding == STB_LOCAL) && + strcmp(looking_function_name, sym->name) == 0) { + num_matched_functions++; + return 0; + } + return 1; +} + +#define strdup_or_goto(str, label) \ + ({ char *__p = strdup(str); if (!__p) goto label; __p; }) + +/* + * Find probe function addresses from map. + * Return an error or the number of found probe_trace_event + */ +static int find_probe_trace_events_from_map(struct perf_probe_event *pev, + struct probe_trace_event **tevs, + int max_tevs, const char *target) { + struct map *map = NULL; + struct kmap *kmap = NULL; + struct ref_reloc_sym *reloc_sym = NULL; struct symbol *sym; - int ret, i; + struct rb_node *nd; struct probe_trace_event *tev; + struct perf_probe_point *pp = &pev->point; + struct probe_trace_point *tp; + int ret, i; - if (pev->uprobes && !pev->group) { - /* Replace group name if not given */ - ret = convert_exec_to_group(target, &pev->group); - if (ret != 0) { - pr_warning("Failed to make a group name.\n"); - return ret; - } + /* Init maps of given executable or kernel */ + if (pev->uprobes) + map = dso__new_map(target); + else + map = kernel_get_module_map(target); + if (!map) { + ret = -EINVAL; + goto out; } - /* Convert perf_probe_event with debuginfo */ - ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, target); - if (ret != 0) - return ret; /* Found in debuginfo or got an error */ - - if (pev->uprobes) { - ret = convert_name_to_addr(pev, target); - if (ret < 0) - return ret; + /* + * Load matched symbols: Since the different local symbols may have + * same name but different addresses, this lists all the symbols. + */ + num_matched_functions = 0; + looking_function_name = pp->function; + ret = map__load(map, probe_function_filter); + if (ret || num_matched_functions == 0) { + pr_err("Failed to find symbol %s in %s\n", pp->function, + target ? : "kernel"); + ret = -ENOENT; + goto out; + } else if (num_matched_functions > max_tevs) { + pr_err("Too many functions matched in %s\n", + target ? : "kernel"); + ret = -E2BIG; + goto out; } - /* Allocate trace event buffer */ - tev = *tevs = zalloc(sizeof(struct probe_trace_event)); - if (tev == NULL) - return -ENOMEM; + if (!pev->uprobes) { + kmap = map__kmap(map); + reloc_sym = kmap->ref_reloc_sym; + if (!reloc_sym) { + pr_warning("Relocated base symbol is not found!\n"); + ret = -EINVAL; + goto out; + } + } - /* Copy parameters */ - tev->point.symbol = strdup(pev->point.function); - if (tev->point.symbol == NULL) { + /* Setup result trace-probe-events */ + *tevs = zalloc(sizeof(*tev) * num_matched_functions); + if (!*tevs) { ret = -ENOMEM; - goto error; + goto out; } - if (target) { - tev->point.module = strdup(target); - if (tev->point.module == NULL) { - ret = -ENOMEM; - goto error; + ret = 0; + map__for_each_symbol(map, sym, nd) { + tev = (*tevs) + ret; + tp = &tev->point; + if (ret == num_matched_functions) { + pr_warning("Too many symbols are listed. Skip it.\n"); + break; } - } + ret++; - tev->point.offset = pev->point.offset; - tev->point.retprobe = pev->point.retprobe; - tev->nargs = pev->nargs; - tev->uprobes = pev->uprobes; - - if (tev->nargs) { - tev->args = zalloc(sizeof(struct probe_trace_arg) - * tev->nargs); - if (tev->args == NULL) { - ret = -ENOMEM; - goto error; + if (pp->offset > sym->end - sym->start) { + pr_warning("Offset %ld is bigger than the size of %s\n", + pp->offset, sym->name); + ret = -ENOENT; + goto err_out; + } + /* Add one probe point */ + tp->address = map->unmap_ip(map, sym->start) + pp->offset; + if (reloc_sym) { + tp->symbol = strdup_or_goto(reloc_sym->name, nomem_out); + tp->offset = tp->address - reloc_sym->addr; + } else { + tp->symbol = strdup_or_goto(sym->name, nomem_out); + tp->offset = pp->offset; + } + tp->retprobe = pp->retprobe; + if (target) + tev->point.module = strdup_or_goto(target, nomem_out); + tev->uprobes = pev->uprobes; + tev->nargs = pev->nargs; + if (tev->nargs) { + tev->args = zalloc(sizeof(struct probe_trace_arg) * + tev->nargs); + if (tev->args == NULL) + goto nomem_out; } for (i = 0; i < tev->nargs; i++) { - if (pev->args[i].name) { - tev->args[i].name = strdup(pev->args[i].name); - if (tev->args[i].name == NULL) { - ret = -ENOMEM; - goto error; - } - } - tev->args[i].value = strdup(pev->args[i].var); - if (tev->args[i].value == NULL) { - ret = -ENOMEM; - goto error; - } - if (pev->args[i].type) { - tev->args[i].type = strdup(pev->args[i].type); - if (tev->args[i].type == NULL) { - ret = -ENOMEM; - goto error; - } - } + if (pev->args[i].name) + tev->args[i].name = + strdup_or_goto(pev->args[i].name, + nomem_out); + + tev->args[i].value = strdup_or_goto(pev->args[i].var, + nomem_out); + if (pev->args[i].type) + tev->args[i].type = + strdup_or_goto(pev->args[i].type, + nomem_out); } } - if (pev->uprobes) - return 1; +out: + if (map && pev->uprobes) { + /* Only when using uprobe(exec) map needs to be released */ + dso__delete(map->dso); + map__delete(map); + } + return ret; - /* Currently just checking function name from symbol map */ - sym = __find_kernel_function_by_name(tev->point.symbol, NULL); - if (!sym) { - pr_warning("Kernel symbol \'%s\' not found.\n", - tev->point.symbol); - ret = -ENOENT; - goto error; - } else if (tev->point.offset > sym->end - sym->start) { - pr_warning("Offset specified is greater than size of %s\n", - tev->point.symbol); - ret = -ENOENT; - goto error; +nomem_out: + ret = -ENOMEM; +err_out: + clear_probe_trace_events(*tevs, num_matched_functions); + zfree(tevs); + goto out; +} +static int convert_to_probe_trace_events(struct perf_probe_event *pev, + struct probe_trace_event **tevs, + int max_tevs, const char *target) +{ + int ret; + + if (pev->uprobes && !pev->group) { + /* Replace group name if not given */ + ret = convert_exec_to_group(target, &pev->group); + if (ret != 0) { + pr_warning("Failed to make a group name.\n"); + return ret; + } } - return 1; -error: - clear_probe_trace_event(tev); - free(tev); - *tevs = NULL; - return ret; + /* Convert perf_probe_event with debuginfo */ + ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, target); + if (ret != 0) + return ret; /* Found in debuginfo or got an error */ + + return find_probe_trace_events_from_map(pev, tevs, max_tevs, target); } struct __event_package { @@ -2461,7 +2521,7 @@ static struct strfilter *available_func_filter; static int filter_available_functions(struct map *map __maybe_unused, struct symbol *sym) { - if (sym->binding == STB_GLOBAL && + if ((sym->binding == STB_GLOBAL || sym->binding == STB_LOCAL) && strfilter__compare(available_func_filter, sym->name)) return 0; return 1; @@ -2509,95 +2569,3 @@ end: return ret; } -/* - * uprobe_events only accepts address: - * Convert function and any offset to address - */ -static int convert_name_to_addr(struct perf_probe_event *pev, const char *exec) -{ - struct perf_probe_point *pp = &pev->point; - struct symbol *sym; - struct map *map = NULL; - char *function = NULL; - int ret = -EINVAL; - unsigned long long vaddr = 0; - - if (!pp->function) { - pr_warning("No function specified for uprobes"); - goto out; - } - - function = strdup(pp->function); - if (!function) { - pr_warning("Failed to allocate memory by strdup.\n"); - ret = -ENOMEM; - goto out; - } - - map = dso__new_map(exec); - if (!map) { - pr_warning("Cannot find appropriate DSO for %s.\n", exec); - goto out; - } - available_func_filter = strfilter__new(function, NULL); - if (map__load(map, filter_available_functions)) { - pr_err("Failed to load map.\n"); - goto out; - } - - sym = map__find_symbol_by_name(map, function, NULL); - if (!sym) { - pr_warning("Cannot find %s in DSO %s\n", function, exec); - goto out; - } - - if (map->start > sym->start) - vaddr = map->start; - vaddr += sym->start + pp->offset + map->pgoff; - pp->offset = 0; - - if (!pev->event) { - pev->event = function; - function = NULL; - } - if (!pev->group) { - char *ptr1, *ptr2, *exec_copy; - - pev->group = zalloc(sizeof(char *) * 64); - exec_copy = strdup(exec); - if (!exec_copy) { - ret = -ENOMEM; - pr_warning("Failed to copy exec string.\n"); - goto out; - } - - ptr1 = strdup(basename(exec_copy)); - if (ptr1) { - ptr2 = strpbrk(ptr1, "-._"); - if (ptr2) - *ptr2 = '\0'; - e_snprintf(pev->group, 64, "%s_%s", PERFPROBE_GROUP, - ptr1); - free(ptr1); - } - free(exec_copy); - } - free(pp->function); - pp->function = zalloc(sizeof(char *) * MAX_PROBE_ARGS); - if (!pp->function) { - ret = -ENOMEM; - pr_warning("Failed to allocate memory by zalloc.\n"); - goto out; - } - e_snprintf(pp->function, MAX_PROBE_ARGS, "0x%llx", vaddr); - ret = 0; - -out: - if (map) { - dso__delete(map->dso); - map__delete(map); - } - if (function) - free(function); - return ret; -} diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 538d484..2553ae0 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -79,6 +79,17 @@ struct symbol { void symbol__delete(struct symbol *sym); void symbols__delete(struct rb_root *symbols); +/* symbols__for_each_entry - iterate over symbols (rb_root) + * + * @symbols: the rb_root of symbols + * @pos: the 'struct symbol *' to use as a loop cursor + * @nd: the 'struct rb_node *' to use as a temporary storage + */ +#define symbols__for_each_entry(symbols, pos, nd) \ + for (nd = rb_first(symbols); \ + nd && (pos = rb_entry(nd, struct symbol, rb_node)); \ + nd = rb_next(nd)) + static inline size_t symbol__size(const struct symbol *sym) { return sym->end - sym->start + 1; -- cgit v0.10.2 From a15ad2f5360c821f030c53266ebf467738249c68 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 6 Feb 2014 05:32:27 +0000 Subject: perf probe: Support distro-style debuginfo for uprobe Support distro-style debuginfo supported by dso for setting uprobes. Note that this tries to find a debuginfo file based on the real path of the target binary. If the debuginfo is not correctly installed on the system, this can not find it. Signed-off-by: Masami Hiramatsu Cc: David Ahern Cc: "David A. Long" Cc: Ingo Molnar Cc: Namhyung Kim Cc: Oleg Nesterov Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20140206053227.29635.54434.stgit@kbuild-fedora.yrl.intra.hitachi.co.jp Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 42bec67..0d1542f 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -256,17 +256,14 @@ static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs) } #ifdef HAVE_DWARF_SUPPORT + /* Open new debuginfo of given module */ static struct debuginfo *open_debuginfo(const char *module) { - const char *path; + const char *path = module; - /* A file path -- this is an offline module */ - if (module && strchr(module, '/')) - path = module; - else { + if (!module || !strchr(module, '/')) { path = kernel_get_module_path(module); - if (!path) { pr_err("Failed to find path of %s module.\n", module ?: "kernel"); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 4f6e277..df02386 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -34,6 +34,7 @@ #include #include "event.h" +#include "dso.h" #include "debug.h" #include "intlist.h" #include "util.h" @@ -89,7 +90,7 @@ error: return -ENOENT; } -struct debuginfo *debuginfo__new(const char *path) +static struct debuginfo *__debuginfo__new(const char *path) { struct debuginfo *dbg = zalloc(sizeof(*dbg)); if (!dbg) @@ -97,10 +98,46 @@ struct debuginfo *debuginfo__new(const char *path) if (debuginfo__init_offline_dwarf(dbg, path) < 0) zfree(&dbg); - + if (dbg) + pr_debug("Open Debuginfo file: %s\n", path); return dbg; } +enum dso_binary_type distro_dwarf_types[] = { + DSO_BINARY_TYPE__FEDORA_DEBUGINFO, + DSO_BINARY_TYPE__UBUNTU_DEBUGINFO, + DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, + DSO_BINARY_TYPE__BUILDID_DEBUGINFO, + DSO_BINARY_TYPE__NOT_FOUND, +}; + +struct debuginfo *debuginfo__new(const char *path) +{ + enum dso_binary_type *type; + char buf[PATH_MAX], nil = '\0'; + struct dso *dso; + struct debuginfo *dinfo = NULL; + + /* Try to open distro debuginfo files */ + dso = dso__new(path); + if (!dso) + goto out; + + for (type = distro_dwarf_types; + !dinfo && *type != DSO_BINARY_TYPE__NOT_FOUND; + type++) { + if (dso__read_binary_type_filename(dso, *type, &nil, + buf, PATH_MAX) < 0) + continue; + dinfo = __debuginfo__new(buf); + } + dso__delete(dso); + +out: + /* if failed to open all distro debuginfo, open given binary */ + return dinfo ? : __debuginfo__new(path); +} + void debuginfo__delete(struct debuginfo *dbg) { if (dbg) { diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 3fc5973..92590b2 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -30,6 +30,7 @@ struct debuginfo { Dwarf_Addr bias; }; +/* This also tries to open distro debuginfo */ extern struct debuginfo *debuginfo__new(const char *path); extern void debuginfo__delete(struct debuginfo *dbg); -- cgit v0.10.2 From f3a44fe0608eb628e1f8f6e3540462e6d171a745 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 18 Feb 2014 09:57:22 -0500 Subject: dm raid1: fix immutable biovec related BUG when retrying read bio When restoring bi_end_io, increase bi_remaining before retrying the bio to avoid BUG_ON(atomic_read(&bio->bi_remaining) <= 0) in bio_endio(). Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index f284e0b..7dfdb5c 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1244,6 +1244,9 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) dm_bio_restore(bd, bio); bio_record->details.bi_bdev = NULL; + + atomic_inc(&bio->bi_remaining); + queue_bio(ms, bio, rw); return DM_ENDIO_INCOMPLETE; } -- cgit v0.10.2 From 1406b916f4a29d5f9660264a28ce609c8c77e7ae Mon Sep 17 00:00:00 2001 From: "J. R. Okajima" Date: Wed, 19 Feb 2014 00:27:53 +0900 Subject: nfsd: fix lost nfserrno() call in nfsd_setattr() There is a regression in 208d0ac 2014-01-07 nfsd4: break only delegations when appropriate which deletes an nfserrno() call in nfsd_setattr() (by accident, probably), and NFSD becomes ignoring an error from VFS. Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 017d3cb..6d7be3f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -449,6 +449,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, fh_lock(fhp); host_err = notify_change(dentry, iap, NULL); fh_unlock(fhp); + err = nfserrno(host_err); out_put_write_access: if (size_change) -- cgit v0.10.2 From cd91b2fecfa66967e6ad732a9af860eb96c31ba4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 14 Feb 2014 20:19:03 +0100 Subject: ARM: 7963/1: mm: report both sections from PMD On 2-level page table systems, the PMD has 2 section entries. Report these, otherwise ARM_PTDUMP will miss reporting permission changes on odd section boundaries. Signed-off-by: Kees Cook Acked-by: Catalin Marinas Tested-by: Steve Capper Signed-off-by: Russell King diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c index 2b3a564..ef69152 100644 --- a/arch/arm/mm/dump.c +++ b/arch/arm/mm/dump.c @@ -264,6 +264,9 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) note_page(st, addr, 3, pmd_val(*pmd)); else walk_pte(st, pmd, addr); + + if (SECTION_SIZE < PMD_SIZE && pmd_large(pmd[1])) + note_page(st, addr + SECTION_SIZE, 3, pmd_val(pmd[1])); } } -- cgit v0.10.2 From c9698e5cd6ad1ff2844bc44fabddc0f2e0562047 Mon Sep 17 00:00:00 2001 From: "David A. Long" Date: Fri, 14 Feb 2014 22:41:18 +0100 Subject: ARM: 7964/1: Detect section mismatches in thumb relocations Add processing for normally encountered thumb relocation types so that section mismatches will be detected. Comment from Rusty Russell follows: Happiest for this to go through an ARM tree, so: Signed-off-by: David A. Long Acked-by: Rusty Russell Signed-off-by: Russell King diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 4061098..99a45fd 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1502,6 +1502,16 @@ static int addend_386_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r) #define R_ARM_JUMP24 29 #endif +#ifndef R_ARM_THM_CALL +#define R_ARM_THM_CALL 10 +#endif +#ifndef R_ARM_THM_JUMP24 +#define R_ARM_THM_JUMP24 30 +#endif +#ifndef R_ARM_THM_JUMP19 +#define R_ARM_THM_JUMP19 51 +#endif + static int addend_arm_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r) { unsigned int r_typ = ELF_R_TYPE(r->r_info); @@ -1515,6 +1525,9 @@ static int addend_arm_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r) case R_ARM_PC24: case R_ARM_CALL: case R_ARM_JUMP24: + case R_ARM_THM_CALL: + case R_ARM_THM_JUMP24: + case R_ARM_THM_JUMP19: /* From ARM ABI: ((S + A) | T) - P */ r->r_addend = (int)(long)(elf->hdr + sechdr->sh_offset + -- cgit v0.10.2 From c7e8240db112a128ca09cfc9ee0db1bafd7229ec Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 18 Dec 2013 16:56:22 -0800 Subject: rcutorture: Print grace-period performance statistics Sometime problems can manifest themselves as unusually slow grace periods. This commit therefore prints the number of rcutorture updates during the test and the number per second. These statistics are harvested from the config.out and qemu-cmd files, and are silently omitted if these files are not available, as would be the case if there was a build failure or a boot-time hang. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index baef09f..e3b1af36 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -31,7 +31,24 @@ do for i in $dirs do configfile=`echo $i | sed -e 's/^.*\///'` - echo $configfile + ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'` + if test -z "$ngps" + then + echo $configfile + else + title="$configfile ------- $ngps grace periods" + dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null` + if test -z "$dur" + then + : + else + ngpsps=$((ngps / dur)) + ngpsps=`awk -v ngps=$ngps -v dur=$dur ' + BEGIN { print ngps / dur }' < /dev/null` + title="$title ($ngpsps per second)" + fi + echo $title + fi configcheck.sh $i/.config $i/ConfigFragment parse-build.sh $i/Make.out $configfile parse-rcutorture.sh $i/console.log $configfile -- cgit v0.10.2 From 1219c8636cc51eaed1529c9d5ed4184f726c8f24 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 19 Dec 2013 10:52:55 -0800 Subject: rcutorture: Print script and arguments to standard output Although the script name and arguments are logged in the results directory, it is more convenient to see it in the output. This commit therefore adds the output of this information. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index e3b1af36..eb28509 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -27,9 +27,16 @@ PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH for rd in "$@" do + firsttime=1 dirs=`find $rd -name Make.defconfig.out -print | sort | sed -e 's,/[^/]*$,,' | sort -u` for i in $dirs do + if test $firsttime + then + firsttime=0 + resdir=`echo $i | sed -e 's,/$,,' -e 's,/[^/]*$,,'` + head -1 $resdir/log + fi configfile=`echo $i | sed -e 's/^.*\///'` ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'` if test -z "$ngps" diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 1b7923b..f60f984 100644 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -169,6 +169,7 @@ else fi mkdir $resdir/$ds touch $resdir/$ds/log +echo $scriptname $args echo $scriptname $args >> $resdir/$ds/log pwd > $resdir/$ds/testid.txt -- cgit v0.10.2 From 4a261dbceaaece2018ef03b16d5092c09147df28 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 13 Jan 2014 22:57:48 -0800 Subject: rcutorture: Move common boot flags to kvm-test-1-rcu.sh Currently, most boot flags are calculated in kvm-test-1-rcu.sh, except that rcutorture.test_no_idle_hz and rcutorture.verbose are set up by kvm.sh. This commit promotes one-stop shopping by consolidating the determination of boot flags into kvm-test-1-rcu.sh. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh index 151b237..2cb7facb 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh @@ -6,15 +6,15 @@ # Execute this in the source tree. Do not run it as a background task # because qemu does not seem to like that much. # -# Usage: sh kvm-test-1-rcu.sh config builddir resdir minutes qemu-args bootargs +# Usage: sh kvm-test-1-rcu.sh config builddir resdir minutes qemu-args boot_args # # qemu-args defaults to "" -- you will want "-nographic" if running headless. -# bootargs defaults to "root=/dev/sda noapic selinux=0 console=ttyS0" +# boot_args defaults to "root=/dev/sda noapic selinux=0 console=ttyS0" # "initcall_debug debug rcutorture.stat_interval=15" # "rcutorture.shutdown_secs=$((minutes * 60))" # "rcutorture.rcutorture_runnable=1" # -# Anything you specify for either qemu-args or bootargs is appended to +# Anything you specify for either qemu-args or boot_args is appended to # the default values. The "-smp" value is deduced from the contents of # the config fragment. # @@ -138,7 +138,7 @@ boot_args="`rcutorture_param_onoff "$boot_args" $builddir/.config`" # Generate rcu_barrier() boot parameter boot_args="`rcutorture_param_n_barrier_cbs "$boot_args"`" # Pull in standard rcutorture boot arguments -boot_args="$boot_args rcutorture.stat_interval=15 rcutorture.shutdown_secs=$seconds rcutorture.rcutorture_runnable=1" +boot_args="$boot_args rcutorture.stat_interval=15 rcutorture.shutdown_secs=$seconds rcutorture.rcutorture_runnable=1 rcutorture.test_no_idle_hz=1 rcutorture.verbose=1" echo $QEMU $qemu_args -m 512 -kernel $builddir/arch/x86/boot/bzImage -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd if test -n "$RCU_BUILDONLY" diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index f60f984..979881b 100644 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -203,7 +203,7 @@ do fi mkdir "${rd}" echo Results directory: $rd - kvm-test-1-rcu.sh $CONFIGFRAG/$kversion/$CF $builddir $rd $dur "-nographic $RCU_QEMU_ARG" "rcutorture.test_no_idle_hz=1 rcutorture.verbose=1 $RCU_BOOTARGS" + kvm-test-1-rcu.sh $CONFIGFRAG/$kversion/$CF $builddir $rd $dur "-nographic $RCU_QEMU_ARG" "$RCU_BOOTARGS" done # Tracing: trace_event=rcu:rcu_grace_period,rcu:rcu_future_grace_period,rcu:rcu_grace_period_init,rcu:rcu_nocb_wake,rcu:rcu_preempt_task,rcu:rcu_unlock_preempted_task,rcu:rcu_quiescent_state_report,rcu:rcu_fqs,rcu:rcu_callback,rcu:rcu_kfree_callback,rcu:rcu_batch_start,rcu:rcu_invoke_callback,rcu:rcu_invoke_kfree_callback,rcu:rcu_batch_end,rcu:rcu_torture_read,rcu:rcu_barrier -- cgit v0.10.2 From bad804406ad627591477a87778e0698116f8e7f5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 13 Jan 2014 23:16:55 -0800 Subject: rcutorture: Allow kvm-test-1-rcu.sh to pause after build Parallel rcutorture runs is valuable on large systems, but it is not a good idea to do (say) five builds in parallel if each build believes it has the whole system at its disposal, especially if the system is shared. It is also bad to restrict the build to (say) a single CPU just because the corresponding rcutorture run uses only a single CPU. This commit therefore adds a kvm-test-1-rcu.sh ability to pause after the build completes, which will allow kvm.sh to do a number of builds serially (with each build thus having the full system at its disposal), then allow the rcutorture runs to proceed in parallel. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh index 2cb7facb..c3a3965 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh @@ -96,11 +96,23 @@ then cp $builddir/.config $resdir cp $builddir/arch/x86/boot/bzImage $resdir parse-build.sh $resdir/Make.out $title + if test -f $builddir.wait + then + mv $builddir.wait $builddir.ready + fi else cp $builddir/Make*.out $resdir echo Build failed, not running KVM, see $resdir. + if test -f $builddir.wait + then + mv $builddir.wait $builddir.ready + fi exit 1 fi +while test -f $builddir.ready +do + sleep 1 +done minutes=$4 seconds=$(($minutes * 60)) qemu_args=$5 -- cgit v0.10.2 From 16d301cbddeb4fb79126fdcda1509207ac6034ba Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 14 Jan 2014 21:28:57 -0800 Subject: rcutorture: Move common qemu flags to kvm-test-1.sh Currently, most qemu flags are calculated in kvm-test-1-rcu.sh, except that -nographics is set up by kvm.sh. This commit promotes one-stop shopping by consolidating the determination of qemu flags into kvm-test-1-rcu.sh. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh index c3a3965..a06a0a6 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh @@ -126,6 +126,7 @@ echo ' ---' `date`: Starting kernel QEMU="`identify_qemu $builddir/vmlinux.o`" # Generate -smp qemu argument. +qemu_args="-nographic $qemu_args" cpu_count=`configNR_CPUS.sh $config_template` vcpus=`identify_qemu_vcpus` if test $cpu_count -gt $vcpus diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 979881b..f211349 100644 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -203,7 +203,7 @@ do fi mkdir "${rd}" echo Results directory: $rd - kvm-test-1-rcu.sh $CONFIGFRAG/$kversion/$CF $builddir $rd $dur "-nographic $RCU_QEMU_ARG" "$RCU_BOOTARGS" + kvm-test-1-rcu.sh $CONFIGFRAG/$kversion/$CF $builddir $rd $dur "$RCU_QEMU_ARG" "$RCU_BOOTARGS" done # Tracing: trace_event=rcu:rcu_grace_period,rcu:rcu_future_grace_period,rcu:rcu_grace_period_init,rcu:rcu_nocb_wake,rcu:rcu_preempt_task,rcu:rcu_unlock_preempted_task,rcu:rcu_quiescent_state_report,rcu:rcu_fqs,rcu:rcu_callback,rcu:rcu_kfree_callback,rcu:rcu_batch_start,rcu:rcu_invoke_callback,rcu:rcu_invoke_kfree_callback,rcu:rcu_batch_end,rcu:rcu_torture_read,rcu:rcu_barrier -- cgit v0.10.2 From 061862386e8062046cc980e1be1fc4779159411e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 14 Jan 2014 22:26:09 -0800 Subject: rcutorture: Reduce SRCU-N number of CPUs Both SRCU-P and SRCU-N specify eight CPUs, which results in four iterations for a parallel run on 32 CPUs. This commit reduces SRCU-N to four CPUs (but leaving SRCU-P at eight) to speed up parallel runs, while maintaining essentially the same test coverage. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/configs/SRCU-N b/tools/testing/selftests/rcutorture/configs/SRCU-N index 10a0e27..b4c6340 100644 --- a/tools/testing/selftests/rcutorture/configs/SRCU-N +++ b/tools/testing/selftests/rcutorture/configs/SRCU-N @@ -1,6 +1,6 @@ CONFIG_RCU_TRACE=n CONFIG_SMP=y -CONFIG_NR_CPUS=8 +CONFIG_NR_CPUS=4 CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n -- cgit v0.10.2 From 43e38ab3d518352932951bacb99705a3bee9477c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 15 Jan 2014 15:48:41 -0800 Subject: rcutorture: Enable concurrent rcutorture runs The rcutorture tests run by default range from using one CPU to using sixteen of them. Therefore, rcutorture testing could be sped up significantly simply by running the kernels in parallel. Building them in parallel is not all that helpful: "make -j" is usually a better bet. So this commit takes a new "--cpus" argument that specifies how many CPUs rcutorture is permitted to use for its parallel runs. The default of zero does sequential runs as before. The bin-packing is minimal, and will be grossly suboptimal for some configurations. However, powers of two work reasonably well. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh index 587561d..9b17e81 100644 --- a/tools/testing/selftests/rcutorture/bin/functions.sh +++ b/tools/testing/selftests/rcutorture/bin/functions.sh @@ -96,6 +96,7 @@ identify_qemu () { echo qemu-system-ppc64 else echo Cannot figure out what qemu command to use! 1>&2 + echo file $1 output: $u # Usually this will be one of /usr/bin/qemu-system-* # Use RCU_QEMU_CMD environment variable or appropriate # argument to top-level script. diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index eb28509..89b5dba 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -31,9 +31,9 @@ do dirs=`find $rd -name Make.defconfig.out -print | sort | sed -e 's,/[^/]*$,,' | sort -u` for i in $dirs do - if test $firsttime + if test -n "$firsttime" then - firsttime=0 + firsttime="" resdir=`echo $i | sed -e 's,/$,,' -e 's,/[^/]*$,,'` head -1 $resdir/log fi diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh index a06a0a6..1b9555b 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh @@ -123,7 +123,7 @@ kstarttime=`awk 'BEGIN { print systime() }' < /dev/null` echo ' ---' `date`: Starting kernel # Determine the appropriate flavor of qemu command. -QEMU="`identify_qemu $builddir/vmlinux.o`" +QEMU="`identify_qemu $builddir/vmlinux`" # Generate -smp qemu argument. qemu_args="-nographic $qemu_args" diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index f211349..f74f006 100644 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -30,6 +30,10 @@ scriptname=$0 args="$*" +T=/tmp/kvm.sh.$$ +trap 'rm -rf $T' 0 +mkdir $T + dur=30 KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM PATH=${KVM}/bin:$PATH; export PATH @@ -38,6 +42,7 @@ RCU_INITRD="$KVM/initrd"; export RCU_INITRD RCU_KMAKE_ARG=""; export RCU_KMAKE_ARG resdir="" configs="" +cpus=0 ds=`date +%Y.%m.%d-%H:%M:%S` kversion="" @@ -49,6 +54,7 @@ usage () { echo " --builddir absolute-pathname" echo " --buildonly" echo " --configs \"config-file list\"" + echo " --cpus N" echo " --datestamp string" echo " --duration minutes" echo " --interactive" @@ -85,6 +91,11 @@ do configs="$2" shift ;; + --cpus) + checkarg --cpus "(number)" "$#" "$2" '^[0-9]*$' '^--' + cpus=$2 + shift + ;; --datestamp) checkarg --datestamp "(relative pathname)" "$#" "$2" '^[^/]*$' '^--' ds=$2 @@ -168,6 +179,7 @@ else fi fi mkdir $resdir/$ds +echo Results directory: $resdir/$ds touch $resdir/$ds/log echo $scriptname $args echo $scriptname $args >> $resdir/$ds/log @@ -178,33 +190,104 @@ then git status >> $resdir/$ds/testid.txt git rev-parse HEAD >> $resdir/$ds/testid.txt fi -builddir=$KVM/b1 -if ! test -e $builddir -then - mkdir $builddir || : -fi +touch $T/cfgcpu for CF in $configs do - # Running TREE01 multiple times creates TREE01, TREE01.2, TREE01.3, ... - rd=$resdir/$ds/$CF - if test -d "${rd}" + if test -f "$CONFIGFRAG/$kversion/$CF" then - n="`ls -d "${rd}"* | grep '\.[0-9]\+$' | - sed -e 's/^.*\.\([0-9]\+\)/\1/' | - sort -k1n | tail -1`" - if test -z "$n" - then - rd="${rd}.2" - else - n="`expr $n + 1`" - rd="${rd}.${n}" - fi + echo $CF `configNR_CPUS.sh $CONFIGFRAG/$kversion/$CF` >> $T/cfgcpu + else + echo "The --configs file $CF does not exist, terminating." + exit 1 fi - mkdir "${rd}" - echo Results directory: $rd - kvm-test-1-rcu.sh $CONFIGFRAG/$kversion/$CF $builddir $rd $dur "$RCU_QEMU_ARG" "$RCU_BOOTARGS" done +sort -k2nr $T/cfgcpu > $T/cfgcpu.sort + +awk < $T/cfgcpu.sort \ + -v CONFIGDIR="$CONFIGFRAG/$kversion/" \ + -v KVM="$KVM" \ + -v ncpus=$cpus \ + -v rd=$resdir/$ds/ \ + -v dur=$dur \ + -v RCU_QEMU_ARG=$RCU_QEMU_ARG \ + -v RCU_BOOTARGS=$RCU_BOOTARGS \ +'BEGIN { + i = 0; +} + +{ + cf[i] = $1; + cpus[i] = $2; + i++; +} + +function dump(first, pastlast) +{ + print "echo ----start batch----" + jn=1 + for (j = first; j < pastlast; j++) { + builddir=KVM "/b" jn + print "echo ", cf[j], cpus[j] ": Starting build." + print "rm -f " builddir ".*" + print "touch " builddir ".wait" + print "mkdir " builddir " || :" + if (cfrep[cf[j]] == "") { + cfr[j] = cf[j]; + cfrep[cf[j]] = 1; + } else { + cfrep[cf[j]]++; + cfr[j] = cf[j] "." cfrep[cf[j]]; + } + print "mkdir " rd cfr[j] " || :"; + print "kvm-test-1-rcu.sh " CONFIGDIR cf[j], builddir, rd cfr[j], dur " \"" RCU_QEMU_ARG "\" \"" RCU_BOOTARGS "\" > " builddir ".out 2>&1 &" + print "echo ", cf[j], cpus[j] ": Waiting for build to complete." + print "while test -f " builddir ".wait" + print "do" + print "\tsleep 1" + print "done" + print "echo ", cf[j], cpus[j] ": Build complete." + jn++; + } + k = first + for (j = 1; j < jn; j++) { + builddir=KVM "/b" j + print "rm -f " builddir ".ready" + print "echo ----", cf[k], cpus[k] ": Starting kernel" + k++; + } + print "wait" + print "echo ---- All kernel runs complete" + k = first + for (j = 1; j < jn; j++) { + builddir=KVM "/b" j + print "echo ----", cf[k], cpus[k] ": Build/run results:" + print "cat " builddir ".out" + k++; + } +} + +END { + njobs = i; + nc = ncpus; + first = 0; + for (i = 0; i < njobs; i++) { + if (ncpus == 0) { + dump(i, i + 1); + first = i; + } else if (nc < cpus[i] && i != 0) { + dump(first, i); + first = i; + nc = ncpus; + } + nc -= cpus[i]; + } + if (ncpus != 0) + dump(first, i); +}' > $T/script + +sh $T/script + # Tracing: trace_event=rcu:rcu_grace_period,rcu:rcu_future_grace_period,rcu:rcu_grace_period_init,rcu:rcu_nocb_wake,rcu:rcu_preempt_task,rcu:rcu_unlock_preempted_task,rcu:rcu_quiescent_state_report,rcu:rcu_fqs,rcu:rcu_callback,rcu:rcu_kfree_callback,rcu:rcu_batch_start,rcu:rcu_invoke_callback,rcu:rcu_invoke_kfree_callback,rcu:rcu_batch_end,rcu:rcu_torture_read,rcu:rcu_barrier echo " --- `date` Test summary:" -- cgit v0.10.2 From db007ab5ce155c90d89220495ee17213f66b7241 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 16 Jan 2014 20:37:50 -0800 Subject: rcutorture: Fix results-directory error message The message complains about a build directory when it should instead be complaining about the results directory, so this commit fixes it. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh index 1b9555b..41ea781 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh @@ -57,7 +57,7 @@ fi resdir=${3} if test -z "$resdir" -o ! -d "$resdir" -o ! -w "$resdir" then - echo "kvm-test-1-rcu.sh :$resdir: Not a writable directory, cannot build into it" + echo "kvm-test-1-rcu.sh :$resdir: Not a writable directory, cannot store results into it" exit 1 fi cp $config_template $resdir/ConfigFragment -- cgit v0.10.2 From a7582815b98c843e6468cad82d01a6c084ed4072 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Jan 2014 03:10:16 -0800 Subject: rcutorture: Add dryrun capability Actual rcutorture tests take considerable time and machine resources, so it is inconvenient to actually do an rcutorture run when optimizing the bin-packing algorithm. This commit therefore adds a --dryrun argument, which defaults to doing a run, but for which "sched" says to simply print the run schedule and "script" dumps the script without running it. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index f74f006..9fab7c8 100644 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -35,6 +35,7 @@ trap 'rm -rf $T' 0 mkdir $T dur=30 +dryrun="" KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM PATH=${KVM}/bin:$PATH; export PATH builddir="${KVM}/b1" @@ -56,6 +57,7 @@ usage () { echo " --configs \"config-file list\"" echo " --cpus N" echo " --datestamp string" + echo " --dryrun sched|script" echo " --duration minutes" echo " --interactive" echo " --kmake-arg kernel-make-arguments" @@ -101,6 +103,11 @@ do ds=$2 shift ;; + --dryrun) + checkarg --dryrun "sched|script" $# "$2" 'sched\|script' '^--' + dryrun=$2 + shift + ;; --duration) checkarg --duration "(minutes)" $# "$2" '^[0-9]*$' '^error' dur=$2 @@ -179,9 +186,12 @@ else fi fi mkdir $resdir/$ds -echo Results directory: $resdir/$ds +if test "$dryrun" = "" +then + echo Results directory: $resdir/$ds + echo $scriptname $args +fi touch $resdir/$ds/log -echo $scriptname $args echo $scriptname $args >> $resdir/$ds/log pwd > $resdir/$ds/testid.txt @@ -231,7 +241,7 @@ function dump(first, pastlast) print "echo ", cf[j], cpus[j] ": Starting build." print "rm -f " builddir ".*" print "touch " builddir ".wait" - print "mkdir " builddir " || :" + print "mkdir " builddir " > /dev/null 2>&1 || :" if (cfrep[cf[j]] == "") { cfr[j] = cf[j]; cfrep[cf[j]] = 1; @@ -286,7 +296,28 @@ END { dump(first, i); }' > $T/script -sh $T/script +if test "$dryrun" = script +then + echo CONFIGFRAG="$CONFIGFRAG; export CONFIGFRAG" + echo KVM="$KVM; export KVM" + echo KVPATH="$KVPATH; export KVPATH" + echo PATH="$PATH; export PATH" + echo RCU_BUILDONLY="$RCU_BUILDONLY; export RCU_BUILDONLY" + echo RCU_INITRD="$RCU_INITRD; export RCU_INITRD" + echo RCU_KMAKE_ARG="$RCU_KMAKE_ARG; export RCU_KMAKE_ARG" + echo RCU_QEMU_CMD="$RCU_QEMU_CMD; export RCU_QEMU_CMD" + echo RCU_QEMU_INTERACTIVE="$RCU_QEMU_INTERACTIVE; export RCU_QEMU_INTERACTIVE" + echo RCU_QEMU_MAC="$RCU_QEMU_MAC; export RCU_QEMU_MAC" + cat $T/script + exit 0 +elif test "$dryrun" = sched +then + egrep 'start batch|Starting build\.' $T/script | + sed -e 's/:.*$//' -e 's/^echo //' + exit 0 +else + sh $T/script +fi # Tracing: trace_event=rcu:rcu_grace_period,rcu:rcu_future_grace_period,rcu:rcu_grace_period_init,rcu:rcu_nocb_wake,rcu:rcu_preempt_task,rcu:rcu_unlock_preempted_task,rcu:rcu_quiescent_state_report,rcu:rcu_fqs,rcu:rcu_callback,rcu:rcu_kfree_callback,rcu:rcu_batch_start,rcu:rcu_invoke_callback,rcu:rcu_invoke_kfree_callback,rcu:rcu_batch_end,rcu:rcu_torture_read,rcu:rcu_barrier -- cgit v0.10.2 From 0ae3f73af5ba8025abcb328913643b291698af35 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Jan 2014 03:29:12 -0800 Subject: rcutorture: Handle multiple runs of the same test This commit fixes handling numbering of multiple runs of the same test so as to disambiguate output. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 9fab7c8..ad3779c 100644 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -238,42 +238,39 @@ function dump(first, pastlast) jn=1 for (j = first; j < pastlast; j++) { builddir=KVM "/b" jn - print "echo ", cf[j], cpus[j] ": Starting build." - print "rm -f " builddir ".*" - print "touch " builddir ".wait" - print "mkdir " builddir " > /dev/null 2>&1 || :" + cpusr[jn] = cpus[j]; if (cfrep[cf[j]] == "") { - cfr[j] = cf[j]; + cfr[jn] = cf[j]; cfrep[cf[j]] = 1; } else { cfrep[cf[j]]++; - cfr[j] = cf[j] "." cfrep[cf[j]]; + cfr[jn] = cf[j] "." cfrep[cf[j]]; } - print "mkdir " rd cfr[j] " || :"; - print "kvm-test-1-rcu.sh " CONFIGDIR cf[j], builddir, rd cfr[j], dur " \"" RCU_QEMU_ARG "\" \"" RCU_BOOTARGS "\" > " builddir ".out 2>&1 &" - print "echo ", cf[j], cpus[j] ": Waiting for build to complete." + print "echo ", cfr[jn], cpusr[jn] ": Starting build."; + print "rm -f " builddir ".*"; + print "touch " builddir ".wait"; + print "mkdir " builddir " > /dev/null 2>&1 || :"; + print "mkdir " rd cfr[jn] " || :"; + print "kvm-test-1-rcu.sh " CONFIGDIR cf[j], builddir, rd cfr[jn], dur " \"" RCU_QEMU_ARG "\" \"" RCU_BOOTARGS "\" > " builddir ".out 2>&1 &" + print "echo ", cfr[jn], cpusr[jn] ": Waiting for build to complete." print "while test -f " builddir ".wait" print "do" print "\tsleep 1" print "done" - print "echo ", cf[j], cpus[j] ": Build complete." + print "echo ", cfr[jn], cpusr[jn] ": Build complete." jn++; } - k = first for (j = 1; j < jn; j++) { builddir=KVM "/b" j print "rm -f " builddir ".ready" - print "echo ----", cf[k], cpus[k] ": Starting kernel" - k++; + print "echo ----", cfr[j], cpusr[j] ": Starting kernel" } print "wait" print "echo ---- All kernel runs complete" - k = first for (j = 1; j < jn; j++) { builddir=KVM "/b" j - print "echo ----", cf[k], cpus[k] ": Build/run results:" + print "echo ----", cfr[j], cpusr[j] ": Build/run results:" print "cat " builddir ".out" - k++; } } -- cgit v0.10.2 From 53954671033dc878acbeef1ecf9ac653c7b1a58f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Jan 2014 14:18:05 -0800 Subject: rcutorture: Do better bin packing Running the standard set of rcutorture tests on 24 CPUs results in the following sub-optimal schedule: ----start batch---- TREE07 16 ----start batch---- TREE08 16 SRCU-P 8 ----start batch---- TREE01 8 TREE02 8 TREE03 8 ----start batch---- TREE04 8 TREE05 8 TREE06 8 ----start batch---- SRCU-N 4 TINY01 1 TINY02 1 TREE09 1 If one of the eight-CPU runs were to be moved into the first batch, the test suite would complete in four batches rather than five. This commit therefore uses a greedy algorithm to re-order the test entries so that the sequential batching will produce an optimal schedule in this case: ----start batch---- TREE07 16 SRCU-P 8 ----start batch---- TREE08 16 TREE01 8 ----start batch---- TREE02 8 TREE03 8 TREE04 8 ----start batch---- TREE05 8 TREE06 8 SRCU-N 4 TINY01 1 TINY02 1 TREE09 1 Please note that this is still not an optimal bin-packing algorithm, however, it does produce optimal solutions for most common scenarios. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index ad3779c..18649b8 100644 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -214,7 +214,42 @@ do done sort -k2nr $T/cfgcpu > $T/cfgcpu.sort -awk < $T/cfgcpu.sort \ +awk < $T/cfgcpu.sort > $T/cfgcpu.pack -v ncpus=$cpus ' +BEGIN { + njobs = 0; +} + +{ + cf[njobs] = $1; + cpus[njobs] = $2; + njobs++; +} + +END { + alldone = 0; + batch = 0; + nc = -1; + while (nc != ncpus) { + batch++; + nc = ncpus; + for (i = 0; i < njobs; i++) { + if (done[i]) + continue; + if (nc >= cpus[i] || nc == ncpus) { + done[i] = batch; + nc -= cpus[i]; + if (nc <= 0) + break; + } + } + } + for (b = 1; b <= batch; b++) + for (i = 0; i < njobs; i++) + if (done[i] == b) + print cf[i], cpus[i]; +}' + +awk < $T/cfgcpu.pack \ -v CONFIGDIR="$CONFIGFRAG/$kversion/" \ -v KVM="$KVM" \ -v ncpus=$cpus \ -- cgit v0.10.2 From 78ad0693233080169e5a01811bd3fcb3966f2d3f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Jan 2014 21:56:57 -0800 Subject: rcutorture: Add comments, especially on bin packing. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 18649b8..7ef3b24 100644 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -188,6 +188,7 @@ fi mkdir $resdir/$ds if test "$dryrun" = "" then + # Be noisy only if running the script. echo Results directory: $resdir/$ds echo $scriptname $args fi @@ -201,6 +202,7 @@ then git rev-parse HEAD >> $resdir/$ds/testid.txt fi +# Create a file of test-name/#cpus pairs, sorted by decreasing #cpus. touch $T/cfgcpu for CF in $configs do @@ -214,12 +216,14 @@ do done sort -k2nr $T/cfgcpu > $T/cfgcpu.sort +# Use a greedy bin-packing algorithm, sorting the list accordingly. awk < $T/cfgcpu.sort > $T/cfgcpu.pack -v ncpus=$cpus ' BEGIN { njobs = 0; } { + # Read file of tests and corresponding required numbers of CPUs. cf[njobs] = $1; cpus[njobs] = $2; njobs++; @@ -229,26 +233,40 @@ END { alldone = 0; batch = 0; nc = -1; + + # Each pass through the following loop creates on test batch + # that can be executed concurrently given ncpus. Note that a + # given test that requires more than the available CPUs will run in + # their own batch. Such tests just have to make do with what + # is available. while (nc != ncpus) { batch++; nc = ncpus; + + # Each pass through the following loop considers one + # test for inclusion in the current batch. for (i = 0; i < njobs; i++) { if (done[i]) - continue; + continue; # Already part of a batch. if (nc >= cpus[i] || nc == ncpus) { + + # This test fits into the current batch. done[i] = batch; nc -= cpus[i]; if (nc <= 0) - break; + break; # Too-big test in its own batch. } } } + + # Dump out the tests in batch order. for (b = 1; b <= batch; b++) for (i = 0; i < njobs; i++) if (done[i] == b) print cf[i], cpus[i]; }' +# Generate a script to execute the tests in appropriate batches. awk < $T/cfgcpu.pack \ -v CONFIGDIR="$CONFIGFRAG/$kversion/" \ -v KVM="$KVM" \ @@ -267,6 +285,7 @@ awk < $T/cfgcpu.pack \ i++; } +# Dump out the scripting required to run one test batch. function dump(first, pastlast) { print "echo ----start batch----" @@ -313,23 +332,31 @@ END { njobs = i; nc = ncpus; first = 0; + + # Each pass through the following loop considers one test. for (i = 0; i < njobs; i++) { if (ncpus == 0) { + # Sequential test specified, each test its own batch. dump(i, i + 1); first = i; } else if (nc < cpus[i] && i != 0) { + # Out of CPUs, dump out a batch. dump(first, i); first = i; nc = ncpus; } + # Account for the CPUs needed by the current test. nc -= cpus[i]; } + # Dump the last batch. if (ncpus != 0) dump(first, i); }' > $T/script if test "$dryrun" = script then + # Dump out the script, but define the environment variables that + # it needs to run standalone. echo CONFIGFRAG="$CONFIGFRAG; export CONFIGFRAG" echo KVM="$KVM; export KVM" echo KVPATH="$KVPATH; export KVPATH" @@ -344,10 +371,12 @@ then exit 0 elif test "$dryrun" = sched then + # Extract the test run schedule from the script. egrep 'start batch|Starting build\.' $T/script | sed -e 's/:.*$//' -e 's/^echo //' exit 0 else + # Not a dryru, so run the script. sh $T/script fi -- cgit v0.10.2 From df1cc81ba7ce80a681aa7f42cad0b9d93f677a6a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Jan 2014 22:08:09 -0800 Subject: rcutorture: Flag tests requiring more CPUs than are available This commit adds a "(!)" flag after the number of CPUs required by a given test if that test requires more than the available number of CPUs. Note that these flags appear only when the number of CPUs is specified using the --cpus argument. In the absence of a --cpus argument, no tests are flagged. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 7ef3b24..c099f86 100644 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -300,30 +300,34 @@ function dump(first, pastlast) cfrep[cf[j]]++; cfr[jn] = cf[j] "." cfrep[cf[j]]; } - print "echo ", cfr[jn], cpusr[jn] ": Starting build."; + if (cpusr[jn] > ncpus && ncpus != 0) + ovf = "(!)"; + else + ovf = ""; + print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build."; print "rm -f " builddir ".*"; print "touch " builddir ".wait"; print "mkdir " builddir " > /dev/null 2>&1 || :"; print "mkdir " rd cfr[jn] " || :"; print "kvm-test-1-rcu.sh " CONFIGDIR cf[j], builddir, rd cfr[jn], dur " \"" RCU_QEMU_ARG "\" \"" RCU_BOOTARGS "\" > " builddir ".out 2>&1 &" - print "echo ", cfr[jn], cpusr[jn] ": Waiting for build to complete." + print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete." print "while test -f " builddir ".wait" print "do" print "\tsleep 1" print "done" - print "echo ", cfr[jn], cpusr[jn] ": Build complete." + print "echo ", cfr[jn], cpusr[jn] ovf ": Build complete." jn++; } for (j = 1; j < jn; j++) { builddir=KVM "/b" j print "rm -f " builddir ".ready" - print "echo ----", cfr[j], cpusr[j] ": Starting kernel" + print "echo ----", cfr[j], cpusr[j] ovf ": Starting kernel" } print "wait" print "echo ---- All kernel runs complete" for (j = 1; j < jn; j++) { builddir=KVM "/b" j - print "echo ----", cfr[j], cpusr[j] ": Build/run results:" + print "echo ----", cfr[j], cpusr[j] ovf ": Build/run results:" print "cat " builddir ".out" } } -- cgit v0.10.2 From ec256c0f687f40a42472da798e30a1b9ec04cafe Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Jan 2014 22:12:07 -0800 Subject: rcutorture: Print results directory when dumping results Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index c099f86..cde25d8 100644 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -386,5 +386,8 @@ fi # Tracing: trace_event=rcu:rcu_grace_period,rcu:rcu_future_grace_period,rcu:rcu_grace_period_init,rcu:rcu_nocb_wake,rcu:rcu_preempt_task,rcu:rcu_unlock_preempted_task,rcu:rcu_quiescent_state_report,rcu:rcu_fqs,rcu:rcu_callback,rcu:rcu_kfree_callback,rcu:rcu_batch_start,rcu:rcu_invoke_callback,rcu:rcu_invoke_kfree_callback,rcu:rcu_batch_end,rcu:rcu_torture_read,rcu:rcu_barrier +echo +echo echo " --- `date` Test summary:" +echo Results directory: $resdir/$ds kvm-recheck.sh $resdir/$ds -- cgit v0.10.2 From b314fc7754119e12974210f06e04cd7a15206fee Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 17 Feb 2014 19:05:56 -0300 Subject: pata_imx: Use devm_ioremap_resource() to simplify code Using devm_ioremap_resource() can lead to code simplication, as we don't need to explicitily check for error returned by platform_get_resource(). Also, no need to print an error message when devm_ioremap_resource() fails, as the OOM code code will shout loudly on such condition. Signed-off-by: Fabio Estevam Signed-off-by: Tejun Heo diff --git a/drivers/ata/pata_imx.c b/drivers/ata/pata_imx.c index 97417d5..50e2cf2 100644 --- a/drivers/ata/pata_imx.c +++ b/drivers/ata/pata_imx.c @@ -99,10 +99,6 @@ static int pata_imx_probe(struct platform_device *pdev) struct resource *io_res; int ret; - io_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (io_res == NULL) - return -EINVAL; - irq = platform_get_irq(pdev, 0); if (irq <= 0) return -EINVAL; @@ -133,10 +129,9 @@ static int pata_imx_probe(struct platform_device *pdev) ap->pio_mask = ATA_PIO0; ap->flags |= ATA_FLAG_SLAVE_POSS; - priv->host_regs = devm_ioremap(&pdev->dev, io_res->start, - resource_size(io_res)); + io_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + priv->host_regs = devm_ioremap_resource(&pdev->dev, io_res); if (!priv->host_regs) { - dev_err(&pdev->dev, "failed to map IO/CTL base\n"); ret = -EBUSY; goto err; } -- cgit v0.10.2 From 3ef9cc31ee39dea369e989de3f7f9e822cc8de62 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 17 Feb 2014 19:05:57 -0300 Subject: pata_imx: Propagate the real error code on platform_get_irq() failure No need to return a 'fake' return value on platform_get_irq() failure. Just return the error code itself instead. Also, change the error condition to irq < 0, so that only negative values are treated as errors. Signed-off-by: Fabio Estevam Signed-off-by: Tejun Heo diff --git a/drivers/ata/pata_imx.c b/drivers/ata/pata_imx.c index 50e2cf2..121c748 100644 --- a/drivers/ata/pata_imx.c +++ b/drivers/ata/pata_imx.c @@ -100,8 +100,8 @@ static int pata_imx_probe(struct platform_device *pdev) int ret; irq = platform_get_irq(pdev, 0); - if (irq <= 0) - return -EINVAL; + if (irq < 0) + return irq; priv = devm_kzalloc(&pdev->dev, sizeof(struct pata_imx_priv), GFP_KERNEL); -- cgit v0.10.2 From 5a98268e0f657e8f1289ad9b83fe010f0208565d Mon Sep 17 00:00:00 2001 From: Asai Thambi S P Date: Tue, 18 Feb 2014 14:49:17 -0800 Subject: mtip32xx: Reduce the number of unaligned writes to 2 After several experiments, deduced the the optimal number of unaligned writes to be 2. Changing the value accordingly. Signed-off-by: Asai Thambi S P Signed-off-by: Sam Bradshaw Signed-off-by: Jens Axboe diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index b52e9a6..54174cb 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -53,7 +53,7 @@ #define MTIP_FTL_REBUILD_TIMEOUT_MS 2400000 /* unaligned IO handling */ -#define MTIP_MAX_UNALIGNED_SLOTS 8 +#define MTIP_MAX_UNALIGNED_SLOTS 2 /* Macro to extract the tag bit number from a tag value. */ #define MTIP_TAG_BIT(tag) (tag & 0x1F) -- cgit v0.10.2 From e478cffd2a34421cd6651f3656c0622a14cfbeaf Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 14 Feb 2014 18:29:34 -0800 Subject: ata: CONFIG_ATA is libata Let users know that CONFIG_ATA is the kconfig symbol for libata, since libata is mentioned in documentation and messages several times. Also correct a grammar typo. Signed-off-by: Randy Dunlap Signed-off-by: Tejun Heo diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 4e73772..77416c1 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -11,13 +11,13 @@ config HAVE_PATA_PLATFORM to update the PATA_PLATFORM entry. menuconfig ATA - tristate "Serial ATA and Parallel ATA drivers" + tristate "Serial ATA and Parallel ATA drivers (libata)" depends on HAS_IOMEM depends on BLOCK depends on !(M32R || M68K || S390) || BROKEN select SCSI ---help--- - If you want to use a ATA hard disk, ATA tape drive, ATA CD-ROM or + If you want to use an ATA hard disk, ATA tape drive, ATA CD-ROM or any other ATA device under Linux, say Y and make sure that you know the name of your ATA host adapter (the card inside your computer that "speaks" the ATA protocol, also called ATA controller), -- cgit v0.10.2 From 90d88bd75424dff51e2072fd2f8fa85ee893aa17 Mon Sep 17 00:00:00 2001 From: Tan Xiaojun Date: Sat, 15 Feb 2014 13:19:51 +0800 Subject: workqueue: Remove deprecated __cancel_delayed_work() __cancel_delayed_work() was deprecated by 136b5721d75a ("workqueue: deprecate __cancel_delayed_work()") as cancel_delayed_work() was updated so that it could be used from all contexts. Enough time has passed since the deprecation. Let's remove it. tj: description update Signed-off-by: Tan Xiaojun Signed-off-by: Tejun Heo diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 594521b..edc9410 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -605,21 +605,6 @@ static inline bool keventd_up(void) return system_wq != NULL; } -/* - * Like above, but uses del_timer() instead of del_timer_sync(). This means, - * if it returns 0 the timer function may be running and the queueing is in - * progress. - */ -static inline bool __deprecated __cancel_delayed_work(struct delayed_work *work) -{ - bool ret; - - ret = del_timer(&work->timer); - if (ret) - work_clear_pending(&work->work); - return ret; -} - /* used to be different but now identical to flush_work(), deprecated */ static inline bool __deprecated flush_work_sync(struct work_struct *work) { -- cgit v0.10.2 From 30686c350628a68852f8abd67557aecb137789d5 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 18 Feb 2014 16:05:27 +0000 Subject: ASoC: dapm: Correct regulator bypass error messages The error messages for bypassing/unbypassing a regulator appear to be swapped round, this patch corrects these. Signed-off-by: Charles Keepax Signed-off-by: Mark Brown diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index dc8ff13..fd39cd2 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -1218,7 +1218,7 @@ int dapm_regulator_event(struct snd_soc_dapm_widget *w, ret = regulator_allow_bypass(w->regulator, false); if (ret != 0) dev_warn(w->dapm->dev, - "ASoC: Failed to bypass %s: %d\n", + "ASoC: Failed to unbypass %s: %d\n", w->name, ret); } @@ -1228,7 +1228,7 @@ int dapm_regulator_event(struct snd_soc_dapm_widget *w, ret = regulator_allow_bypass(w->regulator, true); if (ret != 0) dev_warn(w->dapm->dev, - "ASoC: Failed to unbypass %s: %d\n", + "ASoC: Failed to bypass %s: %d\n", w->name, ret); } @@ -3248,7 +3248,7 @@ snd_soc_dapm_new_control(struct snd_soc_dapm_context *dapm, ret = regulator_allow_bypass(w->regulator, true); if (ret != 0) dev_warn(w->dapm->dev, - "ASoC: Failed to unbypass %s: %d\n", + "ASoC: Failed to bypass %s: %d\n", w->name, ret); } break; -- cgit v0.10.2 From 995a9190ac9ca85cc15d11d9566017e91c58a118 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 7 Jan 2014 17:47:52 +0100 Subject: clk: shmobile: rcar-gen2: Fix clock parent all non-PLL clocks The lb, qspi, sdh, sd0 and sd1 clocks have the PLL1 (divided by 2) as their parent, not the main clock. Fix it. Reported-by: Geert Uytterhoeven Signed-off-by: Laurent Pinchart Tested-by: Geert Uytterhoeven Acked-by: Simon Horman Signed-off-by: Mike Turquette diff --git a/drivers/clk/shmobile/clk-rcar-gen2.c b/drivers/clk/shmobile/clk-rcar-gen2.c index a59ec21..8c7bcbd 100644 --- a/drivers/clk/shmobile/clk-rcar-gen2.c +++ b/drivers/clk/shmobile/clk-rcar-gen2.c @@ -186,7 +186,7 @@ rcar_gen2_cpg_register_clock(struct device_node *np, struct rcar_gen2_cpg *cpg, const char *name) { const struct clk_div_table *table = NULL; - const char *parent_name = "main"; + const char *parent_name; unsigned int shift; unsigned int mult = 1; unsigned int div = 1; @@ -201,23 +201,31 @@ rcar_gen2_cpg_register_clock(struct device_node *np, struct rcar_gen2_cpg *cpg, * the multiplier value. */ u32 value = clk_readl(cpg->reg + CPG_PLL0CR); + parent_name = "main"; mult = ((value >> 24) & ((1 << 7) - 1)) + 1; } else if (!strcmp(name, "pll1")) { + parent_name = "main"; mult = config->pll1_mult / 2; } else if (!strcmp(name, "pll3")) { + parent_name = "main"; mult = config->pll3_mult; } else if (!strcmp(name, "lb")) { + parent_name = "pll1_div2"; div = cpg_mode & BIT(18) ? 36 : 24; } else if (!strcmp(name, "qspi")) { + parent_name = "pll1_div2"; div = (cpg_mode & (BIT(3) | BIT(2) | BIT(1))) == BIT(2) ? 16 : 20; } else if (!strcmp(name, "sdh")) { + parent_name = "pll1_div2"; table = cpg_sdh_div_table; shift = 8; } else if (!strcmp(name, "sd0")) { + parent_name = "pll1_div2"; table = cpg_sd01_div_table; shift = 4; } else if (!strcmp(name, "sd1")) { + parent_name = "pll1_div2"; table = cpg_sd01_div_table; shift = 0; } else if (!strcmp(name, "z")) { -- cgit v0.10.2 From 8510e7263ad9399e771e0f67517a0d5409390445 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 7 Jan 2014 17:47:53 +0100 Subject: clk: shmobile: rcar-gen2: Fix qspi divisor The qspi clock divisor is incorrectly set to twice the value it should have, possibly because it has been computed based on PLL1 as the clock parent instead of PLL1 / 2 (the datasheets specifies the qspi nominal frequencies, not the divisor values). Fix it. Reported-by: Geert Uytterhoeven Signed-off-by: Laurent Pinchart Tested-by: Geert Uytterhoeven Acked-by: Simon Horman Signed-off-by: Mike Turquette diff --git a/drivers/clk/shmobile/clk-rcar-gen2.c b/drivers/clk/shmobile/clk-rcar-gen2.c index 8c7bcbd..dd272a0 100644 --- a/drivers/clk/shmobile/clk-rcar-gen2.c +++ b/drivers/clk/shmobile/clk-rcar-gen2.c @@ -215,7 +215,7 @@ rcar_gen2_cpg_register_clock(struct device_node *np, struct rcar_gen2_cpg *cpg, } else if (!strcmp(name, "qspi")) { parent_name = "pll1_div2"; div = (cpg_mode & (BIT(3) | BIT(2) | BIT(1))) == BIT(2) - ? 16 : 20; + ? 8 : 10; } else if (!strcmp(name, "sdh")) { parent_name = "pll1_div2"; table = cpg_sdh_div_table; -- cgit v0.10.2 From 50c11eb9982554e9f99b7bab322c517cbe5ce1a1 Mon Sep 17 00:00:00 2001 From: Inbal Hacohen Date: Wed, 12 Feb 2014 09:32:27 +0200 Subject: cfg80211: bugfix in regulatory user hint process After processing hint_user, we would want to schedule the timeout work only if we are actually waiting to CRDA. This happens when the status is not "IGNORE" nor "ALREADY_SET". Signed-off-by: Inbal Hacohen Signed-off-by: Johannes Berg diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 9b897fc..4c50c21 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1700,7 +1700,7 @@ static void reg_process_hint(struct regulatory_request *reg_request) return; case NL80211_REGDOM_SET_BY_USER: treatment = reg_process_hint_user(reg_request); - if (treatment == REG_REQ_OK || + if (treatment == REG_REQ_IGNORE || treatment == REG_REQ_ALREADY_SET) return; schedule_delayed_work(®_timeout, msecs_to_jiffies(3142)); -- cgit v0.10.2 From 7611c7a5613f20ec2fb536c1d868d15045730bc0 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 19 Feb 2014 10:36:08 +0100 Subject: spi/topcliff-pch: Fix DMA channel bus_num might be asigned dynamically to e.g. 32766. In this case the calculated DMA channel based on SPI bus number is bogus. Use SPI channel number instead for calculation. Signed-off-by: Alexander Stein Signed-off-by: Mark Brown diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c index 2e7f38c..3383008c 100644 --- a/drivers/spi/spi-topcliff-pch.c +++ b/drivers/spi/spi-topcliff-pch.c @@ -915,7 +915,7 @@ static void pch_spi_request_dma(struct pch_spi_data *data, int bpw) /* Set Tx DMA */ param = &dma->param_tx; param->dma_dev = &dma_dev->dev; - param->chan_id = data->master->bus_num * 2; /* Tx = 0, 2 */ + param->chan_id = data->ch * 2; /* Tx = 0, 2 */; param->tx_reg = data->io_base_addr + PCH_SPDWR; param->width = width; chan = dma_request_channel(mask, pch_spi_filter, param); @@ -930,7 +930,7 @@ static void pch_spi_request_dma(struct pch_spi_data *data, int bpw) /* Set Rx DMA */ param = &dma->param_rx; param->dma_dev = &dma_dev->dev; - param->chan_id = data->master->bus_num * 2 + 1; /* Rx = Tx + 1 */ + param->chan_id = data->ch * 2 + 1; /* Rx = Tx + 1 */; param->rx_reg = data->io_base_addr + PCH_SPDRR; param->width = width; chan = dma_request_channel(mask, pch_spi_filter, param); -- cgit v0.10.2 From 18258f7239a61d8929b8e0c7b6d46c446459074c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 15 Feb 2014 00:55:18 +0000 Subject: genirq: Provide synchronize_hardirq() synchronize_irq() waits for hard irq and threaded handlers to complete before returning. For some special cases we only need to make sure that the hard interrupt part of the irq line is not in progress when we disabled the - possibly shared - interrupt at the device level. A proper use case for this was provided by Russell. The sdhci driver requires some irq triggered functions to be run in thread context. The current implementation of the thread context is a sdio private kthread construct, which has quite some shortcomings. These can be avoided when the thread is directly associated to the device interrupt via the generic threaded irq infrastructure. Though there is a corner case related to run time power management where one side disables the device interrupts at the device level and needs to make sure, that an already running hard interrupt handler has completed before proceeding further. Though that hard interrupt handler might wake the associated thread, which in turn can request the runtime PM to reenable the device. Using synchronize_irq() leads to an immediate deadlock of the irq thread waiting for the PM lock and the synchronize_irq() waiting for the irq thread to complete. Due to the fact that it is sufficient for this case to ensure that no hard irq handler is executing a new function which avoids the check for the thread is required. Add a function, which just monitors the hard irq parts and ignores the threaded handlers. Signed-off-by: Thomas Gleixner Tested-by: Russell King Cc: Chris Ball Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140215003823.653236081@linutronix.de diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 12d5f97..cba442e 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -9,6 +9,7 @@ extern void synchronize_irq(unsigned int irq); +extern void synchronize_hardirq(unsigned int irq); #if defined(CONFIG_TINY_RCU) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 481a13c..274ba92 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -32,24 +32,10 @@ static int __init setup_forced_irqthreads(char *arg) early_param("threadirqs", setup_forced_irqthreads); #endif -/** - * synchronize_irq - wait for pending IRQ handlers (on other CPUs) - * @irq: interrupt number to wait for - * - * This function waits for any pending IRQ handlers for this interrupt - * to complete before returning. If you use this function while - * holding a resource the IRQ handler may need you will deadlock. - * - * This function may be called - with care - from IRQ context. - */ -void synchronize_irq(unsigned int irq) +static void __synchronize_hardirq(struct irq_desc *desc) { - struct irq_desc *desc = irq_to_desc(irq); bool inprogress; - if (!desc) - return; - do { unsigned long flags; @@ -67,12 +53,56 @@ void synchronize_irq(unsigned int irq) /* Oops, that failed? */ } while (inprogress); +} - /* - * We made sure that no hardirq handler is running. Now verify - * that no threaded handlers are active. - */ - wait_event(desc->wait_for_threads, !atomic_read(&desc->threads_active)); +/** + * synchronize_hardirq - wait for pending hard IRQ handlers (on other CPUs) + * @irq: interrupt number to wait for + * + * This function waits for any pending hard IRQ handlers for this + * interrupt to complete before returning. If you use this + * function while holding a resource the IRQ handler may need you + * will deadlock. It does not take associated threaded handlers + * into account. + * + * Do not use this for shutdown scenarios where you must be sure + * that all parts (hardirq and threaded handler) have completed. + * + * This function may be called - with care - from IRQ context. + */ +void synchronize_hardirq(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (desc) + __synchronize_hardirq(desc); +} +EXPORT_SYMBOL(synchronize_hardirq); + +/** + * synchronize_irq - wait for pending IRQ handlers (on other CPUs) + * @irq: interrupt number to wait for + * + * This function waits for any pending IRQ handlers for this interrupt + * to complete before returning. If you use this function while + * holding a resource the IRQ handler may need you will deadlock. + * + * This function may be called - with care - from IRQ context. + */ +void synchronize_irq(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (desc) { + __synchronize_hardirq(desc); + /* + * We made sure that no hardirq handler is + * running. Now verify that no threaded handlers are + * active. + */ + wait_event(desc->wait_for_threads, + !atomic_read(&desc->threads_active)); + } } EXPORT_SYMBOL(synchronize_irq); -- cgit v0.10.2 From a92444c6b2225a9115d661c950cb48a22aeace20 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 15 Feb 2014 00:55:19 +0000 Subject: genirq: Provide irq_wake_thread() In course of the sdhci/sdio discussion with Russell about killing the sdio kthread hackery we discovered the need to be able to wake an interrupt thread from software. The rationale for this is, that sdio hardware can lack proper interrupt support for certain features. So the driver needs to poll the status registers, but at the same time it needs to be woken up by an hardware interrupt. To be able to get rid of the home brewn kthread construct of sdio we need a way to wake an irq thread independent of an actual hardware interrupt. Provide an irq_wake_thread() function which wakes up the thread which is associated to a given dev_id. This allows sdio to invoke the irq thread from the hardware irq handler via the IRQ_WAKE_THREAD return value and provides a possibility to wake it via a timer for the polling scenarios. That allows to simplify the sdio logic significantly. Signed-off-by: Thomas Gleixner Cc: Russell King Cc: Chris Ball Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140215003823.772565780@linutronix.de diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a2678d3..c7bfac1 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -188,6 +188,7 @@ extern void disable_irq(unsigned int irq); extern void disable_percpu_irq(unsigned int irq); extern void enable_irq(unsigned int irq); extern void enable_percpu_irq(unsigned int irq, unsigned int type); +extern void irq_wake_thread(unsigned int irq, void *dev_id); /* The following three functions are for the core kernel use only. */ extern void suspend_device_irqs(void); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 131ca17..bfec453 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -51,7 +51,7 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action) "but no thread function available.", irq, action->name); } -static void irq_wake_thread(struct irq_desc *desc, struct irqaction *action) +void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action) { /* * In case the thread crashed and was killed we just pretend that @@ -157,7 +157,7 @@ handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action) break; } - irq_wake_thread(desc, action); + __irq_wake_thread(desc, action); /* Fall through to add to randomness */ case IRQ_HANDLED: diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 001fa5b..d61ac29 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -82,6 +82,7 @@ irqreturn_t handle_irq_event(struct irq_desc *desc); /* Resending of interrupts :*/ void check_irq_resend(struct irq_desc *desc, unsigned int irq); bool irq_wait_for_poll(struct irq_desc *desc); +void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action); #ifdef CONFIG_PROC_FS extern void register_irq_proc(unsigned int irq, struct irq_desc *desc); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 274ba92..54eb5c9 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -911,6 +911,33 @@ static int irq_thread(void *data) return 0; } +/** + * irq_wake_thread - wake the irq thread for the action identified by dev_id + * @irq: Interrupt line + * @dev_id: Device identity for which the thread should be woken + * + */ +void irq_wake_thread(unsigned int irq, void *dev_id) +{ + struct irq_desc *desc = irq_to_desc(irq); + struct irqaction *action; + unsigned long flags; + + if (!desc || WARN_ON(irq_settings_is_per_cpu_devid(desc))) + return; + + raw_spin_lock_irqsave(&desc->lock, flags); + for (action = desc->action; action; action = action->next) { + if (action->dev_id == dev_id) { + if (action->thread) + __irq_wake_thread(desc, action); + break; + } + } + raw_spin_unlock_irqrestore(&desc->lock, flags); +} +EXPORT_SYMBOL_GPL(irq_wake_thread); + static void irq_setup_forced_threading(struct irqaction *new) { if (!force_irqthreads) -- cgit v0.10.2 From b04c644e670f79417f1728e6be310cfd8e6a921b Mon Sep 17 00:00:00 2001 From: Chuansheng Liu Date: Mon, 10 Feb 2014 16:13:57 +0800 Subject: genirq: Update the a comment typo Change the comment "chasnge" to "change". Signed-off-by: Chuansheng Liu Link: http://lkml.kernel.org/r/1392020037-5484-2-git-send-email-chuansheng.liu@intel.com Signed-off-by: Thomas Gleixner diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 54eb5c9..ada0c54 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -757,7 +757,7 @@ out_unlock: #ifdef CONFIG_SMP /* - * Check whether we need to chasnge the affinity of the interrupt thread. + * Check whether we need to change the affinity of the interrupt thread. */ static void irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) -- cgit v0.10.2 From 025c3fa9256d4c54506b7a29dc3befac54f5c68d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 18 Feb 2014 09:24:12 +0100 Subject: ASoC: sta32x: Fix array access overflow Preset EQ enum of sta32x codec driver declares too many number of items and it may lead to the access over the actual array size. Use SOC_ENUM_SINGLE_DECL() helper and it's automatically fixed. Signed-off-by: Takashi Iwai Acked-by: Liam Girdwood Acked-by: Lars-Peter Clausen Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/sound/soc/codecs/sta32x.c b/sound/soc/codecs/sta32x.c index 06edb39..42c5b45 100644 --- a/sound/soc/codecs/sta32x.c +++ b/sound/soc/codecs/sta32x.c @@ -187,42 +187,42 @@ static const unsigned int sta32x_limiter_drc_release_tlv[] = { 13, 16, TLV_DB_SCALE_ITEM(-1500, 300, 0), }; -static const struct soc_enum sta32x_drc_ac_enum = - SOC_ENUM_SINGLE(STA32X_CONFD, STA32X_CONFD_DRC_SHIFT, - 2, sta32x_drc_ac); -static const struct soc_enum sta32x_auto_eq_enum = - SOC_ENUM_SINGLE(STA32X_AUTO1, STA32X_AUTO1_AMEQ_SHIFT, - 3, sta32x_auto_eq_mode); -static const struct soc_enum sta32x_auto_gc_enum = - SOC_ENUM_SINGLE(STA32X_AUTO1, STA32X_AUTO1_AMGC_SHIFT, - 4, sta32x_auto_gc_mode); -static const struct soc_enum sta32x_auto_xo_enum = - SOC_ENUM_SINGLE(STA32X_AUTO2, STA32X_AUTO2_XO_SHIFT, - 16, sta32x_auto_xo_mode); -static const struct soc_enum sta32x_preset_eq_enum = - SOC_ENUM_SINGLE(STA32X_AUTO3, STA32X_AUTO3_PEQ_SHIFT, - 32, sta32x_preset_eq_mode); -static const struct soc_enum sta32x_limiter_ch1_enum = - SOC_ENUM_SINGLE(STA32X_C1CFG, STA32X_CxCFG_LS_SHIFT, - 3, sta32x_limiter_select); -static const struct soc_enum sta32x_limiter_ch2_enum = - SOC_ENUM_SINGLE(STA32X_C2CFG, STA32X_CxCFG_LS_SHIFT, - 3, sta32x_limiter_select); -static const struct soc_enum sta32x_limiter_ch3_enum = - SOC_ENUM_SINGLE(STA32X_C3CFG, STA32X_CxCFG_LS_SHIFT, - 3, sta32x_limiter_select); -static const struct soc_enum sta32x_limiter1_attack_rate_enum = - SOC_ENUM_SINGLE(STA32X_L1AR, STA32X_LxA_SHIFT, - 16, sta32x_limiter_attack_rate); -static const struct soc_enum sta32x_limiter2_attack_rate_enum = - SOC_ENUM_SINGLE(STA32X_L2AR, STA32X_LxA_SHIFT, - 16, sta32x_limiter_attack_rate); -static const struct soc_enum sta32x_limiter1_release_rate_enum = - SOC_ENUM_SINGLE(STA32X_L1AR, STA32X_LxR_SHIFT, - 16, sta32x_limiter_release_rate); -static const struct soc_enum sta32x_limiter2_release_rate_enum = - SOC_ENUM_SINGLE(STA32X_L2AR, STA32X_LxR_SHIFT, - 16, sta32x_limiter_release_rate); +static SOC_ENUM_SINGLE_DECL(sta32x_drc_ac_enum, + STA32X_CONFD, STA32X_CONFD_DRC_SHIFT, + sta32x_drc_ac); +static SOC_ENUM_SINGLE_DECL(sta32x_auto_eq_enum, + STA32X_AUTO1, STA32X_AUTO1_AMEQ_SHIFT, + sta32x_auto_eq_mode); +static SOC_ENUM_SINGLE_DECL(sta32x_auto_gc_enum, + STA32X_AUTO1, STA32X_AUTO1_AMGC_SHIFT, + sta32x_auto_gc_mode); +static SOC_ENUM_SINGLE_DECL(sta32x_auto_xo_enum, + STA32X_AUTO2, STA32X_AUTO2_XO_SHIFT, + sta32x_auto_xo_mode); +static SOC_ENUM_SINGLE_DECL(sta32x_preset_eq_enum, + STA32X_AUTO3, STA32X_AUTO3_PEQ_SHIFT, + sta32x_preset_eq_mode); +static SOC_ENUM_SINGLE_DECL(sta32x_limiter_ch1_enum, + STA32X_C1CFG, STA32X_CxCFG_LS_SHIFT, + sta32x_limiter_select); +static SOC_ENUM_SINGLE_DECL(sta32x_limiter_ch2_enum, + STA32X_C2CFG, STA32X_CxCFG_LS_SHIFT, + sta32x_limiter_select); +static SOC_ENUM_SINGLE_DECL(sta32x_limiter_ch3_enum, + STA32X_C3CFG, STA32X_CxCFG_LS_SHIFT, + sta32x_limiter_select); +static SOC_ENUM_SINGLE_DECL(sta32x_limiter1_attack_rate_enum, + STA32X_L1AR, STA32X_LxA_SHIFT, + sta32x_limiter_attack_rate); +static SOC_ENUM_SINGLE_DECL(sta32x_limiter2_attack_rate_enum, + STA32X_L2AR, STA32X_LxA_SHIFT, + sta32x_limiter_attack_rate); +static SOC_ENUM_SINGLE_DECL(sta32x_limiter1_release_rate_enum, + STA32X_L1AR, STA32X_LxR_SHIFT, + sta32x_limiter_release_rate); +static SOC_ENUM_SINGLE_DECL(sta32x_limiter2_release_rate_enum, + STA32X_L2AR, STA32X_LxR_SHIFT, + sta32x_limiter_release_rate); /* byte array controls for setting biquad, mixer, scaling coefficients; * for biquads all five coefficients need to be set in one go, -- cgit v0.10.2 From 18ba7b9d5fd6eb2c2c510dfcfc6e8fee56c42571 Mon Sep 17 00:00:00 2001 From: Vaibhav Bedia Date: Sun, 16 Feb 2014 18:15:44 -0500 Subject: ARM: OMAP5: PRM: Fix reboot handling Use the correct register offset for issuing the reset command in OMAP5. Since dev_inst is set dynamically OMAP4 should not be affected by this change. Signed-off-by: Vaibhav Bedia Tested-by: Lokesh Vutla Acked-by: Rajendra Nayak Signed-off-by: Paul Walmsley diff --git a/arch/arm/mach-omap2/prminst44xx.c b/arch/arm/mach-omap2/prminst44xx.c index 6334b96..280f3c5 100644 --- a/arch/arm/mach-omap2/prminst44xx.c +++ b/arch/arm/mach-omap2/prminst44xx.c @@ -183,11 +183,11 @@ void omap4_prminst_global_warm_sw_reset(void) OMAP4_PRM_RSTCTRL_OFFSET); v |= OMAP4430_RST_GLOBAL_WARM_SW_MASK; omap4_prminst_write_inst_reg(v, OMAP4430_PRM_PARTITION, - OMAP4430_PRM_DEVICE_INST, + dev_inst, OMAP4_PRM_RSTCTRL_OFFSET); /* OCP barrier */ v = omap4_prminst_read_inst_reg(OMAP4430_PRM_PARTITION, - OMAP4430_PRM_DEVICE_INST, + dev_inst, OMAP4_PRM_RSTCTRL_OFFSET); } -- cgit v0.10.2 From c317d0f241fa0bbb098aa35f3d4b3067be2b5f3d Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 10 Jan 2014 17:43:08 -0600 Subject: ARM: DRA7: hwmod data: correct the sysc data for spinlock The spinlock module's SYSCONFIG register on DRA7xx does not support smart wakeup, and also does not have the CLKACTIVITY field. The sysc data for spinlock module has been appropriately fixed up to reflect the same. Cc: Ambresh K Signed-off-by: Suman Anna Signed-off-by: Paul Walmsley diff --git a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c index 18f333c..810c205 100644 --- a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c @@ -1365,11 +1365,10 @@ static struct omap_hwmod_class_sysconfig dra7xx_spinlock_sysc = { .rev_offs = 0x0000, .sysc_offs = 0x0010, .syss_offs = 0x0014, - .sysc_flags = (SYSC_HAS_AUTOIDLE | SYSC_HAS_CLOCKACTIVITY | - SYSC_HAS_ENAWAKEUP | SYSC_HAS_SIDLEMODE | - SYSC_HAS_SOFTRESET | SYSS_HAS_RESET_STATUS), - .idlemodes = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART | - SIDLE_SMART_WKUP), + .sysc_flags = (SYSC_HAS_AUTOIDLE | SYSC_HAS_ENAWAKEUP | + SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET | + SYSS_HAS_RESET_STATUS), + .idlemodes = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART), .sysc_fields = &omap_hwmod_sysc_type1, }; -- cgit v0.10.2 From 01142519ffc0734436f26b01aeed37a915dece05 Mon Sep 17 00:00:00 2001 From: Illia Smyrnov Date: Wed, 5 Feb 2014 17:06:09 +0200 Subject: ARM: OMAP4: hwmod: Fix SOFTRESET logic for OMAP4 Commit 313a76e (ARM: OMAP2+: hwmod: Fix SOFTRESET logic) introduced softreset bit cleaning right after set one. It is caused L3 error for OMAP4 ISS because ISS register write occurs when ISS reset process is in progress. Avoid this situation by cleaning softreset bit later, when reset process is successfully finished. Signed-off-by: Illia Smyrnov Reviewed-by: Grygorii Strashko Acked-by: Roger Quadros Signed-off-by: Paul Walmsley diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 42d8188..1f33f5d 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -1947,29 +1947,31 @@ static int _ocp_softreset(struct omap_hwmod *oh) goto dis_opt_clks; _write_sysconfig(v, oh); - ret = _clear_softreset(oh, &v); - if (ret) - goto dis_opt_clks; - - _write_sysconfig(v, oh); if (oh->class->sysc->srst_udelay) udelay(oh->class->sysc->srst_udelay); c = _wait_softreset_complete(oh); - if (c == MAX_MODULE_SOFTRESET_WAIT) + if (c == MAX_MODULE_SOFTRESET_WAIT) { pr_warning("omap_hwmod: %s: softreset failed (waited %d usec)\n", oh->name, MAX_MODULE_SOFTRESET_WAIT); - else + ret = -ETIMEDOUT; + goto dis_opt_clks; + } else { pr_debug("omap_hwmod: %s: softreset in %d usec\n", oh->name, c); + } + + ret = _clear_softreset(oh, &v); + if (ret) + goto dis_opt_clks; + + _write_sysconfig(v, oh); /* * XXX add _HWMOD_STATE_WEDGED for modules that don't come back from * _wait_target_ready() or _reset() */ - ret = (c == MAX_MODULE_SOFTRESET_WAIT) ? -ETIMEDOUT : 0; - dis_opt_clks: if (oh->flags & HWMOD_CONTROL_OPT_CLKS_IN_RESET) _disable_optional_clocks(oh); -- cgit v0.10.2 From 994c41ee0ac875797b4dfef509ac7753e2649b4d Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 30 Jan 2014 13:17:20 +0200 Subject: ARM: OMAP2+: clock: fix clkoutx2 with CLK_SET_RATE_PARENT If CLK_SET_RATE_PARENT is set for a clkoutx2 clock, calling clk_set_rate() on the clock "skips" the x2 multiplier as there are no set_rate and round_rate functions defined for the clkoutx2. This results in getting double the requested clock rates, breaking the display on omap3430 based devices. This got broken when d0f58bd3bba3877fb1af4664c4e33273d36f00e4 and related patches were merged for v3.14, as omapdss driver now relies more on the clk-framework and CLK_SET_RATE_PARENT. This patch implements set_rate and round_rate for clkoutx2. Tested on OMAP3430, OMAP3630, OMAP4460. Signed-off-by: Tomi Valkeinen Acked-by: Tero Kristo Signed-off-by: Paul Walmsley diff --git a/arch/arm/mach-omap2/cclock3xxx_data.c b/arch/arm/mach-omap2/cclock3xxx_data.c index 3b05aea..11ed915 100644 --- a/arch/arm/mach-omap2/cclock3xxx_data.c +++ b/arch/arm/mach-omap2/cclock3xxx_data.c @@ -433,7 +433,9 @@ static const struct clk_ops dpll4_m5x2_ck_ops = { .enable = &omap2_dflt_clk_enable, .disable = &omap2_dflt_clk_disable, .is_enabled = &omap2_dflt_clk_is_enabled, + .set_rate = &omap3_clkoutx2_set_rate, .recalc_rate = &omap3_clkoutx2_recalc, + .round_rate = &omap3_clkoutx2_round_rate, }; static const struct clk_ops dpll4_m5x2_ck_3630_ops = { diff --git a/arch/arm/mach-omap2/dpll3xxx.c b/arch/arm/mach-omap2/dpll3xxx.c index 3185ced..3c418ea 100644 --- a/arch/arm/mach-omap2/dpll3xxx.c +++ b/arch/arm/mach-omap2/dpll3xxx.c @@ -623,6 +623,32 @@ void omap3_dpll_deny_idle(struct clk_hw_omap *clk) /* Clock control for DPLL outputs */ +/* Find the parent DPLL for the given clkoutx2 clock */ +static struct clk_hw_omap *omap3_find_clkoutx2_dpll(struct clk_hw *hw) +{ + struct clk_hw_omap *pclk = NULL; + struct clk *parent; + + /* Walk up the parents of clk, looking for a DPLL */ + do { + do { + parent = __clk_get_parent(hw->clk); + hw = __clk_get_hw(parent); + } while (hw && (__clk_get_flags(hw->clk) & CLK_IS_BASIC)); + if (!hw) + break; + pclk = to_clk_hw_omap(hw); + } while (pclk && !pclk->dpll_data); + + /* clk does not have a DPLL as a parent? error in the clock data */ + if (!pclk) { + WARN_ON(1); + return NULL; + } + + return pclk; +} + /** * omap3_clkoutx2_recalc - recalculate DPLL X2 output virtual clock rate * @clk: DPLL output struct clk @@ -637,27 +663,14 @@ unsigned long omap3_clkoutx2_recalc(struct clk_hw *hw, unsigned long rate; u32 v; struct clk_hw_omap *pclk = NULL; - struct clk *parent; if (!parent_rate) return 0; - /* Walk up the parents of clk, looking for a DPLL */ - do { - do { - parent = __clk_get_parent(hw->clk); - hw = __clk_get_hw(parent); - } while (hw && (__clk_get_flags(hw->clk) & CLK_IS_BASIC)); - if (!hw) - break; - pclk = to_clk_hw_omap(hw); - } while (pclk && !pclk->dpll_data); + pclk = omap3_find_clkoutx2_dpll(hw); - /* clk does not have a DPLL as a parent? error in the clock data */ - if (!pclk) { - WARN_ON(1); + if (!pclk) return 0; - } dd = pclk->dpll_data; @@ -672,6 +685,55 @@ unsigned long omap3_clkoutx2_recalc(struct clk_hw *hw, return rate; } +int omap3_clkoutx2_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + return 0; +} + +long omap3_clkoutx2_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate) +{ + const struct dpll_data *dd; + u32 v; + struct clk_hw_omap *pclk = NULL; + + if (!*prate) + return 0; + + pclk = omap3_find_clkoutx2_dpll(hw); + + if (!pclk) + return 0; + + dd = pclk->dpll_data; + + /* TYPE J does not have a clkoutx2 */ + if (dd->flags & DPLL_J_TYPE) { + *prate = __clk_round_rate(__clk_get_parent(pclk->hw.clk), rate); + return *prate; + } + + WARN_ON(!dd->enable_mask); + + v = omap2_clk_readl(pclk, dd->control_reg) & dd->enable_mask; + v >>= __ffs(dd->enable_mask); + + /* If in bypass, the rate is fixed to the bypass rate*/ + if (v != OMAP3XXX_EN_DPLL_LOCKED) + return *prate; + + if (__clk_get_flags(hw->clk) & CLK_SET_RATE_PARENT) { + unsigned long best_parent; + + best_parent = (rate / 2); + *prate = __clk_round_rate(__clk_get_parent(hw->clk), + best_parent); + } + + return *prate * 2; +} + /* OMAP3/4 non-CORE DPLL clkops */ const struct clk_hw_omap_ops clkhwops_omap3_dpll = { .allow_idle = omap3_dpll_allow_idle, diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 092b641..4a21a87 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -245,6 +245,10 @@ long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate, void omap2_init_clk_clkdm(struct clk_hw *clk); unsigned long omap3_clkoutx2_recalc(struct clk_hw *hw, unsigned long parent_rate); +int omap3_clkoutx2_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate); +long omap3_clkoutx2_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate); int omap2_clkops_enable_clkdm(struct clk_hw *hw); void omap2_clkops_disable_clkdm(struct clk_hw *hw); int omap2_clk_disable_autoidle_all(void); -- cgit v0.10.2 From c89b5c65c08116cef4b80f2903cb452ca0e20cee Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 19 Feb 2014 02:46:50 +0400 Subject: net: ethoc: document OF bindings Signed-off-by: Max Filippov Signed-off-by: David S. Miller diff --git a/Documentation/devicetree/bindings/net/opencores-ethoc.txt b/Documentation/devicetree/bindings/net/opencores-ethoc.txt new file mode 100644 index 0000000..2dc127c --- /dev/null +++ b/Documentation/devicetree/bindings/net/opencores-ethoc.txt @@ -0,0 +1,22 @@ +* OpenCores MAC 10/100 Mbps + +Required properties: +- compatible: Should be "opencores,ethoc". +- reg: two memory regions (address and length), + first region is for the device registers and descriptor rings, + second is for the device packet memory. +- interrupts: interrupt for the device. + +Optional properties: +- clocks: phandle to refer to the clk used as per + Documentation/devicetree/bindings/clock/clock-bindings.txt + +Examples: + + enet0: ethoc@fd030000 { + compatible = "opencores,ethoc"; + reg = <0xfd030000 0x4000 0xfd800000 0x4000>; + interrupts = <1>; + local-mac-address = [00 50 c2 13 6f 00]; + clocks = <&osc>; + }; -- cgit v0.10.2 From d554f73df6bc35ac8f6a65e5560bf1d31dfebed9 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 19 Feb 2014 18:48:34 +0000 Subject: xen-netfront: reset skb network header before checksum In ed1f50c3a ("net: add skb_checksum_setup") we introduced some checksum functions in core driver. Subsequent change b5cf66cd1 ("xen-netfront: use new skb_checksum_setup function") made use of those functions to replace its own implementation. However with that change netfront is broken. It sees a lot of checksum error. That's because its own implementation of checksum function was a bit hacky (dereferencing skb->data directly) while the new function was implemented using ip_hdr(). The network header is not reset before skb is passed to the new function. When the new function tries to do its job, it's confused and reports error. The fix is simple, we need to reset network header before passing skb to checksum function. Netback is not affected as it already does the right thing. Reported-by: Sander Eikelenboom Signed-off-by: Wei Liu Cc: Konrad Rzeszutek Wilk Cc: Paul Durrant Tested-By: Sander Eikelenboom Signed-off-by: David S. Miller diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index f9daa9e..e30d800 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -907,6 +907,7 @@ static int handle_incoming_queue(struct net_device *dev, /* Ethernet work: Delayed to here as it peeks the header. */ skb->protocol = eth_type_trans(skb, dev); + skb_reset_network_header(skb); if (checksum_setup(dev, skb)) { kfree_skb(skb); -- cgit v0.10.2 From 35186d05838e4d828546e6182f7461674a609e08 Mon Sep 17 00:00:00 2001 From: Daeseok Youn Date: Thu, 20 Feb 2014 08:34:27 +0900 Subject: ata: libahci: make ahci_pmp_retry_softreset() as static sparse says: drivers/ata/libahci.c:1390:5: warning: symbol 'ahci_pmp_retry_softreset' was not declared. Should it be static? Signed-off-by: Daeseok Youn Signed-off-by: Tejun Heo diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index cba1b48..0cff116 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -1386,8 +1386,8 @@ static int ahci_bad_pmp_check_ready(struct ata_link *link) return ata_check_ready(status); } -int ahci_pmp_retry_softreset(struct ata_link *link, unsigned int *class, - unsigned long deadline) +static int ahci_pmp_retry_softreset(struct ata_link *link, unsigned int *class, + unsigned long deadline) { struct ata_port *ap = link->ap; void __iomem *port_mmio = ahci_port_base(ap); -- cgit v0.10.2 From b2a52b6a0a03000d07edb359b4059d4d871a7602 Mon Sep 17 00:00:00 2001 From: Daeseok Youn Date: Thu, 20 Feb 2014 08:28:45 +0900 Subject: ata: libahci: replace obsolete simple_strtoul() with kstrtouint() Signed-off-by: Daeseok Youn Signed-off-by: Tejun Heo diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 0cff116..956851f 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -1031,12 +1031,13 @@ static ssize_t ahci_led_show(struct ata_port *ap, char *buf) static ssize_t ahci_led_store(struct ata_port *ap, const char *buf, size_t size) { - int state; + unsigned int state; int pmp; struct ahci_port_priv *pp = ap->private_data; struct ahci_em_priv *emp; - state = simple_strtoul(buf, NULL, 0); + if (kstrtouint(buf, 0, &state) < 0) + return -EINVAL; /* get the slot number from the message */ pmp = (state & EM_MSG_LED_PMP_SLOT) >> 8; -- cgit v0.10.2 From 7a6c0a58dc824523966f212c76322d47c5b0e6fe Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 18 Feb 2014 09:37:30 +0100 Subject: ASoC: wm8770: Fix wrong number of enum items wm8770 codec driver defines ain_enum with a wrong number of items. Use SOC_ENUM_DOUBLE_DECL() macro and it's automatically fixed. Signed-off-by: Takashi Iwai Acked-by: Liam Girdwood Acked-by: Charles Keepax Acked-by: Lars-Peter Clausen Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/sound/soc/codecs/wm8770.c b/sound/soc/codecs/wm8770.c index 89a18d8..5bce210 100644 --- a/sound/soc/codecs/wm8770.c +++ b/sound/soc/codecs/wm8770.c @@ -196,8 +196,8 @@ static const char *ain_text[] = { "AIN5", "AIN6", "AIN7", "AIN8" }; -static const struct soc_enum ain_enum = - SOC_ENUM_DOUBLE(WM8770_ADCMUX, 0, 4, 8, ain_text); +static SOC_ENUM_DOUBLE_DECL(ain_enum, + WM8770_ADCMUX, 0, 4, ain_text); static const struct snd_kcontrol_new ain_mux = SOC_DAPM_ENUM("Capture Mux", ain_enum); -- cgit v0.10.2 From 9d1663143652d579dd5555e60fd8c78362ffb8c6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 18 Feb 2014 09:39:46 +0100 Subject: ASoC: wm8900: Fix the wrong number of enum items wm8900 codec driver has a few places wrongly defining the number of enum items. Use SOC_ENUM_SINGLE_DECL() macro and they are automatically fixed. Acked-by: Liam Girdwood Acked-by: Charles Keepax Acked-by: Lars-Peter Clausen Signed-off-by: Takashi Iwai Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/wm8900.c b/sound/soc/codecs/wm8900.c index e98bc70..43c2201 100644 --- a/sound/soc/codecs/wm8900.c +++ b/sound/soc/codecs/wm8900.c @@ -304,53 +304,53 @@ static const DECLARE_TLV_DB_SCALE(adc_tlv, -7200, 75, 1); static const char *mic_bias_level_txt[] = { "0.9*AVDD", "0.65*AVDD" }; -static const struct soc_enum mic_bias_level = -SOC_ENUM_SINGLE(WM8900_REG_INCTL, 8, 2, mic_bias_level_txt); +static SOC_ENUM_SINGLE_DECL(mic_bias_level, + WM8900_REG_INCTL, 8, mic_bias_level_txt); static const char *dac_mute_rate_txt[] = { "Fast", "Slow" }; -static const struct soc_enum dac_mute_rate = -SOC_ENUM_SINGLE(WM8900_REG_DACCTRL, 7, 2, dac_mute_rate_txt); +static SOC_ENUM_SINGLE_DECL(dac_mute_rate, + WM8900_REG_DACCTRL, 7, dac_mute_rate_txt); static const char *dac_deemphasis_txt[] = { "Disabled", "32kHz", "44.1kHz", "48kHz" }; -static const struct soc_enum dac_deemphasis = -SOC_ENUM_SINGLE(WM8900_REG_DACCTRL, 4, 4, dac_deemphasis_txt); +static SOC_ENUM_SINGLE_DECL(dac_deemphasis, + WM8900_REG_DACCTRL, 4, dac_deemphasis_txt); static const char *adc_hpf_cut_txt[] = { "Hi-fi mode", "Voice mode 1", "Voice mode 2", "Voice mode 3" }; -static const struct soc_enum adc_hpf_cut = -SOC_ENUM_SINGLE(WM8900_REG_ADCCTRL, 5, 4, adc_hpf_cut_txt); +static SOC_ENUM_SINGLE_DECL(adc_hpf_cut, + WM8900_REG_ADCCTRL, 5, adc_hpf_cut_txt); static const char *lr_txt[] = { "Left", "Right" }; -static const struct soc_enum aifl_src = -SOC_ENUM_SINGLE(WM8900_REG_AUDIO1, 15, 2, lr_txt); +static SOC_ENUM_SINGLE_DECL(aifl_src, + WM8900_REG_AUDIO1, 15, lr_txt); -static const struct soc_enum aifr_src = -SOC_ENUM_SINGLE(WM8900_REG_AUDIO1, 14, 2, lr_txt); +static SOC_ENUM_SINGLE_DECL(aifr_src, + WM8900_REG_AUDIO1, 14, lr_txt); -static const struct soc_enum dacl_src = -SOC_ENUM_SINGLE(WM8900_REG_AUDIO2, 15, 2, lr_txt); +static SOC_ENUM_SINGLE_DECL(dacl_src, + WM8900_REG_AUDIO2, 15, lr_txt); -static const struct soc_enum dacr_src = -SOC_ENUM_SINGLE(WM8900_REG_AUDIO2, 14, 2, lr_txt); +static SOC_ENUM_SINGLE_DECL(dacr_src, + WM8900_REG_AUDIO2, 14, lr_txt); static const char *sidetone_txt[] = { "Disabled", "Left ADC", "Right ADC" }; -static const struct soc_enum dacl_sidetone = -SOC_ENUM_SINGLE(WM8900_REG_SIDETONE, 2, 3, sidetone_txt); +static SOC_ENUM_SINGLE_DECL(dacl_sidetone, + WM8900_REG_SIDETONE, 2, sidetone_txt); -static const struct soc_enum dacr_sidetone = -SOC_ENUM_SINGLE(WM8900_REG_SIDETONE, 0, 3, sidetone_txt); +static SOC_ENUM_SINGLE_DECL(dacr_sidetone, + WM8900_REG_SIDETONE, 0, sidetone_txt); static const struct snd_kcontrol_new wm8900_snd_controls[] = { SOC_ENUM("Mic Bias Level", mic_bias_level), @@ -496,8 +496,8 @@ SOC_DAPM_SINGLE("RINPUT3 Switch", WM8900_REG_INCTL, 0, 1, 0), static const char *wm8900_lp_mux[] = { "Disabled", "Enabled" }; -static const struct soc_enum wm8900_lineout2_lp_mux = -SOC_ENUM_SINGLE(WM8900_REG_LOUTMIXCTL1, 1, 2, wm8900_lp_mux); +static SOC_ENUM_SINGLE_DECL(wm8900_lineout2_lp_mux, + WM8900_REG_LOUTMIXCTL1, 1, wm8900_lp_mux); static const struct snd_kcontrol_new wm8900_lineout2_lp = SOC_DAPM_ENUM("Route", wm8900_lineout2_lp_mux); -- cgit v0.10.2 From e61a35b798497a85725ce10c07049fa1b9ca014c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 18 Feb 2014 09:45:30 +0100 Subject: ASoC: wm8994: Fix the wrong number of enum items wm8994 codec driver has a few places wrongly defining the number of enum items. Use SOC_ENUM_SINGLE_DECL() macro and they are automatically fixed. Signed-off-by: Takashi Iwai Acked-by: Liam Girdwood Acked-by: Charles Keepax Acked-by: Lars-Peter Clausen Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c index b9be9cb..adb7206 100644 --- a/sound/soc/codecs/wm8994.c +++ b/sound/soc/codecs/wm8994.c @@ -265,21 +265,21 @@ static const char *sidetone_hpf_text[] = { "2.7kHz", "1.35kHz", "675Hz", "370Hz", "180Hz", "90Hz", "45Hz" }; -static const struct soc_enum sidetone_hpf = - SOC_ENUM_SINGLE(WM8994_SIDETONE, 7, 7, sidetone_hpf_text); +static SOC_ENUM_SINGLE_DECL(sidetone_hpf, + WM8994_SIDETONE, 7, sidetone_hpf_text); static const char *adc_hpf_text[] = { "HiFi", "Voice 1", "Voice 2", "Voice 3" }; -static const struct soc_enum aif1adc1_hpf = - SOC_ENUM_SINGLE(WM8994_AIF1_ADC1_FILTERS, 13, 4, adc_hpf_text); +static SOC_ENUM_SINGLE_DECL(aif1adc1_hpf, + WM8994_AIF1_ADC1_FILTERS, 13, adc_hpf_text); -static const struct soc_enum aif1adc2_hpf = - SOC_ENUM_SINGLE(WM8994_AIF1_ADC2_FILTERS, 13, 4, adc_hpf_text); +static SOC_ENUM_SINGLE_DECL(aif1adc2_hpf, + WM8994_AIF1_ADC2_FILTERS, 13, adc_hpf_text); -static const struct soc_enum aif2adc_hpf = - SOC_ENUM_SINGLE(WM8994_AIF2_ADC_FILTERS, 13, 4, adc_hpf_text); +static SOC_ENUM_SINGLE_DECL(aif2adc_hpf, + WM8994_AIF2_ADC_FILTERS, 13, adc_hpf_text); static const DECLARE_TLV_DB_SCALE(aif_tlv, 0, 600, 0); static const DECLARE_TLV_DB_SCALE(digital_tlv, -7200, 75, 1); @@ -501,39 +501,39 @@ static const char *aif_chan_src_text[] = { "Left", "Right" }; -static const struct soc_enum aif1adcl_src = - SOC_ENUM_SINGLE(WM8994_AIF1_CONTROL_1, 15, 2, aif_chan_src_text); +static SOC_ENUM_SINGLE_DECL(aif1adcl_src, + WM8994_AIF1_CONTROL_1, 15, aif_chan_src_text); -static const struct soc_enum aif1adcr_src = - SOC_ENUM_SINGLE(WM8994_AIF1_CONTROL_1, 14, 2, aif_chan_src_text); +static SOC_ENUM_SINGLE_DECL(aif1adcr_src, + WM8994_AIF1_CONTROL_1, 14, aif_chan_src_text); -static const struct soc_enum aif2adcl_src = - SOC_ENUM_SINGLE(WM8994_AIF2_CONTROL_1, 15, 2, aif_chan_src_text); +static SOC_ENUM_SINGLE_DECL(aif2adcl_src, + WM8994_AIF2_CONTROL_1, 15, aif_chan_src_text); -static const struct soc_enum aif2adcr_src = - SOC_ENUM_SINGLE(WM8994_AIF2_CONTROL_1, 14, 2, aif_chan_src_text); +static SOC_ENUM_SINGLE_DECL(aif2adcr_src, + WM8994_AIF2_CONTROL_1, 14, aif_chan_src_text); -static const struct soc_enum aif1dacl_src = - SOC_ENUM_SINGLE(WM8994_AIF1_CONTROL_2, 15, 2, aif_chan_src_text); +static SOC_ENUM_SINGLE_DECL(aif1dacl_src, + WM8994_AIF1_CONTROL_2, 15, aif_chan_src_text); -static const struct soc_enum aif1dacr_src = - SOC_ENUM_SINGLE(WM8994_AIF1_CONTROL_2, 14, 2, aif_chan_src_text); +static SOC_ENUM_SINGLE_DECL(aif1dacr_src, + WM8994_AIF1_CONTROL_2, 14, aif_chan_src_text); -static const struct soc_enum aif2dacl_src = - SOC_ENUM_SINGLE(WM8994_AIF2_CONTROL_2, 15, 2, aif_chan_src_text); +static SOC_ENUM_SINGLE_DECL(aif2dacl_src, + WM8994_AIF2_CONTROL_2, 15, aif_chan_src_text); -static const struct soc_enum aif2dacr_src = - SOC_ENUM_SINGLE(WM8994_AIF2_CONTROL_2, 14, 2, aif_chan_src_text); +static SOC_ENUM_SINGLE_DECL(aif2dacr_src, + WM8994_AIF2_CONTROL_2, 14, aif_chan_src_text); static const char *osr_text[] = { "Low Power", "High Performance", }; -static const struct soc_enum dac_osr = - SOC_ENUM_SINGLE(WM8994_OVERSAMPLING, 0, 2, osr_text); +static SOC_ENUM_SINGLE_DECL(dac_osr, + WM8994_OVERSAMPLING, 0, osr_text); -static const struct soc_enum adc_osr = - SOC_ENUM_SINGLE(WM8994_OVERSAMPLING, 1, 2, osr_text); +static SOC_ENUM_SINGLE_DECL(adc_osr, + WM8994_OVERSAMPLING, 1, osr_text); static const struct snd_kcontrol_new wm8994_snd_controls[] = { SOC_DOUBLE_R_TLV("AIF1ADC1 Volume", WM8994_AIF1_ADC1_LEFT_VOLUME, @@ -690,17 +690,20 @@ static const char *wm8958_ng_text[] = { "30ms", "125ms", "250ms", "500ms", }; -static const struct soc_enum wm8958_aif1dac1_ng_hold = - SOC_ENUM_SINGLE(WM8958_AIF1_DAC1_NOISE_GATE, - WM8958_AIF1DAC1_NG_THR_SHIFT, 4, wm8958_ng_text); +static SOC_ENUM_SINGLE_DECL(wm8958_aif1dac1_ng_hold, + WM8958_AIF1_DAC1_NOISE_GATE, + WM8958_AIF1DAC1_NG_THR_SHIFT, + wm8958_ng_text); -static const struct soc_enum wm8958_aif1dac2_ng_hold = - SOC_ENUM_SINGLE(WM8958_AIF1_DAC2_NOISE_GATE, - WM8958_AIF1DAC2_NG_THR_SHIFT, 4, wm8958_ng_text); +static SOC_ENUM_SINGLE_DECL(wm8958_aif1dac2_ng_hold, + WM8958_AIF1_DAC2_NOISE_GATE, + WM8958_AIF1DAC2_NG_THR_SHIFT, + wm8958_ng_text); -static const struct soc_enum wm8958_aif2dac_ng_hold = - SOC_ENUM_SINGLE(WM8958_AIF2_DAC_NOISE_GATE, - WM8958_AIF2DAC_NG_THR_SHIFT, 4, wm8958_ng_text); +static SOC_ENUM_SINGLE_DECL(wm8958_aif2dac_ng_hold, + WM8958_AIF2_DAC_NOISE_GATE, + WM8958_AIF2DAC_NG_THR_SHIFT, + wm8958_ng_text); static const struct snd_kcontrol_new wm8958_snd_controls[] = { SOC_SINGLE_TLV("AIF3 Boost Volume", WM8958_AIF3_CONTROL_2, 10, 3, 0, aif_tlv), @@ -1341,8 +1344,8 @@ static const char *adc_mux_text[] = { "DMIC", }; -static const struct soc_enum adc_enum = - SOC_ENUM_SINGLE(0, 0, 2, adc_mux_text); +static SOC_ENUM_SINGLE_DECL(adc_enum, + 0, 0, adc_mux_text); static const struct snd_kcontrol_new adcl_mux = SOC_DAPM_ENUM_VIRT("ADCL Mux", adc_enum); @@ -1478,14 +1481,14 @@ static const char *sidetone_text[] = { "ADC/DMIC1", "DMIC2", }; -static const struct soc_enum sidetone1_enum = - SOC_ENUM_SINGLE(WM8994_SIDETONE, 0, 2, sidetone_text); +static SOC_ENUM_SINGLE_DECL(sidetone1_enum, + WM8994_SIDETONE, 0, sidetone_text); static const struct snd_kcontrol_new sidetone1_mux = SOC_DAPM_ENUM("Left Sidetone Mux", sidetone1_enum); -static const struct soc_enum sidetone2_enum = - SOC_ENUM_SINGLE(WM8994_SIDETONE, 1, 2, sidetone_text); +static SOC_ENUM_SINGLE_DECL(sidetone2_enum, + WM8994_SIDETONE, 1, sidetone_text); static const struct snd_kcontrol_new sidetone2_mux = SOC_DAPM_ENUM("Right Sidetone Mux", sidetone2_enum); @@ -1498,22 +1501,24 @@ static const char *loopback_text[] = { "None", "ADCDAT", }; -static const struct soc_enum aif1_loopback_enum = - SOC_ENUM_SINGLE(WM8994_AIF1_CONTROL_2, WM8994_AIF1_LOOPBACK_SHIFT, 2, - loopback_text); +static SOC_ENUM_SINGLE_DECL(aif1_loopback_enum, + WM8994_AIF1_CONTROL_2, + WM8994_AIF1_LOOPBACK_SHIFT, + loopback_text); static const struct snd_kcontrol_new aif1_loopback = SOC_DAPM_ENUM("AIF1 Loopback", aif1_loopback_enum); -static const struct soc_enum aif2_loopback_enum = - SOC_ENUM_SINGLE(WM8994_AIF2_CONTROL_2, WM8994_AIF2_LOOPBACK_SHIFT, 2, - loopback_text); +static SOC_ENUM_SINGLE_DECL(aif2_loopback_enum, + WM8994_AIF2_CONTROL_2, + WM8994_AIF2_LOOPBACK_SHIFT, + loopback_text); static const struct snd_kcontrol_new aif2_loopback = SOC_DAPM_ENUM("AIF2 Loopback", aif2_loopback_enum); -static const struct soc_enum aif1dac_enum = - SOC_ENUM_SINGLE(WM8994_POWER_MANAGEMENT_6, 0, 2, aif1dac_text); +static SOC_ENUM_SINGLE_DECL(aif1dac_enum, + WM8994_POWER_MANAGEMENT_6, 0, aif1dac_text); static const struct snd_kcontrol_new aif1dac_mux = SOC_DAPM_ENUM("AIF1DAC Mux", aif1dac_enum); @@ -1522,8 +1527,8 @@ static const char *aif2dac_text[] = { "AIF2DACDAT", "AIF3DACDAT", }; -static const struct soc_enum aif2dac_enum = - SOC_ENUM_SINGLE(WM8994_POWER_MANAGEMENT_6, 1, 2, aif2dac_text); +static SOC_ENUM_SINGLE_DECL(aif2dac_enum, + WM8994_POWER_MANAGEMENT_6, 1, aif2dac_text); static const struct snd_kcontrol_new aif2dac_mux = SOC_DAPM_ENUM("AIF2DAC Mux", aif2dac_enum); @@ -1532,8 +1537,8 @@ static const char *aif2adc_text[] = { "AIF2ADCDAT", "AIF3DACDAT", }; -static const struct soc_enum aif2adc_enum = - SOC_ENUM_SINGLE(WM8994_POWER_MANAGEMENT_6, 2, 2, aif2adc_text); +static SOC_ENUM_SINGLE_DECL(aif2adc_enum, + WM8994_POWER_MANAGEMENT_6, 2, aif2adc_text); static const struct snd_kcontrol_new aif2adc_mux = SOC_DAPM_ENUM("AIF2ADC Mux", aif2adc_enum); @@ -1542,14 +1547,14 @@ static const char *aif3adc_text[] = { "AIF1ADCDAT", "AIF2ADCDAT", "AIF2DACDAT", "Mono PCM", }; -static const struct soc_enum wm8994_aif3adc_enum = - SOC_ENUM_SINGLE(WM8994_POWER_MANAGEMENT_6, 3, 3, aif3adc_text); +static SOC_ENUM_SINGLE_DECL(wm8994_aif3adc_enum, + WM8994_POWER_MANAGEMENT_6, 3, aif3adc_text); static const struct snd_kcontrol_new wm8994_aif3adc_mux = SOC_DAPM_ENUM("AIF3ADC Mux", wm8994_aif3adc_enum); -static const struct soc_enum wm8958_aif3adc_enum = - SOC_ENUM_SINGLE(WM8994_POWER_MANAGEMENT_6, 3, 4, aif3adc_text); +static SOC_ENUM_SINGLE_DECL(wm8958_aif3adc_enum, + WM8994_POWER_MANAGEMENT_6, 3, aif3adc_text); static const struct snd_kcontrol_new wm8958_aif3adc_mux = SOC_DAPM_ENUM("AIF3ADC Mux", wm8958_aif3adc_enum); @@ -1558,8 +1563,8 @@ static const char *mono_pcm_out_text[] = { "None", "AIF2ADCL", "AIF2ADCR", }; -static const struct soc_enum mono_pcm_out_enum = - SOC_ENUM_SINGLE(WM8994_POWER_MANAGEMENT_6, 9, 3, mono_pcm_out_text); +static SOC_ENUM_SINGLE_DECL(mono_pcm_out_enum, + WM8994_POWER_MANAGEMENT_6, 9, mono_pcm_out_text); static const struct snd_kcontrol_new mono_pcm_out_mux = SOC_DAPM_ENUM("Mono PCM Out Mux", mono_pcm_out_enum); @@ -1569,14 +1574,14 @@ static const char *aif2dac_src_text[] = { }; /* Note that these two control shouldn't be simultaneously switched to AIF3 */ -static const struct soc_enum aif2dacl_src_enum = - SOC_ENUM_SINGLE(WM8994_POWER_MANAGEMENT_6, 7, 2, aif2dac_src_text); +static SOC_ENUM_SINGLE_DECL(aif2dacl_src_enum, + WM8994_POWER_MANAGEMENT_6, 7, aif2dac_src_text); static const struct snd_kcontrol_new aif2dacl_src_mux = SOC_DAPM_ENUM("AIF2DACL Mux", aif2dacl_src_enum); -static const struct soc_enum aif2dacr_src_enum = - SOC_ENUM_SINGLE(WM8994_POWER_MANAGEMENT_6, 8, 2, aif2dac_src_text); +static SOC_ENUM_SINGLE_DECL(aif2dacr_src_enum, + WM8994_POWER_MANAGEMENT_6, 8, aif2dac_src_text); static const struct snd_kcontrol_new aif2dacr_src_mux = SOC_DAPM_ENUM("AIF2DACR Mux", aif2dacr_src_enum); -- cgit v0.10.2 From 901bb6c55d2a7ef66bc0bdf0ba410c417f16b7cf Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 18 Feb 2014 09:56:40 +0100 Subject: ASoC: ad1980: Fix wrong number of items for capture source The number of capture sources is 8, not 7. Use SOC_ENUM_DOUBLE_DECL() macro and it's automatically fixed. Signed-off-by: Takashi Iwai Acked-by: Liam Girdwood Acked-by: Lars-Peter Clausen Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/ad1980.c b/sound/soc/codecs/ad1980.c index 7257a88..34d965a 100644 --- a/sound/soc/codecs/ad1980.c +++ b/sound/soc/codecs/ad1980.c @@ -57,8 +57,8 @@ static const u16 ad1980_reg[] = { static const char *ad1980_rec_sel[] = {"Mic", "CD", "NC", "AUX", "Line", "Stereo Mix", "Mono Mix", "Phone"}; -static const struct soc_enum ad1980_cap_src = - SOC_ENUM_DOUBLE(AC97_REC_SEL, 8, 0, 7, ad1980_rec_sel); +static SOC_ENUM_DOUBLE_DECL(ad1980_cap_src, + AC97_REC_SEL, 8, 0, ad1980_rec_sel); static const struct snd_kcontrol_new ad1980_snd_ac97_controls[] = { SOC_DOUBLE("Master Playback Volume", AC97_MASTER, 8, 0, 31, 1), -- cgit v0.10.2 From cdbb492557112bddad26f0ba4ba9ba87d919ebfc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 18 Feb 2014 10:07:56 +0100 Subject: ASoC: isabelle: Fix the wrong number of items in enum ctls isabelle codec driver has a few places wrongly defining the number of enum items. Use SOC_ENUM_SINGLE_DECL() macro and they are automatically fixed. Signed-off-by: Takashi Iwai Acked-by: Liam Girdwood Acked-by: Lars-Peter Clausen Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/isabelle.c b/sound/soc/codecs/isabelle.c index 5839048..cb736dd 100644 --- a/sound/soc/codecs/isabelle.c +++ b/sound/soc/codecs/isabelle.c @@ -140,13 +140,17 @@ static const char *isabelle_rx1_texts[] = {"VRX1", "ARX1"}; static const char *isabelle_rx2_texts[] = {"VRX2", "ARX2"}; static const struct soc_enum isabelle_rx1_enum[] = { - SOC_ENUM_SINGLE(ISABELLE_VOICE_HPF_CFG_REG, 3, 1, isabelle_rx1_texts), - SOC_ENUM_SINGLE(ISABELLE_AUDIO_HPF_CFG_REG, 5, 1, isabelle_rx1_texts), + SOC_ENUM_SINGLE(ISABELLE_VOICE_HPF_CFG_REG, 3, + ARRAY_SIZE(isabelle_rx1_texts), isabelle_rx1_texts), + SOC_ENUM_SINGLE(ISABELLE_AUDIO_HPF_CFG_REG, 5, + ARRAY_SIZE(isabelle_rx1_texts), isabelle_rx1_texts), }; static const struct soc_enum isabelle_rx2_enum[] = { - SOC_ENUM_SINGLE(ISABELLE_VOICE_HPF_CFG_REG, 2, 1, isabelle_rx2_texts), - SOC_ENUM_SINGLE(ISABELLE_AUDIO_HPF_CFG_REG, 4, 1, isabelle_rx2_texts), + SOC_ENUM_SINGLE(ISABELLE_VOICE_HPF_CFG_REG, 2, + ARRAY_SIZE(isabelle_rx2_texts), isabelle_rx2_texts), + SOC_ENUM_SINGLE(ISABELLE_AUDIO_HPF_CFG_REG, 4, + ARRAY_SIZE(isabelle_rx2_texts), isabelle_rx2_texts), }; /* Headset DAC playback switches */ @@ -161,13 +165,17 @@ static const char *isabelle_atx_texts[] = {"AMIC1", "DMIC"}; static const char *isabelle_vtx_texts[] = {"AMIC2", "DMIC"}; static const struct soc_enum isabelle_atx_enum[] = { - SOC_ENUM_SINGLE(ISABELLE_AMIC_CFG_REG, 7, 1, isabelle_atx_texts), - SOC_ENUM_SINGLE(ISABELLE_DMIC_CFG_REG, 0, 1, isabelle_atx_texts), + SOC_ENUM_SINGLE(ISABELLE_AMIC_CFG_REG, 7, + ARRAY_SIZE(isabelle_atx_texts), isabelle_atx_texts), + SOC_ENUM_SINGLE(ISABELLE_DMIC_CFG_REG, 0, + ARRAY_SIZE(isabelle_atx_texts), isabelle_atx_texts), }; static const struct soc_enum isabelle_vtx_enum[] = { - SOC_ENUM_SINGLE(ISABELLE_AMIC_CFG_REG, 6, 1, isabelle_vtx_texts), - SOC_ENUM_SINGLE(ISABELLE_DMIC_CFG_REG, 0, 1, isabelle_vtx_texts), + SOC_ENUM_SINGLE(ISABELLE_AMIC_CFG_REG, 6, + ARRAY_SIZE(isabelle_vtx_texts), isabelle_vtx_texts), + SOC_ENUM_SINGLE(ISABELLE_DMIC_CFG_REG, 0, + ARRAY_SIZE(isabelle_vtx_texts), isabelle_vtx_texts), }; static const struct snd_kcontrol_new atx_mux_controls = @@ -183,17 +191,13 @@ static const char *isabelle_amic1_texts[] = { /* Left analog microphone selection */ static const char *isabelle_amic2_texts[] = {"Sub Mic", "Aux/FM Right"}; -static const struct soc_enum isabelle_amic1_enum[] = { - SOC_ENUM_SINGLE(ISABELLE_AMIC_CFG_REG, 5, - ARRAY_SIZE(isabelle_amic1_texts), - isabelle_amic1_texts), -}; +static SOC_ENUM_SINGLE_DECL(isabelle_amic1_enum, + ISABELLE_AMIC_CFG_REG, 5, + isabelle_amic1_texts); -static const struct soc_enum isabelle_amic2_enum[] = { - SOC_ENUM_SINGLE(ISABELLE_AMIC_CFG_REG, 4, - ARRAY_SIZE(isabelle_amic2_texts), - isabelle_amic2_texts), -}; +static SOC_ENUM_SINGLE_DECL(isabelle_amic2_enum, + ISABELLE_AMIC_CFG_REG, 4, + isabelle_amic2_texts); static const struct snd_kcontrol_new amic1_control = SOC_DAPM_ENUM("Route", isabelle_amic1_enum); @@ -206,16 +210,20 @@ static const char *isabelle_st_audio_texts[] = {"ATX1", "ATX2"}; static const char *isabelle_st_voice_texts[] = {"VTX1", "VTX2"}; static const struct soc_enum isabelle_st_audio_enum[] = { - SOC_ENUM_SINGLE(ISABELLE_ATX_STPGA1_CFG_REG, 7, 1, + SOC_ENUM_SINGLE(ISABELLE_ATX_STPGA1_CFG_REG, 7, + ARRAY_SIZE(isabelle_st_audio_texts), isabelle_st_audio_texts), - SOC_ENUM_SINGLE(ISABELLE_ATX_STPGA2_CFG_REG, 7, 1, + SOC_ENUM_SINGLE(ISABELLE_ATX_STPGA2_CFG_REG, 7, + ARRAY_SIZE(isabelle_st_audio_texts), isabelle_st_audio_texts), }; static const struct soc_enum isabelle_st_voice_enum[] = { - SOC_ENUM_SINGLE(ISABELLE_VTX_STPGA1_CFG_REG, 7, 1, + SOC_ENUM_SINGLE(ISABELLE_VTX_STPGA1_CFG_REG, 7, + ARRAY_SIZE(isabelle_st_voice_texts), isabelle_st_voice_texts), - SOC_ENUM_SINGLE(ISABELLE_VTX2_STPGA2_CFG_REG, 7, 1, + SOC_ENUM_SINGLE(ISABELLE_VTX2_STPGA2_CFG_REG, 7, + ARRAY_SIZE(isabelle_st_voice_texts), isabelle_st_voice_texts), }; -- cgit v0.10.2 From 898b48eb88bff3a7a49590a08bee546c2d26bd91 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 18 Feb 2014 09:34:44 +0100 Subject: ASoC: wm8400: Fix the wrong number of enum items wm8400 codec driver has a few places wrongly defining the number of enum items. Use SOC_ENUM_SINGLE_DECL() macro and they are automatically fixed. Signed-off-by: Takashi Iwai Acked-by: Liam Girdwood Acked-by: Charles Keepax Acked-by: Lars-Peter Clausen Signed-off-by: Mark Brown diff --git a/sound/soc/codecs/wm8400.c b/sound/soc/codecs/wm8400.c index 48dc7d2..6d684d9 100644 --- a/sound/soc/codecs/wm8400.c +++ b/sound/soc/codecs/wm8400.c @@ -117,19 +117,23 @@ static int wm8400_outpga_put_volsw_vu(struct snd_kcontrol *kcontrol, static const char *wm8400_digital_sidetone[] = {"None", "Left ADC", "Right ADC", "Reserved"}; -static const struct soc_enum wm8400_left_digital_sidetone_enum = -SOC_ENUM_SINGLE(WM8400_DIGITAL_SIDE_TONE, - WM8400_ADC_TO_DACL_SHIFT, 2, wm8400_digital_sidetone); +static SOC_ENUM_SINGLE_DECL(wm8400_left_digital_sidetone_enum, + WM8400_DIGITAL_SIDE_TONE, + WM8400_ADC_TO_DACL_SHIFT, + wm8400_digital_sidetone); -static const struct soc_enum wm8400_right_digital_sidetone_enum = -SOC_ENUM_SINGLE(WM8400_DIGITAL_SIDE_TONE, - WM8400_ADC_TO_DACR_SHIFT, 2, wm8400_digital_sidetone); +static SOC_ENUM_SINGLE_DECL(wm8400_right_digital_sidetone_enum, + WM8400_DIGITAL_SIDE_TONE, + WM8400_ADC_TO_DACR_SHIFT, + wm8400_digital_sidetone); static const char *wm8400_adcmode[] = {"Hi-fi mode", "Voice mode 1", "Voice mode 2", "Voice mode 3"}; -static const struct soc_enum wm8400_right_adcmode_enum = -SOC_ENUM_SINGLE(WM8400_ADC_CTRL, WM8400_ADC_HPF_CUT_SHIFT, 3, wm8400_adcmode); +static SOC_ENUM_SINGLE_DECL(wm8400_right_adcmode_enum, + WM8400_ADC_CTRL, + WM8400_ADC_HPF_CUT_SHIFT, + wm8400_adcmode); static const struct snd_kcontrol_new wm8400_snd_controls[] = { /* INMIXL */ @@ -422,9 +426,10 @@ SOC_DAPM_SINGLE("RINPGA34 Switch", WM8400_INPUT_MIXER3, WM8400_L34MNB_SHIFT, static const char *wm8400_ainlmux[] = {"INMIXL Mix", "RXVOICE Mix", "DIFFINL Mix"}; -static const struct soc_enum wm8400_ainlmux_enum = -SOC_ENUM_SINGLE( WM8400_INPUT_MIXER1, WM8400_AINLMODE_SHIFT, - ARRAY_SIZE(wm8400_ainlmux), wm8400_ainlmux); +static SOC_ENUM_SINGLE_DECL(wm8400_ainlmux_enum, + WM8400_INPUT_MIXER1, + WM8400_AINLMODE_SHIFT, + wm8400_ainlmux); static const struct snd_kcontrol_new wm8400_dapm_ainlmux_controls = SOC_DAPM_ENUM("Route", wm8400_ainlmux_enum); @@ -435,9 +440,10 @@ SOC_DAPM_ENUM("Route", wm8400_ainlmux_enum); static const char *wm8400_ainrmux[] = {"INMIXR Mix", "RXVOICE Mix", "DIFFINR Mix"}; -static const struct soc_enum wm8400_ainrmux_enum = -SOC_ENUM_SINGLE( WM8400_INPUT_MIXER1, WM8400_AINRMODE_SHIFT, - ARRAY_SIZE(wm8400_ainrmux), wm8400_ainrmux); +static SOC_ENUM_SINGLE_DECL(wm8400_ainrmux_enum, + WM8400_INPUT_MIXER1, + WM8400_AINRMODE_SHIFT, + wm8400_ainrmux); static const struct snd_kcontrol_new wm8400_dapm_ainrmux_controls = SOC_DAPM_ENUM("Route", wm8400_ainrmux_enum); -- cgit v0.10.2 From ebe44f350e15d6142d4d74cbaec0dad976c36753 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 20 Feb 2014 02:14:23 -0500 Subject: ip_tunnel: Move ip_tunnel_get_stats64 into ip_tunnel_core.c net/built-in.o:(.rodata+0x1707c): undefined reference to `ip_tunnel_get_stats64' Reported-by: Randy Dunlap Signed-off-by: David S. Miller diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 50228be..08f8cf9 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -119,52 +119,6 @@ static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie) return (struct rtable *)dst; } -/* Often modified stats are per cpu, other are shared (netdev->stats) */ -struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot) -{ - int i; - - for_each_possible_cpu(i) { - const struct pcpu_sw_netstats *tstats = - per_cpu_ptr(dev->tstats, i); - u64 rx_packets, rx_bytes, tx_packets, tx_bytes; - unsigned int start; - - do { - start = u64_stats_fetch_begin_bh(&tstats->syncp); - rx_packets = tstats->rx_packets; - tx_packets = tstats->tx_packets; - rx_bytes = tstats->rx_bytes; - tx_bytes = tstats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); - - tot->rx_packets += rx_packets; - tot->tx_packets += tx_packets; - tot->rx_bytes += rx_bytes; - tot->tx_bytes += tx_bytes; - } - - tot->multicast = dev->stats.multicast; - - tot->rx_crc_errors = dev->stats.rx_crc_errors; - tot->rx_fifo_errors = dev->stats.rx_fifo_errors; - tot->rx_length_errors = dev->stats.rx_length_errors; - tot->rx_frame_errors = dev->stats.rx_frame_errors; - tot->rx_errors = dev->stats.rx_errors; - - tot->tx_fifo_errors = dev->stats.tx_fifo_errors; - tot->tx_carrier_errors = dev->stats.tx_carrier_errors; - tot->tx_dropped = dev->stats.tx_dropped; - tot->tx_aborted_errors = dev->stats.tx_aborted_errors; - tot->tx_errors = dev->stats.tx_errors; - - tot->collisions = dev->stats.collisions; - - return tot; -} -EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64); - static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, __be16 flags, __be32 key) { diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 6156f4e..8d69626 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -148,3 +148,49 @@ error: return ERR_PTR(err); } EXPORT_SYMBOL_GPL(iptunnel_handle_offloads); + +/* Often modified stats are per cpu, other are shared (netdev->stats) */ +struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot) +{ + int i; + + for_each_possible_cpu(i) { + const struct pcpu_sw_netstats *tstats = + per_cpu_ptr(dev->tstats, i); + u64 rx_packets, rx_bytes, tx_packets, tx_bytes; + unsigned int start; + + do { + start = u64_stats_fetch_begin_bh(&tstats->syncp); + rx_packets = tstats->rx_packets; + tx_packets = tstats->tx_packets; + rx_bytes = tstats->rx_bytes; + tx_bytes = tstats->tx_bytes; + } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); + + tot->rx_packets += rx_packets; + tot->tx_packets += tx_packets; + tot->rx_bytes += rx_bytes; + tot->tx_bytes += tx_bytes; + } + + tot->multicast = dev->stats.multicast; + + tot->rx_crc_errors = dev->stats.rx_crc_errors; + tot->rx_fifo_errors = dev->stats.rx_fifo_errors; + tot->rx_length_errors = dev->stats.rx_length_errors; + tot->rx_frame_errors = dev->stats.rx_frame_errors; + tot->rx_errors = dev->stats.rx_errors; + + tot->tx_fifo_errors = dev->stats.tx_fifo_errors; + tot->tx_carrier_errors = dev->stats.tx_carrier_errors; + tot->tx_dropped = dev->stats.tx_dropped; + tot->tx_aborted_errors = dev->stats.tx_aborted_errors; + tot->tx_errors = dev->stats.tx_errors; + + tot->collisions = dev->stats.collisions; + + return tot; +} +EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64); -- cgit v0.10.2 From 8d0d21f4053c07714802cbe8b1fe26913ec296cc Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 18 Feb 2014 21:20:08 +0900 Subject: veth: Fix vlan_features so as to be able to use stacked vlan interfaces Even if we create a stacked vlan interface such as veth0.10.20, it sends single tagged frames (tagged with only vid 10). Because vlan_features of a veth interface has the NETIF_F_HW_VLAN_[CTAG/STAG]_TX bits, veth0.10 also has that feature, so dev_hard_start_xmit(veth0.10) doesn't call __vlan_put_tag() and vlan_dev_hard_start_xmit(veth0.10) overwrites vlan_tci. This prevents us from using a combination of 802.1ad and 802.1Q in containers, etc. Signed-off-by: Toshiaki Makita Acked-by: Flavio Leitner Signed-off-by: David S. Miller diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 2ec2041..5b37437 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -285,7 +285,8 @@ static void veth_setup(struct net_device *dev) dev->ethtool_ops = &veth_ethtool_ops; dev->features |= NETIF_F_LLTX; dev->features |= VETH_FEATURES; - dev->vlan_features = dev->features; + dev->vlan_features = dev->features & + ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); dev->destructor = veth_dev_free; dev->hw_features = VETH_FEATURES; -- cgit v0.10.2 From 6671b2240c54585d4afb5286a29f1569fe5e40a8 Mon Sep 17 00:00:00 2001 From: Fernando Luis Vazquez Cao Date: Tue, 18 Feb 2014 21:20:09 +0900 Subject: tun: remove bogus hardware vlan acceleration flags from vlan_features Even though only the outer vlan tag can be HW accelerated in the transmission path, in the TUN/TAP driver vlan_features mirrors hw_features, which happens to have the NETIF_F_HW_VLAN_?TAG_TX flags set. Because of this, during packet tranmisssion through a stacked vlan device dev_hard_start_xmit, (incorrectly) assuming that the vlan device supports hardware vlan acceleration, does not add the vlan header to the skb payload and the inner vlan tags are lost (vlan_tci contains the outer vlan tag when userspace reads the packet from the tap device). Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: Toshiaki Makita Signed-off-by: David S. Miller diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 8fe9cb7..26f8635 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1686,7 +1686,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; dev->features = dev->hw_features; - dev->vlan_features = dev->features; + dev->vlan_features = dev->features & + ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); INIT_LIST_HEAD(&tun->disabled); err = tun_attach(tun, file, false); -- cgit v0.10.2 From a9952a76bc0c24b3c9d355c053e001b8a3b65dd3 Mon Sep 17 00:00:00 2001 From: Peter De Schrijver Date: Wed, 19 Feb 2014 20:48:56 +0200 Subject: clk: tegra: Fix vic03 mux index The vic03 mux uses a linear mapping. Signed-off-by: Peter De Schrijver diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c index f5376a3..1fa5c3f 100644 --- a/drivers/clk/tegra/clk-tegra-periph.c +++ b/drivers/clk/tegra/clk-tegra-periph.c @@ -371,9 +371,7 @@ static const char *mux_pllp3_pllc_clkm[] = { static const char *mux_pllm_pllc_pllp_plla_pllc2_c3_clkm[] = { "pll_m", "pll_c", "pll_p", "pll_a", "pll_c2", "pll_c3", "clk_m" }; -static u32 mux_pllm_pllc_pllp_plla_pllc2_c3_clkm_idx[] = { - [0] = 0, [1] = 1, [2] = 2, [3] = 3, [4] = 4, [5] = 6, -}; +#define mux_pllm_pllc_pllp_plla_pllc2_c3_clkm_idx NULL static const char *mux_pllm_pllc2_c_c3_pllp_plla_pllc4[] = { "pll_m", "pll_c2", "pll_c", "pll_c3", "pll_p", "pll_a_out0", "pll_c4", -- cgit v0.10.2 From 1d147bfa64293b2723c4fec50922168658e613ba Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Thu, 20 Feb 2014 09:22:11 +0200 Subject: mac80211: fix AP powersave TX vs. wakeup race There is a race between the TX path and the STA wakeup: while a station is sleeping, mac80211 buffers frames until it wakes up, then the frames are transmitted. However, the RX and TX path are concurrent, so the packet indicating wakeup can be processed while a packet is being transmitted. This can lead to a situation where the buffered frames list is emptied on the one side, while a frame is being added on the other side, as the station is still seen as sleeping in the TX path. As a result, the newly added frame will not be send anytime soon. It might be sent much later (and out of order) when the station goes to sleep and wakes up the next time. Additionally, it can lead to the crash below. Fix all this by synchronising both paths with a new lock. Both path are not fastpath since they handle PS situations. In a later patch we'll remove the extra skb queue locks to reduce locking overhead. BUG: unable to handle kernel NULL pointer dereference at 000000b0 IP: [] ieee80211_report_used_skb+0x11/0x3e0 [mac80211] *pde = 00000000 Oops: 0000 [#1] SMP DEBUG_PAGEALLOC EIP: 0060:[] EFLAGS: 00210282 CPU: 1 EIP is at ieee80211_report_used_skb+0x11/0x3e0 [mac80211] EAX: e5900da0 EBX: 00000000 ECX: 00000001 EDX: 00000000 ESI: e41d00c0 EDI: e5900da0 EBP: ebe458e4 ESP: ebe458b0 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 CR0: 8005003b CR2: 000000b0 CR3: 25a78000 CR4: 000407d0 DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000 DR6: ffff0ff0 DR7: 00000400 Process iperf (pid: 3934, ti=ebe44000 task=e757c0b0 task.ti=ebe44000) iwlwifi 0000:02:00.0: I iwl_pcie_enqueue_hcmd Sending command LQ_CMD (#4e), seq: 0x0903, 92 bytes at 3[3]:9 Stack: e403b32c ebe458c4 00200002 00200286 e403b338 ebe458cc c10960bb e5900da0 ff76a6ec ebe458d8 00000000 e41d00c0 e5900da0 ebe458f0 ff6f1b75 e403b210 ebe4598c ff723dc1 00000000 ff76a6ec e597c978 e403b758 00000002 00000002 Call Trace: [] ieee80211_free_txskb+0x15/0x20 [mac80211] [] invoke_tx_handlers+0x1661/0x1780 [mac80211] [] ieee80211_tx+0x75/0x100 [mac80211] [] ieee80211_xmit+0x8f/0xc0 [mac80211] [] ieee80211_subif_start_xmit+0x4fe/0xe20 [mac80211] [] dev_hard_start_xmit+0x450/0x950 [] sch_direct_xmit+0xa9/0x250 [] __qdisc_run+0x4b/0x150 [] dev_queue_xmit+0x2c2/0xca0 Cc: stable@vger.kernel.org Reported-by: Yaara Rozenblum Signed-off-by: Emmanuel Grumbach Reviewed-by: Stanislaw Gruszka [reword commit log, use a separate lock] Signed-off-by: Johannes Berg diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index decd30c..62a5f08 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -330,6 +330,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); spin_lock_init(&sta->lock); + spin_lock_init(&sta->ps_lock); INIT_WORK(&sta->drv_unblock_wk, sta_unblock); INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work); mutex_init(&sta->ampdu_mlme.mtx); @@ -1109,6 +1110,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) skb_queue_head_init(&pending); + /* sync with ieee80211_tx_h_unicast_ps_buf */ + spin_lock(&sta->ps_lock); /* Send all buffered frames to the station */ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { int count = skb_queue_len(&pending), tmp; @@ -1128,6 +1131,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) } ieee80211_add_pending_skbs_fn(local, &pending, clear_sta_ps_flags, sta); + spin_unlock(&sta->ps_lock); /* This station just woke up and isn't aware of our SMPS state */ if (!ieee80211_smps_is_restrictive(sta->known_smps_mode, diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index d77ff70..d3a6d82 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -267,6 +267,7 @@ struct ieee80211_tx_latency_stat { * @drv_unblock_wk: used for driver PS unblocking * @listen_interval: listen interval of this station, when we're acting as AP * @_flags: STA flags, see &enum ieee80211_sta_info_flags, do not use directly + * @ps_lock: used for powersave (when mac80211 is the AP) related locking * @ps_tx_buf: buffers (per AC) of frames to transmit to this station * when it leaves power saving state or polls * @tx_filtered: buffers (per AC) of frames we already tried to @@ -356,10 +357,8 @@ struct sta_info { /* use the accessors defined below */ unsigned long _flags; - /* - * STA powersave frame queues, no more than the internal - * locking required. - */ + /* STA powersave lock and frame queues */ + spinlock_t ps_lock; struct sk_buff_head ps_tx_buf[IEEE80211_NUM_ACS]; struct sk_buff_head tx_filtered[IEEE80211_NUM_ACS]; unsigned long driver_buffered_tids; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 97a02d3..4080c61 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -478,6 +478,20 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) sta->sta.addr, sta->sta.aid, ac); if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) purge_old_ps_buffers(tx->local); + + /* sync with ieee80211_sta_ps_deliver_wakeup */ + spin_lock(&sta->ps_lock); + /* + * STA woke up the meantime and all the frames on ps_tx_buf have + * been queued to pending queue. No reordering can happen, go + * ahead and Tx the packet. + */ + if (!test_sta_flag(sta, WLAN_STA_PS_STA) && + !test_sta_flag(sta, WLAN_STA_PS_DRIVER)) { + spin_unlock(&sta->ps_lock); + return TX_CONTINUE; + } + if (skb_queue_len(&sta->ps_tx_buf[ac]) >= STA_MAX_TX_BUFFER) { struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf[ac]); ps_dbg(tx->sdata, @@ -492,6 +506,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS; skb_queue_tail(&sta->ps_tx_buf[ac], tx->skb); + spin_unlock(&sta->ps_lock); if (!timer_pending(&local->sta_cleanup)) mod_timer(&local->sta_cleanup, -- cgit v0.10.2 From 5108ca828017120981880eeec8a9ec369334a899 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 17 Feb 2014 20:49:03 +0100 Subject: mac80211: insert stations before adding to driver There's a race condition in mac80211 because we add stations to the internal lists after adding them to the driver, which means that (for example) the following can happen: 1. a station connects and is added 2. first, it is added to the driver 3. then, it is added to the mac80211 lists If the station goes to sleep between steps 2 and 3, and the firmware/hardware records it as being asleep, mac80211 will never instruct the driver to wake it up again as it never realized it went to sleep since the RX path discarded the frame as a "spurious class 3 frame", no station entry was present yet. Fix this by adding the station in software first, and only then adding it to the driver. That way, any state that the driver changes will be reflected properly in mac80211's station state. The problematic part is the roll-back if the driver fails to add the station, in that case a bit more is needed. To not make that overly complex prevent starting BA sessions in the meantime. Signed-off-by: Johannes Berg diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 62a5f08..ffc1ee6 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -91,7 +91,7 @@ static int sta_info_hash_del(struct ieee80211_local *local, return -ENOENT; } -static void cleanup_single_sta(struct sta_info *sta) +static void __cleanup_single_sta(struct sta_info *sta) { int ac, i; struct tid_ampdu_tx *tid_tx; @@ -139,7 +139,14 @@ static void cleanup_single_sta(struct sta_info *sta) ieee80211_purge_tx_queue(&local->hw, &tid_tx->pending); kfree(tid_tx); } +} +static void cleanup_single_sta(struct sta_info *sta) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + + __cleanup_single_sta(sta); sta_info_free(local, sta); } @@ -488,21 +495,26 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) goto out_err; } - /* notify driver */ - err = sta_info_insert_drv_state(local, sdata, sta); - if (err) - goto out_err; - local->num_sta++; local->sta_generation++; smp_mb(); + /* simplify things and don't accept BA sessions yet */ + set_sta_flag(sta, WLAN_STA_BLOCK_BA); + /* make the station visible */ sta_info_hash_add(local, sta); list_add_rcu(&sta->list, &local->sta_list); + /* notify driver */ + err = sta_info_insert_drv_state(local, sdata, sta); + if (err) + goto out_remove; + set_sta_flag(sta, WLAN_STA_INSERTED); + /* accept BA sessions now */ + clear_sta_flag(sta, WLAN_STA_BLOCK_BA); ieee80211_recalc_min_chandef(sdata); ieee80211_sta_debugfs_add(sta); @@ -523,6 +535,12 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) mesh_accept_plinks_update(sdata); return 0; + out_remove: + sta_info_hash_del(local, sta); + list_del_rcu(&sta->list); + local->num_sta--; + synchronize_net(); + __cleanup_single_sta(sta); out_err: mutex_unlock(&local->sta_mtx); rcu_read_lock(); -- cgit v0.10.2 From c4204960e9d0ba99459dbf1db918f99a45e7a62a Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 18 Feb 2014 15:22:12 +0000 Subject: Input - arizona-haptics: Fix double lock of dapm_mutex snd_soc_dapm_sync takes the dapm_mutex internally, but we currently take it externally as well. This patch fixes this. Signed-off-by: Charles Keepax Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/drivers/input/misc/arizona-haptics.c b/drivers/input/misc/arizona-haptics.c index 7a04f54..e7e12a5 100644 --- a/drivers/input/misc/arizona-haptics.c +++ b/drivers/input/misc/arizona-haptics.c @@ -77,16 +77,14 @@ static void arizona_haptics_work(struct work_struct *work) return; } + mutex_unlock(dapm_mutex); + ret = snd_soc_dapm_sync(arizona->dapm); if (ret != 0) { dev_err(arizona->dev, "Failed to sync DAPM: %d\n", ret); - mutex_unlock(dapm_mutex); return; } - - mutex_unlock(dapm_mutex); - } else { /* This disable sequence will be a noop if already enabled */ mutex_lock_nested(dapm_mutex, SND_SOC_DAPM_CLASS_RUNTIME); @@ -99,16 +97,15 @@ static void arizona_haptics_work(struct work_struct *work) return; } + mutex_unlock(dapm_mutex); + ret = snd_soc_dapm_sync(arizona->dapm); if (ret != 0) { dev_err(arizona->dev, "Failed to sync DAPM: %d\n", ret); - mutex_unlock(dapm_mutex); return; } - mutex_unlock(dapm_mutex); - ret = regmap_update_bits(arizona->regmap, ARIZONA_HAPTICS_CONTROL_1, ARIZONA_HAP_CTRL_MASK, -- cgit v0.10.2 From 113911006442a36c2b4669faf1699d9042ef80ab Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 18 Feb 2014 15:22:14 +0000 Subject: ASoC: dapm: Add locking to snd_soc_dapm_xxxx_pin functions The snd_soc_dapm_xxxx_pin all require the dapm_mutex to be held when they are called as they edit the dirty list, however very few of the callers do so. This patch adds unlocked versions of all the functions replacing the existing implementations with one that holds the lock internally. We also fix up the places where the lock was actually held on the caller side. Signed-off-by: Charles Keepax Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/drivers/extcon/extcon-arizona.c b/drivers/extcon/extcon-arizona.c index c20602f..98a14f6 100644 --- a/drivers/extcon/extcon-arizona.c +++ b/drivers/extcon/extcon-arizona.c @@ -222,27 +222,19 @@ static void arizona_extcon_pulse_micbias(struct arizona_extcon_info *info) struct snd_soc_dapm_context *dapm = arizona->dapm; int ret; - mutex_lock(&dapm->card->dapm_mutex); - ret = snd_soc_dapm_force_enable_pin(dapm, widget); if (ret != 0) dev_warn(arizona->dev, "Failed to enable %s: %d\n", widget, ret); - mutex_unlock(&dapm->card->dapm_mutex); - snd_soc_dapm_sync(dapm); if (!arizona->pdata.micd_force_micbias) { - mutex_lock(&dapm->card->dapm_mutex); - ret = snd_soc_dapm_disable_pin(arizona->dapm, widget); if (ret != 0) dev_warn(arizona->dev, "Failed to disable %s: %d\n", widget, ret); - mutex_unlock(&dapm->card->dapm_mutex); - snd_soc_dapm_sync(dapm); } } @@ -304,16 +296,12 @@ static void arizona_stop_mic(struct arizona_extcon_info *info) ARIZONA_MICD_ENA, 0, &change); - mutex_lock(&dapm->card->dapm_mutex); - ret = snd_soc_dapm_disable_pin(dapm, widget); if (ret != 0) dev_warn(arizona->dev, "Failed to disable %s: %d\n", widget, ret); - mutex_unlock(&dapm->card->dapm_mutex); - snd_soc_dapm_sync(dapm); if (info->micd_reva) { diff --git a/drivers/input/misc/arizona-haptics.c b/drivers/input/misc/arizona-haptics.c index e7e12a5..ef2e281 100644 --- a/drivers/input/misc/arizona-haptics.c +++ b/drivers/input/misc/arizona-haptics.c @@ -37,7 +37,6 @@ static void arizona_haptics_work(struct work_struct *work) struct arizona_haptics, work); struct arizona *arizona = haptics->arizona; - struct mutex *dapm_mutex = &arizona->dapm->card->dapm_mutex; int ret; if (!haptics->arizona->dapm) { @@ -67,18 +66,13 @@ static void arizona_haptics_work(struct work_struct *work) return; } - mutex_lock_nested(dapm_mutex, SND_SOC_DAPM_CLASS_RUNTIME); - ret = snd_soc_dapm_enable_pin(arizona->dapm, "HAPTICS"); if (ret != 0) { dev_err(arizona->dev, "Failed to start HAPTICS: %d\n", ret); - mutex_unlock(dapm_mutex); return; } - mutex_unlock(dapm_mutex); - ret = snd_soc_dapm_sync(arizona->dapm); if (ret != 0) { dev_err(arizona->dev, "Failed to sync DAPM: %d\n", @@ -87,18 +81,13 @@ static void arizona_haptics_work(struct work_struct *work) } } else { /* This disable sequence will be a noop if already enabled */ - mutex_lock_nested(dapm_mutex, SND_SOC_DAPM_CLASS_RUNTIME); - ret = snd_soc_dapm_disable_pin(arizona->dapm, "HAPTICS"); if (ret != 0) { dev_err(arizona->dev, "Failed to disable HAPTICS: %d\n", ret); - mutex_unlock(dapm_mutex); return; } - mutex_unlock(dapm_mutex); - ret = snd_soc_dapm_sync(arizona->dapm); if (ret != 0) { dev_err(arizona->dev, "Failed to sync DAPM: %d\n", @@ -152,16 +141,11 @@ static int arizona_haptics_play(struct input_dev *input, void *data, static void arizona_haptics_close(struct input_dev *input) { struct arizona_haptics *haptics = input_get_drvdata(input); - struct mutex *dapm_mutex = &haptics->arizona->dapm->card->dapm_mutex; cancel_work_sync(&haptics->work); - mutex_lock_nested(dapm_mutex, SND_SOC_DAPM_CLASS_RUNTIME); - if (haptics->arizona->dapm) snd_soc_dapm_disable_pin(haptics->arizona->dapm, "HAPTICS"); - - mutex_unlock(dapm_mutex); } static int arizona_haptics_probe(struct platform_device *pdev) diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index 68d92e3..6e89ef6 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -449,14 +449,22 @@ void snd_soc_dapm_debugfs_init(struct snd_soc_dapm_context *dapm, /* dapm audio pin control and status */ int snd_soc_dapm_enable_pin(struct snd_soc_dapm_context *dapm, const char *pin); +int snd_soc_dapm_enable_pin_unlocked(struct snd_soc_dapm_context *dapm, + const char *pin); int snd_soc_dapm_disable_pin(struct snd_soc_dapm_context *dapm, const char *pin); +int snd_soc_dapm_disable_pin_unlocked(struct snd_soc_dapm_context *dapm, + const char *pin); int snd_soc_dapm_nc_pin(struct snd_soc_dapm_context *dapm, const char *pin); +int snd_soc_dapm_nc_pin_unlocked(struct snd_soc_dapm_context *dapm, + const char *pin); int snd_soc_dapm_get_pin_status(struct snd_soc_dapm_context *dapm, const char *pin); int snd_soc_dapm_sync(struct snd_soc_dapm_context *dapm); int snd_soc_dapm_force_enable_pin(struct snd_soc_dapm_context *dapm, const char *pin); +int snd_soc_dapm_force_enable_pin_unlocked(struct snd_soc_dapm_context *dapm, + const char *pin); int snd_soc_dapm_ignore_suspend(struct snd_soc_dapm_context *dapm, const char *pin); void snd_soc_dapm_auto_nc_codec_pins(struct snd_soc_codec *codec); diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index fd39cd2..b9dc6ac 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -3210,15 +3210,11 @@ int snd_soc_dapm_put_pin_switch(struct snd_kcontrol *kcontrol, struct snd_soc_card *card = snd_kcontrol_chip(kcontrol); const char *pin = (const char *)kcontrol->private_value; - mutex_lock_nested(&card->dapm_mutex, SND_SOC_DAPM_CLASS_RUNTIME); - if (ucontrol->value.integer.value[0]) snd_soc_dapm_enable_pin(&card->dapm, pin); else snd_soc_dapm_disable_pin(&card->dapm, pin); - mutex_unlock(&card->dapm_mutex); - snd_soc_dapm_sync(&card->dapm); return 0; } @@ -3767,23 +3763,52 @@ void snd_soc_dapm_stream_event(struct snd_soc_pcm_runtime *rtd, int stream, } /** + * snd_soc_dapm_enable_pin_unlocked - enable pin. + * @dapm: DAPM context + * @pin: pin name + * + * Enables input/output pin and its parents or children widgets iff there is + * a valid audio route and active audio stream. + * + * Requires external locking. + * + * NOTE: snd_soc_dapm_sync() needs to be called after this for DAPM to + * do any widget power switching. + */ +int snd_soc_dapm_enable_pin_unlocked(struct snd_soc_dapm_context *dapm, + const char *pin) +{ + return snd_soc_dapm_set_pin(dapm, pin, 1); +} +EXPORT_SYMBOL_GPL(snd_soc_dapm_enable_pin_unlocked); + +/** * snd_soc_dapm_enable_pin - enable pin. * @dapm: DAPM context * @pin: pin name * * Enables input/output pin and its parents or children widgets iff there is * a valid audio route and active audio stream. + * * NOTE: snd_soc_dapm_sync() needs to be called after this for DAPM to * do any widget power switching. */ int snd_soc_dapm_enable_pin(struct snd_soc_dapm_context *dapm, const char *pin) { - return snd_soc_dapm_set_pin(dapm, pin, 1); + int ret; + + mutex_lock_nested(&dapm->card->dapm_mutex, SND_SOC_DAPM_CLASS_RUNTIME); + + ret = snd_soc_dapm_set_pin(dapm, pin, 1); + + mutex_unlock(&dapm->card->dapm_mutex); + + return ret; } EXPORT_SYMBOL_GPL(snd_soc_dapm_enable_pin); /** - * snd_soc_dapm_force_enable_pin - force a pin to be enabled + * snd_soc_dapm_force_enable_pin_unlocked - force a pin to be enabled * @dapm: DAPM context * @pin: pin name * @@ -3791,11 +3816,13 @@ EXPORT_SYMBOL_GPL(snd_soc_dapm_enable_pin); * intended for use with microphone bias supplies used in microphone * jack detection. * + * Requires external locking. + * * NOTE: snd_soc_dapm_sync() needs to be called after this for DAPM to * do any widget power switching. */ -int snd_soc_dapm_force_enable_pin(struct snd_soc_dapm_context *dapm, - const char *pin) +int snd_soc_dapm_force_enable_pin_unlocked(struct snd_soc_dapm_context *dapm, + const char *pin) { struct snd_soc_dapm_widget *w = dapm_find_widget(dapm, pin, true); @@ -3811,25 +3838,103 @@ int snd_soc_dapm_force_enable_pin(struct snd_soc_dapm_context *dapm, return 0; } +EXPORT_SYMBOL_GPL(snd_soc_dapm_force_enable_pin_unlocked); + +/** + * snd_soc_dapm_force_enable_pin - force a pin to be enabled + * @dapm: DAPM context + * @pin: pin name + * + * Enables input/output pin regardless of any other state. This is + * intended for use with microphone bias supplies used in microphone + * jack detection. + * + * NOTE: snd_soc_dapm_sync() needs to be called after this for DAPM to + * do any widget power switching. + */ +int snd_soc_dapm_force_enable_pin(struct snd_soc_dapm_context *dapm, + const char *pin) +{ + int ret; + + mutex_lock_nested(&dapm->card->dapm_mutex, SND_SOC_DAPM_CLASS_RUNTIME); + + ret = snd_soc_dapm_force_enable_pin_unlocked(dapm, pin); + + mutex_unlock(&dapm->card->dapm_mutex); + + return ret; +} EXPORT_SYMBOL_GPL(snd_soc_dapm_force_enable_pin); /** + * snd_soc_dapm_disable_pin_unlocked - disable pin. + * @dapm: DAPM context + * @pin: pin name + * + * Disables input/output pin and its parents or children widgets. + * + * Requires external locking. + * + * NOTE: snd_soc_dapm_sync() needs to be called after this for DAPM to + * do any widget power switching. + */ +int snd_soc_dapm_disable_pin_unlocked(struct snd_soc_dapm_context *dapm, + const char *pin) +{ + return snd_soc_dapm_set_pin(dapm, pin, 0); +} +EXPORT_SYMBOL_GPL(snd_soc_dapm_disable_pin_unlocked); + +/** * snd_soc_dapm_disable_pin - disable pin. * @dapm: DAPM context * @pin: pin name * * Disables input/output pin and its parents or children widgets. + * * NOTE: snd_soc_dapm_sync() needs to be called after this for DAPM to * do any widget power switching. */ int snd_soc_dapm_disable_pin(struct snd_soc_dapm_context *dapm, const char *pin) { - return snd_soc_dapm_set_pin(dapm, pin, 0); + int ret; + + mutex_lock_nested(&dapm->card->dapm_mutex, SND_SOC_DAPM_CLASS_RUNTIME); + + ret = snd_soc_dapm_set_pin(dapm, pin, 0); + + mutex_unlock(&dapm->card->dapm_mutex); + + return ret; } EXPORT_SYMBOL_GPL(snd_soc_dapm_disable_pin); /** + * snd_soc_dapm_nc_pin_unlocked - permanently disable pin. + * @dapm: DAPM context + * @pin: pin name + * + * Marks the specified pin as being not connected, disabling it along + * any parent or child widgets. At present this is identical to + * snd_soc_dapm_disable_pin() but in future it will be extended to do + * additional things such as disabling controls which only affect + * paths through the pin. + * + * Requires external locking. + * + * NOTE: snd_soc_dapm_sync() needs to be called after this for DAPM to + * do any widget power switching. + */ +int snd_soc_dapm_nc_pin_unlocked(struct snd_soc_dapm_context *dapm, + const char *pin) +{ + return snd_soc_dapm_set_pin(dapm, pin, 0); +} +EXPORT_SYMBOL_GPL(snd_soc_dapm_nc_pin_unlocked); + +/** * snd_soc_dapm_nc_pin - permanently disable pin. * @dapm: DAPM context * @pin: pin name @@ -3845,7 +3950,15 @@ EXPORT_SYMBOL_GPL(snd_soc_dapm_disable_pin); */ int snd_soc_dapm_nc_pin(struct snd_soc_dapm_context *dapm, const char *pin) { - return snd_soc_dapm_set_pin(dapm, pin, 0); + int ret; + + mutex_lock_nested(&dapm->card->dapm_mutex, SND_SOC_DAPM_CLASS_RUNTIME); + + ret = snd_soc_dapm_set_pin(dapm, pin, 0); + + mutex_unlock(&dapm->card->dapm_mutex); + + return ret; } EXPORT_SYMBOL_GPL(snd_soc_dapm_nc_pin); -- cgit v0.10.2 From e3685e03b40f5ec7926d9a75bf63467fc4071df9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 20 Feb 2014 11:19:58 +0100 Subject: mac80211: fix station wakeup powersave race Consider the following (relatively unlikely) scenario: 1) station goes to sleep while frames are buffered in driver 2) driver blocks wakeup (until no more frames are buffered) 3) station wakes up again 4) driver unblocks wakeup In this case, the current mac80211 code will do the following: 1) WLAN_STA_PS_STA set 2) WLAN_STA_PS_DRIVER set 3) - nothing - 4) WLAN_STA_PS_DRIVER cleared As a result, no frames will be delivered to the client, even though it is awake, until it sends another frame to us that triggers ieee80211_sta_ps_deliver_wakeup() in sta_ps_end(). Since we now take the PS spinlock, we can fix this while at the same time removing the complexity with the pending skb queue function. This was broken since my commit 50a9432daeec ("mac80211: fix powersaving clients races") due to removing the clearing of WLAN_STA_PS_STA in the RX path. While at it, fix a cleanup path issue when a station is removed while the driver is still blocking its wakeup. Signed-off-by: Johannes Berg diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 3701930..5e44e317 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1692,14 +1692,8 @@ void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue, void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue); void ieee80211_add_pending_skb(struct ieee80211_local *local, struct sk_buff *skb); -void ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, - struct sk_buff_head *skbs, - void (*fn)(void *data), void *data); -static inline void ieee80211_add_pending_skbs(struct ieee80211_local *local, - struct sk_buff_head *skbs) -{ - ieee80211_add_pending_skbs_fn(local, skbs, NULL, NULL); -} +void ieee80211_add_pending_skbs(struct ieee80211_local *local, + struct sk_buff_head *skbs); void ieee80211_flush_queues(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index c24ca0d..3e57f96 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1128,6 +1128,13 @@ static void sta_ps_end(struct sta_info *sta) sta->sta.addr, sta->sta.aid); if (test_sta_flag(sta, WLAN_STA_PS_DRIVER)) { + /* + * Clear the flag only if the other one is still set + * so that the TX path won't start TX'ing new frames + * directly ... In the case that the driver flag isn't + * set ieee80211_sta_ps_deliver_wakeup() will clear it. + */ + clear_sta_flag(sta, WLAN_STA_PS_STA); ps_dbg(sta->sdata, "STA %pM aid %d driver-ps-blocked\n", sta->sta.addr, sta->sta.aid); return; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index ffc1ee6..a023b43 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -99,7 +99,8 @@ static void __cleanup_single_sta(struct sta_info *sta) struct ieee80211_local *local = sdata->local; struct ps_data *ps; - if (test_sta_flag(sta, WLAN_STA_PS_STA)) { + if (test_sta_flag(sta, WLAN_STA_PS_STA) || + test_sta_flag(sta, WLAN_STA_PS_DRIVER)) { if (sta->sdata->vif.type == NL80211_IFTYPE_AP || sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) ps = &sdata->bss->ps; @@ -109,6 +110,7 @@ static void __cleanup_single_sta(struct sta_info *sta) return; clear_sta_flag(sta, WLAN_STA_PS_STA); + clear_sta_flag(sta, WLAN_STA_PS_DRIVER); atomic_dec(&ps->num_sta_ps); sta_info_recalc_tim(sta); @@ -1090,10 +1092,14 @@ struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif, } EXPORT_SYMBOL(ieee80211_find_sta); -static void clear_sta_ps_flags(void *_sta) +/* powersave support code */ +void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) { - struct sta_info *sta = _sta; struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + struct sk_buff_head pending; + int filtered = 0, buffered = 0, ac; + unsigned long flags; struct ps_data *ps; if (sdata->vif.type == NL80211_IFTYPE_AP || @@ -1104,20 +1110,6 @@ static void clear_sta_ps_flags(void *_sta) else return; - clear_sta_flag(sta, WLAN_STA_PS_DRIVER); - if (test_and_clear_sta_flag(sta, WLAN_STA_PS_STA)) - atomic_dec(&ps->num_sta_ps); -} - -/* powersave support code */ -void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) -{ - struct ieee80211_sub_if_data *sdata = sta->sdata; - struct ieee80211_local *local = sdata->local; - struct sk_buff_head pending; - int filtered = 0, buffered = 0, ac; - unsigned long flags; - clear_sta_flag(sta, WLAN_STA_SP); BUILD_BUG_ON(BITS_TO_LONGS(IEEE80211_NUM_TIDS) > 1); @@ -1148,9 +1140,13 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) buffered += tmp - count; } - ieee80211_add_pending_skbs_fn(local, &pending, clear_sta_ps_flags, sta); + ieee80211_add_pending_skbs(local, &pending); + clear_sta_flag(sta, WLAN_STA_PS_DRIVER); + clear_sta_flag(sta, WLAN_STA_PS_STA); spin_unlock(&sta->ps_lock); + atomic_dec(&ps->num_sta_ps); + /* This station just woke up and isn't aware of our SMPS state */ if (!ieee80211_smps_is_restrictive(sta->known_smps_mode, sdata->smps_mode) && diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 1d1bb70..b8700d4 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -435,9 +435,8 @@ void ieee80211_add_pending_skb(struct ieee80211_local *local, spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); } -void ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, - struct sk_buff_head *skbs, - void (*fn)(void *data), void *data) +void ieee80211_add_pending_skbs(struct ieee80211_local *local, + struct sk_buff_head *skbs) { struct ieee80211_hw *hw = &local->hw; struct sk_buff *skb; @@ -461,9 +460,6 @@ void ieee80211_add_pending_skbs_fn(struct ieee80211_local *local, __skb_queue_tail(&local->pending[queue], skb); } - if (fn) - fn(data); - for (i = 0; i < hw->queues; i++) __ieee80211_wake_queue(hw, i, IEEE80211_QUEUE_STOP_REASON_SKB_ADD); -- cgit v0.10.2 From 5f5750d2e471866576225c3af28eedfadebb0765 Mon Sep 17 00:00:00 2001 From: Frank Praznik Date: Wed, 19 Feb 2014 13:09:22 -0500 Subject: HID: sony: Fix work queue issues. Don't initialize force-feedback for devices that don't support it to avoid calls to schedule_work() with an uninitialized work_struct. Move the cancel_work_sync() call out of sony_destroy_ff() since the state worker is used for the LEDs even when force-feedback is disabled. Remove sony_destroy_ff() to avoid a compiler warning since it is no longer used. Signed-off-by: Frank Praznik Signed-off-by: Jiri Kosina diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index 1235405..2f19b15 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -42,6 +42,7 @@ #define DUALSHOCK4_CONTROLLER_BT BIT(6) #define SONY_LED_SUPPORT (SIXAXIS_CONTROLLER_USB | BUZZ_CONTROLLER | DUALSHOCK4_CONTROLLER_USB) +#define SONY_FF_SUPPORT (SIXAXIS_CONTROLLER_USB | DUALSHOCK4_CONTROLLER_USB) #define MAX_LEDS 4 @@ -499,6 +500,7 @@ struct sony_sc { __u8 right; #endif + __u8 worker_initialized; __u8 led_state[MAX_LEDS]; __u8 led_count; }; @@ -993,22 +995,11 @@ static int sony_init_ff(struct hid_device *hdev) return input_ff_create_memless(input_dev, NULL, sony_play_effect); } -static void sony_destroy_ff(struct hid_device *hdev) -{ - struct sony_sc *sc = hid_get_drvdata(hdev); - - cancel_work_sync(&sc->state_worker); -} - #else static int sony_init_ff(struct hid_device *hdev) { return 0; } - -static void sony_destroy_ff(struct hid_device *hdev) -{ -} #endif static int sony_set_output_report(struct sony_sc *sc, int req_id, int req_size) @@ -1077,6 +1068,8 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id) if (sc->quirks & SIXAXIS_CONTROLLER_USB) { hdev->hid_output_raw_report = sixaxis_usb_output_raw_report; ret = sixaxis_set_operational_usb(hdev); + + sc->worker_initialized = 1; INIT_WORK(&sc->state_worker, sixaxis_state_worker); } else if (sc->quirks & SIXAXIS_CONTROLLER_BT) @@ -1087,6 +1080,7 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id) if (ret < 0) goto err_stop; + sc->worker_initialized = 1; INIT_WORK(&sc->state_worker, dualshock4_state_worker); } else { ret = 0; @@ -1101,9 +1095,11 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id) goto err_stop; } - ret = sony_init_ff(hdev); - if (ret < 0) - goto err_stop; + if (sc->quirks & SONY_FF_SUPPORT) { + ret = sony_init_ff(hdev); + if (ret < 0) + goto err_stop; + } return 0; err_stop: @@ -1120,7 +1116,8 @@ static void sony_remove(struct hid_device *hdev) if (sc->quirks & SONY_LED_SUPPORT) sony_leds_remove(hdev); - sony_destroy_ff(hdev); + if (sc->worker_initialized) + cancel_work_sync(&sc->state_worker); hid_hw_stop(hdev); } -- cgit v0.10.2 From 876fc03aaa4d8cc81331119d80d3e9bfbf1164e8 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 19 Feb 2014 13:33:23 +0100 Subject: ip6_vti: Fix build when NET_IP_TUNNEL is not set. Since commit 469bdcefdc47a ip6_vti uses ip_tunnel_get_stats64(), so we need to select NET_IP_TUNNEL to have this function available. Fixes: 469bdcefdc ("ipv6: fix the use of pcpu_tstats in ip6_vti.c") Signed-off-by: Steffen Klassert diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index d92e558..438a73a 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -138,6 +138,7 @@ config INET6_XFRM_MODE_ROUTEOPTIMIZATION config IPV6_VTI tristate "Virtual (secure) IPv6: tunneling" select IPV6_TUNNEL + select NET_IP_TUNNEL depends on INET6_XFRM_MODE_TUNNEL ---help--- Tunneling means encapsulating data of one protocol type within -- cgit v0.10.2 From 35ea790d7883dd660208f78eae50ebfd6b8bd14a Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 19 Feb 2014 13:33:23 +0100 Subject: xfrm: Fix NULL pointer dereference on sub policy usage xfrm_state_sort() takes the unsorted states from the src array and stores them into the dst array. We try to get the namespace from the dst array which is empty at this time, so take the namespace from the src array instead. Fixes: 283bc9f35bbbc ("xfrm: Namespacify xfrm state/policy locks") Signed-off-by: Steffen Klassert diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index a26b7aa..97d117b 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1451,7 +1451,7 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, { int err = 0; struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - struct net *net = xs_net(*dst); + struct net *net = xs_net(*src); if (!afinfo) return -EAFNOSUPPORT; -- cgit v0.10.2 From 8c0cba22e196122d26c92980943474eb53db8deb Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 19 Feb 2014 13:33:24 +0100 Subject: xfrm: Take xfrm_state_lock in xfrm_migrate_state_find A comment on xfrm_migrate_state_find() says that xfrm_state_lock is held. This is apparently not the case, but we need it to traverse through the state lists. Signed-off-by: Steffen Klassert diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 97d117b..c101023 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1215,11 +1215,12 @@ out: return NULL; } -/* net->xfrm.xfrm_state_lock is held */ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net) { unsigned int h; - struct xfrm_state *x; + struct xfrm_state *x = NULL; + + spin_lock_bh(&net->xfrm.xfrm_state_lock); if (m->reqid) { h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr, @@ -1236,7 +1237,7 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n m->old_family)) continue; xfrm_state_hold(x); - return x; + break; } } else { h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr, @@ -1251,11 +1252,13 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n m->old_family)) continue; xfrm_state_hold(x); - return x; + break; } } - return NULL; + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + + return x; } EXPORT_SYMBOL(xfrm_migrate_state_find); -- cgit v0.10.2 From ee5c23176fcc820f7a56d3e86001532af0d59b1e Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 19 Feb 2014 13:33:24 +0100 Subject: xfrm: Clone states properly on migration We loose a lot of information of the original state if we clone it with xfrm_state_clone(). In particular, there is no crypto algorithm attached if the original state uses an aead algorithm. This patch add the missing information to the clone state. Signed-off-by: Steffen Klassert diff --git a/include/net/xfrm.h b/include/net/xfrm.h index afa5730..fb5654a 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1648,6 +1648,11 @@ static inline int xfrm_aevent_is_on(struct net *net) } #endif +static inline int aead_len(struct xfrm_algo_aead *alg) +{ + return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); +} + static inline int xfrm_alg_len(const struct xfrm_algo *alg) { return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); @@ -1686,6 +1691,12 @@ static inline int xfrm_replay_clone(struct xfrm_state *x, return 0; } +static inline struct xfrm_algo_aead *xfrm_algo_aead_clone(struct xfrm_algo_aead *orig) +{ + return kmemdup(orig, aead_len(orig), GFP_KERNEL); +} + + static inline struct xfrm_algo *xfrm_algo_clone(struct xfrm_algo *orig) { return kmemdup(orig, xfrm_alg_len(orig), GFP_KERNEL); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index c101023..40f1b3e 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1159,6 +1159,11 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) } x->props.aalgo = orig->props.aalgo; + if (orig->aead) { + x->aead = xfrm_algo_aead_clone(orig->aead); + if (!x->aead) + goto error; + } if (orig->ealg) { x->ealg = xfrm_algo_clone(orig->ealg); if (!x->ealg) @@ -1201,6 +1206,9 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) x->props.flags = orig->props.flags; x->props.extra_flags = orig->props.extra_flags; + x->tfcpad = orig->tfcpad; + x->replay_maxdiff = orig->replay_maxdiff; + x->replay_maxage = orig->replay_maxage; x->curlft.add_time = orig->curlft.add_time; x->km.state = orig->km.state; x->km.seq = orig->km.seq; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 1ae3ec7..c274179 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -32,11 +32,6 @@ #include #endif -static inline int aead_len(struct xfrm_algo_aead *alg) -{ - return sizeof(*alg) + ((alg->alg_key_len + 7) / 8); -} - static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) { struct nlattr *rt = attrs[type]; -- cgit v0.10.2 From 9085a6422900092886da8c404e1c5340c4ff1cbf Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 20 Feb 2014 10:56:45 -0500 Subject: SELinux: bigendian problems with filename trans rules When writing policy via /sys/fs/selinux/policy I wrote the type and class of filename trans rules in CPU endian instead of little endian. On x86_64 this works just fine, but it means that on big endian arch's like ppc64 and s390 userspace reads the policy and converts it from le32_to_cpu. So the values are all screwed up. Write the values in le format like it should have been to start. Signed-off-by: Eric Paris Acked-by: Stephen Smalley Cc: stable@vger.kernel.org Signed-off-by: Paul Moore diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index c0f4988..9c5cdc2ca 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -3338,10 +3338,10 @@ static int filename_write_helper(void *key, void *data, void *ptr) if (rc) return rc; - buf[0] = ft->stype; - buf[1] = ft->ttype; - buf[2] = ft->tclass; - buf[3] = otype->otype; + buf[0] = cpu_to_le32(ft->stype); + buf[1] = cpu_to_le32(ft->ttype); + buf[2] = cpu_to_le32(ft->tclass); + buf[3] = cpu_to_le32(otype->otype); rc = put_entry(buf, sizeof(u32), 4, fp); if (rc) -- cgit v0.10.2 From c7fbd4158433c1c910c62850247728cb05a94e42 Mon Sep 17 00:00:00 2001 From: Peter De Schrijver Date: Thu, 20 Feb 2014 10:49:34 +0200 Subject: clk: tegra124: remove gr2d and gr3d clocks Tegra124 does not have gr2d and gr3d clocks. They have been replaced by the vic03 and gpu clocks respectively. Signed-off-by: Peter De Schrijver diff --git a/drivers/clk/tegra/clk-tegra124.c b/drivers/clk/tegra/clk-tegra124.c index b4cf650..166e02f 100644 --- a/drivers/clk/tegra/clk-tegra124.c +++ b/drivers/clk/tegra/clk-tegra124.c @@ -769,10 +769,8 @@ static struct tegra_clk tegra124_clks[tegra_clk_max] __initdata = { [tegra_clk_sdmmc4_8] = { .dt_id = TEGRA124_CLK_SDMMC4, .present = true }, [tegra_clk_pwm] = { .dt_id = TEGRA124_CLK_PWM, .present = true }, [tegra_clk_i2s2] = { .dt_id = TEGRA124_CLK_I2S2, .present = true }, - [tegra_clk_gr2d] = { .dt_id = TEGRA124_CLK_GR_2D, .present = true }, [tegra_clk_usbd] = { .dt_id = TEGRA124_CLK_USBD, .present = true }, [tegra_clk_isp_8] = { .dt_id = TEGRA124_CLK_ISP, .present = true }, - [tegra_clk_gr3d] = { .dt_id = TEGRA124_CLK_GR_3D, .present = true }, [tegra_clk_disp2] = { .dt_id = TEGRA124_CLK_DISP2, .present = true }, [tegra_clk_disp1] = { .dt_id = TEGRA124_CLK_DISP1, .present = true }, [tegra_clk_host1x_8] = { .dt_id = TEGRA124_CLK_HOST1X, .present = true }, diff --git a/include/dt-bindings/clock/tegra124-car.h b/include/dt-bindings/clock/tegra124-car.h index a1116a3..8c1603b 100644 --- a/include/dt-bindings/clock/tegra124-car.h +++ b/include/dt-bindings/clock/tegra124-car.h @@ -36,10 +36,10 @@ #define TEGRA124_CLK_PWM 17 #define TEGRA124_CLK_I2S2 18 /* 20 (register bit affects vi and vi_sensor) */ -#define TEGRA124_CLK_GR_2D 21 +/* 21 */ #define TEGRA124_CLK_USBD 22 #define TEGRA124_CLK_ISP 23 -#define TEGRA124_CLK_GR_3D 24 +/* 26 */ /* 25 */ #define TEGRA124_CLK_DISP2 26 #define TEGRA124_CLK_DISP1 27 -- cgit v0.10.2 From ec6f678c74dbdb06a6a775bbb00f1d26c17c404b Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 18 Feb 2014 10:30:18 +0200 Subject: iwlwifi: dvm: clear IWL_STA_UCODE_INPROGRESS when assoc fails We set IWL_STA_UCODE_INPROGRESS flag when we add a station and clear it when we send the LQ command for it. But the LQ command is sent only when the association succeeds. If the association doesn't succeed, we would leave this flag set and that wouldn't indicate the station entry as vacant. This probably fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1065663 Cc: Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach diff --git a/drivers/net/wireless/iwlwifi/dvm/sta.c b/drivers/net/wireless/iwlwifi/dvm/sta.c index c0d070c..9cdd91c 100644 --- a/drivers/net/wireless/iwlwifi/dvm/sta.c +++ b/drivers/net/wireless/iwlwifi/dvm/sta.c @@ -590,6 +590,7 @@ void iwl_deactivate_station(struct iwl_priv *priv, const u8 sta_id, sizeof(priv->tid_data[sta_id][tid])); priv->stations[sta_id].used &= ~IWL_STA_DRIVER_ACTIVE; + priv->stations[sta_id].used &= ~IWL_STA_UCODE_INPROGRESS; priv->num_stations--; -- cgit v0.10.2 From cf71d2bc0b8a473209d5c770ce560853bd720d14 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 20 Feb 2014 10:19:31 +0100 Subject: sit: fix panic with route cache in ip tunnels Bug introduced by commit 7d442fab0a67 ("ipv4: Cache dst in tunnels"). Because sit code does not call ip_tunnel_init(), the dst_cache was not initialized. CC: Tom Herbert Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 48ed75c..e77c104 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -129,6 +129,7 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p); void ip_tunnel_setup(struct net_device *dev, int net_id); +void ip_tunnel_dst_reset_all(struct ip_tunnel *t); /* Extract dsfield from inner protocol */ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph, diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 08f8cf9..78a89e6 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -93,13 +93,14 @@ static void tunnel_dst_reset(struct ip_tunnel *t) tunnel_dst_set(t, NULL); } -static void tunnel_dst_reset_all(struct ip_tunnel *t) +void ip_tunnel_dst_reset_all(struct ip_tunnel *t) { int i; for_each_possible_cpu(i) __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); } +EXPORT_SYMBOL(ip_tunnel_dst_reset_all); static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie) { @@ -713,7 +714,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn, if (set_mtu) dev->mtu = mtu; } - tunnel_dst_reset_all(t); + ip_tunnel_dst_reset_all(t); netdev_state_change(dev); } @@ -1042,7 +1043,7 @@ void ip_tunnel_uninit(struct net_device *dev) if (itn->fb_tunnel_dev != dev) ip_tunnel_del(netdev_priv(dev)); - tunnel_dst_reset_all(tunnel); + ip_tunnel_dst_reset_all(tunnel); } EXPORT_SYMBOL_GPL(ip_tunnel_uninit); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 3dfbcf1..b4d74c8 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -475,6 +475,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev) ipip6_tunnel_unlink(sitn, tunnel); ipip6_tunnel_del_prl(tunnel, NULL); } + ip_tunnel_dst_reset_all(tunnel); dev_put(dev); } @@ -1082,6 +1083,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) t->parms.link = p->link; ipip6_tunnel_bind_dev(t->dev); } + ip_tunnel_dst_reset_all(t); netdev_state_change(t->dev); } @@ -1112,6 +1114,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, t->ip6rd.relay_prefix = relay_prefix; t->ip6rd.prefixlen = ip6rd->prefixlen; t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen; + ip_tunnel_dst_reset_all(t); netdev_state_change(t->dev); return 0; } @@ -1271,6 +1274,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); break; } + ip_tunnel_dst_reset_all(t); netdev_state_change(dev); break; @@ -1326,6 +1330,9 @@ static const struct net_device_ops ipip6_netdev_ops = { static void ipip6_dev_free(struct net_device *dev) { + struct ip_tunnel *tunnel = netdev_priv(dev); + + free_percpu(tunnel->dst_cache); free_percpu(dev->tstats); free_netdev(dev); } @@ -1375,6 +1382,12 @@ static int ipip6_tunnel_init(struct net_device *dev) u64_stats_init(&ipip6_tunnel_stats->syncp); } + tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); + if (!tunnel->dst_cache) { + free_percpu(dev->tstats); + return -ENOMEM; + } + return 0; } @@ -1405,6 +1418,12 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) u64_stats_init(&ipip6_fb_stats->syncp); } + tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); + if (!tunnel->dst_cache) { + free_percpu(dev->tstats); + return -ENOMEM; + } + dev_hold(dev); rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); return 0; -- cgit v0.10.2 From fb8ef788680d48523321e5f150b23700a1caf980 Mon Sep 17 00:00:00 2001 From: Duan Fugang-B38611 Date: Thu, 20 Feb 2014 18:14:39 +0800 Subject: net: fec: fix potential issue to avoid fec interrupt lost and crc error The current flow: Set TX BD ready, and then set "INT" and "PINS" bit to enable tx interrupt generation and crc checksum. There has potential issue like as: CPU fec uDMA Set tx ready bit uDMA start the BD transmission Set "INT" bit Set "PINS" bit ... Above situation cause fec tx interrupt lost and fec MAC don't do CRC checksum. The patch fix the potential issue. Signed-off-by: Fugang Duan Acked-by: Frank Li Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 903362a..479a7cb 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -389,12 +389,6 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) netdev_err(ndev, "Tx DMA memory map failed\n"); return NETDEV_TX_OK; } - /* Send it on its way. Tell FEC it's ready, interrupt when done, - * it's the last BD of the frame, and to put the CRC on the end. - */ - status |= (BD_ENET_TX_READY | BD_ENET_TX_INTR - | BD_ENET_TX_LAST | BD_ENET_TX_TC); - bdp->cbd_sc = status; if (fep->bufdesc_ex) { @@ -416,6 +410,13 @@ fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev) } } + /* Send it on its way. Tell FEC it's ready, interrupt when done, + * it's the last BD of the frame, and to put the CRC on the end. + */ + status |= (BD_ENET_TX_READY | BD_ENET_TX_INTR + | BD_ENET_TX_LAST | BD_ENET_TX_TC); + bdp->cbd_sc = status; + bdp_pre = fec_enet_get_prevdesc(bdp, fep); if ((id_entry->driver_data & FEC_QUIRK_ERR006358) && !(bdp_pre->cbd_sc & BD_ENET_TX_READY)) { -- cgit v0.10.2 From 7cce3b75682ff898c935c17d186983cbf3ed393e Mon Sep 17 00:00:00 2001 From: Matija Glavinic Pecotic Date: Thu, 20 Feb 2014 14:13:04 +0100 Subject: net: sctp: Potentially-Failed state should not be reached from unconfirmed state In current implementation it is possible to reach PF state from unconfirmed. We can interpret sctp-failover-02 in a way that PF state is meant to be reached only from active state, in the end, this is when entering PF state makes sense. Here are few quotes from sctp-failover-02, but regardless of these, same understanding can be reached from whole section 5: Section 5.1, quickfailover guide: "The PF state is an intermediate state between Active and Failed states." "Each time the T3-rtx timer expires on an active or idle destination, the error counter of that destination address will be incremented. When the value in the error counter exceeds PFMR, the endpoint should mark the destination transport address as PF." There are several concrete reasons for such interpretation. For start, rfc4960 does not take into concern quickfailover algorithm. Therefore, quickfailover must comply to 4960. Point where this compliance can be argued is following behavior: When PF is entered, association overall error counter is incremented for each missed HB. This is contradictory to rfc4960, as address, while in unconfirmed state, is subjected to probing, and while it is probed, it should not increment association overall error counter. This has as a consequence that we might end up in situation in which we drop association due path failure on unconfirmed address, in case we have wrong configuration in a way: Association.Max.Retrans == Path.Max.Retrans. Another reason is that entering PF from unconfirmed will cause a loss of address confirmed event when address is once (if) confirmed. This is fine from failover guide point of view, but it is not consistent with behavior preceding failover implementation and recommendation from 4960: 5.4. Path Verification Whenever a path is confirmed, an indication MAY be given to the upper layer. Signed-off-by: Matija Glavinic Pecotic Acked-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index bd85915..5d6883f 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -495,11 +495,12 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, } /* If the transport error count is greater than the pf_retrans - * threshold, and less than pathmaxrtx, then mark this transport - * as Partially Failed, ee SCTP Quick Failover Draft, secon 5.1, - * point 1 + * threshold, and less than pathmaxrtx, and if the current state + * is not SCTP_UNCONFIRMED, then mark this transport as Partially + * Failed, see SCTP Quick Failover Draft, section 5.1 */ if ((transport->state != SCTP_PF) && + (transport->state != SCTP_UNCONFIRMED) && (asoc->pf_retrans < transport->pathmaxrxt) && (transport->error_count > asoc->pf_retrans)) { -- cgit v0.10.2 From 72471c0d3197d02da53897868692f0b91042f8a5 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Fri, 14 Feb 2014 16:17:49 +0400 Subject: hostap: Do not free priv until timer handler has actually stopped using it Function del_timer() does not guarantee that timer was really deleted. If the timer handler is beeing executed at the moment, the function does nothing. So, it's possible to use already freed memory in the handler: [ref: Documentation/DocBook/kernel-locking.tmpl] This was found using grep and compile-tested only. Please, consider applying or something similar to it. Signed-off-by: Kirill Tkhai CC: Jouni Malinen CC: John W. Linville Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/hostap/hostap_ap.c b/drivers/net/wireless/hostap/hostap_ap.c index d36e252..5965255 100644 --- a/drivers/net/wireless/hostap/hostap_ap.c +++ b/drivers/net/wireless/hostap/hostap_ap.c @@ -147,7 +147,7 @@ static void ap_free_sta(struct ap_data *ap, struct sta_info *sta) if (!sta->ap && sta->u.sta.challenge) kfree(sta->u.sta.challenge); - del_timer(&sta->timer); + del_timer_sync(&sta->timer); #endif /* PRISM2_NO_KERNEL_IEEE80211_MGMT */ kfree(sta); -- cgit v0.10.2 From 95320774fae71d7b22b970ef4267fcc4d1ad23d8 Mon Sep 17 00:00:00 2001 From: James Cameron Date: Mon, 17 Feb 2014 10:25:53 +1100 Subject: libertas: fix scan result loss if SSID IE len 0 Scan results from Marvell 8388 and 8686 have probe responses from hidden APs and OLPC XO-1 mesh with a zero length SSID IE. Bug in lbs_ret_scan discarded any remaining BSS in scan response, leading to user not seeing APs in dense environments. With LBS_DEB_SCAN, dmesg shows libertas scan: scan response: 5 BSSs (419 bytes); resp size 474 bytes libertas scan: scan: 00:1a:2b:84:de:e8, capa 0401, chan 1, qz, -51 dBm libertas scan: scan: 5c:63:bf:d8:eb:0c, capa 0411, chan 1, qw129, -23 dBm libertas scan: scan response: invalid IE fmt With LBS_DEB_HEX, dmesg shows valid BSS in scan response were not processed. Change is to ignore zero length IE and continue processing. Fixes OLPC 12757, http://dev.laptop.org/ticket/12757 Signed-off-by: James Cameron Reported-by: T Gillett Tested-by: T Gillett CC: Dan Williams Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/libertas/cfg.c b/drivers/net/wireless/libertas/cfg.c index 32f7500..cb6d189 100644 --- a/drivers/net/wireless/libertas/cfg.c +++ b/drivers/net/wireless/libertas/cfg.c @@ -621,7 +621,7 @@ static int lbs_ret_scan(struct lbs_private *priv, unsigned long dummy, id = *pos++; elen = *pos++; left -= 2; - if (elen > left || elen == 0) { + if (elen > left) { lbs_deb_scan("scan response: invalid IE fmt\n"); goto done; } -- cgit v0.10.2 From 4f7ba432202c8330cc03ab959c6228d0de5dc4a3 Mon Sep 17 00:00:00 2001 From: Avinash Patil Date: Tue, 18 Feb 2014 15:41:54 -0800 Subject: mwifiex: clean pcie ring only when device is present Write io memory to clean PCIe buffer only when PCIe device is present else this results into crash because of invalid memory access. Cc: # 3.9+ Signed-off-by: Avinash Patil Signed-off-by: Bing Zhao Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/mwifiex/wmm.c b/drivers/net/wireless/mwifiex/wmm.c index 13eaeed..981cf6e 100644 --- a/drivers/net/wireless/mwifiex/wmm.c +++ b/drivers/net/wireless/mwifiex/wmm.c @@ -559,7 +559,8 @@ mwifiex_clean_txrx(struct mwifiex_private *priv) mwifiex_wmm_delete_all_ralist(priv); memcpy(tos_to_tid, ac_to_tid, sizeof(tos_to_tid)); - if (priv->adapter->if_ops.clean_pcie_ring) + if (priv->adapter->if_ops.clean_pcie_ring && + !priv->adapter->surprise_removed) priv->adapter->if_ops.clean_pcie_ring(priv->adapter); spin_unlock_irqrestore(&priv->wmm.ra_list_spinlock, flags); } -- cgit v0.10.2 From bb8e6a1ee881d131e404f0f1f5e8dc9281002771 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 18 Feb 2014 15:41:55 -0800 Subject: mwifiex: add NULL check for PCIe Rx skb We may get a NULL pointer here if skb allocation for Rx packet was failed earlier. Cc: # 3.9+ Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/mwifiex/pcie.c b/drivers/net/wireless/mwifiex/pcie.c index 03688aa..28029b7 100644 --- a/drivers/net/wireless/mwifiex/pcie.c +++ b/drivers/net/wireless/mwifiex/pcie.c @@ -1211,6 +1211,12 @@ static int mwifiex_pcie_process_recv_data(struct mwifiex_adapter *adapter) rd_index = card->rxbd_rdptr & reg->rx_mask; skb_data = card->rx_buf_list[rd_index]; + /* If skb allocation was failed earlier for Rx packet, + * rx_buf_list[rd_index] would have been left with a NULL. + */ + if (!skb_data) + return -ENOMEM; + MWIFIEX_SKB_PACB(skb_data, &buf_pa); pci_unmap_single(card->dev, buf_pa, MWIFIEX_RX_DATA_BUF_SIZE, PCI_DMA_FROMDEVICE); -- cgit v0.10.2 From 1c97560f6d751a620978504a4a888c631192b71a Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 18 Feb 2014 15:41:56 -0800 Subject: mwifiex: fix cmd and Tx data timeout issue for PCIe cards We are sending sleep confirm done interrupt in the middle of sleep handshake. There is a corner case when Tx done interrupt is received from firmware during sleep handshake due to which host and firmware power states go out of sync causing cmd and Tx data timeout problem. Hence sleep confirm done interrupt is sent at the end of sleep handshake to fix the problem. Cc: # 3.10+ Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/mwifiex/pcie.c b/drivers/net/wireless/mwifiex/pcie.c index 28029b7..7fe7b53 100644 --- a/drivers/net/wireless/mwifiex/pcie.c +++ b/drivers/net/wireless/mwifiex/pcie.c @@ -1531,6 +1531,14 @@ static int mwifiex_pcie_process_cmd_complete(struct mwifiex_adapter *adapter) if (adapter->ps_state == PS_STATE_SLEEP_CFM) { mwifiex_process_sleep_confirm_resp(adapter, skb->data, skb->len); + mwifiex_pcie_enable_host_int(adapter); + if (mwifiex_write_reg(adapter, + PCIE_CPU_INT_EVENT, + CPU_INTR_SLEEP_CFM_DONE)) { + dev_warn(adapter->dev, + "Write register failed\n"); + return -1; + } while (reg->sleep_cookie && (count++ < 10) && mwifiex_pcie_ok_to_access_hw(adapter)) usleep_range(50, 60); @@ -1999,23 +2007,9 @@ static void mwifiex_interrupt_status(struct mwifiex_adapter *adapter) adapter->int_status |= pcie_ireg; spin_unlock_irqrestore(&adapter->int_lock, flags); - if (pcie_ireg & HOST_INTR_CMD_DONE) { - if ((adapter->ps_state == PS_STATE_SLEEP_CFM) || - (adapter->ps_state == PS_STATE_SLEEP)) { - mwifiex_pcie_enable_host_int(adapter); - if (mwifiex_write_reg(adapter, - PCIE_CPU_INT_EVENT, - CPU_INTR_SLEEP_CFM_DONE) - ) { - dev_warn(adapter->dev, - "Write register failed\n"); - return; - - } - } - } else if (!adapter->pps_uapsd_mode && - adapter->ps_state == PS_STATE_SLEEP && - mwifiex_pcie_ok_to_access_hw(adapter)) { + if (!adapter->pps_uapsd_mode && + adapter->ps_state == PS_STATE_SLEEP && + mwifiex_pcie_ok_to_access_hw(adapter)) { /* Potentially for PCIe we could get other * interrupts like shared. Don't change power * state until cookie is set */ -- cgit v0.10.2 From 21f8aaee0c62708654988ce092838aa7df4d25d8 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 19 Feb 2014 13:15:17 +0100 Subject: ath9k: protect tid->sched check We check tid->sched without a lock taken on ath_tx_aggr_sleep(). That is race condition which can result of doing list_del(&tid->list) twice (second time with poisoned list node) and cause crash like shown below: [424271.637220] BUG: unable to handle kernel paging request at 00100104 [424271.637328] IP: [] ath_tx_aggr_sleep+0x62/0xe0 [ath9k] ... [424271.639953] Call Trace: [424271.639998] [] ? ath9k_get_survey+0x110/0x110 [ath9k] [424271.640083] [] ath9k_sta_notify+0x42/0x50 [ath9k] [424271.640177] [] sta_ps_start+0x8f/0x1c0 [mac80211] [424271.640258] [] ? free_compound_page+0x2e/0x40 [424271.640346] [] ieee80211_rx_handlers+0x9d5/0x2340 [mac80211] [424271.640437] [] ? kmem_cache_free+0x1d8/0x1f0 [424271.640510] [] ? kfree_skbmem+0x34/0x90 [424271.640578] [] ? put_page+0x2c/0x40 [424271.640640] [] ? kfree_skbmem+0x34/0x90 [424271.640706] [] ? kfree_skbmem+0x34/0x90 [424271.640787] [] ? ieee80211_rx_handlers_result+0x73/0x1d0 [mac80211] [424271.640897] [] ieee80211_prepare_and_rx_handle+0x520/0xad0 [mac80211] [424271.641009] [] ? ieee80211_rx_handlers+0x2ed/0x2340 [mac80211] [424271.641104] [] ? ip_output+0x7e/0xd0 [424271.641182] [] ieee80211_rx+0x307/0x7c0 [mac80211] [424271.641266] [] ath_rx_tasklet+0x88e/0xf70 [ath9k] [424271.641358] [] ? ieee80211_rx+0x1dc/0x7c0 [mac80211] [424271.641445] [] ath9k_tasklet+0xcb/0x130 [ath9k] Bug report: https://bugzilla.kernel.org/show_bug.cgi?id=70551 Reported-and-tested-by: Max Sydorenko Cc: stable@vger.kernel.org Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index 0a75e2f..4f4ce83 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -1444,14 +1444,16 @@ void ath_tx_aggr_sleep(struct ieee80211_sta *sta, struct ath_softc *sc, for (tidno = 0, tid = &an->tid[tidno]; tidno < IEEE80211_NUM_TIDS; tidno++, tid++) { - if (!tid->sched) - continue; - ac = tid->ac; txq = ac->txq; ath_txq_lock(sc, txq); + if (!tid->sched) { + ath_txq_unlock(sc, txq); + continue; + } + buffered = ath_tid_has_buffered(tid); tid->sched = false; -- cgit v0.10.2 From 1eb43018673e735ea9cd756970f4e71ca01a5f21 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Thu, 20 Feb 2014 18:55:55 +0100 Subject: brcmfmac: fix txglomming scatter-gather packet transfers The driver concatenates multiple packets in one MMC transfer. For scatter-gather to work the total length need to be multiple of 512 bytes. A pre-allocated buffer was used to add padding to accomplish that. However, the length was not properly set and it was freed after the first transfer causing a crash. Reviewed-by: Daniel (Deognyoun) Kim Reviewed-by: Hante Meuleman Reviewed-by: Franky (Zhenhui) Lin Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Arend van Spriel Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c index 3e99189..119ee6e 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c @@ -457,7 +457,6 @@ struct brcmf_sdio { u8 tx_hdrlen; /* sdio bus header length for tx packet */ bool txglom; /* host tx glomming enable flag */ - struct sk_buff *txglom_sgpad; /* scatter-gather padding buffer */ u16 head_align; /* buffer pointer alignment */ u16 sgentry_align; /* scatter-gather buffer alignment */ }; @@ -1944,9 +1943,8 @@ static int brcmf_sdio_txpkt_prep_sg(struct brcmf_sdio *bus, if (lastfrm && chain_pad) tail_pad += blksize - chain_pad; if (skb_tailroom(pkt) < tail_pad && pkt->len > blksize) { - pkt_pad = bus->txglom_sgpad; - if (pkt_pad == NULL) - brcmu_pkt_buf_get_skb(tail_pad + tail_chop); + pkt_pad = brcmu_pkt_buf_get_skb(tail_pad + tail_chop + + bus->head_align); if (pkt_pad == NULL) return -ENOMEM; ret = brcmf_sdio_txpkt_hdalign(bus, pkt_pad); @@ -1957,6 +1955,7 @@ static int brcmf_sdio_txpkt_prep_sg(struct brcmf_sdio *bus, tail_chop); *(u32 *)(pkt_pad->cb) = ALIGN_SKB_FLAG + tail_chop; skb_trim(pkt, pkt->len - tail_chop); + skb_trim(pkt_pad, tail_pad + tail_chop); __skb_queue_after(pktq, pkt, pkt_pad); } else { ntail = pkt->data_len + tail_pad - @@ -2011,7 +2010,7 @@ brcmf_sdio_txpkt_prep(struct brcmf_sdio *bus, struct sk_buff_head *pktq, return ret; head_pad = (u16)ret; if (head_pad) - memset(pkt_next->data, 0, head_pad + bus->tx_hdrlen); + memset(pkt_next->data + bus->tx_hdrlen, 0, head_pad); total_len += pkt_next->len; @@ -3486,10 +3485,6 @@ static int brcmf_sdio_bus_preinit(struct device *dev) bus->txglom = false; value = 1; pad_size = bus->sdiodev->func[2]->cur_blksize << 1; - bus->txglom_sgpad = brcmu_pkt_buf_get_skb(pad_size); - if (!bus->txglom_sgpad) - brcmf_err("allocating txglom padding skb failed, reduced performance\n"); - err = brcmf_iovar_data_set(bus->sdiodev->dev, "bus:rxglom", &value, sizeof(u32)); if (err < 0) { @@ -4053,7 +4048,6 @@ void brcmf_sdio_remove(struct brcmf_sdio *bus) brcmf_sdio_chip_detach(&bus->ci); } - brcmu_pkt_buf_free_skb(bus->txglom_sgpad); kfree(bus->rxbuf); kfree(bus->hdrbuf); kfree(bus); -- cgit v0.10.2 From b3050248c167871ca52cfdb2ce78aa2460249346 Mon Sep 17 00:00:00 2001 From: Sujith Manoharan Date: Fri, 14 Feb 2014 08:15:20 +0530 Subject: ath9k: Fix ETSI compliance for AR9462 2.0 The minimum CCA power threshold values have to be adjusted for existing cards to be in compliance with new regulations. Newer cards will make use of the values obtained from EEPROM, support for this was added earlier. To make sure that cards that are already in use and don't have proper values in EEPROM, do not violate regulations, use the initvals instead. Cc: stable@vger.kernel.org Reported-by: Jeang Daniel Signed-off-by: Sujith Manoharan Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/ath/ath9k/ar9462_2p0_initvals.h b/drivers/net/wireless/ath/ath9k/ar9462_2p0_initvals.h index 1cc1356..1b6b4d0 100644 --- a/drivers/net/wireless/ath/ath9k/ar9462_2p0_initvals.h +++ b/drivers/net/wireless/ath/ath9k/ar9462_2p0_initvals.h @@ -57,7 +57,7 @@ static const u32 ar9462_2p0_baseband_postamble[][5] = { {0x00009e14, 0x37b95d5e, 0x37b9605e, 0x3236605e, 0x32365a5e}, {0x00009e18, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, {0x00009e1c, 0x0001cf9c, 0x0001cf9c, 0x00021f9c, 0x00021f9c}, - {0x00009e20, 0x000003b5, 0x000003b5, 0x000003ce, 0x000003ce}, + {0x00009e20, 0x000003a5, 0x000003a5, 0x000003a5, 0x000003a5}, {0x00009e2c, 0x0000001c, 0x0000001c, 0x00000021, 0x00000021}, {0x00009e3c, 0xcf946220, 0xcf946220, 0xcfd5c782, 0xcfd5c282}, {0x00009e44, 0x62321e27, 0x62321e27, 0xfe291e27, 0xfe291e27}, @@ -96,7 +96,7 @@ static const u32 ar9462_2p0_baseband_postamble[][5] = { {0x0000ae04, 0x001c0000, 0x001c0000, 0x001c0000, 0x00100000}, {0x0000ae18, 0x00000000, 0x00000000, 0x00000000, 0x00000000}, {0x0000ae1c, 0x0000019c, 0x0000019c, 0x0000019c, 0x0000019c}, - {0x0000ae20, 0x000001b5, 0x000001b5, 0x000001ce, 0x000001ce}, + {0x0000ae20, 0x000001a6, 0x000001a6, 0x000001aa, 0x000001aa}, {0x0000b284, 0x00000000, 0x00000000, 0x00000550, 0x00000550}, }; -- cgit v0.10.2 From 09ebb17ab476b6ac1cc07b53d07e88f4d31ee4d3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 18 Feb 2014 12:00:21 +0100 Subject: udf: Fix data corruption on file type conversion UDF has two types of files - files with data stored in inode (ICB in UDF terminology) and files with data stored in external data blocks. We convert file from in-inode format to external format in udf_file_aio_write() when we find out data won't fit into inode any longer. However the following race between two O_APPEND writes can happen: CPU1 CPU2 udf_file_aio_write() udf_file_aio_write() down_write(&iinfo->i_data_sem); checks that i_size + count1 fits within inode => no need to convert up_write(&iinfo->i_data_sem); down_write(&iinfo->i_data_sem); checks that i_size + count2 fits within inode => no need to convert up_write(&iinfo->i_data_sem); generic_file_aio_write() - extends file by count1 bytes generic_file_aio_write() - extends file by count2 bytes Clearly if count1 + count2 doesn't fit into the inode, we overwrite kernel buffers beyond inode, possibly corrupting the filesystem as well. Fix the problem by acquiring i_mutex before checking whether write fits into the inode and using __generic_file_aio_write() afterwards which puts check and write into one critical section. Reported-by: Al Viro Signed-off-by: Jan Kara diff --git a/fs/udf/file.c b/fs/udf/file.c index c02a27a..1037637 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -144,6 +144,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, size_t count = iocb->ki_nbytes; struct udf_inode_info *iinfo = UDF_I(inode); + mutex_lock(&inode->i_mutex); down_write(&iinfo->i_data_sem); if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { if (file->f_flags & O_APPEND) @@ -156,6 +157,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, pos + count)) { err = udf_expand_file_adinicb(inode); if (err) { + mutex_unlock(&inode->i_mutex); udf_debug("udf_expand_adinicb: err=%d\n", err); return err; } @@ -169,9 +171,17 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, } else up_write(&iinfo->i_data_sem); - retval = generic_file_aio_write(iocb, iov, nr_segs, ppos); - if (retval > 0) + retval = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); + mutex_unlock(&inode->i_mutex); + + if (retval > 0) { + ssize_t err; + mark_inode_dirty(inode); + err = generic_write_sync(file, iocb->ki_pos - retval, retval); + if (err < 0) + retval = err; + } return retval; } diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 062b792..982ce05 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -265,6 +265,7 @@ int udf_expand_file_adinicb(struct inode *inode) .nr_to_write = 1, }; + WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex)); if (!iinfo->i_lenAlloc) { if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT; -- cgit v0.10.2 From 1362f4ea20fa63688ba6026e586d9746ff13a846 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 20 Feb 2014 17:02:27 +0100 Subject: quota: Fix race between dqput() and dquot_scan_active() Currently last dqput() can race with dquot_scan_active() causing it to call callback for an already deactivated dquot. The race is as follows: CPU1 CPU2 dqput() spin_lock(&dq_list_lock); if (atomic_read(&dquot->dq_count) > 1) { - not taken if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) { spin_unlock(&dq_list_lock); ->release_dquot(dquot); if (atomic_read(&dquot->dq_count) > 1) - not taken dquot_scan_active() spin_lock(&dq_list_lock); if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) - not taken atomic_inc(&dquot->dq_count); spin_unlock(&dq_list_lock); - proceeds to release dquot ret = fn(dquot, priv); - called for inactive dquot Fix the problem by making sure possible ->release_dquot() is finished by the time we call the callback and new calls to it will notice reference dquot_scan_active() has taken and bail out. CC: stable@vger.kernel.org # >= 2.6.29 Signed-off-by: Jan Kara diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 831d49a..cfc8dcc 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -581,9 +581,17 @@ int dquot_scan_active(struct super_block *sb, dqstats_inc(DQST_LOOKUPS); dqput(old_dquot); old_dquot = dquot; - ret = fn(dquot, priv); - if (ret < 0) - goto out; + /* + * ->release_dquot() can be racing with us. Our reference + * protects us from new calls to it so just wait for any + * outstanding call and recheck the DQ_ACTIVE_B after that. + */ + wait_on_dquot(dquot); + if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) { + ret = fn(dquot, priv); + if (ret < 0) + goto out; + } spin_lock(&dq_list_lock); /* We are safe to continue now because our dquot could not * be moved out of the inuse list while we hold the reference */ -- cgit v0.10.2 From ddb95145a38eb37b236d4e00f43a75d067922dda Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 19 Feb 2014 17:51:25 -0800 Subject: qla2xxx: Fix qlt_lport_register base_vha callback race This patch closes a race between qlt_lport_register() and tcm_qla2xxx callback logic by holding qla_tgt_mutex before making the callback. In order for this to work, the qlt_add_target() and qlt_remove_target() code has been changed to avoid the accessing qla_tgt_mutex + list_[add,del] for NPIV enabled ports. This bug introduced in v3.14-rc1 code with commit 49a47f2. Cc: Sawan Chandak Cc: Quinn Tran Cc: Saurav Kashyap Cc: Giridhar Malavali Signed-off-by: Nicholas Bellinger diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 2eb97d7..ea3eaef 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -4181,6 +4181,9 @@ int qlt_add_target(struct qla_hw_data *ha, struct scsi_qla_host *base_vha) tgt->datasegs_per_cmd = QLA_TGT_DATASEGS_PER_CMD_24XX; tgt->datasegs_per_cont = QLA_TGT_DATASEGS_PER_CONT_24XX; + if (base_vha->fc_vport) + return 0; + mutex_lock(&qla_tgt_mutex); list_add_tail(&tgt->tgt_list_entry, &qla_tgt_glist); mutex_unlock(&qla_tgt_mutex); @@ -4194,6 +4197,10 @@ int qlt_remove_target(struct qla_hw_data *ha, struct scsi_qla_host *vha) if (!vha->vha_tgt.qla_tgt) return 0; + if (vha->fc_vport) { + qlt_release(vha->vha_tgt.qla_tgt); + return 0; + } mutex_lock(&qla_tgt_mutex); list_del(&vha->vha_tgt.qla_tgt->tgt_list_entry); mutex_unlock(&qla_tgt_mutex); @@ -4265,6 +4272,12 @@ int qlt_lport_register(void *target_lport_ptr, u64 phys_wwpn, spin_unlock_irqrestore(&ha->hardware_lock, flags); continue; } + if (tgt->tgt_stop) { + pr_debug("MODE_TARGET in shutdown on qla2xxx(%d)\n", + host->host_no); + spin_unlock_irqrestore(&ha->hardware_lock, flags); + continue; + } spin_unlock_irqrestore(&ha->hardware_lock, flags); if (!scsi_host_get(host)) { @@ -4279,12 +4292,11 @@ int qlt_lport_register(void *target_lport_ptr, u64 phys_wwpn, scsi_host_put(host); continue; } - mutex_unlock(&qla_tgt_mutex); - rc = (*callback)(vha, target_lport_ptr, npiv_wwpn, npiv_wwnn); if (rc != 0) scsi_host_put(host); + mutex_unlock(&qla_tgt_mutex); return rc; } mutex_unlock(&qla_tgt_mutex); -- cgit v0.10.2 From 3c231bdae1e7c8d366eeb133980b81dff2e1e809 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 19 Feb 2014 17:50:22 -0800 Subject: qla2xxx: Check + fail when npiv_vports_inuse exists in shutdown This patch adds an check to qlt_stop_phase1() to avoid shutdown when the base_vha contains a non-zero fc_host->npiv_vports_inuse count. This includes holding qla_tgt_mutex in qlt_stop_phase1() between the fc_host->npiv_vports_inuse check + setting of tgt->tgt_stop to avoid a possible race between qlt_lport_register() -> tcm_qla2xxx -> tcm_qla2xxx_lport_register_npiv_cb() calling fc_vport_create(). Cc: Sawan Chandak Cc: Quinn Tran Cc: Saurav Kashyap Cc: Giridhar Malavali Signed-off-by: Nicholas Bellinger diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index ea3eaef..f2e1c5a 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -790,17 +790,32 @@ static inline int test_tgt_sess_count(struct qla_tgt *tgt) } /* Called by tcm_qla2xxx configfs code */ -void qlt_stop_phase1(struct qla_tgt *tgt) +int qlt_stop_phase1(struct qla_tgt *tgt) { struct scsi_qla_host *vha = tgt->vha; struct qla_hw_data *ha = tgt->ha; unsigned long flags; + mutex_lock(&qla_tgt_mutex); + if (!vha->fc_vport) { + struct Scsi_Host *sh = vha->host; + struct fc_host_attrs *fc_host = shost_to_fc_host(sh); + bool npiv_vports; + + spin_lock_irqsave(sh->host_lock, flags); + npiv_vports = (fc_host->npiv_vports_inuse); + spin_unlock_irqrestore(sh->host_lock, flags); + + if (npiv_vports) { + mutex_unlock(&qla_tgt_mutex); + return -EPERM; + } + } if (tgt->tgt_stop || tgt->tgt_stopped) { ql_dbg(ql_dbg_tgt_mgt, vha, 0xf04e, "Already in tgt->tgt_stop or tgt_stopped state\n"); - dump_stack(); - return; + mutex_unlock(&qla_tgt_mutex); + return -EPERM; } ql_dbg(ql_dbg_tgt, vha, 0xe003, "Stopping target for host %ld(%p)\n", @@ -815,6 +830,7 @@ void qlt_stop_phase1(struct qla_tgt *tgt) qlt_clear_tgt_db(tgt, true); spin_unlock_irqrestore(&ha->hardware_lock, flags); mutex_unlock(&vha->vha_tgt.tgt_mutex); + mutex_unlock(&qla_tgt_mutex); flush_delayed_work(&tgt->sess_del_work); @@ -841,6 +857,7 @@ void qlt_stop_phase1(struct qla_tgt *tgt) /* Wait for sessions to clear out (just in case) */ wait_event(tgt->waitQ, test_tgt_sess_count(tgt)); + return 0; } EXPORT_SYMBOL(qlt_stop_phase1); diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h index 66e755c..ce33d8c 100644 --- a/drivers/scsi/qla2xxx/qla_target.h +++ b/drivers/scsi/qla2xxx/qla_target.h @@ -1001,7 +1001,7 @@ extern void qlt_modify_vp_config(struct scsi_qla_host *, extern void qlt_probe_one_stage1(struct scsi_qla_host *, struct qla_hw_data *); extern int qlt_mem_alloc(struct qla_hw_data *); extern void qlt_mem_free(struct qla_hw_data *); -extern void qlt_stop_phase1(struct qla_tgt *); +extern int qlt_stop_phase1(struct qla_tgt *); extern void qlt_stop_phase2(struct qla_tgt *); extern irqreturn_t qla83xx_msix_atio_q(int, void *); extern void qlt_83xx_iospace_config(struct qla_hw_data *); -- cgit v0.10.2 From 394d62ba4580a74afc90bf0e007e10291bf447cc Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 19 Feb 2014 16:52:15 -0800 Subject: tcm_qla2xxx: Add NPIV specific enable/disable attribute logic This patch adds seperate logic for NPIV specific enable/disable attribute logic, as NPIV vs. non-NPIV enable/disable ends up being different enough to warrent seperate logic for setting configfs tpg_group dependencies in the non-NPIV case. Cc: Sawan Chandak Cc: Quinn Tran Cc: Saurav Kashyap Cc: Giridhar Malavali Signed-off-by: Nicholas Bellinger diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 75a141b..db43b289 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -1053,11 +1053,64 @@ static void tcm_qla2xxx_drop_tpg(struct se_portal_group *se_tpg) /* * Clear local TPG=1 pointer for non NPIV mode. */ - lport->tpg_1 = NULL; - + lport->tpg_1 = NULL; kfree(tpg); } +static ssize_t tcm_qla2xxx_npiv_tpg_show_enable( + struct se_portal_group *se_tpg, + char *page) +{ + return tcm_qla2xxx_tpg_show_enable(se_tpg, page); +} + +static ssize_t tcm_qla2xxx_npiv_tpg_store_enable( + struct se_portal_group *se_tpg, + const char *page, + size_t count) +{ + struct se_wwn *se_wwn = se_tpg->se_tpg_wwn; + struct tcm_qla2xxx_lport *lport = container_of(se_wwn, + struct tcm_qla2xxx_lport, lport_wwn); + struct scsi_qla_host *vha = lport->qla_vha; + struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, + struct tcm_qla2xxx_tpg, se_tpg); + unsigned long op; + int rc; + + rc = kstrtoul(page, 0, &op); + if (rc < 0) { + pr_err("kstrtoul() returned %d\n", rc); + return -EINVAL; + } + if ((op != 1) && (op != 0)) { + pr_err("Illegal value for tpg_enable: %lu\n", op); + return -EINVAL; + } + if (op) { + if (atomic_read(&tpg->lport_tpg_enabled)) + return -EEXIST; + + atomic_set(&tpg->lport_tpg_enabled, 1); + qlt_enable_vha(vha); + } else { + if (!atomic_read(&tpg->lport_tpg_enabled)) + return count; + + atomic_set(&tpg->lport_tpg_enabled, 0); + qlt_stop_phase1(vha->vha_tgt.qla_tgt); + } + + return count; +} + +TF_TPG_BASE_ATTR(tcm_qla2xxx_npiv, enable, S_IRUGO | S_IWUSR); + +static struct configfs_attribute *tcm_qla2xxx_npiv_tpg_attrs[] = { + &tcm_qla2xxx_npiv_tpg_enable.attr, + NULL, +}; + static struct se_portal_group *tcm_qla2xxx_npiv_make_tpg( struct se_wwn *wwn, struct config_group *group, @@ -1935,7 +1988,7 @@ static int tcm_qla2xxx_register_configfs(void) */ npiv_fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = tcm_qla2xxx_wwn_attrs; npiv_fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = - tcm_qla2xxx_tpg_attrs; + tcm_qla2xxx_npiv_tpg_attrs; npiv_fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL; npiv_fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL; npiv_fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL; -- cgit v0.10.2 From 7474f52a82d51da2e6110e91bba8b000cb9cf803 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 19 Feb 2014 16:53:07 -0800 Subject: tcm_qla2xxx: Perform configfs depend/undepend for base_tpg This patch performs configfs_depend_item() during TPG enable for base_tpg (eg: non-NPIV) ports, and configfs_undepend_item() during TPG disable for base_tpg. This is done to ensure that any attempt to configfs rmdir a base_tpg with active NPIV ports will fail with -EBUSY, until all associated NPIV ports have been explicitly shutdown and base_tpg disabled. Note that the actual configfs_[un]depend_item() is done from seperate process context, as these are not intended to be called directly from configfs callbacks. Cc: Sawan Chandak Cc: Quinn Tran Cc: Saurav Kashyap Cc: Giridhar Malavali Signed-off-by: Nicholas Bellinger diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index db43b289..5bdc440 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -941,15 +941,41 @@ static ssize_t tcm_qla2xxx_tpg_show_enable( atomic_read(&tpg->lport_tpg_enabled)); } +static void tcm_qla2xxx_depend_tpg(struct work_struct *work) +{ + struct tcm_qla2xxx_tpg *base_tpg = container_of(work, + struct tcm_qla2xxx_tpg, tpg_base_work); + struct se_portal_group *se_tpg = &base_tpg->se_tpg; + struct scsi_qla_host *base_vha = base_tpg->lport->qla_vha; + + if (!configfs_depend_item(se_tpg->se_tpg_tfo->tf_subsys, + &se_tpg->tpg_group.cg_item)) { + atomic_set(&base_tpg->lport_tpg_enabled, 1); + qlt_enable_vha(base_vha); + } + complete(&base_tpg->tpg_base_comp); +} + +static void tcm_qla2xxx_undepend_tpg(struct work_struct *work) +{ + struct tcm_qla2xxx_tpg *base_tpg = container_of(work, + struct tcm_qla2xxx_tpg, tpg_base_work); + struct se_portal_group *se_tpg = &base_tpg->se_tpg; + struct scsi_qla_host *base_vha = base_tpg->lport->qla_vha; + + if (!qlt_stop_phase1(base_vha->vha_tgt.qla_tgt)) { + atomic_set(&base_tpg->lport_tpg_enabled, 0); + configfs_undepend_item(se_tpg->se_tpg_tfo->tf_subsys, + &se_tpg->tpg_group.cg_item); + } + complete(&base_tpg->tpg_base_comp); +} + static ssize_t tcm_qla2xxx_tpg_store_enable( struct se_portal_group *se_tpg, const char *page, size_t count) { - struct se_wwn *se_wwn = se_tpg->se_tpg_wwn; - struct tcm_qla2xxx_lport *lport = container_of(se_wwn, - struct tcm_qla2xxx_lport, lport_wwn); - struct scsi_qla_host *vha = lport->qla_vha; struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, struct tcm_qla2xxx_tpg, se_tpg); unsigned long op; @@ -964,19 +990,28 @@ static ssize_t tcm_qla2xxx_tpg_store_enable( pr_err("Illegal value for tpg_enable: %lu\n", op); return -EINVAL; } - if (op) { - atomic_set(&tpg->lport_tpg_enabled, 1); - qlt_enable_vha(vha); + if (atomic_read(&tpg->lport_tpg_enabled)) + return -EEXIST; + + INIT_WORK(&tpg->tpg_base_work, tcm_qla2xxx_depend_tpg); } else { - if (!vha->vha_tgt.qla_tgt) { - pr_err("struct qla_hw_data *vha->vha_tgt.qla_tgt is NULL\n"); - return -ENODEV; - } - atomic_set(&tpg->lport_tpg_enabled, 0); - qlt_stop_phase1(vha->vha_tgt.qla_tgt); + if (!atomic_read(&tpg->lport_tpg_enabled)) + return count; + + INIT_WORK(&tpg->tpg_base_work, tcm_qla2xxx_undepend_tpg); } + init_completion(&tpg->tpg_base_comp); + schedule_work(&tpg->tpg_base_work); + wait_for_completion(&tpg->tpg_base_comp); + if (op) { + if (!atomic_read(&tpg->lport_tpg_enabled)) + return -ENODEV; + } else { + if (atomic_read(&tpg->lport_tpg_enabled)) + return -EPERM; + } return count; } @@ -1703,6 +1738,9 @@ static int tcm_qla2xxx_lport_register_npiv_cb(struct scsi_qla_host *base_vha, struct scsi_qla_host *npiv_vha; struct tcm_qla2xxx_lport *lport = (struct tcm_qla2xxx_lport *)target_lport_ptr; + struct tcm_qla2xxx_lport *base_lport = + (struct tcm_qla2xxx_lport *)base_vha->vha_tgt.target_lport_ptr; + struct tcm_qla2xxx_tpg *base_tpg; struct fc_vport_identifiers vport_id; if (!qla_tgt_mode_enabled(base_vha)) { @@ -1710,6 +1748,13 @@ static int tcm_qla2xxx_lport_register_npiv_cb(struct scsi_qla_host *base_vha, return -EPERM; } + if (!base_lport || !base_lport->tpg_1 || + !atomic_read(&base_lport->tpg_1->lport_tpg_enabled)) { + pr_err("qla2xxx base_lport or tpg_1 not available\n"); + return -EPERM; + } + base_tpg = base_lport->tpg_1; + memset(&vport_id, 0, sizeof(vport_id)); vport_id.port_name = npiv_wwpn; vport_id.node_name = npiv_wwnn; @@ -1728,7 +1773,6 @@ static int tcm_qla2xxx_lport_register_npiv_cb(struct scsi_qla_host *base_vha, npiv_vha = (struct scsi_qla_host *)vport->dd_data; npiv_vha->vha_tgt.target_lport_ptr = target_lport_ptr; lport->qla_vha = npiv_vha; - scsi_host_get(npiv_vha->host); return 0; } diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h b/drivers/scsi/qla2xxx/tcm_qla2xxx.h index 275d8b9..a90966d 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h @@ -43,6 +43,9 @@ struct tcm_qla2xxx_tpg { struct tcm_qla2xxx_tpg_attrib tpg_attrib; /* Returned by tcm_qla2xxx_make_tpg() */ struct se_portal_group se_tpg; + /* Items for dealing with configfs_depend_item */ + struct completion tpg_base_comp; + struct work_struct tpg_base_work; }; struct tcm_qla2xxx_fc_loopid { -- cgit v0.10.2 From 84197a36e9d78213da17b96fb838afcca4e150ea Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 30 Jan 2014 09:52:21 -0800 Subject: tcm_qla2xxx: Fix NAA formatted name for NPIV WWPNs This patch fixes the NAA formatted name used by EVPD=0x83 device identifer to reflect the proper NPIV enabled WWPN. Cc: Sawan Chandak Cc: Quinn Tran Cc: Saurav Kashyap Cc: Giridhar Malavali Signed-off-by: Nicholas Bellinger diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 5bdc440..788c4fe 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -182,20 +182,6 @@ static int tcm_qla2xxx_npiv_parse_wwn( return 0; } -static ssize_t tcm_qla2xxx_npiv_format_wwn(char *buf, size_t len, - u64 wwpn, u64 wwnn) -{ - u8 b[8], b2[8]; - - put_unaligned_be64(wwpn, b); - put_unaligned_be64(wwnn, b2); - return snprintf(buf, len, - "%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x," - "%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x", - b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7], - b2[0], b2[1], b2[2], b2[3], b2[4], b2[5], b2[6], b2[7]); -} - static char *tcm_qla2xxx_npiv_get_fabric_name(void) { return "qla2xxx_npiv"; @@ -227,15 +213,6 @@ static char *tcm_qla2xxx_get_fabric_wwn(struct se_portal_group *se_tpg) return lport->lport_naa_name; } -static char *tcm_qla2xxx_npiv_get_fabric_wwn(struct se_portal_group *se_tpg) -{ - struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, - struct tcm_qla2xxx_tpg, se_tpg); - struct tcm_qla2xxx_lport *lport = tpg->lport; - - return &lport->lport_npiv_name[0]; -} - static u16 tcm_qla2xxx_get_tag(struct se_portal_group *se_tpg) { struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, @@ -1811,8 +1788,6 @@ static struct se_wwn *tcm_qla2xxx_npiv_make_lport( } lport->lport_npiv_wwpn = npiv_wwpn; lport->lport_npiv_wwnn = npiv_wwnn; - tcm_qla2xxx_npiv_format_wwn(&lport->lport_npiv_name[0], - TCM_QLA2XXX_NAMELEN, npiv_wwpn, npiv_wwnn); sprintf(lport->lport_naa_name, "naa.%016llx", (unsigned long long) npiv_wwpn); ret = tcm_qla2xxx_init_lport(lport); @@ -1921,7 +1896,7 @@ static struct target_core_fabric_ops tcm_qla2xxx_ops = { static struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = { .get_fabric_name = tcm_qla2xxx_npiv_get_fabric_name, .get_fabric_proto_ident = tcm_qla2xxx_get_fabric_proto_ident, - .tpg_get_wwn = tcm_qla2xxx_npiv_get_fabric_wwn, + .tpg_get_wwn = tcm_qla2xxx_get_fabric_wwn, .tpg_get_tag = tcm_qla2xxx_get_tag, .tpg_get_default_depth = tcm_qla2xxx_get_default_depth, .tpg_get_pr_transport_id = tcm_qla2xxx_get_pr_transport_id, diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h b/drivers/scsi/qla2xxx/tcm_qla2xxx.h index a90966d..33aaac8 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h @@ -4,8 +4,6 @@ #define TCM_QLA2XXX_VERSION "v0.1" /* length of ASCII WWPNs including pad */ #define TCM_QLA2XXX_NAMELEN 32 -/* lenth of ASCII NPIV 'WWPN+WWNN' including pad */ -#define TCM_QLA2XXX_NPIV_NAMELEN 66 #include "qla_target.h" @@ -65,8 +63,6 @@ struct tcm_qla2xxx_lport { char lport_name[TCM_QLA2XXX_NAMELEN]; /* ASCII formatted naa WWPN for VPD page 83 etc */ char lport_naa_name[TCM_QLA2XXX_NAMELEN]; - /* ASCII formatted WWPN+WWNN for NPIV FC Target Lport */ - char lport_npiv_name[TCM_QLA2XXX_NPIV_NAMELEN]; /* map for fc_port pointers in 24-bit FC Port ID space */ struct btree_head32 lport_fcport_map; /* vmalloc-ed memory for fc_port pointers for 16-bit FC loop ID */ -- cgit v0.10.2 From 8e5780fdeef7dc490b3f0b3a62704593721fa4f3 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 20 Feb 2014 13:24:50 -0800 Subject: x86, AVX-512: AVX-512 Feature Detection AVX-512 is an extention of AVX2. Its spec can be found at: http://download-software.intel.com/sites/default/files/managed/71/2e/319433-017.pdf This patch detects AVX-512 features by CPUID. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1392931491-33237-1-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin Cc: # hw enabling diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index e099f95..5f12968 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -217,9 +217,13 @@ #define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */ #define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */ #define X86_FEATURE_MPX (9*32+14) /* Memory Protection Extension */ +#define X86_FEATURE_AVX512F (9*32+16) /* AVX-512 Foundation */ #define X86_FEATURE_RDSEED (9*32+18) /* The RDSEED instruction */ #define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */ #define X86_FEATURE_SMAP (9*32+20) /* Supervisor Mode Access Prevention */ +#define X86_FEATURE_AVX512PF (9*32+26) /* AVX-512 Prefetch */ +#define X86_FEATURE_AVX512ER (9*32+27) /* AVX-512 Exponential and Reciprocal */ +#define X86_FEATURE_AVX512CD (9*32+28) /* AVX-512 Conflict Detection */ /* * BUG word(s) -- cgit v0.10.2 From c2bc11f10a39527cd1bb252097b5525664560956 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 20 Feb 2014 13:24:51 -0800 Subject: x86, AVX-512: Enable AVX-512 States Context Switch This patch enables Opmask, ZMM_Hi256, and Hi16_ZMM AVX-512 states for xstate context switch. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1392931491-33237-2-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin Cc: # hw enabling diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 5547389..6c1d741 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -6,11 +6,14 @@ #define XSTATE_CPUID 0x0000000d -#define XSTATE_FP 0x1 -#define XSTATE_SSE 0x2 -#define XSTATE_YMM 0x4 -#define XSTATE_BNDREGS 0x8 -#define XSTATE_BNDCSR 0x10 +#define XSTATE_FP 0x1 +#define XSTATE_SSE 0x2 +#define XSTATE_YMM 0x4 +#define XSTATE_BNDREGS 0x8 +#define XSTATE_BNDCSR 0x10 +#define XSTATE_OPMASK 0x20 +#define XSTATE_ZMM_Hi256 0x40 +#define XSTATE_Hi16_ZMM 0x80 #define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) @@ -23,7 +26,8 @@ #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) /* Supported features which support lazy state saving */ -#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) +#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ + | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) /* Supported features which require eager state saving */ #define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR) -- cgit v0.10.2 From 896dc5064083063981954f142e43548765199792 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 20 Jan 2014 10:31:54 +0800 Subject: x86, acpi: Fix bug in associating hot-added CPUs with corresponding NUMA node Current ACPI cpu hotplug driver fails to associate hot-added CPUs with corresponding NUMA node when doing socket online. The code path to associate CPU with NUMA node is as below: acpi_processor_add() ->acpi_processor_get_info() ->acpi_processor_hotadd_init() ->acpi_map_lsapic() ->_acpi_map_lsapic() ->acpi_map_cpu2node() cpu_subsys_online() ->try_online_node() ->node_set_online() When doing socket online, a new NUMA node is introduced in addition to hot-added CPU and memory device. And the new NUMA node is marked as online when onlining hot-added CPUs through sysfs interface /sys/devices/system/cpu/cpuxx/online. On the other hand, acpi_map_cpu2node() will only build the CPU to node map if corresponding NUMA node is already online, so it always fails to associate hot-added CPUs with corresponding NUMA node because the NUMA node is still in offline state. For the fix, we could safely remove the "node_online(node)" check in function acpi_map_cpu2node() because it's only called for hot-added CPUs by acpi_processor_hotadd_init(). Signed-off-by: Jiang Liu Link: http://lkml.kernel.org/r/1390185115-26850-1-git-send-email-jiang.liu@linux.intel.com Acked-by: Rafael J. Wysocki Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 1dac942..9f46f2b 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -613,10 +613,10 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) int nid; nid = acpi_get_node(handle); - if (nid == -1 || !node_online(nid)) - return; - set_apicid_to_node(physid, nid); - numa_set_node(cpu, nid); + if (nid != -1) { + set_apicid_to_node(physid, nid); + numa_set_node(cpu, nid); + } #endif } -- cgit v0.10.2 From 423edb6fce67133d4524513954af943a75bb6ef5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 10 Feb 2014 13:15:28 +0100 Subject: s390/compat: fix sys_sched_getattr compat wrapper Fix stupid typo. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 59c8efc..0248949 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1421,5 +1421,5 @@ ENTRY(sys_sched_setattr_wrapper) ENTRY(sys_sched_getattr_wrapper) lgfr %r2,%r2 # pid_t llgtr %r3,%r3 # const char __user * - llgfr %r3,%r3 # unsigned int + llgfr %r4,%r4 # unsigned int jg sys_sched_getattr -- cgit v0.10.2 From 5ec6d4918a45952e99b1b36c93372d79d6927c57 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Tue, 18 Feb 2014 19:47:17 +0100 Subject: s390/pci/dma: use correct segment boundary size The boundary size for iommu_area_alloc() is currently set to a constant value. This is wrong, we shouldn't use a constant value but rather the return value of dma_get_seg_boundary(), since a device driver can override the default. Reviewed-by: Sebastian Ott Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 60c11a6..f91c031 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -206,11 +206,13 @@ static void dma_cleanup_tables(struct zpci_dev *zdev) zdev->dma_table = NULL; } -static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev, unsigned long start, - int size) +static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev, + unsigned long start, int size) { - unsigned long boundary_size = 0x1000000; + unsigned long boundary_size; + boundary_size = ALIGN(dma_get_seg_boundary(&zdev->pdev->dev) + 1, + PAGE_SIZE) >> PAGE_SHIFT; return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages, start, size, 0, boundary_size, 0); } -- cgit v0.10.2 From 9955e8d15f53e53540aaed7bcef640142e65e900 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Wed, 19 Feb 2014 17:43:04 +0100 Subject: s390/cio: Fix missing subchannels after CHPID configure on Performing a Channel-Path configure on operation on a Channel-Path ID (CHPID) does not trigger a scan for subchannels that might have become available through that CHPID. As a result, some subchannels and associated I/O devices might be missing. Fix this by adding the missing scan. This problem was introduced by commit c820de39, "[S390] cio: Rework css driver.", but wasn't noticed earlier because the machine usually also generates a Channel-Report-Word when the first CHPID of a subchannel is configured on, resulting in a separate scan for that subchannel. The problem only becomes apparent when this first CHPID is not working properly and additional working CHPIDs are subsequently configured on without any effect on the availability of the affected subchannel. Reviewed-by: Sebastian Ott Signed-off-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index f6b9188..9f0ea6c 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -610,6 +610,7 @@ void chsc_chp_online(struct chp_id chpid) css_wait_for_slow_path(); for_each_subchannel_staged(__s390_process_res_acc, NULL, &link); + css_schedule_reprobe(); } } -- cgit v0.10.2 From 2253e8d79237c69086ded391e6767afe16972527 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 27 Jan 2014 13:26:10 +0100 Subject: s390/cio: fix driver callback initialization for ccw consoles ccw consoles are in use before they can be properly registered with the driver core. For devices which are in use by a device driver we rely on the ccw_device's pointer to the driver callbacks to be valid. For ccw consoles this pointer is NULL until they are registered later during boot and we dereferenced this pointer. This worked by chance on 64 bit builds (cdev->drv was NULL but the optional callback cdev->drv->path_event was also NULL by coincidence) and was unnoticed until we received reports about boot failures on 31 bit systems. Fix it by initializing the driver pointer for ccw consoles. Cc: # 3.10+ Reported-by: Mike Frysinger Reported-by: Heiko Carstens Reviewed-by: Peter Oberparleiter Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index f201af8..31b5ca8 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -219,7 +219,7 @@ extern void ccw_device_get_id(struct ccw_device *, struct ccw_dev_id *); #define to_ccwdev(n) container_of(n, struct ccw_device, dev) #define to_ccwdrv(n) container_of(n, struct ccw_driver, driver) -extern struct ccw_device *ccw_device_probe_console(void); +extern struct ccw_device *ccw_device_probe_console(struct ccw_driver *); extern void ccw_device_wait_idle(struct ccw_device *); extern int ccw_device_force_console(struct ccw_device *); diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index eb5d227..bb86494 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -922,7 +922,7 @@ static int __init con3215_init(void) raw3215_freelist = req; } - cdev = ccw_device_probe_console(); + cdev = ccw_device_probe_console(&raw3215_ccw_driver); if (IS_ERR(cdev)) return -ENODEV; diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c index 699fd3e..bb6b0df 100644 --- a/drivers/s390/char/con3270.c +++ b/drivers/s390/char/con3270.c @@ -576,7 +576,6 @@ static struct console con3270 = { static int __init con3270_init(void) { - struct ccw_device *cdev; struct raw3270 *rp; void *cbuf; int i; @@ -591,10 +590,7 @@ con3270_init(void) cpcmd("TERM AUTOCR OFF", NULL, 0, NULL); } - cdev = ccw_device_probe_console(); - if (IS_ERR(cdev)) - return -ENODEV; - rp = raw3270_setup_console(cdev); + rp = raw3270_setup_console(); if (IS_ERR(rp)) return PTR_ERR(rp); diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index 2cdec21..de2c048 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -776,16 +776,24 @@ raw3270_setup_device(struct ccw_device *cdev, struct raw3270 *rp, char *ascebc) } #ifdef CONFIG_TN3270_CONSOLE +/* Tentative definition - see below for actual definition. */ +static struct ccw_driver raw3270_ccw_driver; + /* * Setup 3270 device configured as console. */ -struct raw3270 __init *raw3270_setup_console(struct ccw_device *cdev) +struct raw3270 __init *raw3270_setup_console(void) { + struct ccw_device *cdev; unsigned long flags; struct raw3270 *rp; char *ascebc; int rc; + cdev = ccw_device_probe_console(&raw3270_ccw_driver); + if (IS_ERR(cdev)) + return ERR_CAST(cdev); + rp = kzalloc(sizeof(struct raw3270), GFP_KERNEL | GFP_DMA); ascebc = kzalloc(256, GFP_KERNEL); rc = raw3270_setup_device(cdev, rp, ascebc); diff --git a/drivers/s390/char/raw3270.h b/drivers/s390/char/raw3270.h index 7b73ff8..359276a 100644 --- a/drivers/s390/char/raw3270.h +++ b/drivers/s390/char/raw3270.h @@ -190,7 +190,7 @@ raw3270_put_view(struct raw3270_view *view) wake_up(&raw3270_wait_queue); } -struct raw3270 *raw3270_setup_console(struct ccw_device *cdev); +struct raw3270 *raw3270_setup_console(void); void raw3270_wait_cons_dev(struct raw3270 *); /* Notifier for device addition/removal */ diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index e9d7835..4283dd3 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1609,7 +1609,7 @@ out_unlock: return rc; } -struct ccw_device *ccw_device_probe_console(void) +struct ccw_device *ccw_device_probe_console(struct ccw_driver *drv) { struct io_subchannel_private *io_priv; struct ccw_device *cdev; @@ -1631,6 +1631,7 @@ struct ccw_device *ccw_device_probe_console(void) kfree(io_priv); return cdev; } + cdev->drv = drv; set_io_private(sch, io_priv); ret = ccw_device_console_enable(cdev, sch); if (ret) { -- cgit v0.10.2 From 1e5320960510d6d6f2cbdc7ed33df9791283b7ea Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 27 Jan 2014 13:28:10 +0100 Subject: s390/cio: reorder initialization of ccw consoles Drivers for ccw consoles use ccw_device_probe_console to receive an initialized ccw device which is already enabled for interrupts. After that the device driver does the initialization of its private data. This can race with unsolicited interrupts which can happen once the device is enabled for interrupts. Split ccw_device_probe_console into ccw_device_create_console and ccw_device_enable_console and reorder the initialization of the ccw console drivers. While at it mark these functions as __init. Reviewed-by: Peter Oberparleiter Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index 31b5ca8..a9c2c06 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -219,7 +219,9 @@ extern void ccw_device_get_id(struct ccw_device *, struct ccw_dev_id *); #define to_ccwdev(n) container_of(n, struct ccw_device, dev) #define to_ccwdrv(n) container_of(n, struct ccw_driver, driver) -extern struct ccw_device *ccw_device_probe_console(struct ccw_driver *); +extern struct ccw_device *ccw_device_create_console(struct ccw_driver *); +extern void ccw_device_destroy_console(struct ccw_device *); +extern int ccw_device_enable_console(struct ccw_device *); extern void ccw_device_wait_idle(struct ccw_device *); extern int ccw_device_force_console(struct ccw_device *); diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index bb86494..5af7f0b 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -922,7 +922,7 @@ static int __init con3215_init(void) raw3215_freelist = req; } - cdev = ccw_device_probe_console(&raw3215_ccw_driver); + cdev = ccw_device_create_console(&raw3215_ccw_driver); if (IS_ERR(cdev)) return -ENODEV; @@ -932,6 +932,12 @@ static int __init con3215_init(void) cdev->handler = raw3215_irq; raw->flags |= RAW3215_FIXED; + if (ccw_device_enable_console(cdev)) { + ccw_device_destroy_console(cdev); + raw3215_free_info(raw); + raw3215[0] = NULL; + return -ENODEV; + } /* Request the console irq */ if (raw3215_startup(raw) != 0) { diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index de2c048..041c65b 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -790,7 +790,7 @@ struct raw3270 __init *raw3270_setup_console(void) char *ascebc; int rc; - cdev = ccw_device_probe_console(&raw3270_ccw_driver); + cdev = ccw_device_create_console(&raw3270_ccw_driver); if (IS_ERR(cdev)) return ERR_CAST(cdev); @@ -800,6 +800,13 @@ struct raw3270 __init *raw3270_setup_console(void) if (rc) return ERR_PTR(rc); set_bit(RAW3270_FLAGS_CONSOLE, &rp->flags); + + rc = ccw_device_enable_console(cdev); + if (rc) { + ccw_device_destroy_console(cdev); + return ERR_PTR(rc); + } + spin_lock_irqsave(get_ccwdev_lock(rp->cdev), flags); do { __raw3270_reset_device(rp); diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 4283dd3..da43199 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1572,11 +1572,14 @@ out: } #ifdef CONFIG_CCW_CONSOLE -static int ccw_device_console_enable(struct ccw_device *cdev, - struct subchannel *sch) +int __init ccw_device_enable_console(struct ccw_device *cdev) { + struct subchannel *sch = to_subchannel(cdev->dev.parent); int rc; + if (!cdev->drv || !cdev->handler) + return -EINVAL; + io_subchannel_init_fields(sch); rc = cio_commit_config(sch); if (rc) @@ -1609,12 +1612,11 @@ out_unlock: return rc; } -struct ccw_device *ccw_device_probe_console(struct ccw_driver *drv) +struct ccw_device * __init ccw_device_create_console(struct ccw_driver *drv) { struct io_subchannel_private *io_priv; struct ccw_device *cdev; struct subchannel *sch; - int ret; sch = cio_probe_console(); if (IS_ERR(sch)) @@ -1633,17 +1635,20 @@ struct ccw_device *ccw_device_probe_console(struct ccw_driver *drv) } cdev->drv = drv; set_io_private(sch, io_priv); - ret = ccw_device_console_enable(cdev, sch); - if (ret) { - set_io_private(sch, NULL); - put_device(&sch->dev); - put_device(&cdev->dev); - kfree(io_priv); - return ERR_PTR(ret); - } return cdev; } +void __init ccw_device_destroy_console(struct ccw_device *cdev) +{ + struct subchannel *sch = to_subchannel(cdev->dev.parent); + struct io_subchannel_private *io_priv = to_io_private(sch); + + set_io_private(sch, NULL); + put_device(&sch->dev); + put_device(&cdev->dev); + kfree(io_priv); +} + /** * ccw_device_wait_idle() - busy wait for device to become idle * @cdev: ccw device -- cgit v0.10.2 From 137a14f434705a366cc94b2b32f2488c975863ad Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 27 Jan 2014 13:29:15 +0100 Subject: s390/cio: fix irq stats for early interrupts on ccw consoles Interrupts which happen on ccw consoles prior to their registration with the driver core are not accounted to the respective device driver. Fix this by setting the proper interrupt class during initialization of ccw consoles. Reviewed-by: Peter Oberparleiter Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index da43199..d8d9b5b 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1571,6 +1571,18 @@ out: return rc; } +static void ccw_device_set_int_class(struct ccw_device *cdev) +{ + struct ccw_driver *cdrv = cdev->drv; + + /* Note: we interpret class 0 in this context as an uninitialized + * field since it translates to a non-I/O interrupt class. */ + if (cdrv->int_class != 0) + cdev->private->int_class = cdrv->int_class; + else + cdev->private->int_class = IRQIO_CIO; +} + #ifdef CONFIG_CCW_CONSOLE int __init ccw_device_enable_console(struct ccw_device *cdev) { @@ -1635,6 +1647,7 @@ struct ccw_device * __init ccw_device_create_console(struct ccw_driver *drv) } cdev->drv = drv; set_io_private(sch, io_priv); + ccw_device_set_int_class(cdev); return cdev; } @@ -1732,15 +1745,8 @@ ccw_device_probe (struct device *dev) int ret; cdev->drv = cdrv; /* to let the driver call _set_online */ - /* Note: we interpret class 0 in this context as an uninitialized - * field since it translates to a non-I/O interrupt class. */ - if (cdrv->int_class != 0) - cdev->private->int_class = cdrv->int_class; - else - cdev->private->int_class = IRQIO_CIO; - + ccw_device_set_int_class(cdev); ret = cdrv->probe ? cdrv->probe(cdev) : -ENODEV; - if (ret) { cdev->drv = NULL; cdev->private->int_class = IRQIO_CIO; -- cgit v0.10.2 From cfa785e623577cdad2aa721acb23bd3a95eced9a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 22 Jan 2014 14:49:30 +0100 Subject: s390/uaccess: normalize order of parameters of indirect uaccess function calls For some unknown reason the indirect uaccess functions on s390 implement a different parameter order than what is usual. e.g.: unsigned long copy_to_user(void *to, const void *from, unsigned long n); vs. size_t (*copy_to_user)(size_t n, void __user * to, const void *from); Let's get rid of this confusing parameter reordering. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 79330af..7319963 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -93,12 +93,12 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x) #define ARCH_HAS_SEARCH_EXTABLE struct uaccess_ops { - size_t (*copy_from_user)(size_t, const void __user *, void *); - size_t (*copy_to_user)(size_t, void __user *, const void *); - size_t (*copy_in_user)(size_t, void __user *, const void __user *); - size_t (*clear_user)(size_t, void __user *); - size_t (*strnlen_user)(size_t, const char __user *); - size_t (*strncpy_from_user)(size_t, const char __user *, char *); + size_t (*copy_from_user)(void *, const void __user *, size_t); + size_t (*copy_to_user)(void __user *, const void *, size_t); + size_t (*copy_in_user)(void __user *, const void __user *, size_t); + size_t (*clear_user)(void __user *, size_t); + size_t (*strnlen_user)(const char __user *, size_t); + size_t (*strncpy_from_user)(char *, const char __user *, size_t); int (*futex_atomic_op)(int op, u32 __user *, int oparg, int *old); int (*futex_atomic_cmpxchg)(u32 *, u32 __user *, u32 old, u32 new); }; @@ -109,15 +109,15 @@ extern struct uaccess_ops uaccess_pt; extern int __handle_fault(unsigned long, unsigned long, int); -static inline int __put_user_fn(size_t size, void __user *ptr, void *x) +static inline int __put_user_fn(void *x, void __user *ptr, size_t size) { - size = uaccess.copy_to_user(size, ptr, x); + size = uaccess.copy_to_user(ptr, x, size); return size ? -EFAULT : size; } -static inline int __get_user_fn(size_t size, const void __user *ptr, void *x) +static inline int __get_user_fn(void *x, const void __user *ptr, size_t size) { - size = uaccess.copy_from_user(size, ptr, x); + size = uaccess.copy_from_user(x, ptr, size); return size ? -EFAULT : size; } @@ -135,8 +135,8 @@ static inline int __get_user_fn(size_t size, const void __user *ptr, void *x) case 2: \ case 4: \ case 8: \ - __pu_err = __put_user_fn(sizeof (*(ptr)), \ - ptr, &__x); \ + __pu_err = __put_user_fn(&__x, ptr, \ + sizeof(*(ptr))); \ break; \ default: \ __put_user_bad(); \ @@ -161,29 +161,29 @@ extern int __put_user_bad(void) __attribute__((noreturn)); switch (sizeof(*(ptr))) { \ case 1: { \ unsigned char __x; \ - __gu_err = __get_user_fn(sizeof (*(ptr)), \ - ptr, &__x); \ + __gu_err = __get_user_fn(&__x, ptr, \ + sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ case 2: { \ unsigned short __x; \ - __gu_err = __get_user_fn(sizeof (*(ptr)), \ - ptr, &__x); \ + __gu_err = __get_user_fn(&__x, ptr, \ + sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ case 4: { \ unsigned int __x; \ - __gu_err = __get_user_fn(sizeof (*(ptr)), \ - ptr, &__x); \ + __gu_err = __get_user_fn(&__x, ptr, \ + sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ case 8: { \ unsigned long long __x; \ - __gu_err = __get_user_fn(sizeof (*(ptr)), \ - ptr, &__x); \ + __gu_err = __get_user_fn(&__x, ptr, \ + sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ @@ -222,7 +222,7 @@ extern int __get_user_bad(void) __attribute__((noreturn)); static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) { - return uaccess.copy_to_user(n, to, from); + return uaccess.copy_to_user(to, from, n); } #define __copy_to_user_inatomic __copy_to_user @@ -268,7 +268,7 @@ copy_to_user(void __user *to, const void *from, unsigned long n) static inline unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n) { - return uaccess.copy_from_user(n, from, to); + return uaccess.copy_from_user(to, from, n); } extern void copy_from_user_overflow(void) @@ -309,7 +309,7 @@ copy_from_user(void *to, const void __user *from, unsigned long n) static inline unsigned long __must_check __copy_in_user(void __user *to, const void __user *from, unsigned long n) { - return uaccess.copy_in_user(n, to, from); + return uaccess.copy_in_user(to, from, n); } static inline unsigned long __must_check @@ -326,14 +326,14 @@ static inline long __must_check strncpy_from_user(char *dst, const char __user *src, long count) { might_fault(); - return uaccess.strncpy_from_user(count, src, dst); + return uaccess.strncpy_from_user(dst, src, count); } static inline unsigned long strnlen_user(const char __user * src, unsigned long n) { might_fault(); - return uaccess.strnlen_user(n, src); + return uaccess.strnlen_user(src, n); } /** @@ -359,14 +359,14 @@ strnlen_user(const char __user * src, unsigned long n) static inline unsigned long __must_check __clear_user(void __user *to, unsigned long n) { - return uaccess.clear_user(n, to); + return uaccess.clear_user(to, n); } static inline unsigned long __must_check clear_user(void __user *to, unsigned long n) { might_fault(); - return uaccess.clear_user(n, to); + return uaccess.clear_user(to, n); } extern int copy_to_user_real(void __user *dest, void *src, size_t count); diff --git a/arch/s390/lib/uaccess_mvcos.c b/arch/s390/lib/uaccess_mvcos.c index 4b7993b..95123f5 100644 --- a/arch/s390/lib/uaccess_mvcos.c +++ b/arch/s390/lib/uaccess_mvcos.c @@ -26,7 +26,7 @@ #define SLR "slgr" #endif -static size_t copy_from_user_mvcos(size_t size, const void __user *ptr, void *x) +static size_t copy_from_user_mvcos(void *x, const void __user *ptr, size_t size) { register unsigned long reg0 asm("0") = 0x81UL; unsigned long tmp1, tmp2; @@ -65,7 +65,7 @@ static size_t copy_from_user_mvcos(size_t size, const void __user *ptr, void *x) return size; } -static size_t copy_to_user_mvcos(size_t size, void __user *ptr, const void *x) +static size_t copy_to_user_mvcos(void __user *ptr, const void *x, size_t size) { register unsigned long reg0 asm("0") = 0x810000UL; unsigned long tmp1, tmp2; @@ -94,8 +94,8 @@ static size_t copy_to_user_mvcos(size_t size, void __user *ptr, const void *x) return size; } -static size_t copy_in_user_mvcos(size_t size, void __user *to, - const void __user *from) +static size_t copy_in_user_mvcos(void __user *to, const void __user *from, + size_t size) { register unsigned long reg0 asm("0") = 0x810081UL; unsigned long tmp1, tmp2; @@ -117,7 +117,7 @@ static size_t copy_in_user_mvcos(size_t size, void __user *to, return size; } -static size_t clear_user_mvcos(size_t size, void __user *to) +static size_t clear_user_mvcos(void __user *to, size_t size) { register unsigned long reg0 asm("0") = 0x810000UL; unsigned long tmp1, tmp2; @@ -145,7 +145,7 @@ static size_t clear_user_mvcos(size_t size, void __user *to) return size; } -static size_t strnlen_user_mvcos(size_t count, const char __user *src) +static size_t strnlen_user_mvcos(const char __user *src, size_t count) { size_t done, len, offset, len_str; char buf[256]; @@ -155,7 +155,7 @@ static size_t strnlen_user_mvcos(size_t count, const char __user *src) offset = (size_t)src & ~PAGE_MASK; len = min(256UL, PAGE_SIZE - offset); len = min(count - done, len); - if (copy_from_user_mvcos(len, src, buf)) + if (copy_from_user_mvcos(buf, src, len)) return 0; len_str = strnlen(buf, len); done += len_str; @@ -164,8 +164,8 @@ static size_t strnlen_user_mvcos(size_t count, const char __user *src) return done + 1; } -static size_t strncpy_from_user_mvcos(size_t count, const char __user *src, - char *dst) +static size_t strncpy_from_user_mvcos(char *dst, const char __user *src, + size_t count) { size_t done, len, offset, len_str; @@ -175,7 +175,7 @@ static size_t strncpy_from_user_mvcos(size_t count, const char __user *src, do { offset = (size_t)src & ~PAGE_MASK; len = min(count - done, PAGE_SIZE - offset); - if (copy_from_user_mvcos(len, src, dst)) + if (copy_from_user_mvcos(dst, src, len)) return -EFAULT; len_str = strnlen(dst, len); done += len_str; diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 61ebcc9..2fa696b 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -22,7 +22,7 @@ #define SLR "slgr" #endif -static size_t strnlen_kernel(size_t count, const char __user *src) +static size_t strnlen_kernel(const char __user *src, size_t count) { register unsigned long reg0 asm("0") = 0UL; unsigned long tmp1, tmp2; @@ -42,8 +42,8 @@ static size_t strnlen_kernel(size_t count, const char __user *src) return count; } -static size_t copy_in_kernel(size_t count, void __user *to, - const void __user *from) +static size_t copy_in_kernel(void __user *to, const void __user *from, + size_t count) { unsigned long tmp1; @@ -211,26 +211,26 @@ fault: return 0; } -static size_t copy_from_user_pt(size_t n, const void __user *from, void *to) +static size_t copy_from_user_pt(void *to, const void __user *from, size_t n) { size_t rc; if (segment_eq(get_fs(), KERNEL_DS)) - return copy_in_kernel(n, (void __user *) to, from); + return copy_in_kernel((void __user *) to, from, n); rc = __user_copy_pt((unsigned long) from, to, n, 0); if (unlikely(rc)) memset(to + n - rc, 0, rc); return rc; } -static size_t copy_to_user_pt(size_t n, void __user *to, const void *from) +static size_t copy_to_user_pt(void __user *to, const void *from, size_t n) { if (segment_eq(get_fs(), KERNEL_DS)) - return copy_in_kernel(n, to, (void __user *) from); + return copy_in_kernel(to, (void __user *) from, n); return __user_copy_pt((unsigned long) to, (void *) from, n, 1); } -static size_t clear_user_pt(size_t n, void __user *to) +static size_t clear_user_pt(void __user *to, size_t n) { void *zpage = (void *) empty_zero_page; long done, size, ret; @@ -242,7 +242,7 @@ static size_t clear_user_pt(size_t n, void __user *to) else size = n - done; if (segment_eq(get_fs(), KERNEL_DS)) - ret = copy_in_kernel(n, to, (void __user *) zpage); + ret = copy_in_kernel(to, (void __user *) zpage, n); else ret = __user_copy_pt((unsigned long) to, zpage, size, 1); done += size; @@ -253,7 +253,7 @@ static size_t clear_user_pt(size_t n, void __user *to) return 0; } -static size_t strnlen_user_pt(size_t count, const char __user *src) +static size_t strnlen_user_pt(const char __user *src, size_t count) { unsigned long uaddr = (unsigned long) src; struct mm_struct *mm = current->mm; @@ -263,7 +263,7 @@ static size_t strnlen_user_pt(size_t count, const char __user *src) if (unlikely(!count)) return 0; if (segment_eq(get_fs(), KERNEL_DS)) - return strnlen_kernel(count, src); + return strnlen_kernel(src, count); if (!mm) return 0; done = 0; @@ -289,8 +289,8 @@ fault: goto retry; } -static size_t strncpy_from_user_pt(size_t count, const char __user *src, - char *dst) +static size_t strncpy_from_user_pt(char *dst, const char __user *src, + size_t count) { size_t done, len, offset, len_str; @@ -301,7 +301,7 @@ static size_t strncpy_from_user_pt(size_t count, const char __user *src, offset = (size_t)src & ~PAGE_MASK; len = min(count - done, PAGE_SIZE - offset); if (segment_eq(get_fs(), KERNEL_DS)) { - if (copy_in_kernel(len, (void __user *) dst, src)) + if (copy_in_kernel((void __user *) dst, src, len)) return -EFAULT; } else { if (__user_copy_pt((unsigned long) src, dst, len, 0)) @@ -315,8 +315,8 @@ static size_t strncpy_from_user_pt(size_t count, const char __user *src, return done; } -static size_t copy_in_user_pt(size_t n, void __user *to, - const void __user *from) +static size_t copy_in_user_pt(void __user *to, const void __user *from, + size_t n) { struct mm_struct *mm = current->mm; unsigned long offset_max, uaddr, done, size, error_code; @@ -326,7 +326,7 @@ static size_t copy_in_user_pt(size_t n, void __user *to, int write_user; if (segment_eq(get_fs(), KERNEL_DS)) - return copy_in_kernel(n, to, from); + return copy_in_kernel(to, from, n); if (!mm) return n; done = 0; -- cgit v0.10.2 From 4f41c2b4567dbfb7ff93e5c552b869e2865bcd9d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 23 Jan 2014 11:18:36 +0100 Subject: s390/uaccess: get rid of indirect function calls There are only two uaccess variants on s390 left: the version that is used if the mvcos instruction is available, and the page table walk variant. So there is no need for expensive indirect function calls. By default the mvcos variant will be called. If the mvcos instruction is not available it will call the page table walk variant. For minimal performance impact the "if (mvcos_is_available)" is implemented with a jump label, which will be a six byte nop on machines with mvcos. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index 51bcaa0..fda46bd 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -5,7 +5,10 @@ #include #include -static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) +int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval); +int __futex_atomic_op_inuser(int op, u32 __user *uaddr, int oparg, int *old); + +static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; @@ -17,7 +20,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) oparg = 1 << oparg; pagefault_disable(); - ret = uaccess.futex_atomic_op(op, uaddr, oparg, &oldval); + ret = __futex_atomic_op_inuser(op, uaddr, oparg, &oldval); pagefault_enable(); if (!ret) { @@ -34,10 +37,4 @@ static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) return ret; } -static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, - u32 oldval, u32 newval) -{ - return uaccess.futex_atomic_cmpxchg(uval, uaddr, oldval, newval); -} - #endif /* _ASM_S390_FUTEX_H */ diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 7319963..49885a5 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -92,33 +92,58 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x) #define ARCH_HAS_SORT_EXTABLE #define ARCH_HAS_SEARCH_EXTABLE -struct uaccess_ops { - size_t (*copy_from_user)(void *, const void __user *, size_t); - size_t (*copy_to_user)(void __user *, const void *, size_t); - size_t (*copy_in_user)(void __user *, const void __user *, size_t); - size_t (*clear_user)(void __user *, size_t); - size_t (*strnlen_user)(const char __user *, size_t); - size_t (*strncpy_from_user)(char *, const char __user *, size_t); - int (*futex_atomic_op)(int op, u32 __user *, int oparg, int *old); - int (*futex_atomic_cmpxchg)(u32 *, u32 __user *, u32 old, u32 new); -}; +int __handle_fault(unsigned long, unsigned long, int); -extern struct uaccess_ops uaccess; -extern struct uaccess_ops uaccess_mvcos; -extern struct uaccess_ops uaccess_pt; +/** + * __copy_from_user: - Copy a block of data from user space, with less checking. + * @to: Destination address, in kernel space. + * @from: Source address, in user space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from user space to kernel space. Caller must check + * the specified block with access_ok() before calling this function. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + * + * If some data could not be copied, this function will pad the copied + * data to the requested size using zero bytes. + */ +size_t __must_check __copy_from_user(void *to, const void __user *from, + size_t n); + +/** + * __copy_to_user: - Copy a block of data into user space, with less checking. + * @to: Destination address, in user space. + * @from: Source address, in kernel space. + * @n: Number of bytes to copy. + * + * Context: User context only. This function may sleep. + * + * Copy data from kernel space to user space. Caller must check + * the specified block with access_ok() before calling this function. + * + * Returns number of bytes that could not be copied. + * On success, this will be zero. + */ +unsigned long __must_check __copy_to_user(void __user *to, const void *from, + unsigned long n); -extern int __handle_fault(unsigned long, unsigned long, int); +#define __copy_to_user_inatomic __copy_to_user +#define __copy_from_user_inatomic __copy_from_user static inline int __put_user_fn(void *x, void __user *ptr, size_t size) { - size = uaccess.copy_to_user(ptr, x, size); - return size ? -EFAULT : size; + size = __copy_to_user(ptr, x, size); + return size ? -EFAULT : 0; } static inline int __get_user_fn(void *x, const void __user *ptr, size_t size) { - size = uaccess.copy_from_user(x, ptr, size); - return size ? -EFAULT : size; + size = __copy_from_user(x, ptr, size); + return size ? -EFAULT : 0; } /* @@ -152,7 +177,7 @@ static inline int __get_user_fn(void *x, const void __user *ptr, size_t size) }) -extern int __put_user_bad(void) __attribute__((noreturn)); +int __put_user_bad(void) __attribute__((noreturn)); #define __get_user(x, ptr) \ ({ \ @@ -200,35 +225,12 @@ extern int __put_user_bad(void) __attribute__((noreturn)); __get_user(x, ptr); \ }) -extern int __get_user_bad(void) __attribute__((noreturn)); +int __get_user_bad(void) __attribute__((noreturn)); #define __put_user_unaligned __put_user #define __get_user_unaligned __get_user /** - * __copy_to_user: - Copy a block of data into user space, with less checking. - * @to: Destination address, in user space. - * @from: Source address, in kernel space. - * @n: Number of bytes to copy. - * - * Context: User context only. This function may sleep. - * - * Copy data from kernel space to user space. Caller must check - * the specified block with access_ok() before calling this function. - * - * Returns number of bytes that could not be copied. - * On success, this will be zero. - */ -static inline unsigned long __must_check -__copy_to_user(void __user *to, const void *from, unsigned long n) -{ - return uaccess.copy_to_user(to, from, n); -} - -#define __copy_to_user_inatomic __copy_to_user -#define __copy_from_user_inatomic __copy_from_user - -/** * copy_to_user: - Copy a block of data into user space. * @to: Destination address, in user space. * @from: Source address, in kernel space. @@ -248,30 +250,7 @@ copy_to_user(void __user *to, const void *from, unsigned long n) return __copy_to_user(to, from, n); } -/** - * __copy_from_user: - Copy a block of data from user space, with less checking. - * @to: Destination address, in kernel space. - * @from: Source address, in user space. - * @n: Number of bytes to copy. - * - * Context: User context only. This function may sleep. - * - * Copy data from user space to kernel space. Caller must check - * the specified block with access_ok() before calling this function. - * - * Returns number of bytes that could not be copied. - * On success, this will be zero. - * - * If some data could not be copied, this function will pad the copied - * data to the requested size using zero bytes. - */ -static inline unsigned long __must_check -__copy_from_user(void *to, const void __user *from, unsigned long n) -{ - return uaccess.copy_from_user(to, from, n); -} - -extern void copy_from_user_overflow(void) +void copy_from_user_overflow(void) #ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS __compiletime_warning("copy_from_user() buffer size is not provably correct") #endif @@ -306,11 +285,8 @@ copy_from_user(void *to, const void __user *from, unsigned long n) return __copy_from_user(to, from, n); } -static inline unsigned long __must_check -__copy_in_user(void __user *to, const void __user *from, unsigned long n) -{ - return uaccess.copy_in_user(to, from, n); -} +unsigned long __must_check +__copy_in_user(void __user *to, const void __user *from, unsigned long n); static inline unsigned long __must_check copy_in_user(void __user *to, const void __user *from, unsigned long n) @@ -322,18 +298,22 @@ copy_in_user(void __user *to, const void __user *from, unsigned long n) /* * Copy a null terminated string from userspace. */ + +long __strncpy_from_user(char *dst, const char __user *src, long count); + static inline long __must_check strncpy_from_user(char *dst, const char __user *src, long count) { might_fault(); - return uaccess.strncpy_from_user(dst, src, count); + return __strncpy_from_user(dst, src, count); } -static inline unsigned long -strnlen_user(const char __user * src, unsigned long n) +size_t __must_check __strnlen_user(const char __user *src, size_t count); + +static inline size_t strnlen_user(const char __user *src, size_t n) { might_fault(); - return uaccess.strnlen_user(src, n); + return __strnlen_user(src, n); } /** @@ -355,21 +335,15 @@ strnlen_user(const char __user * src, unsigned long n) /* * Zero Userspace */ +size_t __must_check __clear_user(void __user *to, size_t size); -static inline unsigned long __must_check -__clear_user(void __user *to, unsigned long n) -{ - return uaccess.clear_user(to, n); -} - -static inline unsigned long __must_check -clear_user(void __user *to, unsigned long n) +static inline size_t __must_check clear_user(void __user *to, size_t n) { might_fault(); - return uaccess.clear_user(to, n); + return __clear_user(to, n); } -extern int copy_to_user_real(void __user *dest, void *src, size_t count); -extern int copy_from_user_real(void *dest, void __user *src, size_t count); +int copy_to_user_real(void __user *dest, void *src, size_t count); +int copy_from_user_real(void *dest, void __user *src, size_t count); #endif /* __S390_UACCESS_H */ diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 09e2f46..91ea009 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -47,7 +47,6 @@ #include #include -#include #include #include #include @@ -65,12 +64,6 @@ #include "entry.h" /* - * User copy operations. - */ -struct uaccess_ops uaccess; -EXPORT_SYMBOL(uaccess); - -/* * Machine setup.. */ unsigned int console_mode = 0; @@ -1009,8 +1002,6 @@ void __init setup_arch(char **cmdline_p) init_mm.end_data = (unsigned long) &_edata; init_mm.brk = (unsigned long) &_end; - uaccess = MACHINE_HAS_MVCOS ? uaccess_mvcos : uaccess_pt; - parse_early_param(); detect_memory_layout(memory_chunk, memory_end); os_info_init(); diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index b068729..e3fffe1 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -2,8 +2,7 @@ # Makefile for s390-specific library files.. # -lib-y += delay.o string.o uaccess_pt.o find.o +lib-y += delay.o string.o uaccess_pt.o uaccess_mvcos.o find.o obj-$(CONFIG_32BIT) += div64.o qrnnd.o ucmpdi2.o mem32.o obj-$(CONFIG_64BIT) += mem64.o -lib-$(CONFIG_64BIT) += uaccess_mvcos.o lib-$(CONFIG_SMP) += spinlock.o diff --git a/arch/s390/lib/uaccess.h b/arch/s390/lib/uaccess.h index b1a2217..e5b9c924 100644 --- a/arch/s390/lib/uaccess.h +++ b/arch/s390/lib/uaccess.h @@ -6,7 +6,11 @@ #ifndef __ARCH_S390_LIB_UACCESS_H #define __ARCH_S390_LIB_UACCESS_H -extern int futex_atomic_op_pt(int, u32 __user *, int, int *); -extern int futex_atomic_cmpxchg_pt(u32 *, u32 __user *, u32, u32); +size_t copy_from_user_pt(void *to, const void __user *from, size_t n); +size_t copy_to_user_pt(void __user *to, const void *from, size_t n); +size_t copy_in_user_pt(void __user *to, const void __user *from, size_t n); +size_t clear_user_pt(void __user *to, size_t n); +size_t strnlen_user_pt(const char __user *src, size_t count); +size_t strncpy_from_user_pt(char *dst, const char __user *src, size_t count); #endif /* __ARCH_S390_LIB_UACCESS_H */ diff --git a/arch/s390/lib/uaccess_mvcos.c b/arch/s390/lib/uaccess_mvcos.c index 95123f5..66f35e1 100644 --- a/arch/s390/lib/uaccess_mvcos.c +++ b/arch/s390/lib/uaccess_mvcos.c @@ -6,7 +6,9 @@ * Gerald Schaefer (gerald.schaefer@de.ibm.com) */ +#include #include +#include #include #include #include @@ -26,7 +28,10 @@ #define SLR "slgr" #endif -static size_t copy_from_user_mvcos(void *x, const void __user *ptr, size_t size) +static struct static_key have_mvcos = STATIC_KEY_INIT_TRUE; + +static inline size_t copy_from_user_mvcos(void *x, const void __user *ptr, + size_t size) { register unsigned long reg0 asm("0") = 0x81UL; unsigned long tmp1, tmp2; @@ -65,7 +70,16 @@ static size_t copy_from_user_mvcos(void *x, const void __user *ptr, size_t size) return size; } -static size_t copy_to_user_mvcos(void __user *ptr, const void *x, size_t size) +size_t __copy_from_user(void *to, const void __user *from, size_t n) +{ + if (static_key_true(&have_mvcos)) + return copy_from_user_mvcos(to, from, n); + return copy_from_user_pt(to, from, n); +} +EXPORT_SYMBOL(__copy_from_user); + +static inline size_t copy_to_user_mvcos(void __user *ptr, const void *x, + size_t size) { register unsigned long reg0 asm("0") = 0x810000UL; unsigned long tmp1, tmp2; @@ -94,8 +108,16 @@ static size_t copy_to_user_mvcos(void __user *ptr, const void *x, size_t size) return size; } -static size_t copy_in_user_mvcos(void __user *to, const void __user *from, - size_t size) +size_t __copy_to_user(void __user *to, const void *from, size_t n) +{ + if (static_key_true(&have_mvcos)) + return copy_to_user_mvcos(to, from, n); + return copy_to_user_pt(to, from, n); +} +EXPORT_SYMBOL(__copy_to_user); + +static inline size_t copy_in_user_mvcos(void __user *to, const void __user *from, + size_t size) { register unsigned long reg0 asm("0") = 0x810081UL; unsigned long tmp1, tmp2; @@ -117,7 +139,15 @@ static size_t copy_in_user_mvcos(void __user *to, const void __user *from, return size; } -static size_t clear_user_mvcos(void __user *to, size_t size) +size_t __copy_in_user(void __user *to, const void __user *from, size_t n) +{ + if (static_key_true(&have_mvcos)) + return copy_in_user_mvcos(to, from, n); + return copy_in_user_pt(to, from, n); +} +EXPORT_SYMBOL(__copy_in_user); + +static inline size_t clear_user_mvcos(void __user *to, size_t size) { register unsigned long reg0 asm("0") = 0x810000UL; unsigned long tmp1, tmp2; @@ -145,7 +175,15 @@ static size_t clear_user_mvcos(void __user *to, size_t size) return size; } -static size_t strnlen_user_mvcos(const char __user *src, size_t count) +size_t __clear_user(void __user *to, size_t size) +{ + if (static_key_true(&have_mvcos)) + return clear_user_mvcos(to, size); + return clear_user_pt(to, size); +} +EXPORT_SYMBOL(__clear_user); + +static inline size_t strnlen_user_mvcos(const char __user *src, size_t count) { size_t done, len, offset, len_str; char buf[256]; @@ -164,10 +202,18 @@ static size_t strnlen_user_mvcos(const char __user *src, size_t count) return done + 1; } -static size_t strncpy_from_user_mvcos(char *dst, const char __user *src, - size_t count) +size_t __strnlen_user(const char __user *src, size_t count) { - size_t done, len, offset, len_str; + if (static_key_true(&have_mvcos)) + return strnlen_user_mvcos(src, count); + return strnlen_user_pt(src, count); +} +EXPORT_SYMBOL(__strnlen_user); + +static inline size_t strncpy_from_user_mvcos(char *dst, const char __user *src, + size_t count) +{ + unsigned long done, len, offset, len_str; if (unlikely(!count)) return 0; @@ -185,13 +231,18 @@ static size_t strncpy_from_user_mvcos(char *dst, const char __user *src, return done; } -struct uaccess_ops uaccess_mvcos = { - .copy_from_user = copy_from_user_mvcos, - .copy_to_user = copy_to_user_mvcos, - .copy_in_user = copy_in_user_mvcos, - .clear_user = clear_user_mvcos, - .strnlen_user = strnlen_user_mvcos, - .strncpy_from_user = strncpy_from_user_mvcos, - .futex_atomic_op = futex_atomic_op_pt, - .futex_atomic_cmpxchg = futex_atomic_cmpxchg_pt, -}; +long __strncpy_from_user(char *dst, const char __user *src, long count) +{ + if (static_key_true(&have_mvcos)) + return strncpy_from_user_mvcos(dst, src, count); + return strncpy_from_user_pt(dst, src, count); +} +EXPORT_SYMBOL(__strncpy_from_user); + +static int __init uaccess_init(void) +{ + if (!MACHINE_HAS_MVCOS) + static_key_slow_dec(&have_mvcos); + return 0; +} +early_initcall(uaccess_init); diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 2fa696b..b49c3a4 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -211,7 +211,7 @@ fault: return 0; } -static size_t copy_from_user_pt(void *to, const void __user *from, size_t n) +size_t copy_from_user_pt(void *to, const void __user *from, size_t n) { size_t rc; @@ -223,14 +223,14 @@ static size_t copy_from_user_pt(void *to, const void __user *from, size_t n) return rc; } -static size_t copy_to_user_pt(void __user *to, const void *from, size_t n) +size_t copy_to_user_pt(void __user *to, const void *from, size_t n) { if (segment_eq(get_fs(), KERNEL_DS)) return copy_in_kernel(to, (void __user *) from, n); return __user_copy_pt((unsigned long) to, (void *) from, n, 1); } -static size_t clear_user_pt(void __user *to, size_t n) +size_t clear_user_pt(void __user *to, size_t n) { void *zpage = (void *) empty_zero_page; long done, size, ret; @@ -253,7 +253,7 @@ static size_t clear_user_pt(void __user *to, size_t n) return 0; } -static size_t strnlen_user_pt(const char __user *src, size_t count) +size_t strnlen_user_pt(const char __user *src, size_t count) { unsigned long uaddr = (unsigned long) src; struct mm_struct *mm = current->mm; @@ -289,8 +289,7 @@ fault: goto retry; } -static size_t strncpy_from_user_pt(char *dst, const char __user *src, - size_t count) +size_t strncpy_from_user_pt(char *dst, const char __user *src, size_t count) { size_t done, len, offset, len_str; @@ -315,8 +314,7 @@ static size_t strncpy_from_user_pt(char *dst, const char __user *src, return done; } -static size_t copy_in_user_pt(void __user *to, const void __user *from, - size_t n) +size_t copy_in_user_pt(void __user *to, const void __user *from, size_t n) { struct mm_struct *mm = current->mm; unsigned long offset_max, uaddr, done, size, error_code; @@ -411,7 +409,7 @@ static int __futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old) return ret; } -int futex_atomic_op_pt(int op, u32 __user *uaddr, int oparg, int *old) +int __futex_atomic_op_inuser(int op, u32 __user *uaddr, int oparg, int *old) { int ret; @@ -449,8 +447,8 @@ static int __futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr, return ret; } -int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr, - u32 oldval, u32 newval) +int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) { int ret; @@ -471,14 +469,3 @@ int futex_atomic_cmpxchg_pt(u32 *uval, u32 __user *uaddr, put_page(virt_to_page(uaddr)); return ret; } - -struct uaccess_ops uaccess_pt = { - .copy_from_user = copy_from_user_pt, - .copy_to_user = copy_to_user_pt, - .copy_in_user = copy_in_user_pt, - .clear_user = clear_user_pt, - .strnlen_user = strnlen_user_pt, - .strncpy_from_user = strncpy_from_user_pt, - .futex_atomic_op = futex_atomic_op_pt, - .futex_atomic_cmpxchg = futex_atomic_cmpxchg_pt, -}; -- cgit v0.10.2 From 211deca6bf413560b562d69748ebc4df5d80d65e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 24 Jan 2014 12:51:27 +0100 Subject: s390/uaccess: consistent types The types 'size_t' and 'unsigned long' have been used randomly for the uaccess functions. This looks rather confusing. So let's change all functions to use unsigned long instead and get rid of size_t in order to have a consistent interface. The only exception is strncpy_from_user() which uses 'long' since it may return a signed value (-EFAULT). Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 49885a5..2710b41 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -111,8 +111,8 @@ int __handle_fault(unsigned long, unsigned long, int); * If some data could not be copied, this function will pad the copied * data to the requested size using zero bytes. */ -size_t __must_check __copy_from_user(void *to, const void __user *from, - size_t n); +unsigned long __must_check __copy_from_user(void *to, const void __user *from, + unsigned long n); /** * __copy_to_user: - Copy a block of data into user space, with less checking. @@ -134,13 +134,13 @@ unsigned long __must_check __copy_to_user(void __user *to, const void *from, #define __copy_to_user_inatomic __copy_to_user #define __copy_from_user_inatomic __copy_from_user -static inline int __put_user_fn(void *x, void __user *ptr, size_t size) +static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) { size = __copy_to_user(ptr, x, size); return size ? -EFAULT : 0; } -static inline int __get_user_fn(void *x, const void __user *ptr, size_t size) +static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) { size = __copy_from_user(x, ptr, size); return size ? -EFAULT : 0; @@ -308,9 +308,9 @@ strncpy_from_user(char *dst, const char __user *src, long count) return __strncpy_from_user(dst, src, count); } -size_t __must_check __strnlen_user(const char __user *src, size_t count); +unsigned long __must_check __strnlen_user(const char __user *src, unsigned long count); -static inline size_t strnlen_user(const char __user *src, size_t n) +static inline unsigned long strnlen_user(const char __user *src, unsigned long n) { might_fault(); return __strnlen_user(src, n); @@ -335,15 +335,15 @@ static inline size_t strnlen_user(const char __user *src, size_t n) /* * Zero Userspace */ -size_t __must_check __clear_user(void __user *to, size_t size); +unsigned long __must_check __clear_user(void __user *to, unsigned long size); -static inline size_t __must_check clear_user(void __user *to, size_t n) +static inline unsigned long __must_check clear_user(void __user *to, unsigned long n) { might_fault(); return __clear_user(to, n); } -int copy_to_user_real(void __user *dest, void *src, size_t count); -int copy_from_user_real(void *dest, void __user *src, size_t count); +int copy_to_user_real(void __user *dest, void *src, unsigned long count); +int copy_from_user_real(void *dest, void __user *src, unsigned long count); #endif /* __S390_UACCESS_H */ diff --git a/arch/s390/lib/uaccess.h b/arch/s390/lib/uaccess.h index e5b9c924..c7e0e81f 100644 --- a/arch/s390/lib/uaccess.h +++ b/arch/s390/lib/uaccess.h @@ -6,11 +6,11 @@ #ifndef __ARCH_S390_LIB_UACCESS_H #define __ARCH_S390_LIB_UACCESS_H -size_t copy_from_user_pt(void *to, const void __user *from, size_t n); -size_t copy_to_user_pt(void __user *to, const void *from, size_t n); -size_t copy_in_user_pt(void __user *to, const void __user *from, size_t n); -size_t clear_user_pt(void __user *to, size_t n); -size_t strnlen_user_pt(const char __user *src, size_t count); -size_t strncpy_from_user_pt(char *dst, const char __user *src, size_t count); +unsigned long copy_from_user_pt(void *to, const void __user *from, unsigned long n); +unsigned long copy_to_user_pt(void __user *to, const void *from, unsigned long n); +unsigned long copy_in_user_pt(void __user *to, const void __user *from, unsigned long n); +unsigned long clear_user_pt(void __user *to, unsigned long n); +unsigned long strnlen_user_pt(const char __user *src, unsigned long count); +long strncpy_from_user_pt(char *dst, const char __user *src, long count); #endif /* __ARCH_S390_LIB_UACCESS_H */ diff --git a/arch/s390/lib/uaccess_mvcos.c b/arch/s390/lib/uaccess_mvcos.c index 66f35e1..8c01f3a 100644 --- a/arch/s390/lib/uaccess_mvcos.c +++ b/arch/s390/lib/uaccess_mvcos.c @@ -30,8 +30,8 @@ static struct static_key have_mvcos = STATIC_KEY_INIT_TRUE; -static inline size_t copy_from_user_mvcos(void *x, const void __user *ptr, - size_t size) +static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr, + unsigned long size) { register unsigned long reg0 asm("0") = 0x81UL; unsigned long tmp1, tmp2; @@ -70,7 +70,7 @@ static inline size_t copy_from_user_mvcos(void *x, const void __user *ptr, return size; } -size_t __copy_from_user(void *to, const void __user *from, size_t n) +unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { if (static_key_true(&have_mvcos)) return copy_from_user_mvcos(to, from, n); @@ -78,8 +78,8 @@ size_t __copy_from_user(void *to, const void __user *from, size_t n) } EXPORT_SYMBOL(__copy_from_user); -static inline size_t copy_to_user_mvcos(void __user *ptr, const void *x, - size_t size) +static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, + unsigned long size) { register unsigned long reg0 asm("0") = 0x810000UL; unsigned long tmp1, tmp2; @@ -108,7 +108,7 @@ static inline size_t copy_to_user_mvcos(void __user *ptr, const void *x, return size; } -size_t __copy_to_user(void __user *to, const void *from, size_t n) +unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n) { if (static_key_true(&have_mvcos)) return copy_to_user_mvcos(to, from, n); @@ -116,8 +116,8 @@ size_t __copy_to_user(void __user *to, const void *from, size_t n) } EXPORT_SYMBOL(__copy_to_user); -static inline size_t copy_in_user_mvcos(void __user *to, const void __user *from, - size_t size) +static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from, + unsigned long size) { register unsigned long reg0 asm("0") = 0x810081UL; unsigned long tmp1, tmp2; @@ -139,7 +139,7 @@ static inline size_t copy_in_user_mvcos(void __user *to, const void __user *from return size; } -size_t __copy_in_user(void __user *to, const void __user *from, size_t n) +unsigned long __copy_in_user(void __user *to, const void __user *from, unsigned long n) { if (static_key_true(&have_mvcos)) return copy_in_user_mvcos(to, from, n); @@ -147,7 +147,7 @@ size_t __copy_in_user(void __user *to, const void __user *from, size_t n) } EXPORT_SYMBOL(__copy_in_user); -static inline size_t clear_user_mvcos(void __user *to, size_t size) +static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size) { register unsigned long reg0 asm("0") = 0x810000UL; unsigned long tmp1, tmp2; @@ -175,7 +175,7 @@ static inline size_t clear_user_mvcos(void __user *to, size_t size) return size; } -size_t __clear_user(void __user *to, size_t size) +unsigned long __clear_user(void __user *to, unsigned long size) { if (static_key_true(&have_mvcos)) return clear_user_mvcos(to, size); @@ -183,14 +183,15 @@ size_t __clear_user(void __user *to, size_t size) } EXPORT_SYMBOL(__clear_user); -static inline size_t strnlen_user_mvcos(const char __user *src, size_t count) +static inline unsigned long strnlen_user_mvcos(const char __user *src, + unsigned long count) { - size_t done, len, offset, len_str; + unsigned long done, len, offset, len_str; char buf[256]; done = 0; do { - offset = (size_t)src & ~PAGE_MASK; + offset = (unsigned long)src & ~PAGE_MASK; len = min(256UL, PAGE_SIZE - offset); len = min(count - done, len); if (copy_from_user_mvcos(buf, src, len)) @@ -202,7 +203,7 @@ static inline size_t strnlen_user_mvcos(const char __user *src, size_t count) return done + 1; } -size_t __strnlen_user(const char __user *src, size_t count) +unsigned long __strnlen_user(const char __user *src, unsigned long count) { if (static_key_true(&have_mvcos)) return strnlen_user_mvcos(src, count); @@ -210,16 +211,16 @@ size_t __strnlen_user(const char __user *src, size_t count) } EXPORT_SYMBOL(__strnlen_user); -static inline size_t strncpy_from_user_mvcos(char *dst, const char __user *src, - size_t count) +static inline long strncpy_from_user_mvcos(char *dst, const char __user *src, + long count) { unsigned long done, len, offset, len_str; - if (unlikely(!count)) + if (unlikely(count <= 0)) return 0; done = 0; do { - offset = (size_t)src & ~PAGE_MASK; + offset = (unsigned long)src & ~PAGE_MASK; len = min(count - done, PAGE_SIZE - offset); if (copy_from_user_mvcos(dst, src, len)) return -EFAULT; diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index b49c3a4..8d39760 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -22,7 +22,7 @@ #define SLR "slgr" #endif -static size_t strnlen_kernel(const char __user *src, size_t count) +static unsigned long strnlen_kernel(const char __user *src, unsigned long count) { register unsigned long reg0 asm("0") = 0UL; unsigned long tmp1, tmp2; @@ -42,8 +42,8 @@ static size_t strnlen_kernel(const char __user *src, size_t count) return count; } -static size_t copy_in_kernel(void __user *to, const void __user *from, - size_t count) +static unsigned long copy_in_kernel(void __user *to, const void __user *from, + unsigned long count) { unsigned long tmp1; @@ -146,8 +146,8 @@ static unsigned long follow_table(struct mm_struct *mm, #endif /* CONFIG_64BIT */ -static __always_inline size_t __user_copy_pt(unsigned long uaddr, void *kptr, - size_t n, int write_user) +static inline unsigned long __user_copy_pt(unsigned long uaddr, void *kptr, + unsigned long n, int write_user) { struct mm_struct *mm = current->mm; unsigned long offset, done, size, kaddr; @@ -189,8 +189,7 @@ fault: * Do DAT for user address by page table walk, return kernel address. * This function needs to be called with current->mm->page_table_lock held. */ -static __always_inline unsigned long __dat_user_addr(unsigned long uaddr, - int write) +static inline unsigned long __dat_user_addr(unsigned long uaddr, int write) { struct mm_struct *mm = current->mm; unsigned long kaddr; @@ -211,9 +210,9 @@ fault: return 0; } -size_t copy_from_user_pt(void *to, const void __user *from, size_t n) +unsigned long copy_from_user_pt(void *to, const void __user *from, unsigned long n) { - size_t rc; + unsigned long rc; if (segment_eq(get_fs(), KERNEL_DS)) return copy_in_kernel((void __user *) to, from, n); @@ -223,17 +222,17 @@ size_t copy_from_user_pt(void *to, const void __user *from, size_t n) return rc; } -size_t copy_to_user_pt(void __user *to, const void *from, size_t n) +unsigned long copy_to_user_pt(void __user *to, const void *from, unsigned long n) { if (segment_eq(get_fs(), KERNEL_DS)) return copy_in_kernel(to, (void __user *) from, n); return __user_copy_pt((unsigned long) to, (void *) from, n, 1); } -size_t clear_user_pt(void __user *to, size_t n) +unsigned long clear_user_pt(void __user *to, unsigned long n) { void *zpage = (void *) empty_zero_page; - long done, size, ret; + unsigned long done, size, ret; done = 0; do { @@ -253,12 +252,12 @@ size_t clear_user_pt(void __user *to, size_t n) return 0; } -size_t strnlen_user_pt(const char __user *src, size_t count) +unsigned long strnlen_user_pt(const char __user *src, unsigned long count) { unsigned long uaddr = (unsigned long) src; struct mm_struct *mm = current->mm; unsigned long offset, done, len, kaddr; - size_t len_str; + unsigned long len_str; if (unlikely(!count)) return 0; @@ -289,15 +288,15 @@ fault: goto retry; } -size_t strncpy_from_user_pt(char *dst, const char __user *src, size_t count) +long strncpy_from_user_pt(char *dst, const char __user *src, long count) { - size_t done, len, offset, len_str; + unsigned long done, len, offset, len_str; - if (unlikely(!count)) + if (unlikely(count <= 0)) return 0; done = 0; do { - offset = (size_t)src & ~PAGE_MASK; + offset = (unsigned long)src & ~PAGE_MASK; len = min(count - done, PAGE_SIZE - offset); if (segment_eq(get_fs(), KERNEL_DS)) { if (copy_in_kernel((void __user *) dst, src, len)) @@ -314,7 +313,8 @@ size_t strncpy_from_user_pt(char *dst, const char __user *src, size_t count) return done; } -size_t copy_in_user_pt(void __user *to, const void __user *from, size_t n) +unsigned long copy_in_user_pt(void __user *to, const void __user *from, + unsigned long n) { struct mm_struct *mm = current->mm; unsigned long offset_max, uaddr, done, size, error_code; diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index d1e0e0c..efe8ad0 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -128,7 +128,7 @@ void memcpy_absolute(void *dest, void *src, size_t count) /* * Copy memory from kernel (real) to user (virtual) */ -int copy_to_user_real(void __user *dest, void *src, size_t count) +int copy_to_user_real(void __user *dest, void *src, unsigned long count) { int offs = 0, size, rc; char *buf; @@ -154,7 +154,7 @@ out: /* * Copy memory from user (virtual) to kernel (real) */ -int copy_from_user_real(void *dest, void __user *src, size_t count) +int copy_from_user_real(void *dest, void __user *src, unsigned long count) { int offs = 0, size, rc; char *buf; -- cgit v0.10.2 From ca04ddbf537d30b25f6e240b70f19be35fac4313 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 24 Jan 2014 13:03:42 +0100 Subject: s390/setup: get rid of MACHINE_HAS_MVCOS machine flag MACHINE_HAS_MVCOS is used exactly once when the machine is brought up. There is no need to cache the flag in the machine_flags. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 94cfbe4..406f3a1 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -59,7 +59,6 @@ void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr, #define MACHINE_FLAG_DIAG44 (1UL << 4) #define MACHINE_FLAG_IDTE (1UL << 5) #define MACHINE_FLAG_DIAG9C (1UL << 6) -#define MACHINE_FLAG_MVCOS (1UL << 7) #define MACHINE_FLAG_KVM (1UL << 8) #define MACHINE_FLAG_ESOP (1UL << 9) #define MACHINE_FLAG_EDAT1 (1UL << 10) @@ -85,7 +84,6 @@ void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr, #define MACHINE_HAS_IDTE (0) #define MACHINE_HAS_DIAG44 (1) #define MACHINE_HAS_MVPG (S390_lowcore.machine_flags & MACHINE_FLAG_MVPG) -#define MACHINE_HAS_MVCOS (0) #define MACHINE_HAS_EDAT1 (0) #define MACHINE_HAS_EDAT2 (0) #define MACHINE_HAS_LPP (0) @@ -98,7 +96,6 @@ void create_mem_hole(struct mem_chunk mem_chunk[], unsigned long addr, #define MACHINE_HAS_IDTE (S390_lowcore.machine_flags & MACHINE_FLAG_IDTE) #define MACHINE_HAS_DIAG44 (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44) #define MACHINE_HAS_MVPG (1) -#define MACHINE_HAS_MVCOS (S390_lowcore.machine_flags & MACHINE_FLAG_MVCOS) #define MACHINE_HAS_EDAT1 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1) #define MACHINE_HAS_EDAT2 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2) #define MACHINE_HAS_LPP (S390_lowcore.machine_flags & MACHINE_FLAG_LPP) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index fca20b5..6b59443 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -380,8 +380,6 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2; if (test_facility(3)) S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE; - if (test_facility(27)) - S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS; if (test_facility(40)) S390_lowcore.machine_flags |= MACHINE_FLAG_LPP; if (test_facility(50) && test_facility(73)) diff --git a/arch/s390/lib/uaccess_mvcos.c b/arch/s390/lib/uaccess_mvcos.c index 8c01f3a..e2685ff 100644 --- a/arch/s390/lib/uaccess_mvcos.c +++ b/arch/s390/lib/uaccess_mvcos.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include "uaccess.h" @@ -242,7 +243,7 @@ EXPORT_SYMBOL(__strncpy_from_user); static int __init uaccess_init(void) { - if (!MACHINE_HAS_MVCOS) + if (IS_ENABLED(CONFIG_32BIT) || !test_facility(27)) static_key_slow_dec(&have_mvcos); return 0; } -- cgit v0.10.2 From 7385d0a550813c912094c5df22aeeb463712300e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 27 Jan 2014 10:09:11 +0100 Subject: s390/uaccess: remove dead kernel parameter 'user_mode=' Remove another leftover from the time when we supported running user space in either home or primary address space. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 91ea009..f70f248 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -287,14 +287,6 @@ static int __init parse_vmalloc(char *arg) } early_param("vmalloc", parse_vmalloc); -static int __init early_parse_user_mode(char *p) -{ - if (!p || strcmp(p, "primary") == 0) - return 0; - return 1; -} -early_param("user_mode", early_parse_user_mode); - void *restart_stack __attribute__((__section__(".data"))); static void __init setup_lowcore(void) -- cgit v0.10.2 From 56f15e518cfdc732bd4e4da90e0c9cf2fc4e7c1b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 27 Jan 2014 10:25:39 +0100 Subject: s390/uaccess: introduce 'uaccesspt' kernel parameter The uaccesspt kernel parameter allows to enforce using the uaccess page table walk variant. This is mainly for debugging purposes, so this mode can also be enabled on machines which support the mvcos instruction. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/lib/uaccess_mvcos.c b/arch/s390/lib/uaccess_mvcos.c index e2685ff..ae97b8d 100644 --- a/arch/s390/lib/uaccess_mvcos.c +++ b/arch/s390/lib/uaccess_mvcos.c @@ -241,9 +241,22 @@ long __strncpy_from_user(char *dst, const char __user *src, long count) } EXPORT_SYMBOL(__strncpy_from_user); +/* + * The uaccess page tabe walk variant can be enforced with the "uaccesspt" + * kernel parameter. This is mainly for debugging purposes. + */ +static int force_uaccess_pt __initdata; + +static int __init parse_uaccess_pt(char *__unused) +{ + force_uaccess_pt = 1; + return 0; +} +early_param("uaccesspt", parse_uaccess_pt); + static int __init uaccess_init(void) { - if (IS_ENABLED(CONFIG_32BIT) || !test_facility(27)) + if (IS_ENABLED(CONFIG_32BIT) || force_uaccess_pt || !test_facility(27)) static_key_slow_dec(&have_mvcos); return 0; } -- cgit v0.10.2 From a53efe5ff88d0283bae8a2c2fa066d0fff31dc91 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 26 Oct 2012 17:17:44 +0200 Subject: sched/mm: call finish_arch_post_lock_switch in idle_task_exit and use_mm The finish_arch_post_lock_switch is called at the end of the task switch after all locks have been released. In concept it is paired with the switch_mm function, but the current code only does the call in finish_task_switch. Add the call to idle_task_exit and use_mm. One use case for the additional calls is s390 which will use finish_arch_post_lock_switch to wait for the completion of TLB flush operations. Signed-off-by: Martin Schwidefsky diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b46131e..4b0739c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4692,8 +4692,10 @@ void idle_task_exit(void) BUG_ON(cpu_online(smp_processor_id())); - if (mm != &init_mm) + if (mm != &init_mm) { switch_mm(mm, &init_mm, current); + finish_arch_post_lock_switch(); + } mmdrop(mm); } diff --git a/mm/mmu_context.c b/mm/mmu_context.c index 8a8cd02..f802c2d 100644 --- a/mm/mmu_context.c +++ b/mm/mmu_context.c @@ -31,6 +31,9 @@ void use_mm(struct mm_struct *mm) tsk->mm = mm; switch_mm(active_mm, mm, tsk); task_unlock(tsk); +#ifdef finish_arch_post_lock_switch + finish_arch_post_lock_switch(); +#endif if (active_mm != mm) mmdrop(active_mm); -- cgit v0.10.2 From 53e857f30867918b3618d8e18902e63291946ef4 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 10 Sep 2012 13:00:09 +0200 Subject: s390/mm,tlb: race of lazy TLB flush vs. recreation of TLB entries Git commit 050eef364ad70059 "[S390] fix tlb flushing vs. concurrent /proc accesses" introduced the attach counter to avoid using the mm_users value to decide between IPTE for every PTE and lazy TLB flushing with IDTE. That fixed the problem with mm_users but it introduced another subtle race, fortunately one that is very hard to hit. The background is the requirement of the architecture that a valid PTE may not be changed while it can be used concurrently by another cpu. The decision between IPTE and lazy TLB flushing needs to be done while the PTE is still valid. Now if the virtual cpu is temporarily stopped after the decision to use lazy TLB flushing but before the invalid bit of the PTE has been set, another cpu can attach the mm, find that flush_mm is set, do the IDTE, return to userspace, and recreate a TLB that uses the PTE in question. When the first, stopped cpu continues it will change the PTE while it is attached on another cpu. The first cpu will do another IDTE shortly after the modification of the PTE which makes the race window quite short. To fix this race the CPU that wants to attach the address space of a user space thread needs to wait for the end of the PTE modification. The number of concurrent TLB flushers for an mm is tracked in the upper 16 bits of the attach_count and finish_arch_post_lock_switch is used to wait for the end of the flush operation if required. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 5d1f950..38149b6 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -48,13 +48,42 @@ static inline void update_mm(struct mm_struct *mm, struct task_struct *tsk) static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { - cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); - update_mm(next, tsk); + int cpu = smp_processor_id(); + + if (prev == next) + return; + if (atomic_inc_return(&next->context.attach_count) >> 16) { + /* Delay update_mm until all TLB flushes are done. */ + set_tsk_thread_flag(tsk, TIF_TLB_WAIT); + } else { + cpumask_set_cpu(cpu, mm_cpumask(next)); + update_mm(next, tsk); + if (next->context.flush_mm) + /* Flush pending TLBs */ + __tlb_flush_mm(next); + } atomic_dec(&prev->context.attach_count); WARN_ON(atomic_read(&prev->context.attach_count) < 0); - atomic_inc(&next->context.attach_count); - /* Check for TLBs not flushed yet */ - __tlb_flush_mm_lazy(next); +} + +#define finish_arch_post_lock_switch finish_arch_post_lock_switch +static inline void finish_arch_post_lock_switch(void) +{ + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; + + if (!test_tsk_thread_flag(tsk, TIF_TLB_WAIT)) + return; + preempt_disable(); + clear_tsk_thread_flag(tsk, TIF_TLB_WAIT); + while (atomic_read(&mm->context.attach_count) >> 16) + cpu_relax(); + + cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); + update_mm(mm, tsk); + if (mm->context.flush_mm) + __tlb_flush_mm(mm); + preempt_enable(); } #define enter_lazy_tlb(mm,tsk) do { } while (0) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 2204400..fc4bb82 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1034,30 +1034,41 @@ static inline int ptep_test_and_clear_user_young(struct mm_struct *mm, static inline void __ptep_ipte(unsigned long address, pte_t *ptep) { - if (!(pte_val(*ptep) & _PAGE_INVALID)) { + unsigned long pto = (unsigned long) ptep; + #ifndef CONFIG_64BIT - /* pto must point to the start of the segment table */ - pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00); -#else - /* ipte in zarch mode can do the math */ - pte_t *pto = ptep; + /* pto in ESA mode must point to the start of the segment table */ + pto &= 0x7ffffc00; #endif - asm volatile( - " ipte %2,%3" - : "=m" (*ptep) : "m" (*ptep), - "a" (pto), "a" (address)); - } + /* Invalidation + global TLB flush for the pte */ + asm volatile( + " ipte %2,%3" + : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address)); +} + +static inline void ptep_flush_direct(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + if (pte_val(*ptep) & _PAGE_INVALID) + return; + __ptep_ipte(address, ptep); } static inline void ptep_flush_lazy(struct mm_struct *mm, unsigned long address, pte_t *ptep) { - int active = (mm == current->active_mm) ? 1 : 0; + int active, count; - if (atomic_read(&mm->context.attach_count) > active) - __ptep_ipte(address, ptep); - else + if (pte_val(*ptep) & _PAGE_INVALID) + return; + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if ((count & 0xffff) <= active) { + pte_val(*ptep) |= _PAGE_INVALID; mm->context.flush_mm = 1; + } else + __ptep_ipte(address, ptep); + atomic_sub(0x10000, &mm->context.attach_count); } #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG @@ -1074,7 +1085,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, } pte = *ptep; - __ptep_ipte(addr, ptep); + ptep_flush_direct(vma->vm_mm, addr, ptep); young = pte_young(pte); pte = pte_mkold(pte); @@ -1145,7 +1156,6 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, pte = *ptep; ptep_flush_lazy(mm, address, ptep); - pte_val(*ptep) |= _PAGE_INVALID; if (mm_has_pgste(mm)) { pgste = pgste_update_all(&pte, pgste); @@ -1182,7 +1192,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, } pte = *ptep; - __ptep_ipte(address, ptep); + ptep_flush_direct(vma->vm_mm, address, ptep); pte_val(*ptep) = _PAGE_INVALID; if (mm_has_pgste(vma->vm_mm)) { @@ -1263,7 +1273,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma, pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); } - __ptep_ipte(address, ptep); + ptep_flush_direct(vma->vm_mm, address, ptep); if (mm_has_pgste(vma->vm_mm)) { pgste_set_pte(ptep, entry); @@ -1447,12 +1457,16 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd) static inline void pmdp_flush_lazy(struct mm_struct *mm, unsigned long address, pmd_t *pmdp) { - int active = (mm == current->active_mm) ? 1 : 0; + int active, count; - if ((atomic_read(&mm->context.attach_count) & 0xffff) > active) - __pmd_idte(address, pmdp); - else + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if ((count & 0xffff) <= active) { + pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; mm->context.flush_mm = 1; + } else + __pmd_idte(address, pmdp); + atomic_sub(0x10000, &mm->context.attach_count); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index 10e0fcd..d2e53d6 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -81,6 +81,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ +#define TIF_TLB_WAIT 4 /* wait for TLB flush completion */ #define TIF_PER_TRAP 6 /* deliver sigtrap on return to user */ #define TIF_MCCK_PENDING 7 /* machine check handling is pending */ #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ @@ -96,6 +97,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_NOTIFY_RESUME (1< Date: Wed, 17 Apr 2013 13:59:32 +0200 Subject: mm: add support for discard of unused ptes In a virtualized environment and given an appropriate interface the guest can mark pages as unused while they are free (for the s390 implementation see git commit 45e576b1c3d00206 "guest page hinting light"). For the host the unused state is a property of the pte. This patch adds the primitive 'pte_unused' and code to the host swap out handler so that pages marked as unused by all mappers are not swapped out but discarded instead, thus saving one IO for swap out and potentially another one for swap in. [ Martin Schwidefsky: patch reordering and simplification ] Signed-off-by: Konstantin Weitz Signed-off-by: Martin Schwidefsky diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 34c7bdc..1ec08c1 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -193,6 +193,19 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b) } #endif +#ifndef __HAVE_ARCH_PTE_UNUSED +/* + * Some architectures provide facilities to virtualization guests + * so that they can flag allocated pages as unused. This allows the + * host to transparently reclaim unused pages. This function returns + * whether the pte's page is unused. + */ +static inline int pte_unused(pte_t pte) +{ + return 0; +} +#endif + #ifndef __HAVE_ARCH_PMD_SAME #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) diff --git a/mm/rmap.c b/mm/rmap.c index d9d4231..9056a1f 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1165,6 +1165,16 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, } set_pte_at(mm, address, pte, swp_entry_to_pte(make_hwpoison_entry(page))); + } else if (pte_unused(pteval)) { + /* + * The guest indicated that the page content is of no + * interest anymore. Simply discard the pte, vmscan + * will take care of the rest. + */ + if (PageAnon(page)) + dec_mm_counter(mm, MM_ANONPAGES); + else + dec_mm_counter(mm, MM_FILEPAGES); } else if (PageAnon(page)) { swp_entry_t entry = { .val = page_private(page) }; pte_t swp_pte; -- cgit v0.10.2 From b31288fa83b2bcc8834e1e208e9526b8bd5ce361 Mon Sep 17 00:00:00 2001 From: Konstantin Weitz Date: Wed, 17 Apr 2013 17:36:29 +0200 Subject: s390/kvm: support collaborative memory management This patch enables Collaborative Memory Management (CMM) for kvm on s390. CMM allows the guest to inform the host about page usage (see arch/s390/mm/cmm.c). The host uses this information to avoid swapping in unused pages in the page fault handler. Further, a CPU provided list of unused invalid pages is processed to reclaim swap space of not yet accessed unused pages. [ Martin Schwidefsky: patch reordering and cleanup ] Signed-off-by: Konstantin Weitz Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index eef3dd3..9bf95bb 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -106,7 +106,9 @@ struct kvm_s390_sie_block { __u64 gbea; /* 0x0180 */ __u8 reserved188[24]; /* 0x0188 */ __u32 fac; /* 0x01a0 */ - __u8 reserved1a4[68]; /* 0x01a4 */ + __u8 reserved1a4[20]; /* 0x01a4 */ + __u64 cbrlo; /* 0x01b8 */ + __u8 reserved1c0[40]; /* 0x01c0 */ __u64 itdba; /* 0x01e8 */ __u8 reserved1f0[16]; /* 0x01f0 */ } __attribute__((packed)); @@ -155,6 +157,7 @@ struct kvm_vcpu_stat { u32 instruction_stsi; u32 instruction_stfl; u32 instruction_tprot; + u32 instruction_essa; u32 instruction_sigp_sense; u32 instruction_sigp_sense_running; u32 instruction_sigp_external_call; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index fc4bb82..a7dd672 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -229,6 +229,7 @@ extern unsigned long MODULES_END; #define _PAGE_READ 0x010 /* SW pte read bit */ #define _PAGE_WRITE 0x020 /* SW pte write bit */ #define _PAGE_SPECIAL 0x040 /* SW associated with special page */ +#define _PAGE_UNUSED 0x080 /* SW bit for pgste usage state */ #define __HAVE_ARCH_PTE_SPECIAL /* Set of bits not changed in pte_modify */ @@ -394,6 +395,12 @@ extern unsigned long MODULES_END; #endif /* CONFIG_64BIT */ +/* Guest Page State used for virtualization */ +#define _PGSTE_GPS_ZERO 0x0000000080000000UL +#define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL +#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL +#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL + /* * A user page table pointer has the space-switch-event bit, the * private-space-control bit and the storage-alteration-event-control @@ -617,6 +624,14 @@ static inline int pte_none(pte_t pte) return pte_val(pte) == _PAGE_INVALID; } +static inline int pte_swap(pte_t pte) +{ + /* Bit pattern: (pte & 0x603) == 0x402 */ + return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | + _PAGE_TYPE | _PAGE_PRESENT)) + == (_PAGE_INVALID | _PAGE_TYPE); +} + static inline int pte_file(pte_t pte) { /* Bit pattern: (pte & 0x601) == 0x600 */ @@ -821,6 +836,7 @@ unsigned long gmap_translate(unsigned long address, struct gmap *); unsigned long __gmap_fault(unsigned long address, struct gmap *); unsigned long gmap_fault(unsigned long address, struct gmap *); void gmap_discard(unsigned long from, unsigned long to, struct gmap *); +void __gmap_zap(unsigned long address, struct gmap *); void gmap_register_ipte_notifier(struct gmap_notifier *); void gmap_unregister_ipte_notifier(struct gmap_notifier *); @@ -852,6 +868,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); + pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; pgste_set_key(ptep, pgste, entry); pgste_set_pte(ptep, entry); pgste_set_unlock(ptep, pgste); @@ -881,6 +898,12 @@ static inline int pte_young(pte_t pte) return (pte_val(pte) & _PAGE_YOUNG) != 0; } +#define __HAVE_ARCH_PTE_UNUSED +static inline int pte_unused(pte_t pte) +{ + return pte_val(pte) & _PAGE_UNUSED; +} + /* * pgd/pmd/pte modification functions */ @@ -1196,6 +1219,9 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, pte_val(*ptep) = _PAGE_INVALID; if (mm_has_pgste(vma->vm_mm)) { + if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == + _PGSTE_GPS_USAGE_UNUSED) + pte_val(pte) |= _PAGE_UNUSED; pgste = pgste_update_all(&pte, pgste); pgste_set_unlock(ptep, pgste); } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index e0676f3..10b5db3 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -68,6 +68,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "instruction_storage_key", VCPU_STAT(instruction_storage_key) }, { "instruction_stsch", VCPU_STAT(instruction_stsch) }, { "instruction_chsc", VCPU_STAT(instruction_chsc) }, + { "instruction_essa", VCPU_STAT(instruction_essa) }, { "instruction_stsi", VCPU_STAT(instruction_stsi) }, { "instruction_stfl", VCPU_STAT(instruction_stfl) }, { "instruction_tprot", VCPU_STAT(instruction_tprot) }, @@ -283,7 +284,11 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) if (kvm_is_ucontrol(vcpu->kvm)) gmap_free(vcpu->arch.gmap); + if (vcpu->arch.sie_block->cbrlo) + __free_page(__pfn_to_page( + vcpu->arch.sie_block->cbrlo >> PAGE_SHIFT)); free_page((unsigned long)(vcpu->arch.sie_block)); + kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vcpu); } @@ -390,6 +395,8 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { + struct page *cbrl; + atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | CPUSTAT_SM | CPUSTAT_STOPPED | @@ -401,6 +408,14 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->ecb2 = 8; vcpu->arch.sie_block->eca = 0xC1002001U; vcpu->arch.sie_block->fac = (int) (long) vfacilities; + if (kvm_enabled_cmma()) { + cbrl = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (cbrl) { + vcpu->arch.sie_block->ecb2 |= 0x80; + vcpu->arch.sie_block->ecb2 &= ~0x08; + vcpu->arch.sie_block->cbrlo = page_to_phys(cbrl); + } + } hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, (unsigned long) vcpu); @@ -761,6 +776,16 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) return rc; } +bool kvm_enabled_cmma(void) +{ + if (!MACHINE_IS_LPAR) + return false; + /* only enable for z10 and later */ + if (!MACHINE_HAS_EDAT1) + return false; + return true; +} + static int __vcpu_run(struct kvm_vcpu *vcpu) { int rc, exit_reason; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index f9559b0..564514f4 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -156,6 +156,8 @@ void s390_vcpu_block(struct kvm_vcpu *vcpu); void s390_vcpu_unblock(struct kvm_vcpu *vcpu); void exit_sie(struct kvm_vcpu *vcpu); void exit_sie_sync(struct kvm_vcpu *vcpu); +/* are we going to support cmma? */ +bool kvm_enabled_cmma(void); /* implemented in diag.c */ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 75beea6..aacb6b1 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -636,8 +636,49 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) return 0; } +static int handle_essa(struct kvm_vcpu *vcpu) +{ + /* entries expected to be 1FF */ + int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3; + unsigned long *cbrlo, cbrle; + struct gmap *gmap; + int i; + + VCPU_EVENT(vcpu, 5, "cmma release %d pages", entries); + gmap = vcpu->arch.gmap; + vcpu->stat.instruction_essa++; + if (!kvm_enabled_cmma() || !vcpu->arch.sie_block->cbrlo) + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); + + if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) + return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + + if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + /* Rewind PSW to repeat the ESSA instruction */ + vcpu->arch.sie_block->gpsw.addr = + __rewind_psw(vcpu->arch.sie_block->gpsw, 4); + vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */ + cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo); + down_read(&gmap->mm->mmap_sem); + for (i = 0; i < entries; ++i) { + cbrle = cbrlo[i]; + if (unlikely(cbrle & ~PAGE_MASK || cbrle < 2 * PAGE_SIZE)) + /* invalid entry */ + break; + /* try to free backing */ + __gmap_zap(cbrle, gmap); + } + up_read(&gmap->mm->mmap_sem); + if (i < entries) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + return 0; +} + static const intercept_handler_t b9_handlers[256] = { [0x8d] = handle_epsw, + [0xab] = handle_essa, [0xaf] = handle_pfmf, }; diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 3584ed9..9e2b470 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -594,6 +595,82 @@ unsigned long gmap_fault(unsigned long address, struct gmap *gmap) } EXPORT_SYMBOL_GPL(gmap_fault); +static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm) +{ + if (!non_swap_entry(entry)) + dec_mm_counter(mm, MM_SWAPENTS); + else if (is_migration_entry(entry)) { + struct page *page = migration_entry_to_page(entry); + + if (PageAnon(page)) + dec_mm_counter(mm, MM_ANONPAGES); + else + dec_mm_counter(mm, MM_FILEPAGES); + } + free_swap_and_cache(entry); +} + +/** + * The mm->mmap_sem lock must be held + */ +static void gmap_zap_unused(struct mm_struct *mm, unsigned long address) +{ + unsigned long ptev, pgstev; + spinlock_t *ptl; + pgste_t pgste; + pte_t *ptep, pte; + + ptep = get_locked_pte(mm, address, &ptl); + if (unlikely(!ptep)) + return; + pte = *ptep; + if (!pte_swap(pte)) + goto out_pte; + /* Zap unused and logically-zero pages */ + pgste = pgste_get_lock(ptep); + pgstev = pgste_val(pgste); + ptev = pte_val(pte); + if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) || + ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) { + gmap_zap_swap_entry(pte_to_swp_entry(pte), mm); + pte_clear(mm, address, ptep); + } + pgste_set_unlock(ptep, pgste); +out_pte: + pte_unmap_unlock(*ptep, ptl); +} + +/* + * this function is assumed to be called with mmap_sem held + */ +void __gmap_zap(unsigned long address, struct gmap *gmap) +{ + unsigned long *table, *segment_ptr; + unsigned long segment, pgstev, ptev; + struct gmap_pgtable *mp; + struct page *page; + + segment_ptr = gmap_table_walk(address, gmap); + if (IS_ERR(segment_ptr)) + return; + segment = *segment_ptr; + if (segment & _SEGMENT_ENTRY_INVALID) + return; + page = pfn_to_page(segment >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + address = mp->vmaddr | (address & ~PMD_MASK); + /* Page table is present */ + table = (unsigned long *)(segment & _SEGMENT_ENTRY_ORIGIN); + table = table + ((address >> 12) & 0xff); + pgstev = table[PTRS_PER_PTE]; + ptev = table[0]; + /* quick check, checked again with locks held */ + if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) || + ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) + gmap_zap_unused(gmap->mm, address); +} +EXPORT_SYMBOL_GPL(__gmap_zap); + void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) { -- cgit v0.10.2 From deedabb2b4a68a63351a949b1abcf73fc97eb406 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 21 May 2013 17:29:52 +0200 Subject: s390/kvm: set guest page states to stable on re-ipl The guest page state needs to be reset to stable for all pages on initial program load via diagnose 0x308. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index e1408dd..14d43c7 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -22,6 +22,7 @@ unsigned long *page_table_alloc(struct mm_struct *, unsigned long); void page_table_free(struct mm_struct *, unsigned long *); void page_table_free_rcu(struct mmu_gather *, unsigned long *); +void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned long key, bool nq); diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 8216c0e..6f9cfa5 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -13,6 +13,7 @@ #include #include +#include #include #include "kvm-s390.h" #include "trace.h" @@ -86,9 +87,11 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) switch (subcode) { case 3: vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR; + page_table_reset_pgste(current->mm, 0, TASK_SIZE); break; case 4: vcpu->run->s390_reset_flags = 0; + page_table_reset_pgste(current->mm, 0, TASK_SIZE); break; default: return -EOPNOTSUPP; diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 9e2b470..9c26b7a 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -879,6 +879,78 @@ static inline void page_table_free_pgste(unsigned long *table) __free_page(page); } +static inline unsigned long page_table_reset_pte(struct mm_struct *mm, + pmd_t *pmd, unsigned long addr, unsigned long end) +{ + pte_t *start_pte, *pte; + spinlock_t *ptl; + pgste_t pgste; + + start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + pte = start_pte; + do { + pgste = pgste_get_lock(pte); + pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; + pgste_set_unlock(pte, pgste); + } while (pte++, addr += PAGE_SIZE, addr != end); + pte_unmap_unlock(start_pte, ptl); + + return addr; +} + +static inline unsigned long page_table_reset_pmd(struct mm_struct *mm, + pud_t *pud, unsigned long addr, unsigned long end) +{ + unsigned long next; + pmd_t *pmd; + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + if (pmd_none_or_clear_bad(pmd)) + continue; + next = page_table_reset_pte(mm, pmd, addr, next); + } while (pmd++, addr = next, addr != end); + + return addr; +} + +static inline unsigned long page_table_reset_pud(struct mm_struct *mm, + pgd_t *pgd, unsigned long addr, unsigned long end) +{ + unsigned long next; + pud_t *pud; + + pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + if (pud_none_or_clear_bad(pud)) + continue; + next = page_table_reset_pmd(mm, pud, addr, next); + } while (pud++, addr = next, addr != end); + + return addr; +} + +void page_table_reset_pgste(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + unsigned long addr, next; + pgd_t *pgd; + + addr = start; + down_read(&mm->mmap_sem); + pgd = pgd_offset(mm, addr); + do { + next = pgd_addr_end(addr, end); + if (pgd_none_or_clear_bad(pgd)) + continue; + next = page_table_reset_pud(mm, pgd, addr, next); + } while (pgd++, addr = next, addr != end); + up_read(&mm->mmap_sem); +} +EXPORT_SYMBOL(page_table_reset_pgste); + int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned long key, bool nq) { -- cgit v0.10.2 From db85eaeb52637eb91a7bbc70f6684f5563b983e9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 12 Feb 2014 12:43:40 +0100 Subject: s390/bitops: fix comment Fix some numbers in the comments describing the layout of the bit maps. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 6e6ad06..ec5ef89 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -13,9 +13,9 @@ * * The bitop functions are defined to work on unsigned longs, so for an * s390x system the bits end up numbered: - * |63..............0|127............64|191...........128|255...........196| + * |63..............0|127............64|191...........128|255...........192| * and on s390: - * |31.....0|63....31|95....64|127...96|159..128|191..160|223..192|255..224| + * |31.....0|63....32|95....64|127...96|159..128|191..160|223..192|255..224| * * There are a few little-endian macros used mostly for filesystem * bitmaps, these work on similar bit arrays layouts, but @@ -30,7 +30,7 @@ * on an s390x system the bits are numbered: * |0..............63|64............127|128...........191|192...........255| * and on s390: - * |0.....31|31....63|64....95|96...127|128..159|160..191|192..223|224..255| + * |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255| * * The main difference is that bit 0-63 (64b) or 0-31 (32b) in the bit * number field needs to be reversed compared to the LSB0 encoded bit @@ -304,7 +304,7 @@ static inline int test_bit(unsigned long nr, const volatile unsigned long *ptr) * On an s390x system the bits are numbered: * |0..............63|64............127|128...........191|192...........255| * and on s390: - * |0.....31|31....63|64....95|96...127|128..159|160..191|192..223|224..255| + * |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255| */ unsigned long find_first_bit_inv(const unsigned long *addr, unsigned long size); unsigned long find_next_bit_inv(const unsigned long *addr, unsigned long size, diff --git a/arch/s390/lib/find.c b/arch/s390/lib/find.c index 620d34d..922003c 100644 --- a/arch/s390/lib/find.c +++ b/arch/s390/lib/find.c @@ -4,7 +4,7 @@ * On s390x the bits are numbered: * |0..............63|64............127|128...........191|192...........255| * and on s390: - * |0.....31|31....63|64....95|96...127|128..159|160..191|192..223|224..255| + * |0.....31|32....63|64....95|96...127|128..159|160..191|192..223|224..255| * * The reason for this bit numbering is the fact that the hardware sets bits * in a bitmap starting at bit 0 (MSB) and we don't want to scan the bitmap -- cgit v0.10.2 From 634391ace193d00c59a691e9fc227b0f8942bad7 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 13 Feb 2014 13:53:33 +0100 Subject: mm: mask bits from pmd in pmd_lockptr/pmd_huge_pte The pmd pointer passed to pmd_lockptr/pmd_huge_pte can point to any entry in a pmd table. With USE_SPLIT_PMD_PTLOCKS==1 the code uses virt_to_page to get a struct page for the pmd table. The virt_to_page function automatically masks the lower PAGE_SHIFT bits from the address. But if the size of a pmd table is larger than PAGE_SIZE the additional bits are not removed from the pmd address and the wrong page struct is used. Fix this by explicitely masking the offset in the pmd table from the pmd pointer. Signed-off-by: Martin Schwidefsky diff --git a/include/linux/mm.h b/include/linux/mm.h index f28f46e..d354a72 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1477,9 +1477,15 @@ static inline void pgtable_page_dtor(struct page *page) #if USE_SPLIT_PMD_PTLOCKS +static struct page *pmd_to_page(pmd_t *pmd) +{ + unsigned long mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); + return virt_to_page((void *)((unsigned long) pmd & mask)); +} + static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) { - return ptlock_ptr(virt_to_page(pmd)); + return ptlock_ptr(pmd_to_page(pmd)); } static inline bool pgtable_pmd_page_ctor(struct page *page) @@ -1498,7 +1504,7 @@ static inline void pgtable_pmd_page_dtor(struct page *page) ptlock_free(page); } -#define pmd_huge_pte(mm, pmd) (virt_to_page(pmd)->pmd_huge_pte) +#define pmd_huge_pte(mm, pmd) (pmd_to_page(pmd)->pmd_huge_pte) #else -- cgit v0.10.2 From ec66ad66a0de87866be347b5ecc83bd46427f53b Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 12 Feb 2014 14:16:18 +0100 Subject: s390/mm: enable split page table lock for PMD level Add the pgtable_pmd_page_ctor/pgtable_pmd_page_dtor calls to the pmd allocation and free functions and enable ARCH_ENABLE_SPLIT_PMD_PTLOCK for 64 bit. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 65a0775..86db5a8 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -415,6 +415,10 @@ config ARCH_ENABLE_MEMORY_HOTPLUG config ARCH_ENABLE_MEMORY_HOTREMOVE def_bool y +config ARCH_ENABLE_SPLIT_PMD_PTLOCK + def_bool y + depends on 64BIT + config FORCE_MAX_ZONEORDER int default "9" diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 14d43c7..884017c 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -92,11 +92,22 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) { unsigned long *table = crst_table_alloc(mm); - if (table) - crst_table_init(table, _SEGMENT_ENTRY_EMPTY); + + if (!table) + return NULL; + crst_table_init(table, _SEGMENT_ENTRY_EMPTY); + if (!pgtable_pmd_page_ctor(virt_to_page(table))) { + crst_table_free(mm, table); + return NULL; + } return (pmd_t *) table; } -#define pmd_free(mm, pmd) crst_table_free(mm, (unsigned long *) pmd) + +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) +{ + pgtable_pmd_page_dtor(virt_to_page(pmd)); + crst_table_free(mm, (unsigned long *) pmd); +} static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) { diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 9c26b7a..f8b58a7 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1397,7 +1397,7 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, { struct list_head *lh = (struct list_head *) pgtable; - assert_spin_locked(&mm->page_table_lock); + assert_spin_locked(pmd_lockptr(mm, pmdp)); /* FIFO */ if (!pmd_huge_pte(mm, pmdp)) @@ -1413,7 +1413,7 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) pgtable_t pgtable; pte_t *ptep; - assert_spin_locked(&mm->page_table_lock); + assert_spin_locked(pmd_lockptr(mm, pmdp)); /* FIFO */ pgtable = pmd_huge_pte(mm, pmdp); -- cgit v0.10.2 From fe7c30a420761654777d3cc15412fc7626407e93 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 13 Feb 2014 13:02:32 +0100 Subject: s390/airq: add support for irq ranges Add airq_iv_alloc and airq_iv_free to allocate and free consecutive ranges of irqs from the interrupt vector. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h index 4bbb595..bd93ff6 100644 --- a/arch/s390/include/asm/airq.h +++ b/arch/s390/include/asm/airq.h @@ -44,11 +44,21 @@ struct airq_iv { struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags); void airq_iv_release(struct airq_iv *iv); -unsigned long airq_iv_alloc_bit(struct airq_iv *iv); -void airq_iv_free_bit(struct airq_iv *iv, unsigned long bit); +unsigned long airq_iv_alloc(struct airq_iv *iv, unsigned long num); +void airq_iv_free(struct airq_iv *iv, unsigned long bit, unsigned long num); unsigned long airq_iv_scan(struct airq_iv *iv, unsigned long start, unsigned long end); +static inline unsigned long airq_iv_alloc_bit(struct airq_iv *iv) +{ + return airq_iv_alloc(iv, 1); +} + +static inline void airq_iv_free_bit(struct airq_iv *iv, unsigned long bit) +{ + airq_iv_free(iv, bit, 1); +} + static inline unsigned long airq_iv_end(struct airq_iv *iv) { return iv->end; diff --git a/drivers/s390/cio/airq.c b/drivers/s390/cio/airq.c index f055df0..445564c 100644 --- a/drivers/s390/cio/airq.c +++ b/drivers/s390/cio/airq.c @@ -186,55 +186,71 @@ void airq_iv_release(struct airq_iv *iv) EXPORT_SYMBOL(airq_iv_release); /** - * airq_iv_alloc_bit - allocate an irq bit from an interrupt vector + * airq_iv_alloc - allocate irq bits from an interrupt vector * @iv: pointer to an interrupt vector structure + * @num: number of consecutive irq bits to allocate * - * Returns the bit number of the allocated irq, or -1UL if no bit - * is available or the AIRQ_IV_ALLOC flag has not been specified + * Returns the bit number of the first irq in the allocated block of irqs, + * or -1UL if no bit is available or the AIRQ_IV_ALLOC flag has not been + * specified */ -unsigned long airq_iv_alloc_bit(struct airq_iv *iv) +unsigned long airq_iv_alloc(struct airq_iv *iv, unsigned long num) { - unsigned long bit; + unsigned long bit, i; - if (!iv->avail) + if (!iv->avail || num == 0) return -1UL; spin_lock(&iv->lock); bit = find_first_bit_inv(iv->avail, iv->bits); - if (bit < iv->bits) { - clear_bit_inv(bit, iv->avail); - if (bit >= iv->end) - iv->end = bit + 1; - } else + while (bit + num <= iv->bits) { + for (i = 1; i < num; i++) + if (!test_bit_inv(bit + i, iv->avail)) + break; + if (i >= num) { + /* Found a suitable block of irqs */ + for (i = 0; i < num; i++) + clear_bit_inv(bit + i, iv->avail); + if (bit + num >= iv->end) + iv->end = bit + num + 1; + break; + } + bit = find_next_bit_inv(iv->avail, iv->bits, bit + i + 1); + } + if (bit + num > iv->bits) bit = -1UL; spin_unlock(&iv->lock); return bit; } -EXPORT_SYMBOL(airq_iv_alloc_bit); +EXPORT_SYMBOL(airq_iv_alloc); /** - * airq_iv_free_bit - free an irq bit of an interrupt vector + * airq_iv_free - free irq bits of an interrupt vector * @iv: pointer to interrupt vector structure - * @bit: number of the irq bit to free + * @bit: number of the first irq bit to free + * @num: number of consecutive irq bits to free */ -void airq_iv_free_bit(struct airq_iv *iv, unsigned long bit) +void airq_iv_free(struct airq_iv *iv, unsigned long bit, unsigned long num) { - if (!iv->avail) + unsigned long i; + + if (!iv->avail || num == 0) return; spin_lock(&iv->lock); - /* Clear (possibly left over) interrupt bit */ - clear_bit_inv(bit, iv->vector); - /* Make the bit position available again */ - set_bit_inv(bit, iv->avail); - if (bit == iv->end - 1) { + for (i = 0; i < num; i++) { + /* Clear (possibly left over) interrupt bit */ + clear_bit_inv(bit + i, iv->vector); + /* Make the bit positions available again */ + set_bit_inv(bit + i, iv->avail); + } + if (bit + num >= iv->end) { /* Find new end of bit-field */ - while (--iv->end > 0) - if (!test_bit_inv(iv->end - 1, iv->avail)) - break; + while (iv->end > 0 && !test_bit_inv(iv->end - 1, iv->avail)) + iv->end--; } spin_unlock(&iv->lock); } -EXPORT_SYMBOL(airq_iv_free_bit); +EXPORT_SYMBOL(airq_iv_free); /** * airq_iv_scan - scan interrupt vector for non-zero bits -- cgit v0.10.2 From f7e1e65d29636d050cdde0770b9544572959a67d Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 17 Feb 2014 11:16:10 +0100 Subject: s390: improve debug feature usage The maximum usable buffer size of the s390 debug feature (when using the sprintf_view) is 11 * sizeof(long) (1 pointer for the format string + 10 arguments). When a larger buffer size is specified the additional memory is unused and wasted per debug entry. So reducing the buffer size to its maximum (or to the actual buffer size used) will make more precious debug feature space usable. For pci_msg, chsc_msg, and cio_crw we use the additional usable dbf space to reduce the number of allocated pages. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index 75c69b4..c5c6684 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -139,7 +139,7 @@ void zpci_debug_exit_device(struct zpci_dev *zdev) int __init zpci_debug_init(void) { /* event trace buffer */ - pci_debug_msg_id = debug_register("pci_msg", 16, 1, 16 * sizeof(long)); + pci_debug_msg_id = debug_register("pci_msg", 8, 1, 8 * sizeof(long)); if (!pci_debug_msg_id) return -EINVAL; debug_register_view(pci_debug_msg_id, &debug_sprintf_view); diff --git a/drivers/s390/cio/chsc_sch.c b/drivers/s390/cio/chsc_sch.c index 7b29d0b..1d3661a 100644 --- a/drivers/s390/cio/chsc_sch.c +++ b/drivers/s390/cio/chsc_sch.c @@ -173,8 +173,7 @@ static struct css_driver chsc_subchannel_driver = { static int __init chsc_init_dbfs(void) { - chsc_debug_msg_id = debug_register("chsc_msg", 16, 1, - 16 * sizeof(long)); + chsc_debug_msg_id = debug_register("chsc_msg", 8, 1, 4 * sizeof(long)); if (!chsc_debug_msg_id) goto out; debug_register_view(chsc_debug_msg_id, &debug_sprintf_view); diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 8ee88c4..97c48b3 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -54,7 +54,7 @@ debug_info_t *cio_debug_crw_id; */ static int __init cio_debug_init(void) { - cio_debug_msg_id = debug_register("cio_msg", 16, 1, 16 * sizeof(long)); + cio_debug_msg_id = debug_register("cio_msg", 16, 1, 11 * sizeof(long)); if (!cio_debug_msg_id) goto out_unregister; debug_register_view(cio_debug_msg_id, &debug_sprintf_view); @@ -64,7 +64,7 @@ static int __init cio_debug_init(void) goto out_unregister; debug_register_view(cio_debug_trace_id, &debug_hex_ascii_view); debug_set_level(cio_debug_trace_id, 2); - cio_debug_crw_id = debug_register("cio_crw", 16, 1, 16 * sizeof(long)); + cio_debug_crw_id = debug_register("cio_crw", 8, 1, 8 * sizeof(long)); if (!cio_debug_crw_id) goto out_unregister; debug_register_view(cio_debug_crw_id, &debug_sprintf_view); diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index c3a83df..bd5d4ab 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -33,8 +33,8 @@ struct qeth_dbf_info qeth_dbf[QETH_DBF_INFOS] = { /* N P A M L V H */ [QETH_DBF_SETUP] = {"qeth_setup", 8, 1, 8, 5, &debug_hex_ascii_view, NULL}, - [QETH_DBF_MSG] = {"qeth_msg", - 8, 1, 128, 3, &debug_sprintf_view, NULL}, + [QETH_DBF_MSG] = {"qeth_msg", 8, 1, 11 * sizeof(long), 3, + &debug_sprintf_view, NULL}, [QETH_DBF_CTRL] = {"qeth_control", 8, 1, QETH_DBF_CTRL_LEN, 5, &debug_hex_ascii_view, NULL}, }; -- cgit v0.10.2 From 322986ca64d170abb343763640763f3d7933bb69 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 17 Feb 2014 15:16:30 +0100 Subject: s390: add some drivers/subsystems to the MAINTAINERS file Add entries for s390/cio, s390/dasd and s390/pci. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky diff --git a/MAINTAINERS b/MAINTAINERS index 0dba50b..a177f74 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7372,10 +7372,26 @@ W: http://www.ibm.com/developerworks/linux/linux390/ S: Supported F: arch/s390/ F: drivers/s390/ -F: block/partitions/ibm.c F: Documentation/s390/ F: Documentation/DocBook/s390* +S390 COMMON I/O LAYER +M: Sebastian Ott +M: Peter Oberparleiter +L: linux-s390@vger.kernel.org +W: http://www.ibm.com/developerworks/linux/linux390/ +S: Supported +F: drivers/s390/cio/ + +S390 DASD DRIVER +M: Stefan Weinhuber +M: Stefan Haberland +L: linux-s390@vger.kernel.org +W: http://www.ibm.com/developerworks/linux/linux390/ +S: Supported +F: drivers/s390/block/dasd* +F: block/partitions/ibm.c + S390 NETWORK DRIVERS M: Ursula Braun M: Frank Blaschka @@ -7385,6 +7401,15 @@ W: http://www.ibm.com/developerworks/linux/linux390/ S: Supported F: drivers/s390/net/ +S390 PCI SUBSYSTEM +M: Sebastian Ott +M: Gerald Schaefer +L: linux-s390@vger.kernel.org +W: http://www.ibm.com/developerworks/linux/linux390/ +S: Supported +F: arch/s390/pci/ +F: drivers/pci/hotplug/s390_pci_hpc.c + S390 ZCRYPT DRIVER M: Ingo Tuchscherer M: linux390@de.ibm.com -- cgit v0.10.2 From c60666bd2224a32606364fdbc620e4cb9446a1d1 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Fri, 21 Feb 2014 16:23:35 +0800 Subject: ALSA: hda/realtek - Add more entry for enable HP mute led More HP machine need mute led support. Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6eb903c..0b4ea6c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4319,6 +4319,53 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x1973, "HP Pavilion", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x1983, "HP Pavilion", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x218b, "HP", ALC269_FIXUP_LIMIT_INT_MIC_BOOST_MUTE_LED), + /* ALC282 */ + SND_PCI_QUIRK(0x103c, 0x220f, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2213, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2266, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2267, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2268, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2269, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x226a, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x226b, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x227a, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x227b, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x229e, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x22a0, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x22b2, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x22b7, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x22bf, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x22c0, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x22c1, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x22c2, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x22cd, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x22ce, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x22cf, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x22d0, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + /* ALC290 */ + SND_PCI_QUIRK(0x103c, 0x2260, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2261, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2262, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2263, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2264, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2265, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x227d, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x227e, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x227f, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2280, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2281, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2282, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x2289, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x228a, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x228b, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x228c, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x228e, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x22c5, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x22c6, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x22c7, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x22c8, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x22c3, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x22c4, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK_VENDOR(0x103c, "HP", ALC269_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), -- cgit v0.10.2 From b3634575930857724d5c3987a2739b0637999b4e Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 18 Feb 2014 17:02:54 +0100 Subject: ARM: 7980/1: kernel: improve error message when LPAE config doesn't match CPU Currently, when the kernel is configured with LPAE support, but the CPU doesn't support it, the error message is fairly cryptic: Error: unrecognized/unsupported processor variant (0x561f5811). This messages is normally shown when there is an issue when comparing the processor ID (CP15 0, c0, c0) with the values/masks described in proc-v7.S. However, the same message is displayed when LPAE support is enabled in the kernel configuration, but not available in the CPU, after looking at ID_MMFR0 (CP15 0, c0, c1, 4). Having the same error message is highly misleading. This commit improves this by showing a different error message when this situation occurs: Error: Kernel with LPAE support, but CPU does not support LPAE. Signed-off-by: Thomas Petazzoni Signed-off-by: Russell King diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 47cd974..c96ecac 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -177,6 +177,18 @@ __lookup_processor_type_data: .long __proc_info_end .size __lookup_processor_type_data, . - __lookup_processor_type_data +__error_lpae: +#ifdef CONFIG_DEBUG_LL + adr r0, str_lpae + bl printascii + b __error +str_lpae: .asciz "\nError: Kernel with LPAE support, but CPU does not support LPAE.\n" +#else + b __error +#endif + .align +ENDPROC(__error_lpae) + __error_p: #ifdef CONFIG_DEBUG_LL adr r0, str_p1 diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 914616e..f5f381d 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -102,7 +102,7 @@ ENTRY(stext) and r3, r3, #0xf @ extract VMSA support cmp r3, #5 @ long-descriptor translation table format? THUMB( it lo ) @ force fixup-able long branch encoding - blo __error_p @ only classic page table format + blo __error_lpae @ only classic page table format #endif #ifndef CONFIG_XIP_KERNEL -- cgit v0.10.2 From fd694131bb033e7b1fd748498cd9a70e9c16420b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Feb 2014 15:32:36 -0800 Subject: blk-mq: remove blk_mq_alloc_rq There's only one caller, which is a straight wrapper and fits the naming scheme of the related functions a lot better. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe diff --git a/block/blk-mq.c b/block/blk-mq.c index 1fa9dd1..1e585e3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -73,8 +73,8 @@ static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx, set_bit(ctx->index_hw, hctx->ctx_map); } -static struct request *blk_mq_alloc_rq(struct blk_mq_hw_ctx *hctx, gfp_t gfp, - bool reserved) +static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx, + gfp_t gfp, bool reserved) { struct request *rq; unsigned int tag; @@ -193,12 +193,6 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, ctx->rq_dispatched[rw_is_sync(rw_flags)]++; } -static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx, - gfp_t gfp, bool reserved) -{ - return blk_mq_alloc_rq(hctx, gfp, reserved); -} - static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, int rw, gfp_t gfp, bool reserved) -- cgit v0.10.2 From feb71dae1f9e0aeb056f7f639a21e620d327fc66 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Feb 2014 15:32:37 -0800 Subject: blk-mq: merge blk_mq_insert_request and blk_mq_run_request It's almost identical to blk_mq_insert_request, so fold the two into one slightly more generic function by making the flush special case a bit smarted. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe diff --git a/block/blk-exec.c b/block/blk-exec.c index c68613b..dbf4502 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -65,7 +65,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, * be resued after dying flag is set */ if (q->mq_ops) { - blk_mq_insert_request(q, rq, at_head, true); + blk_mq_insert_request(rq, at_head, true, false); return; } diff --git a/block/blk-flush.c b/block/blk-flush.c index 66e2b69..f598f79 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -137,7 +137,7 @@ static void mq_flush_run(struct work_struct *work) rq = container_of(work, struct request, mq_flush_work); memset(&rq->csd, 0, sizeof(rq->csd)); - blk_mq_run_request(rq, true, false); + blk_mq_insert_request(rq, false, true, false); } static bool blk_flush_queue_rq(struct request *rq) @@ -411,7 +411,7 @@ void blk_insert_flush(struct request *rq) if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { if (q->mq_ops) { - blk_mq_run_request(rq, false, true); + blk_mq_insert_request(rq, false, false, true); } else list_add_tail(&rq->queuelist, &q->queue_head); return; diff --git a/block/blk-mq.c b/block/blk-mq.c index 1e585e3..2af8405 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -724,61 +724,28 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, blk_mq_add_timer(rq); } -void blk_mq_insert_request(struct request_queue *q, struct request *rq, - bool at_head, bool run_queue) +void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, + bool async) { + struct request_queue *q = rq->q; struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx, *current_ctx; + struct blk_mq_ctx *ctx = rq->mq_ctx, *current_ctx; + + current_ctx = blk_mq_get_ctx(q); + if (!cpu_online(ctx->cpu)) + rq->mq_ctx = ctx = current_ctx; - ctx = rq->mq_ctx; hctx = q->mq_ops->map_queue(q, ctx->cpu); - if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) { + if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA) && + !(rq->cmd_flags & (REQ_FLUSH_SEQ))) { blk_insert_flush(rq); } else { - current_ctx = blk_mq_get_ctx(q); - - if (!cpu_online(ctx->cpu)) { - ctx = current_ctx; - hctx = q->mq_ops->map_queue(q, ctx->cpu); - rq->mq_ctx = ctx; - } spin_lock(&ctx->lock); __blk_mq_insert_request(hctx, rq, at_head); spin_unlock(&ctx->lock); - - blk_mq_put_ctx(current_ctx); } - if (run_queue) - __blk_mq_run_hw_queue(hctx); -} -EXPORT_SYMBOL(blk_mq_insert_request); - -/* - * This is a special version of blk_mq_insert_request to bypass FLUSH request - * check. Should only be used internally. - */ -void blk_mq_run_request(struct request *rq, bool run_queue, bool async) -{ - struct request_queue *q = rq->q; - struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx, *current_ctx; - - current_ctx = blk_mq_get_ctx(q); - - ctx = rq->mq_ctx; - if (!cpu_online(ctx->cpu)) { - ctx = current_ctx; - rq->mq_ctx = ctx; - } - hctx = q->mq_ops->map_queue(q, ctx->cpu); - - /* ctx->cpu might be offline */ - spin_lock(&ctx->lock); - __blk_mq_insert_request(hctx, rq, false); - spin_unlock(&ctx->lock); - blk_mq_put_ctx(current_ctx); if (run_queue) diff --git a/block/blk-mq.h b/block/blk-mq.h index ed0035c..72beba1 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -23,7 +23,6 @@ struct blk_mq_ctx { }; void __blk_mq_complete_request(struct request *rq); -void blk_mq_run_request(struct request *rq, bool run_queue, bool async); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_init_flush(struct request_queue *q); void blk_mq_drain_queue(struct request_queue *q); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 18ba8a6..ff28fe3 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -121,8 +121,7 @@ void blk_mq_init_commands(struct request_queue *, void (*init)(void *data, struc void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); -void blk_mq_insert_request(struct request_queue *, struct request *, - bool, bool); +void blk_mq_insert_request(struct request *, bool, bool, bool); void blk_mq_run_queues(struct request_queue *q, bool async); void blk_mq_free_request(struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); -- cgit v0.10.2 From d6a25b31315327eef7785b895c354cc45c3f3742 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Feb 2014 15:32:38 -0800 Subject: blk-mq: support partial I/O completions Add a new blk_mq_end_io_partial function to partially complete requests as needed by the SCSI layer. We do this by reusing blk_update_request to advance the bio instead of having a simplified version of it in the blk-mq code. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe diff --git a/block/blk-mq.c b/block/blk-mq.c index 2af8405..1b8b50d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -283,38 +283,10 @@ void blk_mq_free_request(struct request *rq) __blk_mq_free_request(hctx, ctx, rq); } -static void blk_mq_bio_endio(struct request *rq, struct bio *bio, int error) +bool blk_mq_end_io_partial(struct request *rq, int error, unsigned int nr_bytes) { - if (error) - clear_bit(BIO_UPTODATE, &bio->bi_flags); - else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) - error = -EIO; - - if (unlikely(rq->cmd_flags & REQ_QUIET)) - set_bit(BIO_QUIET, &bio->bi_flags); - - /* don't actually finish bio if it's part of flush sequence */ - if (!(rq->cmd_flags & REQ_FLUSH_SEQ)) - bio_endio(bio, error); -} - -void blk_mq_end_io(struct request *rq, int error) -{ - struct bio *bio = rq->bio; - unsigned int bytes = 0; - - trace_block_rq_complete(rq->q, rq); - - while (bio) { - struct bio *next = bio->bi_next; - - bio->bi_next = NULL; - bytes += bio->bi_iter.bi_size; - blk_mq_bio_endio(rq, bio, error); - bio = next; - } - - blk_account_io_completion(rq, bytes); + if (blk_update_request(rq, error, blk_rq_bytes(rq))) + return true; blk_account_io_done(rq); @@ -322,8 +294,9 @@ void blk_mq_end_io(struct request *rq, int error) rq->end_io(rq, error); else blk_mq_free_request(rq); + return false; } -EXPORT_SYMBOL(blk_mq_end_io); +EXPORT_SYMBOL(blk_mq_end_io_partial); static void __blk_mq_complete_request_remote(void *data) { diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ff28fe3..2ff2e8d 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -133,7 +133,13 @@ struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_ind struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_reg *, unsigned int); void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int); -void blk_mq_end_io(struct request *rq, int error); +bool blk_mq_end_io_partial(struct request *rq, int error, + unsigned int nr_bytes); +static inline void blk_mq_end_io(struct request *rq, int error) +{ + bool done = !blk_mq_end_io_partial(rq, error, blk_rq_bytes(rq)); + BUG_ON(!done); +} void blk_mq_complete_request(struct request *rq); -- cgit v0.10.2 From e9d6dca51823b94e1ca28cb5e9180701d4375d61 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 31 Jan 2014 05:38:21 +0400 Subject: xtensa: don't pass high memory to bootmem allocator This fixes panic when booting on machine with more than 128M memory passed from the bootloader. Signed-off-by: Max Filippov diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 479d753..aff108d 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -90,7 +90,7 @@ int __init mem_reserve(unsigned long start, unsigned long end, int must_exist) /* - * Initialize the bootmem system and give it all the memory we have available. + * Initialize the bootmem system and give it all low memory we have available. */ void __init bootmem_init(void) @@ -142,9 +142,14 @@ void __init bootmem_init(void) /* Add all remaining memory pieces into the bootmem map */ - for (i=0; i> PAGE_SHIFT < max_low_pfn) { + unsigned long end = min(max_low_pfn << PAGE_SHIFT, + sysmem.bank[i].end); + free_bootmem(sysmem.bank[i].start, + end - sysmem.bank[i].start); + } + } } -- cgit v0.10.2 From 8e9356c6146d0bb81a6ffb02eae522e57ff29662 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Fri, 7 Feb 2014 11:09:52 +0400 Subject: xtensa: fsf: drop nonexistent GPIO32 support The toolchain for xtensa FSF core never supported GPIO32, drop it on the linux side too. Reported-by: Fengguang Wu Signed-off-by: Max Filippov Acked-by: Baruch Siach diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index ba56e11..1cfb3d5 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -80,7 +80,6 @@ choice config XTENSA_VARIANT_FSF bool "fsf - default (not generic) configuration" select MMU - select HAVE_XTENSA_GPIO32 config XTENSA_VARIANT_DC232B bool "dc232b - Diamond 232L Standard Core Rev.B (LE)" diff --git a/arch/xtensa/variants/fsf/include/variant/tie.h b/arch/xtensa/variants/fsf/include/variant/tie.h index bf40201..244cdea 100644 --- a/arch/xtensa/variants/fsf/include/variant/tie.h +++ b/arch/xtensa/variants/fsf/include/variant/tie.h @@ -18,13 +18,6 @@ #define XCHAL_CP_MASK 0x00 /* bitmask of all CPs by ID */ #define XCHAL_CP_PORT_MASK 0x00 /* bitmask of only port CPs */ -/* Basic parameters of each coprocessor: */ -#define XCHAL_CP7_NAME "XTIOP" -#define XCHAL_CP7_IDENT XTIOP -#define XCHAL_CP7_SA_SIZE 0 /* size of state save area */ -#define XCHAL_CP7_SA_ALIGN 1 /* min alignment of save area */ -#define XCHAL_CP_ID_XTIOP 7 /* coprocessor ID (0..7) */ - /* Filler info for unassigned coprocessors, to simplify arrays etc: */ #define XCHAL_NCP_SA_SIZE 0 #define XCHAL_NCP_SA_ALIGN 1 @@ -42,6 +35,8 @@ #define XCHAL_CP5_SA_ALIGN 1 #define XCHAL_CP6_SA_SIZE 0 #define XCHAL_CP6_SA_ALIGN 1 +#define XCHAL_CP7_SA_SIZE 0 +#define XCHAL_CP7_SA_ALIGN 1 /* Save area for non-coprocessor optional and custom (TIE) state: */ #define XCHAL_NCP_SA_SIZE 0 -- cgit v0.10.2 From 4e3b4df839b550aec463945b10141c65c69f5135 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Sun, 9 Feb 2014 15:45:06 +0100 Subject: xtensa: no need to select USE_GENERIC_SMP_HELPERS Commit f615136c06a7 ("xtensa: add SMP support") added "select USE_GENERIC_SMP_HELPERS". But the Kconfig symbol USE_GENERIC_SMP_HELPERS was already removed in v3.13, so that select is a nop. Drop it. Signed-off-by: Paul Bolle Signed-off-by: Max Filippov diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 1cfb3d5..44f1152 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -134,7 +134,6 @@ config HAVE_SMP config SMP bool "Enable Symmetric multi-processing support" depends on HAVE_SMP - select USE_GENERIC_SMP_HELPERS select GENERIC_SMP_IDLE_THREAD help Enabled SMP Software; allows more than one CPU/CORE -- cgit v0.10.2 From bda8932d234aeaee870ac666e776a5ba03bb13a4 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 29 Jan 2014 06:20:46 +0400 Subject: xtensa: support common clock framework Signed-off-by: Max Filippov diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 44f1152..c87ae7c 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -20,6 +20,7 @@ config XTENSA select HAVE_FUNCTION_TRACER select HAVE_IRQ_TIME_ACCOUNTING select HAVE_PERF_EVENTS + select COMMON_CLK help Xtensa processors are 32-bit RISC machines designed by Tensilica primarily for embedded systems. These processors are both diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 7d12af1..84fe931 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -276,6 +277,7 @@ void __init early_init_devtree(void *params) static int __init xtensa_device_probe(void) { + of_clk_init(NULL); of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); return 0; } -- cgit v0.10.2 From cdc9af7ccfc26d35ff8a29dded2cc2c096c0fc1e Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 29 Jan 2014 07:42:46 +0400 Subject: xtensa: xtfpga: use common clock framework With this change the board needs to set up single clock object, users of this clock will get correct frequency automatically. Signed-off-by: Max Filippov diff --git a/arch/xtensa/boot/dts/xtfpga.dtsi b/arch/xtensa/boot/dts/xtfpga.dtsi index 46b4f5e..d5ccbbb 100644 --- a/arch/xtensa/boot/dts/xtfpga.dtsi +++ b/arch/xtensa/boot/dts/xtfpga.dtsi @@ -35,6 +35,13 @@ interrupt-controller; }; + clocks { + osc: main-oscillator { + #clock-cells = <0>; + compatible = "fixed-clock"; + }; + }; + serial0: serial@fd050020 { device_type = "serial"; compatible = "ns16550a"; @@ -42,9 +49,7 @@ reg = <0xfd050020 0x20>; reg-shift = <2>; interrupts = <0 1>; /* external irq 0 */ - /* Filled in by platform_setup from FPGA register - * clock-frequency = <100000000>; - */ + clocks = <&osc>; }; enet0: ethoc@fd030000 { diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c index 8002278..0372913 100644 --- a/arch/xtensa/platforms/xtfpga/setup.c +++ b/arch/xtensa/platforms/xtfpga/setup.c @@ -135,11 +135,11 @@ static void __init update_local_mac(struct device_node *node) static int __init machine_setup(void) { - struct device_node *serial; + struct device_node *clock; struct device_node *eth = NULL; - for_each_compatible_node(serial, NULL, "ns16550a") - update_clock_frequency(serial); + for_each_node_by_name(clock, "main-oscillator") + update_clock_frequency(clock); if ((eth = of_find_compatible_node(eth, NULL, "opencores,ethoc"))) update_local_mac(eth); -- cgit v0.10.2 From 2bc2fde63858322a942864bf6cb2d54f5aa33186 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 29 Jan 2014 07:53:44 +0400 Subject: xtensa: xtfpga: set ethoc clock frequency Connect xtfpga board ethernet MAC to the clock in the DTS. Set up MAC base frequency in the platform data in case of build w/o CONFIG_OF. Signed-off-by: Max Filippov diff --git a/arch/xtensa/boot/dts/xtfpga.dtsi b/arch/xtensa/boot/dts/xtfpga.dtsi index d5ccbbb..e7370b1 100644 --- a/arch/xtensa/boot/dts/xtfpga.dtsi +++ b/arch/xtensa/boot/dts/xtfpga.dtsi @@ -57,5 +57,6 @@ reg = <0xfd030000 0x4000 0xfd800000 0x4000>; interrupts = <1 1>; /* external irq 1 */ local-mac-address = [00 50 c2 13 6f 00]; + clocks = <&osc>; }; }; diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c index 0372913..57fd08b 100644 --- a/arch/xtensa/platforms/xtfpga/setup.c +++ b/arch/xtensa/platforms/xtfpga/setup.c @@ -290,6 +290,7 @@ static int __init xtavnet_init(void) * knows whether they set it correctly on the DIP switches. */ pr_info("XTFPGA: Ethernet MAC %pM\n", ethoc_pdata.hwaddr); + ethoc_pdata.eth_clkfreq = *(long *)XTFPGA_CLKFRQ_VADDR; return 0; } -- cgit v0.10.2 From f63b6d7555cd4064554b39da4d44c4cbbc9d6a4a Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 18 Feb 2014 16:00:05 +0400 Subject: xtensa: wire up sched_setattr and sched_getattr syscalls Signed-off-by: Max Filippov diff --git a/arch/xtensa/include/uapi/asm/unistd.h b/arch/xtensa/include/uapi/asm/unistd.h index 51940fe..b939552 100644 --- a/arch/xtensa/include/uapi/asm/unistd.h +++ b/arch/xtensa/include/uapi/asm/unistd.h @@ -734,7 +734,12 @@ __SYSCALL(332, sys_finit_module, 3) #define __NR_accept4 333 __SYSCALL(333, sys_accept4, 4) -#define __NR_syscall_count 334 +#define __NR_sched_setattr 334 +__SYSCALL(334, sys_sched_setattr, 2) +#define __NR_sched_getattr 335 +__SYSCALL(335, sys_sched_getattr, 3) + +#define __NR_syscall_count 336 /* * sysxtensa syscall handler -- cgit v0.10.2 From eb7a59b2c888c2518ba2c9d0020343ca71aa9dee Mon Sep 17 00:00:00 2001 From: Michael wang Date: Thu, 20 Feb 2014 11:14:53 +0800 Subject: sched/fair: Reset se-depth when task switched to FAIR Sasha reported: [ 522.645288] BUG: unable to handle kernel NULL pointer dereference at ... [ 522.646271] IP: [] check_preempt_wakeup+0x11f/0x210 ... [ 522.650021] Call Trace: [ 522.650021] [ 522.650021] [] check_preempt_curr+0x3d/0xb0 [ 522.650021] [] ttwu_do_wakeup+0x18/0x130 ... which was caused by the se-depth changed during the time when task is not FAIR, and we will use the wrong depth value after it switched back to FAIR. This patch reset the depth at the time when task switched to FAIR, make sure that we always have the correct value when task is FAIR. Cc: Ingo Molnar Reported-by: Sasha Levin Tested-by: Sasha Levin Signed-off-by: Michael Wang Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/5305732D.70001@linux.vnet.ibm.com Signed-off-by: Thomas Gleixner diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 235cfa7..280da89 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7317,7 +7317,15 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p) */ static void switched_to_fair(struct rq *rq, struct task_struct *p) { - if (!p->se.on_rq) + struct sched_entity *se = &p->se; +#ifdef CONFIG_FAIR_GROUP_SCHED + /* + * Since the real-depth could have been changed (only FAIR + * class maintain depth value), reset depth properly. + */ + se->depth = se->parent ? se->parent->depth + 1 : 0; +#endif + if (!se->on_rq) return; /* -- cgit v0.10.2 From 6e83125c6b151afa139c8852c099d6d92954fe3b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 11 Feb 2014 16:11:48 +0100 Subject: sched/fair: Remove idle_balance() declaration in sched.h Remove idle_balance() from the public life; also reduce some #ifdef clutter by folding the pick_next_task_fair() idle path into idle_balance(). Cc: mingo@kernel.org Reported-by: Daniel Lezcano Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140211151148.GP27965@twins.programming.kicks-ass.net Signed-off-by: Thomas Gleixner diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 280da89..40c758b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2374,13 +2374,13 @@ static inline void __update_group_entity_contrib(struct sched_entity *se) se->avg.load_avg_contrib >>= NICE_0_SHIFT; } } -#else +#else /* CONFIG_FAIR_GROUP_SCHED */ static inline void __update_cfs_rq_tg_load_contrib(struct cfs_rq *cfs_rq, int force_update) {} static inline void __update_tg_runnable_avg(struct sched_avg *sa, struct cfs_rq *cfs_rq) {} static inline void __update_group_entity_contrib(struct sched_entity *se) {} -#endif +#endif /* CONFIG_FAIR_GROUP_SCHED */ static inline void __update_task_entity_contrib(struct sched_entity *se) { @@ -2571,6 +2571,8 @@ void idle_exit_fair(struct rq *this_rq) update_rq_runnable_avg(this_rq, 0); } +static int idle_balance(struct rq *this_rq); + #else /* CONFIG_SMP */ static inline void update_entity_load_avg(struct sched_entity *se, @@ -2584,6 +2586,12 @@ static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq, int sleep) {} static inline void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update) {} + +static inline int idle_balance(struct rq *rq) +{ + return 0; +} + #endif /* CONFIG_SMP */ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) @@ -4677,7 +4685,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev) struct sched_entity *se; struct task_struct *p; -again: __maybe_unused +again: #ifdef CONFIG_FAIR_GROUP_SCHED if (!cfs_rq->nr_running) goto idle; @@ -4775,18 +4783,8 @@ simple: return p; idle: -#ifdef CONFIG_SMP - idle_enter_fair(rq); - /* - * We must set idle_stamp _before_ calling idle_balance(), such that we - * measure the duration of idle_balance() as idle time. - */ - rq->idle_stamp = rq_clock(rq); - if (idle_balance(rq)) { /* drops rq->lock */ - rq->idle_stamp = 0; + if (idle_balance(rq)) /* drops rq->lock */ goto again; - } -#endif return NULL; } @@ -6634,7 +6632,7 @@ out: * idle_balance is called by schedule() if this_cpu is about to become * idle. Attempts to pull tasks from other CPUs. */ -int idle_balance(struct rq *this_rq) +static int idle_balance(struct rq *this_rq) { struct sched_domain *sd; int pulled_task = 0; @@ -6642,8 +6640,15 @@ int idle_balance(struct rq *this_rq) u64 curr_cost = 0; int this_cpu = this_rq->cpu; + idle_enter_fair(this_rq); + /* + * We must set idle_stamp _before_ calling idle_balance(), such that we + * measure the duration of idle_balance() as idle time. + */ + this_rq->idle_stamp = rq_clock(this_rq); + if (this_rq->avg_idle < sysctl_sched_migration_cost) - return 0; + goto out; /* * Drop the rq->lock, but keep IRQ/preempt disabled. @@ -6692,8 +6697,10 @@ int idle_balance(struct rq *this_rq) * While browsing the domains, we released the rq lock. * A task could have be enqueued in the meantime */ - if (this_rq->nr_running && !pulled_task) - return 1; + if (this_rq->nr_running && !pulled_task) { + pulled_task = 1; + goto out; + } if (pulled_task || time_after(jiffies, this_rq->next_balance)) { /* @@ -6706,6 +6713,10 @@ int idle_balance(struct rq *this_rq) if (curr_cost > this_rq->max_idle_balance_cost) this_rq->max_idle_balance_cost = curr_cost; +out: + if (pulled_task) + this_rq->idle_stamp = 0; + return pulled_task; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 1bf34c2..92018f9 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1163,17 +1163,10 @@ extern const struct sched_class idle_sched_class; extern void update_group_power(struct sched_domain *sd, int cpu); extern void trigger_load_balance(struct rq *rq); -extern int idle_balance(struct rq *this_rq); extern void idle_enter_fair(struct rq *this_rq); extern void idle_exit_fair(struct rq *this_rq); -#else /* CONFIG_SMP */ - -static inline void idle_balance(int cpu, struct rq *rq) -{ -} - #endif extern void sysrq_sched_debug_show(void); -- cgit v0.10.2 From 3f1d2a318171bf61850d4e5a72031271e5aada76 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 12 Feb 2014 10:49:30 +0100 Subject: sched: Fix hotplug task migration Dan Carpenter reported: > kernel/sched/rt.c:1347 pick_next_task_rt() warn: variable dereferenced before check 'prev' (see line 1338) > kernel/sched/deadline.c:1011 pick_next_task_dl() warn: variable dereferenced before check 'prev' (see line 1005) Kirill also spotted that migrate_tasks() will have an instant NULL deref because pick_next_task() will immediately deref prev. Instead of fixing all the corner cases because migrate_tasks() can pass in a NULL prev task in the unlikely case of hot-un-plug, provide a fake task such that we can remove all the NULL checks from the far more common paths. A further problem; not previously spotted; is that because we pushed pre_schedule() and idle_balance() into pick_next_task() we now need to avoid those getting called and pulling more tasks on our dying CPU. We avoid pull_{dl,rt}_task() by setting fake_task.prio to MAX_PRIO+1. We also note that since we call pick_next_task() exactly the amount of times we have runnable tasks present, we should never land in idle_balance(). Fixes: 38033c37faab ("sched: Push down pre_schedule() and idle_balance()") Cc: Juri Lelli Cc: Ingo Molnar Cc: Steven Rostedt Reported-by: Kirill Tkhai Reported-by: Dan Carpenter Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140212094930.GB3545@laptop.programming.kicks-ass.net Signed-off-by: Thomas Gleixner diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fb9764f..49db434 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4681,6 +4681,22 @@ static void calc_load_migrate(struct rq *rq) atomic_long_add(delta, &calc_load_tasks); } +static void put_prev_task_fake(struct rq *rq, struct task_struct *prev) +{ +} + +static const struct sched_class fake_sched_class = { + .put_prev_task = put_prev_task_fake, +}; + +static struct task_struct fake_task = { + /* + * Avoid pull_{rt,dl}_task() + */ + .prio = MAX_PRIO + 1, + .sched_class = &fake_sched_class, +}; + /* * Migrate all tasks from the rq, sleeping tasks will be migrated by * try_to_wake_up()->select_task_rq(). @@ -4721,7 +4737,7 @@ static void migrate_tasks(unsigned int dead_cpu) if (rq->nr_running == 1) break; - next = pick_next_task(rq, NULL); + next = pick_next_task(rq, &fake_task); BUG_ON(!next); next->sched_class->put_prev_task(rq, next); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index ed31ef6..bfeb84e 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1008,8 +1008,7 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev) if (unlikely(!dl_rq->dl_nr_running)) return NULL; - if (prev) - prev->sched_class->put_prev_task(rq, prev); + put_prev_task(rq, prev); dl_se = pick_next_dl_entity(rq, dl_rq); BUG_ON(!dl_se); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 40c758b..e884e45 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4690,7 +4690,7 @@ again: if (!cfs_rq->nr_running) goto idle; - if (!prev || prev->sched_class != &fair_sched_class) + if (prev->sched_class != &fair_sched_class) goto simple; /* @@ -4766,8 +4766,7 @@ simple: if (!cfs_rq->nr_running) goto idle; - if (prev) - prev->sched_class->put_prev_task(rq, prev); + put_prev_task(rq, prev); do { se = pick_next_entity(cfs_rq, NULL); diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c index f7d03af..53ff9e7 100644 --- a/kernel/sched/idle_task.c +++ b/kernel/sched/idle_task.c @@ -26,8 +26,7 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl static struct task_struct * pick_next_task_idle(struct rq *rq, struct task_struct *prev) { - if (prev) - prev->sched_class->put_prev_task(rq, prev); + put_prev_task(rq, prev); schedstat_inc(rq, sched_goidle); #ifdef CONFIG_SMP diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 72f9ec7..65c2d68 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1344,8 +1344,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev) if (rt_rq_throttled(rt_rq)) return NULL; - if (prev) - prev->sched_class->put_prev_task(rq, prev); + put_prev_task(rq, prev); p = _pick_next_task_rt(rq); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 92018f9..d276147 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1147,6 +1147,11 @@ struct sched_class { #endif }; +static inline void put_prev_task(struct rq *rq, struct task_struct *prev) +{ + prev->sched_class->put_prev_task(rq, prev); +} + #define sched_class_highest (&stop_sched_class) #define for_each_class(class) \ for (class = sched_class_highest; class; class = class->next) diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index a4147c9..d6ce65d 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c @@ -31,8 +31,7 @@ pick_next_task_stop(struct rq *rq, struct task_struct *prev) if (!stop || !stop->on_rq) return NULL; - if (prev) - prev->sched_class->put_prev_task(rq, prev); + put_prev_task(rq, prev); stop->se.exec_start = rq_clock_task(rq); -- cgit v0.10.2 From dc87734106bb6e97c92d8bd81f261fb71976ec2c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 12 Feb 2014 15:47:29 +0100 Subject: sched: Remove some #ifdeffery Remove a few gratuitous #ifdefs in pick_next_task*(). Cc: Ingo Molnar Cc: Steven Rostedt Cc: Juri Lelli Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-nnzddp5c4fijyzzxxrwlxghf@git.kernel.org Signed-off-by: Thomas Gleixner diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index bfeb84e..3185b77 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -214,6 +214,16 @@ static inline int has_pushable_dl_tasks(struct rq *rq) static int push_dl_task(struct rq *rq); +static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev) +{ + return dl_task(prev); +} + +static inline void set_post_schedule(struct rq *rq) +{ + rq->post_schedule = has_pushable_dl_tasks(rq); +} + #else static inline @@ -236,6 +246,19 @@ void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) { } +static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev) +{ + return false; +} + +static inline int pull_dl_task(struct rq *rq) +{ + return 0; +} + +static inline void set_post_schedule(struct rq *rq) +{ +} #endif /* CONFIG_SMP */ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags); @@ -1000,10 +1023,8 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev) dl_rq = &rq->dl; -#ifdef CONFIG_SMP - if (dl_task(prev)) + if (need_pull_dl_task(rq, prev)) pull_dl_task(rq); -#endif if (unlikely(!dl_rq->dl_nr_running)) return NULL; @@ -1024,9 +1045,7 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev) start_hrtick_dl(rq, p); #endif -#ifdef CONFIG_SMP - rq->post_schedule = has_pushable_dl_tasks(rq); -#endif /* CONFIG_SMP */ + set_post_schedule(rq); return p; } diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c index 53ff9e7..1f37258 100644 --- a/kernel/sched/idle_task.c +++ b/kernel/sched/idle_task.c @@ -29,9 +29,7 @@ pick_next_task_idle(struct rq *rq, struct task_struct *prev) put_prev_task(rq, prev); schedstat_inc(rq, sched_goidle); -#ifdef CONFIG_SMP idle_enter_fair(rq); -#endif return rq->idle; } @@ -50,10 +48,8 @@ dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags) static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) { -#ifdef CONFIG_SMP idle_exit_fair(rq); rq_last_tick_reset(rq); -#endif } static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 65c2d68..3e488ca 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -231,6 +231,12 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) static int pull_rt_task(struct rq *this_rq); +static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev) +{ + /* Try to pull RT tasks here if we lower this rq's prio */ + return rq->rt.highest_prio.curr > prev->prio; +} + static inline int rt_overloaded(struct rq *rq) { return atomic_read(&rq->rd->rto_count); @@ -317,6 +323,15 @@ static inline int has_pushable_tasks(struct rq *rq) return !plist_head_empty(&rq->rt.pushable_tasks); } +static inline void set_post_schedule(struct rq *rq) +{ + /* + * We detect this state here so that we can avoid taking the RQ + * lock again later if there is no need to push + */ + rq->post_schedule = has_pushable_tasks(rq); +} + static void enqueue_pushable_task(struct rq *rq, struct task_struct *p) { plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); @@ -361,6 +376,19 @@ void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) { } +static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev) +{ + return false; +} + +static inline int pull_rt_task(struct rq *this_rq) +{ + return 0; +} + +static inline void set_post_schedule(struct rq *rq) +{ +} #endif /* CONFIG_SMP */ static inline int on_rt_rq(struct sched_rt_entity *rt_se) @@ -1332,11 +1360,8 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev) struct task_struct *p; struct rt_rq *rt_rq = &rq->rt; -#ifdef CONFIG_SMP - /* Try to pull RT tasks here if we lower this rq's prio */ - if (rq->rt.highest_prio.curr > prev->prio) + if (need_pull_rt_task(rq, prev)) pull_rt_task(rq); -#endif if (!rt_rq->rt_nr_running) return NULL; @@ -1352,13 +1377,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev) if (p) dequeue_pushable_task(rq, p); -#ifdef CONFIG_SMP - /* - * We detect this state here so that we can avoid taking the RQ - * lock again later if there is no need to push - */ - rq->post_schedule = has_pushable_tasks(rq); -#endif + set_post_schedule(rq); return p; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index d276147..caf4abd 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1172,6 +1172,11 @@ extern void trigger_load_balance(struct rq *rq); extern void idle_enter_fair(struct rq *this_rq); extern void idle_exit_fair(struct rq *this_rq); +#else + +static inline void idle_enter_fair(struct rq *rq) { } +static inline void idle_exit_fair(struct rq *rq) { } + #endif extern void sysrq_sched_debug_show(void); -- cgit v0.10.2 From cd578abb24aa67ce468c427d3356c08ea32cf768 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 11 Feb 2014 16:01:16 +0100 Subject: perf/x86: Warn to early_printk() in case irq_work is too slow On Mon, Feb 10, 2014 at 08:45:16AM -0800, Dave Hansen wrote: > The reason I coded this up was that NMIs were firing off so fast that > nothing else was getting a chance to run. With this patch, at least the > printk() would come out and I'd have some idea what was going on. It will start spewing to early_printk() (which is a lot nicer to use from NMI context too) when it fails to queue the IRQ-work because its already enqueued. It does have the false-positive for when two CPUs trigger the warn concurrently, but that should be rare and some extra clutter on the early printk shouldn't be a problem. Cc: hpa@zytor.com Cc: tglx@linutronix.de Cc: dzickus@redhat.com Cc: Dave Hansen Cc: mingo@kernel.org Fixes: 6a02ad66b2c4 ("perf/x86: Push the duration-logging printk() to IRQ context") Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140211150116.GO27965@twins.programming.kicks-ass.net Signed-off-by: Thomas Gleixner diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index add13c8..19ae05d 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -32,7 +32,7 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *)) #define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { .func = (_f), } -void irq_work_queue(struct irq_work *work); +bool irq_work_queue(struct irq_work *work); void irq_work_run(void); void irq_work_sync(struct irq_work *work); diff --git a/kernel/events/core.c b/kernel/events/core.c index 2067cbb..45e5543 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -243,7 +243,7 @@ static void perf_duration_warn(struct irq_work *w) printk_ratelimited(KERN_WARNING "perf interrupt took too long (%lld > %lld), lowering " "kernel.perf_event_max_sample_rate to %d\n", - avg_local_sample_len, allowed_ns, + avg_local_sample_len, allowed_ns >> 1, sysctl_perf_event_sample_rate); } @@ -283,7 +283,12 @@ void perf_sample_event_took(u64 sample_len_ns) update_perf_cpu_limits(); - irq_work_queue(&perf_duration_work); + if (!irq_work_queue(&perf_duration_work)) { + early_printk("perf interrupt took too long (%lld > %lld), lowering " + "kernel.perf_event_max_sample_rate to %d\n", + avg_local_sample_len, allowed_ns >> 1, + sysctl_perf_event_sample_rate); + } } static atomic64_t perf_event_id; diff --git a/kernel/irq_work.c b/kernel/irq_work.c index 55fcce6..a82170e 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -61,11 +61,11 @@ void __weak arch_irq_work_raise(void) * * Can be re-enqueued while the callback is still in progress. */ -void irq_work_queue(struct irq_work *work) +bool irq_work_queue(struct irq_work *work) { /* Only queue if not already pending */ if (!irq_work_claim(work)) - return; + return false; /* Queue the entry and raise the IPI if needed. */ preempt_disable(); @@ -83,6 +83,8 @@ void irq_work_queue(struct irq_work *work) } preempt_enable(); + + return true; } EXPORT_SYMBOL_GPL(irq_work_queue); -- cgit v0.10.2 From 411cf180fa00521f9bfb1d022e3ebf059a2d299f Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 11 Feb 2014 16:20:07 +0100 Subject: perf/x86/uncore: fix initialization of cpumask On certain processors, the uncore PMU boxes may only be msr-bsed or PCI-based. But in both cases, the cpumask, suggesting on which CPUs to monitor to get full coverage of the particular PMU, must be created. However with the current code base, the cpumask was only created on processor which had at least one MSR-based uncore PMU. This patch removes that restriction and ensures the cpumask is created even when there is no msr-based PMU. For instance, on SNB client where only a PCI-based memory controller PMU is supported. Cc: mingo@elte.hu Cc: acme@redhat.com Cc: ak@linux.intel.com Cc: zheng.z.yan@intel.com Cc: peterz@infradead.org Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1392132015-14521-2-git-send-email-eranian@google.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 29c2487..fe4255b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -3764,7 +3764,7 @@ static void __init uncore_cpu_setup(void *dummy) static int __init uncore_cpu_init(void) { - int ret, cpu, max_cores; + int ret, max_cores; max_cores = boot_cpu_data.x86_max_cores; switch (boot_cpu_data.x86_model) { @@ -3808,29 +3808,6 @@ static int __init uncore_cpu_init(void) if (ret) return ret; - get_online_cpus(); - - for_each_online_cpu(cpu) { - int i, phys_id = topology_physical_package_id(cpu); - - for_each_cpu(i, &uncore_cpu_mask) { - if (phys_id == topology_physical_package_id(i)) { - phys_id = -1; - break; - } - } - if (phys_id < 0) - continue; - - uncore_cpu_prepare(cpu, phys_id); - uncore_event_init_cpu(cpu); - } - on_each_cpu(uncore_cpu_setup, NULL, 1); - - register_cpu_notifier(&uncore_cpu_nb); - - put_online_cpus(); - return 0; } @@ -3859,6 +3836,41 @@ static int __init uncore_pmus_register(void) return 0; } +static void uncore_cpumask_init(void) +{ + int cpu; + + /* + * ony invoke once from msr or pci init code + */ + if (!cpumask_empty(&uncore_cpu_mask)) + return; + + get_online_cpus(); + + for_each_online_cpu(cpu) { + int i, phys_id = topology_physical_package_id(cpu); + + for_each_cpu(i, &uncore_cpu_mask) { + if (phys_id == topology_physical_package_id(i)) { + phys_id = -1; + break; + } + } + if (phys_id < 0) + continue; + + uncore_cpu_prepare(cpu, phys_id); + uncore_event_init_cpu(cpu); + } + on_each_cpu(uncore_cpu_setup, NULL, 1); + + register_cpu_notifier(&uncore_cpu_nb); + + put_online_cpus(); +} + + static int __init intel_uncore_init(void) { int ret; @@ -3877,6 +3889,7 @@ static int __init intel_uncore_init(void) uncore_pci_exit(); goto fail; } + uncore_cpumask_init(); uncore_pmus_register(); return 0; -- cgit v0.10.2 From d64b25b6a08d95e973160689320504faed62be4a Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 11 Feb 2014 16:20:08 +0100 Subject: perf/x86/uncore: add ability to customize pmu callbacks This patch enables custom struct pmu callbacks per uncore PMU types. This feature may be used to simplify counter setup for certain uncore PMUs which have free running counters for instance. It becomes possible to bypass the event scheduling phase of the configuration. Cc: mingo@elte.hu Cc: acme@redhat.com Cc: ak@linux.intel.com Cc: zheng.z.yan@intel.com Cc: peterz@infradead.org Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1392132015-14521-3-git-send-email-eranian@google.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index fe4255b..e6f32b3 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -3271,16 +3271,21 @@ static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu) { int ret; - pmu->pmu = (struct pmu) { - .attr_groups = pmu->type->attr_groups, - .task_ctx_nr = perf_invalid_context, - .event_init = uncore_pmu_event_init, - .add = uncore_pmu_event_add, - .del = uncore_pmu_event_del, - .start = uncore_pmu_event_start, - .stop = uncore_pmu_event_stop, - .read = uncore_pmu_event_read, - }; + if (!pmu->type->pmu) { + pmu->pmu = (struct pmu) { + .attr_groups = pmu->type->attr_groups, + .task_ctx_nr = perf_invalid_context, + .event_init = uncore_pmu_event_init, + .add = uncore_pmu_event_add, + .del = uncore_pmu_event_del, + .start = uncore_pmu_event_start, + .stop = uncore_pmu_event_stop, + .read = uncore_pmu_event_read, + }; + } else { + pmu->pmu = *pmu->type->pmu; + pmu->pmu.attr_groups = pmu->type->attr_groups; + } if (pmu->type->num_boxes == 1) { if (strlen(pmu->type->name) > 0) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index a80ab71..77dc9a5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -440,6 +440,7 @@ struct intel_uncore_type { struct intel_uncore_ops *ops; struct uncore_event_desc *event_descs; const struct attribute_group *attr_groups[4]; + struct pmu *pmu; /* for custom pmu ops */ }; #define pmu_group attr_groups[0] -- cgit v0.10.2 From ae58faf90050ea72c289f62c4bc9a5bb8ae7f0bd Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 11 Feb 2014 16:20:09 +0100 Subject: perf/x86/uncore: add PCI ids for SNB/IVB/HSW IMC This patch adds the PCI ids for the Intel SandyBridge, IvyBridge, Haswell Client memory controller (IMC). Cc: mingo@elte.hu Cc: acme@redhat.com Cc: ak@linux.intel.com Cc: zheng.z.yan@intel.com Cc: peterz@infradead.org Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1392132015-14521-4-git-send-email-eranian@google.com Signed-off-by: Thomas Gleixner diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 97fbecd..7399e6a 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2531,6 +2531,9 @@ #define PCI_VENDOR_ID_INTEL 0x8086 #define PCI_DEVICE_ID_INTEL_EESSC 0x0008 +#define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 +#define PCI_DEVICE_ID_INTEL_IVB_IMC 0x0154 +#define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 #define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320 #define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321 #define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 -- cgit v0.10.2 From 79859cce5a551c3bcd8d36a6287f9d83568ccedb Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 11 Feb 2014 16:20:10 +0100 Subject: perf/x86/uncore: make hrtimer timeout configurable per box This patch makes the hrtimer timeout configurable per PMU box. Not all counters have necessarily the same width and rate, thus the default timeout of 60s may need to be adjusted. This patch adds box->hrtimer_duration. It is set to default when the box is allocated. It can be overriden when the box is initialized. Cc: mingo@elte.hu Cc: acme@redhat.com Cc: ak@linux.intel.com Cc: zheng.z.yan@intel.com Cc: peterz@infradead.org Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1392132015-14521-5-git-send-email-eranian@google.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index e6f32b3..ea823b8 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -2798,14 +2798,14 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) local_irq_restore(flags); - hrtimer_forward_now(hrtimer, ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL)); + hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration)); return HRTIMER_RESTART; } static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) { __hrtimer_start_range_ns(&box->hrtimer, - ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL), 0, + ns_to_ktime(box->hrtimer_duration), 0, HRTIMER_MODE_REL_PINNED, 0); } @@ -2839,6 +2839,9 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, box->cpu = -1; box->phys_id = -1; + /* set default hrtimer timeout */ + box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL; + return box; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 77dc9a5..7efd298 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -489,6 +489,7 @@ struct intel_uncore_box { u64 tags[UNCORE_PMC_IDX_MAX]; struct pci_dev *pci_dev; struct intel_uncore_pmu *pmu; + u64 hrtimer_duration; /* hrtimer timeout for this box */ struct hrtimer hrtimer; struct list_head list; struct intel_uncore_extra_reg shared_regs[0]; -- cgit v0.10.2 From 001e413f7e7a4a68dc1c3231f72b5be173939c8f Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 11 Feb 2014 16:20:11 +0100 Subject: perf/x86/uncore: move uncore_event_to_box() and uncore_pmu_to_box() Move a couple of functions around to avoid forward declarations when we add code later on. Cc: mingo@elte.hu Cc: acme@redhat.com Cc: ak@linux.intel.com Cc: zheng.z.yan@intel.com Cc: peterz@infradead.org Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1392132015-14521-6-git-send-email-eranian@google.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index ea823b8..acbbdde 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -66,6 +66,42 @@ DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4"); DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31"); DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63"); +static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) +{ + return container_of(event->pmu, struct intel_uncore_pmu, pmu); +} + +static struct intel_uncore_box * +uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) +{ + struct intel_uncore_box *box; + + box = *per_cpu_ptr(pmu->box, cpu); + if (box) + return box; + + raw_spin_lock(&uncore_box_lock); + list_for_each_entry(box, &pmu->box_list, list) { + if (box->phys_id == topology_physical_package_id(cpu)) { + atomic_inc(&box->refcnt); + *per_cpu_ptr(pmu->box, cpu) = box; + break; + } + } + raw_spin_unlock(&uncore_box_lock); + + return *per_cpu_ptr(pmu->box, cpu); +} + +static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) +{ + /* + * perf core schedules event on the basis of cpu, uncore events are + * collected by one of the cpus inside a physical package. + */ + return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id()); +} + static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) { u64 count; @@ -2845,42 +2881,6 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, return box; } -static struct intel_uncore_box * -uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) -{ - struct intel_uncore_box *box; - - box = *per_cpu_ptr(pmu->box, cpu); - if (box) - return box; - - raw_spin_lock(&uncore_box_lock); - list_for_each_entry(box, &pmu->box_list, list) { - if (box->phys_id == topology_physical_package_id(cpu)) { - atomic_inc(&box->refcnt); - *per_cpu_ptr(pmu->box, cpu) = box; - break; - } - } - raw_spin_unlock(&uncore_box_lock); - - return *per_cpu_ptr(pmu->box, cpu); -} - -static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) -{ - return container_of(event->pmu, struct intel_uncore_pmu, pmu); -} - -static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) -{ - /* - * perf core schedules event on the basis of cpu, uncore events are - * collected by one of the cpus inside a physical package. - */ - return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id()); -} - static int uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp) { -- cgit v0.10.2 From b9e1ab6d4c0582cad97699285a6b3cf992251b00 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 11 Feb 2014 16:20:12 +0100 Subject: perf/x86/uncore: add SNB/IVB/HSW client uncore memory controller support This patch adds a new uncore PMU for Intel SNB/IVB/HSW client CPUs. It adds the Integrated Memory Controller (IMC) PMU. This new PMU provides a set of events to measure memory bandwidth utilization. The IMC on those processor is PCI-space based. This patch exposes a new uncore PMU on those processor: uncore_imc Two new events are defined: - name: data_reads - code: 0x1 - unit: 64 bytes - number of full cacheline read requests to the IMC - name: data_writes - code: 0x2 - unit: 64 bytes - number of full cacheline write requests to the IMC Documentation available at: http://software.intel.com/en-us/articles/monitoring-integrated-memory-controller-requests-in-the-2nd-3rd-and-4th-generation-intel Cc: mingo@elte.hu Cc: acme@redhat.com Cc: ak@linux.intel.com Cc: zheng.z.yan@intel.com Cc: peterz@infradead.org Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1392132015-14521-7-git-send-email-eranian@google.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index acbbdde..66ce5e5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -66,6 +66,11 @@ DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4"); DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31"); DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63"); +static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box); +static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box); +static void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event); +static void uncore_pmu_event_read(struct perf_event *event); + static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) { return container_of(event->pmu, struct intel_uncore_pmu, pmu); @@ -1667,6 +1672,344 @@ static struct intel_uncore_type *snb_msr_uncores[] = { &snb_uncore_cbox, NULL, }; + +enum { + SNB_PCI_UNCORE_IMC, +}; + +static struct uncore_event_desc snb_uncore_imc_events[] = { + INTEL_UNCORE_EVENT_DESC(data_reads, "event=0x01"), + INTEL_UNCORE_EVENT_DESC(data_reads.scale, "64"), + INTEL_UNCORE_EVENT_DESC(data_reads.unit, "bytes"), + + INTEL_UNCORE_EVENT_DESC(data_writes, "event=0x02"), + INTEL_UNCORE_EVENT_DESC(data_writes.scale, "64"), + INTEL_UNCORE_EVENT_DESC(data_writes.unit, "bytes"), + + { /* end: all zeroes */ }, +}; + +#define SNB_UNCORE_PCI_IMC_EVENT_MASK 0xff +#define SNB_UNCORE_PCI_IMC_BAR_OFFSET 0x48 + +/* page size multiple covering all config regs */ +#define SNB_UNCORE_PCI_IMC_MAP_SIZE 0x6000 + +#define SNB_UNCORE_PCI_IMC_DATA_READS 0x1 +#define SNB_UNCORE_PCI_IMC_DATA_READS_BASE 0x5050 +#define SNB_UNCORE_PCI_IMC_DATA_WRITES 0x2 +#define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054 +#define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE + +static struct attribute *snb_uncore_imc_formats_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group snb_uncore_imc_format_group = { + .name = "format", + .attrs = snb_uncore_imc_formats_attr, +}; + +static void snb_uncore_imc_init_box(struct intel_uncore_box *box) +{ + struct pci_dev *pdev = box->pci_dev; + u32 addr_lo, addr_hi; + resource_size_t addr; + + pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET, &addr_lo); + addr = addr_lo; + +#ifdef CONFIG_PHYS_ADDR_T_64BIT + pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET+4, &addr_hi); + addr = ((resource_size_t)addr_hi << 32) | addr_lo; +#endif + + addr &= ~(PAGE_SIZE - 1); + + box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE); +} + +static void snb_uncore_imc_enable_box(struct intel_uncore_box *box) +{} + +static void snb_uncore_imc_disable_box(struct intel_uncore_box *box) +{} + +static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct perf_event *event) +{} + +static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event) +{} + +static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + return (u64)*(unsigned int *)(box->io_addr + hwc->event_base); +} + +/* + * custom event_init() function because we define our own fixed, free + * running counters, so we do not want to conflict with generic uncore + * logic. Also simplifies processing + */ +static int snb_uncore_imc_event_init(struct perf_event *event) +{ + struct intel_uncore_pmu *pmu; + struct intel_uncore_box *box; + struct hw_perf_event *hwc = &event->hw; + u64 cfg = event->attr.config & SNB_UNCORE_PCI_IMC_EVENT_MASK; + int idx, base; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + pmu = uncore_event_to_pmu(event); + /* no device found for this pmu */ + if (pmu->func_id < 0) + return -ENOENT; + + /* Sampling not supported yet */ + if (hwc->sample_period) + return -EINVAL; + + /* unsupported modes and filters */ + if (event->attr.exclude_user || + event->attr.exclude_kernel || + event->attr.exclude_hv || + event->attr.exclude_idle || + event->attr.exclude_host || + event->attr.exclude_guest || + event->attr.sample_period) /* no sampling */ + return -EINVAL; + + /* + * Place all uncore events for a particular physical package + * onto a single cpu + */ + if (event->cpu < 0) + return -EINVAL; + + /* check only supported bits are set */ + if (event->attr.config & ~SNB_UNCORE_PCI_IMC_EVENT_MASK) + return -EINVAL; + + box = uncore_pmu_to_box(pmu, event->cpu); + if (!box || box->cpu < 0) + return -EINVAL; + + event->cpu = box->cpu; + + event->hw.idx = -1; + event->hw.last_tag = ~0ULL; + event->hw.extra_reg.idx = EXTRA_REG_NONE; + event->hw.branch_reg.idx = EXTRA_REG_NONE; + /* + * check event is known (whitelist, determines counter) + */ + switch (cfg) { + case SNB_UNCORE_PCI_IMC_DATA_READS: + base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE; + idx = UNCORE_PMC_IDX_FIXED; + break; + case SNB_UNCORE_PCI_IMC_DATA_WRITES: + base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE; + idx = UNCORE_PMC_IDX_FIXED + 1; + break; + default: + return -EINVAL; + } + + /* must be done before validate_group */ + event->hw.event_base = base; + event->hw.config = cfg; + event->hw.idx = idx; + + /* no group validation needed, we have free running counters */ + + return 0; +} + +static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_event *event) +{ + return 0; +} + +static void snb_uncore_imc_event_start(struct perf_event *event, int flags) +{ + struct intel_uncore_box *box = uncore_event_to_box(event); + u64 count; + + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + event->hw.state = 0; + box->n_active++; + + list_add_tail(&event->active_entry, &box->active_list); + + count = snb_uncore_imc_read_counter(box, event); + local64_set(&event->hw.prev_count, count); + + if (box->n_active == 1) + uncore_pmu_start_hrtimer(box); +} + +static void snb_uncore_imc_event_stop(struct perf_event *event, int flags) +{ + struct intel_uncore_box *box = uncore_event_to_box(event); + struct hw_perf_event *hwc = &event->hw; + + if (!(hwc->state & PERF_HES_STOPPED)) { + box->n_active--; + + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + + list_del(&event->active_entry); + + if (box->n_active == 0) + uncore_pmu_cancel_hrtimer(box); + } + + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + /* + * Drain the remaining delta count out of a event + * that we are disabling: + */ + uncore_perf_event_update(box, event); + hwc->state |= PERF_HES_UPTODATE; + } +} + +static int snb_uncore_imc_event_add(struct perf_event *event, int flags) +{ + struct intel_uncore_box *box = uncore_event_to_box(event); + struct hw_perf_event *hwc = &event->hw; + + if (!box) + return -ENODEV; + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (!(flags & PERF_EF_START)) + hwc->state |= PERF_HES_ARCH; + + snb_uncore_imc_event_start(event, 0); + + box->n_events++; + + return 0; +} + +static void snb_uncore_imc_event_del(struct perf_event *event, int flags) +{ + struct intel_uncore_box *box = uncore_event_to_box(event); + int i; + + snb_uncore_imc_event_stop(event, PERF_EF_UPDATE); + + for (i = 0; i < box->n_events; i++) { + if (event == box->event_list[i]) { + --box->n_events; + break; + } + } +} + +static int snb_pci2phy_map_init(int devid) +{ + struct pci_dev *dev = NULL; + int bus; + + dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev); + if (!dev) + return -ENOTTY; + + bus = dev->bus->number; + + pcibus_to_physid[bus] = 0; + + pci_dev_put(dev); + + return 0; +} + +static struct pmu snb_uncore_imc_pmu = { + .task_ctx_nr = perf_invalid_context, + .event_init = snb_uncore_imc_event_init, + .add = snb_uncore_imc_event_add, + .del = snb_uncore_imc_event_del, + .start = snb_uncore_imc_event_start, + .stop = snb_uncore_imc_event_stop, + .read = uncore_pmu_event_read, +}; + +static struct intel_uncore_ops snb_uncore_imc_ops = { + .init_box = snb_uncore_imc_init_box, + .enable_box = snb_uncore_imc_enable_box, + .disable_box = snb_uncore_imc_disable_box, + .disable_event = snb_uncore_imc_disable_event, + .enable_event = snb_uncore_imc_enable_event, + .hw_config = snb_uncore_imc_hw_config, + .read_counter = snb_uncore_imc_read_counter, +}; + +static struct intel_uncore_type snb_uncore_imc = { + .name = "imc", + .num_counters = 2, + .num_boxes = 1, + .fixed_ctr_bits = 32, + .fixed_ctr = SNB_UNCORE_PCI_IMC_CTR_BASE, + .event_descs = snb_uncore_imc_events, + .format_group = &snb_uncore_imc_format_group, + .perf_ctr = SNB_UNCORE_PCI_IMC_DATA_READS_BASE, + .event_mask = SNB_UNCORE_PCI_IMC_EVENT_MASK, + .ops = &snb_uncore_imc_ops, + .pmu = &snb_uncore_imc_pmu, +}; + +static struct intel_uncore_type *snb_pci_uncores[] = { + [SNB_PCI_UNCORE_IMC] = &snb_uncore_imc, + NULL, +}; + +static DEFINE_PCI_DEVICE_TABLE(snb_uncore_pci_ids) = { + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, +}; + +static DEFINE_PCI_DEVICE_TABLE(ivb_uncore_pci_ids) = { + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, +}; + +static DEFINE_PCI_DEVICE_TABLE(hsw_uncore_pci_ids) = { + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, +}; + +static struct pci_driver snb_uncore_pci_driver = { + .name = "snb_uncore", + .id_table = snb_uncore_pci_ids, +}; + +static struct pci_driver ivb_uncore_pci_driver = { + .name = "ivb_uncore", + .id_table = ivb_uncore_pci_ids, +}; + +static struct pci_driver hsw_uncore_pci_driver = { + .name = "hsw_uncore", + .id_table = hsw_uncore_pci_ids, +}; + /* end of Sandy Bridge uncore support */ /* Nehalem uncore support */ @@ -3501,6 +3844,28 @@ static int __init uncore_pci_init(void) pci_uncores = ivt_pci_uncores; uncore_pci_driver = &ivt_uncore_pci_driver; break; + case 42: /* Sandy Bridge */ + ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_SNB_IMC); + if (ret) + return ret; + pci_uncores = snb_pci_uncores; + uncore_pci_driver = &snb_uncore_pci_driver; + break; + case 58: /* Ivy Bridge */ + ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_IVB_IMC); + if (ret) + return ret; + pci_uncores = snb_pci_uncores; + uncore_pci_driver = &ivb_uncore_pci_driver; + break; + case 60: /* Haswell */ + case 69: /* Haswell Celeron */ + ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_HSW_IMC); + if (ret) + return ret; + pci_uncores = snb_pci_uncores; + uncore_pci_driver = &hsw_uncore_pci_driver; + break; default: return 0; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 7efd298..fbf45a0 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -492,6 +492,7 @@ struct intel_uncore_box { u64 hrtimer_duration; /* hrtimer timeout for this box */ struct hrtimer hrtimer; struct list_head list; + void *io_addr; struct intel_uncore_extra_reg shared_regs[0]; }; -- cgit v0.10.2 From ced2efb099f4f24ea7030c22b064e5ddd65cd764 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 11 Feb 2014 16:20:13 +0100 Subject: perf/x86/uncore: add hrtimer to SNB uncore IMC PMU This patch is needed because that PMU uses 32-bit free running counters with no interrupt capabilities. On SNB/IVB/HSW, we used 20GB/s theoretical peak to calculate the hrtimer timeout necessary to avoid missing an overflow. That delay is set to 5s to be on the cautious side. The SNB IMC uses free running counters, which are handled via pseudo fixed counters. The SNB IMC PMU implementation supports an arbitrary number of events, because the counters are read-only. Therefore it is not possible to track active counters. Instead we put active events on a linked list which is then used by the hrtimer handler to update the SW counts. Cc: mingo@elte.hu Cc: acme@redhat.com Cc: ak@linux.intel.com Cc: zheng.z.yan@intel.com Cc: peterz@infradead.org Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1392132015-14521-8-git-send-email-eranian@google.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 66ce5e5..7760aee 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -1728,6 +1728,7 @@ static void snb_uncore_imc_init_box(struct intel_uncore_box *box) addr &= ~(PAGE_SIZE - 1); box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE); + box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL; } static void snb_uncore_imc_enable_box(struct intel_uncore_box *box) @@ -3160,6 +3161,7 @@ again: static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) { struct intel_uncore_box *box; + struct perf_event *event; unsigned long flags; int bit; @@ -3172,6 +3174,14 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) */ local_irq_save(flags); + /* + * handle boxes with an active event list as opposed to active + * counters + */ + list_for_each_entry(event, &box->active_list, active_entry) { + uncore_perf_event_update(box, event); + } + for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) uncore_perf_event_update(box, box->events[bit]); @@ -3221,6 +3231,8 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, /* set default hrtimer timeout */ box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL; + INIT_LIST_HEAD(&box->active_list); + return box; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index fbf45a0..90236f0 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -6,6 +6,7 @@ #define UNCORE_PMU_NAME_LEN 32 #define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC) +#define UNCORE_SNB_IMC_HRTIMER_INTERVAL (5ULL * NSEC_PER_SEC) #define UNCORE_FIXED_EVENT 0xff #define UNCORE_PMC_IDX_MAX_GENERIC 8 @@ -492,6 +493,7 @@ struct intel_uncore_box { u64 hrtimer_duration; /* hrtimer timeout for this box */ struct hrtimer hrtimer; struct list_head list; + struct list_head active_list; void *io_addr; struct intel_uncore_extra_reg shared_regs[0]; }; -- cgit v0.10.2 From e9d9768824a18212712ae3afbebd5bfef05176f4 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 11 Feb 2014 16:20:14 +0100 Subject: perf/x86/uncore: use MiB unit for events for SNB/IVB/HSW IMC This patch makes perf use Mebibytes to display the counts of uncore_imc/data_reads/ and uncore_imc/data_writes. 1MiB = 1024*1024 bytes. Cc: mingo@elte.hu Cc: acme@redhat.com Cc: ak@linux.intel.com Cc: zheng.z.yan@intel.com Cc: peterz@infradead.org Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1392132015-14521-9-git-send-email-eranian@google.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 7760aee..de5c568 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -1679,12 +1679,12 @@ enum { static struct uncore_event_desc snb_uncore_imc_events[] = { INTEL_UNCORE_EVENT_DESC(data_reads, "event=0x01"), - INTEL_UNCORE_EVENT_DESC(data_reads.scale, "64"), - INTEL_UNCORE_EVENT_DESC(data_reads.unit, "bytes"), + INTEL_UNCORE_EVENT_DESC(data_reads.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(data_reads.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(data_writes, "event=0x02"), - INTEL_UNCORE_EVENT_DESC(data_writes.scale, "64"), - INTEL_UNCORE_EVENT_DESC(data_writes.unit, "bytes"), + INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"), + INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"), { /* end: all zeroes */ }, }; -- cgit v0.10.2 From deff82e688a278eb4b822fd616c05fc62907bb71 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Mon, 17 Feb 2014 22:30:58 +0100 Subject: ARM: OMAP2+: Add support for thumb mode on DT booted N900 Without enabling the workaround for ARM errata 430973 thumb compiled userland crashes randomly on the Nokia N900. Signed-off-by: Sebastian Reichel Reviewed-by: Pavel Machek Signed-off-by: Tony Lindgren diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index 3d5b24d..0cc710d 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -22,6 +22,8 @@ #include "common-board-devices.h" #include "dss-common.h" #include "control.h" +#include "omap-secure.h" +#include "soc.h" struct pdata_init { const char *compatible; @@ -169,6 +171,22 @@ static void __init am3517_evm_legacy_init(void) omap_ctrl_writel(v, AM35XX_CONTROL_IP_SW_RESET); omap_ctrl_readl(AM35XX_CONTROL_IP_SW_RESET); /* OCP barrier */ } + +static void __init nokia_n900_legacy_init(void) +{ + hsmmc2_internal_input_clk(); + + if (omap_type() == OMAP2_DEVICE_TYPE_SEC) { + if (IS_ENABLED(CONFIG_ARM_ERRATA_430973)) { + pr_info("RX-51: Enabling ARM errata 430973 workaround\n"); + /* set IBE to 1 */ + rx51_secure_update_aux_cr(BIT(6), 0); + } else { + pr_warning("RX-51: Not enabling ARM errata 430973 workaround\n"); + pr_warning("Thumb binaries may crash randomly without this workaround\n"); + } + } +} #endif /* CONFIG_ARCH_OMAP3 */ #ifdef CONFIG_ARCH_OMAP4 @@ -259,7 +277,7 @@ struct of_dev_auxdata omap_auxdata_lookup[] __initdata = { static struct pdata_init pdata_quirks[] __initdata = { #ifdef CONFIG_ARCH_OMAP3 { "compulab,omap3-sbc-t3730", omap3_sbc_t3730_legacy_init, }, - { "nokia,omap3-n900", hsmmc2_internal_input_clk, }, + { "nokia,omap3-n900", nokia_n900_legacy_init, }, { "nokia,omap3-n9", hsmmc2_internal_input_clk, }, { "nokia,omap3-n950", hsmmc2_internal_input_clk, }, { "isee,omap3-igep0020", omap3_igep0020_legacy_init, }, -- cgit v0.10.2 From 865da01cd98d67518befe854a71e432d894db279 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Mon, 17 Feb 2014 13:22:55 +0530 Subject: ARM: OMAP: Kill warning in CPUIDLE code with !CONFIG_SMP for non SMP build, NR_CPUS is 1 and hence the code complains with below warnings. arch/arm/mach-omap2/cpuidle44xx.c:207:8: warning: array subscript is above array bounds [-Warray-bounds] arch/arm/mach-omap2/cpuidle44xx.c:212:11: warning: array subscript is above array bounds [-Warray-bounds] Kill it by making array size fixed. Acked-by: Nishanth Menon Signed-off-by: Santosh Shilimkar Signed-off-by: Mugunthan V N Signed-off-by: Tony Lindgren diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c index 4c158c8..01fc710 100644 --- a/arch/arm/mach-omap2/cpuidle44xx.c +++ b/arch/arm/mach-omap2/cpuidle44xx.c @@ -23,6 +23,8 @@ #include "prm.h" #include "clockdomain.h" +#define MAX_CPUS 2 + /* Machine specific information */ struct idle_statedata { u32 cpu_state; @@ -48,11 +50,11 @@ static struct idle_statedata omap4_idle_data[] = { }, }; -static struct powerdomain *mpu_pd, *cpu_pd[NR_CPUS]; -static struct clockdomain *cpu_clkdm[NR_CPUS]; +static struct powerdomain *mpu_pd, *cpu_pd[MAX_CPUS]; +static struct clockdomain *cpu_clkdm[MAX_CPUS]; static atomic_t abort_barrier; -static bool cpu_done[NR_CPUS]; +static bool cpu_done[MAX_CPUS]; static struct idle_statedata *state_ptr = &omap4_idle_data[0]; /* Private functions */ -- cgit v0.10.2 From eec70897d81bb2913e22c6792ea2739faf59c3e1 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 20 Feb 2014 00:52:01 +0000 Subject: bio-integrity: Drop bio_integrity_verify BUG_ON in post bip->bip_iter world Given that bip->bip_iter.bi_size is decremented after bio_advance() -> bio_integrity_advance() is called, the BUG_ON() in bio_integrity_verify() ends up tripping in v3.14-rc1 code with the advent of immutable biovecs in: commit d57a5f7c6605f15f3b5134837e68b448a7cea88e Author: Kent Overstreet Date: Sat Nov 23 17:20:16 2013 -0800 bio-integrity: Convert to bvec_iter Given that there is no easy way to ascertain the original bi_size value, go ahead and drop this BUG_ON(). Reported-by: Sagi Grimberg Reported-by: Akinobu Mita Acked-by: Martin K. Petersen Cc: Kent Overstreet Signed-off-by: Nicholas Bellinger Signed-off-by: Jens Axboe diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 0129b78..4f70f38 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -458,11 +458,10 @@ static int bio_integrity_verify(struct bio *bio) struct blk_integrity_exchg bix; struct bio_vec *bv; sector_t sector = bio->bi_integrity->bip_iter.bi_sector; - unsigned int sectors, total, ret; + unsigned int sectors, ret = 0; void *prot_buf = bio->bi_integrity->bip_buf; int i; - ret = total = 0; bix.disk_name = bio->bi_bdev->bd_disk->disk_name; bix.sector_size = bi->sector_size; @@ -484,8 +483,6 @@ static int bio_integrity_verify(struct bio *bio) sectors = bv->bv_len / bi->sector_size; sector += sectors; prot_buf += sectors * bi->tuple_size; - total += sectors * bi->tuple_size; - BUG_ON(total > bio->bi_integrity->bip_iter.bi_size); kunmap_atomic(kaddr); } -- cgit v0.10.2 From 0dc83bd30b0bf5410c0933cfbbf8853248eff0a9 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 21 Feb 2014 11:19:04 +0100 Subject: Revert "writeback: do not sync data dirtied after sync start" This reverts commit c4a391b53a72d2df4ee97f96f78c1d5971b47489. Dave Chinner has reported the commit may cause some inodes to be left out from sync(2). This is because we can call redirty_tail() for some inode (which sets i_dirtied_when to current time) after sync(2) has started or similarly requeue_inode() can set i_dirtied_when to current time if writeback had to skip some pages. The real problem is in the functions clobbering i_dirtied_when but fixing that isn't trivial so revert is a safer choice for now. CC: stable@vger.kernel.org # >= 3.13 Signed-off-by: Jan Kara diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e0259a1..d754e3c 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -40,18 +40,13 @@ struct wb_writeback_work { long nr_pages; struct super_block *sb; - /* - * Write only inodes dirtied before this time. Don't forget to set - * older_than_this_is_set when you set this. - */ - unsigned long older_than_this; + unsigned long *older_than_this; enum writeback_sync_modes sync_mode; unsigned int tagged_writepages:1; unsigned int for_kupdate:1; unsigned int range_cyclic:1; unsigned int for_background:1; unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ - unsigned int older_than_this_is_set:1; enum wb_reason reason; /* why was writeback initiated? */ struct list_head list; /* pending work list */ @@ -252,10 +247,10 @@ static int move_expired_inodes(struct list_head *delaying_queue, int do_sb_sort = 0; int moved = 0; - WARN_ON_ONCE(!work->older_than_this_is_set); while (!list_empty(delaying_queue)) { inode = wb_inode(delaying_queue->prev); - if (inode_dirtied_after(inode, work->older_than_this)) + if (work->older_than_this && + inode_dirtied_after(inode, *work->older_than_this)) break; list_move(&inode->i_wb_list, &tmp); moved++; @@ -742,8 +737,6 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, .sync_mode = WB_SYNC_NONE, .range_cyclic = 1, .reason = reason, - .older_than_this = jiffies, - .older_than_this_is_set = 1, }; spin_lock(&wb->list_lock); @@ -802,13 +795,12 @@ static long wb_writeback(struct bdi_writeback *wb, { unsigned long wb_start = jiffies; long nr_pages = work->nr_pages; + unsigned long oldest_jif; struct inode *inode; long progress; - if (!work->older_than_this_is_set) { - work->older_than_this = jiffies; - work->older_than_this_is_set = 1; - } + oldest_jif = jiffies; + work->older_than_this = &oldest_jif; spin_lock(&wb->list_lock); for (;;) { @@ -842,10 +834,10 @@ static long wb_writeback(struct bdi_writeback *wb, * safe. */ if (work->for_kupdate) { - work->older_than_this = jiffies - + oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval * 10); } else if (work->for_background) - work->older_than_this = jiffies; + oldest_jif = jiffies; trace_writeback_start(wb->bdi, work); if (list_empty(&wb->b_io)) @@ -1357,21 +1349,18 @@ EXPORT_SYMBOL(try_to_writeback_inodes_sb); /** * sync_inodes_sb - sync sb inode pages - * @sb: the superblock - * @older_than_this: timestamp + * @sb: the superblock * * This function writes and waits on any dirty inode belonging to this - * superblock that has been dirtied before given timestamp. + * super_block. */ -void sync_inodes_sb(struct super_block *sb, unsigned long older_than_this) +void sync_inodes_sb(struct super_block *sb) { DECLARE_COMPLETION_ONSTACK(done); struct wb_writeback_work work = { .sb = sb, .sync_mode = WB_SYNC_ALL, .nr_pages = LONG_MAX, - .older_than_this = older_than_this, - .older_than_this_is_set = 1, .range_cyclic = 0, .done = &done, .reason = WB_REASON_SYNC, diff --git a/fs/sync.c b/fs/sync.c index e8ba024..b28d1dd 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -27,11 +27,10 @@ * wait == 1 case since in that case write_inode() functions do * sync_dirty_buffer() and thus effectively write one block at a time. */ -static int __sync_filesystem(struct super_block *sb, int wait, - unsigned long start) +static int __sync_filesystem(struct super_block *sb, int wait) { if (wait) - sync_inodes_sb(sb, start); + sync_inodes_sb(sb); else writeback_inodes_sb(sb, WB_REASON_SYNC); @@ -48,7 +47,6 @@ static int __sync_filesystem(struct super_block *sb, int wait, int sync_filesystem(struct super_block *sb) { int ret; - unsigned long start = jiffies; /* * We need to be protected against the filesystem going from @@ -62,17 +60,17 @@ int sync_filesystem(struct super_block *sb) if (sb->s_flags & MS_RDONLY) return 0; - ret = __sync_filesystem(sb, 0, start); + ret = __sync_filesystem(sb, 0); if (ret < 0) return ret; - return __sync_filesystem(sb, 1, start); + return __sync_filesystem(sb, 1); } EXPORT_SYMBOL_GPL(sync_filesystem); static void sync_inodes_one_sb(struct super_block *sb, void *arg) { if (!(sb->s_flags & MS_RDONLY)) - sync_inodes_sb(sb, *((unsigned long *)arg)); + sync_inodes_sb(sb); } static void sync_fs_one_sb(struct super_block *sb, void *arg) @@ -104,10 +102,9 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg) SYSCALL_DEFINE0(sync) { int nowait = 0, wait = 1; - unsigned long start = jiffies; wakeup_flusher_threads(0, WB_REASON_SYNC); - iterate_supers(sync_inodes_one_sb, &start); + iterate_supers(sync_inodes_one_sb, NULL); iterate_supers(sync_fs_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &wait); iterate_bdevs(fdatawrite_one_bdev, NULL); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index f317488..d971f49 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -913,7 +913,7 @@ xfs_flush_inodes( struct super_block *sb = mp->m_super; if (down_read_trylock(&sb->s_umount)) { - sync_inodes_sb(sb, jiffies); + sync_inodes_sb(sb); up_read(&sb->s_umount); } } diff --git a/include/linux/writeback.h b/include/linux/writeback.h index fc0e432..021b8a3 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -97,7 +97,7 @@ void writeback_inodes_sb_nr(struct super_block *, unsigned long nr, int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason); int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, enum wb_reason reason); -void sync_inodes_sb(struct super_block *sb, unsigned long older_than_this); +void sync_inodes_sb(struct super_block *); void wakeup_flusher_threads(long nr_pages, enum wb_reason reason); void inode_wait_for_writeback(struct inode *inode); diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index c7bbbe7..464ea82 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -287,11 +287,11 @@ TRACE_EVENT(writeback_queue_io, __field(int, reason) ), TP_fast_assign( - unsigned long older_than_this = work->older_than_this; + unsigned long *older_than_this = work->older_than_this; strncpy(__entry->name, dev_name(wb->bdi->dev), 32); - __entry->older = older_than_this; + __entry->older = older_than_this ? *older_than_this : 0; __entry->age = older_than_this ? - (jiffies - older_than_this) * 1000 / HZ : -1; + (jiffies - *older_than_this) * 1000 / HZ : -1; __entry->moved = moved; __entry->reason = work->reason; ), -- cgit v0.10.2 From 9fe7ed474956944443eec57c5f75be12e10da84e Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 20 Feb 2014 11:32:49 +0800 Subject: tipc: remove all enabled flags from all tipc components When tipc module is inserted, many tipc components are initialized one by one. During the initialization period, if one of them is failed, tipc_core_stop() will be called to stop all components whatever corresponding components are created or not. To avoid to release uncreated ones, relevant components have to add necessary enabled flags indicating whether they are created or not. But in the initialization stage, if one component is unsuccessfully created, we will just destroy successfully created components before the failed component instead of all components. All enabled flags defined in components, in turn, become redundant. Additionally it's also unnecessary to identify whether table.types is NULL in tipc_nametbl_stop() because name stable has been definitely created successfully when tipc_nametbl_stop() is called. Cc: Jon Maloy Cc: Erik Hugne Signed-off-by: Ying Xue Reviewed-by: Paul Gortmaker Signed-off-by: David S. Miller diff --git a/net/tipc/core.c b/net/tipc/core.c index f9e88d8..cfd9cc1 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -122,30 +122,59 @@ static void tipc_core_stop(void) */ static int tipc_core_start(void) { - int res; + int err; get_random_bytes(&tipc_random, sizeof(tipc_random)); - res = tipc_handler_start(); - if (!res) - res = tipc_ref_table_init(tipc_max_ports, tipc_random); - if (!res) - res = tipc_nametbl_init(); - if (!res) - res = tipc_netlink_start(); - if (!res) - res = tipc_socket_init(); - if (!res) - res = tipc_register_sysctl(); - if (!res) - res = tipc_subscr_start(); - if (!res) - res = tipc_cfg_init(); - if (res) { - tipc_handler_stop(); - tipc_core_stop(); - } - return res; + err = tipc_handler_start(); + if (err) + goto out_handler; + + err = tipc_ref_table_init(tipc_max_ports, tipc_random); + if (err) + goto out_reftbl; + + err = tipc_nametbl_init(); + if (err) + goto out_nametbl; + + err = tipc_netlink_start(); + if (err) + goto out_netlink; + + err = tipc_socket_init(); + if (err) + goto out_socket; + + err = tipc_register_sysctl(); + if (err) + goto out_sysctl; + + err = tipc_subscr_start(); + if (err) + goto out_subscr; + + err = tipc_cfg_init(); + if (err) + goto out_cfg; + + return 0; +out_cfg: + tipc_subscr_stop(); +out_subscr: + tipc_unregister_sysctl(); +out_sysctl: + tipc_socket_stop(); +out_socket: + tipc_netlink_stop(); +out_netlink: + tipc_nametbl_stop(); +out_nametbl: + tipc_ref_table_stop(); +out_reftbl: + tipc_handler_stop(); +out_handler: + return err; } static int __init tipc_init(void) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 92a1533..48302be 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -945,9 +945,6 @@ void tipc_nametbl_stop(void) { u32 i; - if (!table.types) - return; - /* Verify name table is empty, then release it */ write_lock_bh(&tipc_nametbl_lock); for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 9f72a63..3aaf73d 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -83,8 +83,6 @@ static struct genl_ops tipc_genl_ops[] = { }, }; -static int tipc_genl_family_registered; - int tipc_netlink_start(void) { int res; @@ -94,16 +92,10 @@ int tipc_netlink_start(void) pr_err("Failed to register netlink interface\n"); return res; } - - tipc_genl_family_registered = 1; return 0; } void tipc_netlink_stop(void) { - if (!tipc_genl_family_registered) - return; - genl_unregister_family(&tipc_genl_family); - tipc_genl_family_registered = 0; } diff --git a/net/tipc/ref.c b/net/tipc/ref.c index 2a2a938..de3d593 100644 --- a/net/tipc/ref.c +++ b/net/tipc/ref.c @@ -126,9 +126,6 @@ int tipc_ref_table_init(u32 requested_size, u32 start) */ void tipc_ref_table_stop(void) { - if (!tipc_ref_table.entries) - return; - vfree(tipc_ref_table.entries); tipc_ref_table.entries = NULL; } diff --git a/net/tipc/server.c b/net/tipc/server.c index b635ca3..3739797 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -573,7 +573,6 @@ int tipc_server_start(struct tipc_server *s) kmem_cache_destroy(s->rcvbuf_cache); return ret; } - s->enabled = 1; return ret; } @@ -583,10 +582,6 @@ void tipc_server_stop(struct tipc_server *s) int total = 0; int id; - if (!s->enabled) - return; - - s->enabled = 0; spin_lock_bh(&s->idr_lock); for (id = 0; total < s->idr_in_use; id++) { con = idr_find(&s->conn_idr, id); diff --git a/net/tipc/server.h b/net/tipc/server.h index 98b23f2..be817b0 100644 --- a/net/tipc/server.h +++ b/net/tipc/server.h @@ -56,7 +56,6 @@ * @name: server name * @imp: message importance * @type: socket type - * @enabled: identify whether server is launched or not */ struct tipc_server { struct idr conn_idr; @@ -74,7 +73,6 @@ struct tipc_server { const char name[TIPC_SERVER_NAME_LEN]; int imp; int type; - int enabled; }; int tipc_conn_sendmsg(struct tipc_server *s, int conid, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index aab4948..a4cf274 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -70,8 +70,6 @@ static const struct proto_ops msg_ops; static struct proto tipc_proto; static struct proto tipc_proto_kern; -static int sockets_enabled; - /* * Revised TIPC socket locking policy: * @@ -2027,8 +2025,6 @@ int tipc_socket_init(void) proto_unregister(&tipc_proto); goto out; } - - sockets_enabled = 1; out: return res; } @@ -2038,10 +2034,6 @@ int tipc_socket_init(void) */ void tipc_socket_stop(void) { - if (!sockets_enabled) - return; - - sockets_enabled = 0; sock_unregister(tipc_family_ops.family); proto_unregister(&tipc_proto); } -- cgit v0.10.2 From 970122fdf4b2d79c708022f2fdc0ab3840311d87 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 20 Feb 2014 11:32:50 +0800 Subject: tipc: make bearer set up in module insertion stage Accidentally a side effect is involved by commit 6e967adf7(tipc: relocate common functions from media to bearer). Now tipc stack handler of receiving packets from netdevices as well as netdevice notification handler are registered when bearer is enabled rather than tipc module initialization stage, but the two handlers are both unregistered in tipc module exit phase. If tipc module is inserted and then immediately removed, the following warning message will appear: "dev_remove_pack: ffffffffa0380940 not found" This is because in module insertion stage tipc stack packet handler is not registered at all, but in module exit phase dev_remove_pack() needs to remove it. Of course, dev_remove_pack() cannot find tipc protocol handler from the kernel protocol handler list so that the warning message is printed out. But if registering the two handlers is adjusted from enabling bearer phase into inserting module stage, the warning message will be eliminated. Due to this change, tipc_core_start_net() and tipc_core_stop_net() can be deleted as well. Reported-by: Wang Weidong Cc: Jon Maloy Cc: Erik Hugne Signed-off-by: Ying Xue Reviewed-by: Paul Gortmaker Signed-off-by: David S. Miller diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index a38c899..574b861 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -610,8 +610,13 @@ static struct notifier_block notifier = { int tipc_bearer_setup(void) { + int err; + + err = register_netdevice_notifier(¬ifier); + if (err) + return err; dev_add_pack(&tipc_packet_type); - return register_netdevice_notifier(¬ifier); + return 0; } void tipc_bearer_cleanup(void) diff --git a/net/tipc/config.c b/net/tipc/config.c index c301a9a..e74eef2 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -181,7 +181,7 @@ static struct sk_buff *cfg_set_own_addr(void) if (tipc_own_addr) return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (cannot change node address once assigned)"); - tipc_core_start_net(addr); + tipc_net_start(addr); return tipc_cfg_reply_none(); } diff --git a/net/tipc/core.c b/net/tipc/core.c index cfd9cc1..80c2064 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -77,37 +77,13 @@ struct sk_buff *tipc_buf_acquire(u32 size) } /** - * tipc_core_stop_net - shut down TIPC networking sub-systems - */ -static void tipc_core_stop_net(void) -{ - tipc_net_stop(); - tipc_bearer_cleanup(); -} - -/** - * start_net - start TIPC networking sub-systems - */ -int tipc_core_start_net(unsigned long addr) -{ - int res; - - tipc_net_start(addr); - res = tipc_bearer_setup(); - if (res < 0) - goto err; - return res; - -err: - tipc_core_stop_net(); - return res; -} - -/** * tipc_core_stop - switch TIPC from SINGLE NODE to NOT RUNNING mode */ static void tipc_core_stop(void) { + tipc_handler_stop(); + tipc_net_stop(); + tipc_bearer_cleanup(); tipc_netlink_stop(); tipc_cfg_stop(); tipc_subscr_stop(); @@ -158,7 +134,13 @@ static int tipc_core_start(void) if (err) goto out_cfg; + err = tipc_bearer_setup(); + if (err) + goto out_bearer; + return 0; +out_bearer: + tipc_cfg_stop(); out_cfg: tipc_subscr_stop(); out_subscr: @@ -203,8 +185,6 @@ static int __init tipc_init(void) static void __exit tipc_exit(void) { - tipc_handler_stop(); - tipc_core_stop_net(); tipc_core_stop(); pr_info("Deactivated\n"); } diff --git a/net/tipc/core.h b/net/tipc/core.h index 5569d96..4dfe137 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -90,7 +90,6 @@ extern int tipc_random __read_mostly; /* * Routines available to privileged subsystems */ -int tipc_core_start_net(unsigned long); int tipc_handler_start(void); void tipc_handler_stop(void); int tipc_netlink_start(void); -- cgit v0.10.2 From f5ddcbbb40aa0ba7fbfe22355d287603dbeeaaac Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Feb 2014 10:09:18 -0800 Subject: net-tcp: fastopen: fix high order allocations This patch fixes two bugs in fastopen : 1) The tcp_sendmsg(..., @size) argument was ignored. Code was relying on user not fooling the kernel with iovec mismatches 2) When MTU is about 64KB, tcp_send_syn_data() attempts order-5 allocations, which are likely to fail when memory gets fragmented. Fixes: 783237e8daf13 ("net-tcp: Fast Open client - sending SYN-data") Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Acked-by: Yuchung Cheng Tested-by: Yuchung Cheng Signed-off-by: David S. Miller diff --git a/include/net/tcp.h b/include/net/tcp.h index 56fc366..8c4dd63 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1303,7 +1303,8 @@ struct tcp_fastopen_request { /* Fast Open cookie. Size 0 means a cookie request */ struct tcp_fastopen_cookie cookie; struct msghdr *data; /* data in MSG_FASTOPEN */ - u16 copied; /* queued in tcp_connect() */ + size_t size; + int copied; /* queued in tcp_connect() */ }; void tcp_free_fastopen_req(struct tcp_sock *tp); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 9f3a2db..97c8f56 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1044,7 +1044,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp) } } -static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size) +static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, + int *copied, size_t size) { struct tcp_sock *tp = tcp_sk(sk); int err, flags; @@ -1059,11 +1060,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size) if (unlikely(tp->fastopen_req == NULL)) return -ENOBUFS; tp->fastopen_req->data = msg; + tp->fastopen_req->size = size; flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; err = __inet_stream_connect(sk->sk_socket, msg->msg_name, msg->msg_namelen, flags); - *size = tp->fastopen_req->copied; + *copied = tp->fastopen_req->copied; tcp_free_fastopen_req(tp); return err; } @@ -1083,7 +1085,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flags = msg->msg_flags; if (flags & MSG_FASTOPEN) { - err = tcp_sendmsg_fastopen(sk, msg, &copied_syn); + err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size); if (err == -EINPROGRESS && copied_syn > 0) goto out; else if (err) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3be1672..0980581 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2908,7 +2908,12 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) - MAX_TCP_OPTION_SPACE; - syn_data = skb_copy_expand(syn, skb_headroom(syn), space, + space = min_t(size_t, space, fo->size); + + /* limit to order-0 allocations */ + space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); + + syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space, sk->sk_allocation); if (syn_data == NULL) goto fallback; -- cgit v0.10.2 From b194c1f1dbd5f2671e49e0ac801b1b78dc7de93b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 21 Feb 2014 14:52:57 +0100 Subject: neigh: fix setting of default gc_* values This patch fixes bug introduced by: commit 1d4c8c29841b9991cdf3c7cc4ba7f96a94f104ca "neigh: restore old behaviour of default parms values" The thing is that in neigh_sysctl_register, extra1 and extra2 which were previously set for NEIGH_VAR_GC_* are overwritten. That leads to nonsense int limits for gc_* variables. So fix this by not touching extra* fields for gc_* variables. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/net/core/neighbour.c b/net/core/neighbour.c index b9e9e0d..e1aa0f3 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3046,7 +3046,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, if (!t) goto err; - for (i = 0; i < ARRAY_SIZE(t->neigh_vars); i++) { + for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) { t->neigh_vars[i].data += (long) p; t->neigh_vars[i].extra1 = dev; t->neigh_vars[i].extra2 = p; -- cgit v0.10.2 From 4c47af4d5eb2c2f78f886079a3920a7078a6f0a0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 20 Feb 2014 20:51:06 +0100 Subject: net: sctp: rework multihoming retransmission path selection to rfc4960 Problem statement: 1) both paths (primary path1 and alternate path2) are up after the association has been established i.e., HB packets are normally exchanged, 2) path2 gets inactive after path_max_retrans * max_rto timed out (i.e. path2 is down completely), 3) now, if a transmission times out on the only surviving/active path1 (any ~1sec network service impact could cause this like a channel bonding failover), then the retransmitted packets are sent over the inactive path2; this happens with partial failover and without it. Besides not being optimal in the above scenario, a small failure or timeout in the only existing path has the potential to cause long delays in the retransmission (depending on RTO_MAX) until the still active path is reselected. Further, when the T3-timeout occurs, we have active_patch == retrans_path, and even though the timeout occurred on the initial transmission of data, not a retransmit, we end up updating retransmit path. RFC4960, section 6.4. "Multi-Homed SCTP Endpoints" states under 6.4.1. "Failover from an Inactive Destination Address" the following: Some of the transport addresses of a multi-homed SCTP endpoint may become inactive due to either the occurrence of certain error conditions (see Section 8.2) or adjustments from the SCTP user. When there is outbound data to send and the primary path becomes inactive (e.g., due to failures), or where the SCTP user explicitly requests to send data to an inactive destination transport address, before reporting an error to its ULP, the SCTP endpoint should try to send the data to an alternate __active__ destination transport address if one exists. When retransmitting data that timed out, if the endpoint is multihomed, it should consider each source-destination address pair in its retransmission selection policy. When retransmitting timed-out data, the endpoint should attempt to pick the most divergent source-destination pair from the original source-destination pair to which the packet was transmitted. Note: Rules for picking the most divergent source-destination pair are an implementation decision and are not specified within this document. So, we should first reconsider to take the current active retransmission transport if we cannot find an alternative active one. If all of that fails, we can still round robin through unkown, partial failover, and inactive ones in the hope to find something still suitable. Commit 4141ddc02a92 ("sctp: retran_path update bug fix") broke that behaviour by selecting the next inactive transport when no other active transport was found besides the current assoc's peer.retran_path. Before commit 4141ddc02a92, we would have traversed through the list until we reach our peer.retran_path again, and in case that is still in state SCTP_ACTIVE, we would take it and return. Only if that is not the case either, we take the next inactive transport. Besides all that, another issue is that transports in state SCTP_UNKNOWN could be preferred over transports in state SCTP_ACTIVE in case a SCTP_ACTIVE transport appears after SCTP_UNKNOWN in the transport list yielding a weaker transport state to be used in retransmission. This patch mostly reverts 4141ddc02a92, but also rewrites this function to introduce more clarity and strictness into the code. A strict priority of transport states is enforced in this patch, hence selection is active > unkown > partial failover > inactive. Fixes: 4141ddc02a92 ("sctp: retran_path update bug fix") Signed-off-by: Daniel Borkmann Cc: Gui Jianfeng Acked-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/net/sctp/associola.c b/net/sctp/associola.c index f558433..ee13d28 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1239,78 +1239,107 @@ void sctp_assoc_update(struct sctp_association *asoc, } /* Update the retran path for sending a retransmitted packet. - * Round-robin through the active transports, else round-robin - * through the inactive transports as this is the next best thing - * we can try. + * See also RFC4960, 6.4. Multi-Homed SCTP Endpoints: + * + * When there is outbound data to send and the primary path + * becomes inactive (e.g., due to failures), or where the + * SCTP user explicitly requests to send data to an + * inactive destination transport address, before reporting + * an error to its ULP, the SCTP endpoint should try to send + * the data to an alternate active destination transport + * address if one exists. + * + * When retransmitting data that timed out, if the endpoint + * is multihomed, it should consider each source-destination + * address pair in its retransmission selection policy. + * When retransmitting timed-out data, the endpoint should + * attempt to pick the most divergent source-destination + * pair from the original source-destination pair to which + * the packet was transmitted. + * + * Note: Rules for picking the most divergent source-destination + * pair are an implementation decision and are not specified + * within this document. + * + * Our basic strategy is to round-robin transports in priorities + * according to sctp_state_prio_map[] e.g., if no such + * transport with state SCTP_ACTIVE exists, round-robin through + * SCTP_UNKNOWN, etc. You get the picture. */ -void sctp_assoc_update_retran_path(struct sctp_association *asoc) +static const u8 sctp_trans_state_to_prio_map[] = { + [SCTP_ACTIVE] = 3, /* best case */ + [SCTP_UNKNOWN] = 2, + [SCTP_PF] = 1, + [SCTP_INACTIVE] = 0, /* worst case */ +}; + +static u8 sctp_trans_score(const struct sctp_transport *trans) { - struct sctp_transport *t, *next; - struct list_head *head = &asoc->peer.transport_addr_list; - struct list_head *pos; + return sctp_trans_state_to_prio_map[trans->state]; +} - if (asoc->peer.transport_count == 1) - return; +static struct sctp_transport *sctp_trans_elect_best(struct sctp_transport *curr, + struct sctp_transport *best) +{ + if (best == NULL) + return curr; - /* Find the next transport in a round-robin fashion. */ - t = asoc->peer.retran_path; - pos = &t->transports; - next = NULL; + return sctp_trans_score(curr) > sctp_trans_score(best) ? curr : best; +} - while (1) { - /* Skip the head. */ - if (pos->next == head) - pos = head->next; - else - pos = pos->next; +void sctp_assoc_update_retran_path(struct sctp_association *asoc) +{ + struct sctp_transport *trans = asoc->peer.retran_path; + struct sctp_transport *trans_next = NULL; - t = list_entry(pos, struct sctp_transport, transports); + /* We're done as we only have the one and only path. */ + if (asoc->peer.transport_count == 1) + return; + /* If active_path and retran_path are the same and active, + * then this is the only active path. Use it. + */ + if (asoc->peer.active_path == asoc->peer.retran_path && + asoc->peer.active_path->state == SCTP_ACTIVE) + return; - /* We have exhausted the list, but didn't find any - * other active transports. If so, use the next - * transport. - */ - if (t == asoc->peer.retran_path) { - t = next; + /* Iterate from retran_path's successor back to retran_path. */ + for (trans = list_next_entry(trans, transports); 1; + trans = list_next_entry(trans, transports)) { + /* Manually skip the head element. */ + if (&trans->transports == &asoc->peer.transport_addr_list) + continue; + if (trans->state == SCTP_UNCONFIRMED) + continue; + trans_next = sctp_trans_elect_best(trans, trans_next); + /* Active is good enough for immediate return. */ + if (trans_next->state == SCTP_ACTIVE) break; - } - - /* Try to find an active transport. */ - - if ((t->state == SCTP_ACTIVE) || - (t->state == SCTP_UNKNOWN)) { + /* We've reached the end, time to update path. */ + if (trans == asoc->peer.retran_path) break; - } else { - /* Keep track of the next transport in case - * we don't find any active transport. - */ - if (t->state != SCTP_UNCONFIRMED && !next) - next = t; - } } - if (t) - asoc->peer.retran_path = t; - else - t = asoc->peer.retran_path; + if (trans_next != NULL) + asoc->peer.retran_path = trans_next; - pr_debug("%s: association:%p addr:%pISpc\n", __func__, asoc, - &t->ipaddr.sa); + pr_debug("%s: association:%p updated new path to addr:%pISpc\n", + __func__, asoc, &asoc->peer.retran_path->ipaddr.sa); } -/* Choose the transport for sending retransmit packet. */ -struct sctp_transport *sctp_assoc_choose_alter_transport( - struct sctp_association *asoc, struct sctp_transport *last_sent_to) +struct sctp_transport * +sctp_assoc_choose_alter_transport(struct sctp_association *asoc, + struct sctp_transport *last_sent_to) { /* If this is the first time packet is sent, use the active path, * else use the retran path. If the last packet was sent over the * retran path, update the retran path and use it. */ - if (!last_sent_to) + if (last_sent_to == NULL) { return asoc->peer.active_path; - else { + } else { if (last_sent_to == asoc->peer.retran_path) sctp_assoc_update_retran_path(asoc); + return asoc->peer.retran_path; } } -- cgit v0.10.2 From 916e4cf46d0204806c062c8c6c4d1f633852c5b6 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 21 Feb 2014 02:55:35 +0100 Subject: ipv6: reuse ip6_frag_id from ip6_ufo_append_data Currently we generate a new fragmentation id on UFO segmentation. It is pretty hairy to identify the correct net namespace and dst there. Especially tunnels use IFF_XMIT_DST_RELEASE and thus have no skb_dst available at all. This causes unreliable or very predictable ipv6 fragmentation id generation while segmentation. Luckily we already have pregenerated the ip6_frag_id in ip6_ufo_append_data and can use it here. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index e7359f9..b261ee8 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -113,7 +113,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); fptr->nexthdr = nexthdr; fptr->reserved = 0; - ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb)); + fptr->identification = skb_shinfo(skb)->ip6_frag_id; /* Fragment the skb. ipv6 header and the remaining fields of the * fragment header are updated in ipv6_gso_segment() -- cgit v0.10.2 From 9b8cf779f93bc48c0f12ef71e5bc90fd92322656 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Mon, 10 Feb 2014 17:00:01 -0300 Subject: irqchip: armada-370-xp: Add helper for the MSI IRQ handling Introduce a helper function to handle the MSI interrupts. This makes the code more readable. In addition, this will allow to introduce a chained IRQ handler mechanism, which is needed in situations where the MPIC is used as a slave to another interrupt controller. Reviewed-by: Gregory CLEMENT Signed-off-by: Ezequiel Garcia Signed-off-by: Jason Cooper diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index 5409564..2ba5761 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -352,6 +352,34 @@ static struct irq_domain_ops armada_370_xp_mpic_irq_ops = { .xlate = irq_domain_xlate_onecell, }; +#ifdef CONFIG_PCI_MSI +static void armada_370_xp_handle_msi_irq(struct pt_regs *regs) +{ + u32 msimask, msinr; + + msimask = readl_relaxed(per_cpu_int_base + + ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS) + & PCI_MSI_DOORBELL_MASK; + + writel(~msimask, per_cpu_int_base + + ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS); + + for (msinr = PCI_MSI_DOORBELL_START; + msinr < PCI_MSI_DOORBELL_END; msinr++) { + int irq; + + if (!(msimask & BIT(msinr))) + continue; + + irq = irq_find_mapping(armada_370_xp_msi_domain, + msinr - 16); + handle_IRQ(irq, regs); + } +} +#else +static void armada_370_xp_handle_msi_irq(struct pt_regs *r) {} +#endif + static asmlinkage void __exception_irq_entry armada_370_xp_handle_irq(struct pt_regs *regs) { @@ -372,31 +400,9 @@ armada_370_xp_handle_irq(struct pt_regs *regs) continue; } -#ifdef CONFIG_PCI_MSI /* MSI handling */ - if (irqnr == 1) { - u32 msimask, msinr; - - msimask = readl_relaxed(per_cpu_int_base + - ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS) - & PCI_MSI_DOORBELL_MASK; - - writel(~msimask, per_cpu_int_base + - ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS); - - for (msinr = PCI_MSI_DOORBELL_START; - msinr < PCI_MSI_DOORBELL_END; msinr++) { - int irq; - - if (!(msimask & BIT(msinr))) - continue; - - irq = irq_find_mapping(armada_370_xp_msi_domain, - msinr - 16); - handle_IRQ(irq, regs); - } - } -#endif + if (irqnr == 1) + armada_370_xp_handle_msi_irq(regs); #ifdef CONFIG_SMP /* IPI Handling */ -- cgit v0.10.2 From bc69b8adfe221def02ea10f7b9ab32e80195334c Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Mon, 10 Feb 2014 17:00:02 -0300 Subject: irqchip: armada-370-xp: Setup a chained handler for the MPIC The new Armada 375 and Armada 38x Marvell SoCs are based on Cortex-A9 CPU cores and use the ARM GIC as their main interrupt controller. However, for various purposes (wake-up from suspend, MSI interrupts), they have kept a separate MPIC interrupt controller, acting as a slave to the GIC. This MPIC was already used as the primary controller on previous Marvell SoCs, so this commit extends the existing driver to allow the MPIC to be used as a GIC slave. Reviewed-by: Gregory CLEMENT Signed-off-by: Ezequiel Garcia Signed-off-by: Jason Cooper diff --git a/Documentation/devicetree/bindings/arm/armada-370-xp-mpic.txt b/Documentation/devicetree/bindings/arm/armada-370-xp-mpic.txt index d74091a..5fc0313 100644 --- a/Documentation/devicetree/bindings/arm/armada-370-xp-mpic.txt +++ b/Documentation/devicetree/bindings/arm/armada-370-xp-mpic.txt @@ -1,4 +1,4 @@ -Marvell Armada 370 and Armada XP Interrupt Controller +Marvell Armada 370, 375, 38x, XP Interrupt Controller ----------------------------------------------------- Required properties: @@ -16,7 +16,13 @@ Required properties: automatically map to the interrupt controller registers of the current CPU) +Optional properties: +- interrupts: If defined, then it indicates that this MPIC is + connected as a slave to another interrupt controller. This is + typically the case on Armada 375 and Armada 38x, where the MPIC is + connected as a slave to the Cortex-A9 GIC. The provided interrupt + indicate to which GIC interrupt the MPIC output is connected. Example: diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index 2ba5761..cd79503 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -42,6 +43,7 @@ #define ARMADA_370_XP_INT_SOURCE_CTL(irq) (0x100 + irq*4) #define ARMADA_370_XP_CPU_INTACK_OFFS (0x44) +#define ARMADA_375_PPI_CAUSE (0x10) #define ARMADA_370_XP_SW_TRIG_INT_OFFS (0x4) #define ARMADA_370_XP_IN_DRBEL_MSK_OFFS (0xc) @@ -353,7 +355,7 @@ static struct irq_domain_ops armada_370_xp_mpic_irq_ops = { }; #ifdef CONFIG_PCI_MSI -static void armada_370_xp_handle_msi_irq(struct pt_regs *regs) +static void armada_370_xp_handle_msi_irq(struct pt_regs *regs, bool is_chained) { u32 msimask, msinr; @@ -373,13 +375,41 @@ static void armada_370_xp_handle_msi_irq(struct pt_regs *regs) irq = irq_find_mapping(armada_370_xp_msi_domain, msinr - 16); - handle_IRQ(irq, regs); + + if (is_chained) + generic_handle_irq(irq); + else + handle_IRQ(irq, regs); } } #else -static void armada_370_xp_handle_msi_irq(struct pt_regs *r) {} +static void armada_370_xp_handle_msi_irq(struct pt_regs *r, bool b) {} #endif +static void armada_370_xp_mpic_handle_cascade_irq(unsigned int irq, + struct irq_desc *desc) +{ + struct irq_chip *chip = irq_get_chip(irq); + unsigned long irqmap, irqn; + unsigned int cascade_irq; + + chained_irq_enter(chip, desc); + + irqmap = readl_relaxed(per_cpu_int_base + ARMADA_375_PPI_CAUSE); + + if (irqmap & BIT(0)) { + armada_370_xp_handle_msi_irq(NULL, true); + irqmap &= ~BIT(0); + } + + for_each_set_bit(irqn, &irqmap, BITS_PER_LONG) { + cascade_irq = irq_find_mapping(armada_370_xp_mpic_domain, irqn); + generic_handle_irq(cascade_irq); + } + + chained_irq_exit(chip, desc); +} + static asmlinkage void __exception_irq_entry armada_370_xp_handle_irq(struct pt_regs *regs) { @@ -402,7 +432,7 @@ armada_370_xp_handle_irq(struct pt_regs *regs) /* MSI handling */ if (irqnr == 1) - armada_370_xp_handle_msi_irq(regs); + armada_370_xp_handle_msi_irq(regs, false); #ifdef CONFIG_SMP /* IPI Handling */ @@ -433,6 +463,7 @@ static int __init armada_370_xp_mpic_of_init(struct device_node *node, struct device_node *parent) { struct resource main_int_res, per_cpu_int_res; + int parent_irq; u32 control; BUG_ON(of_address_to_resource(node, 0, &main_int_res)); @@ -461,8 +492,6 @@ static int __init armada_370_xp_mpic_of_init(struct device_node *node, BUG_ON(!armada_370_xp_mpic_domain); - irq_set_default_host(armada_370_xp_mpic_domain); - #ifdef CONFIG_SMP armada_xp_mpic_smp_cpu_init(); @@ -478,7 +507,14 @@ static int __init armada_370_xp_mpic_of_init(struct device_node *node, armada_370_xp_msi_init(node, main_int_res.start); - set_handle_irq(armada_370_xp_handle_irq); + parent_irq = irq_of_parse_and_map(node, 0); + if (parent_irq <= 0) { + irq_set_default_host(armada_370_xp_mpic_domain); + set_handle_irq(armada_370_xp_handle_irq); + } else { + irq_set_chained_handler(parent_irq, + armada_370_xp_mpic_handle_cascade_irq); + } return 0; } -- cgit v0.10.2 From 77177856e3bf39d435b3ae4bfd164ca3c8cd4577 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Feb 2014 20:58:37 +0100 Subject: sched: Init idle->on_rq in init_idle() We stumbled in RT over a SMP bringup issue on ARM where the idle->on_rq == 0 was causing try_to_wakeup() on the other cpu to run into nada land. After adding that idle->on_rq = 1; I was able to find the root cause of the lockup: the idle task on the newly woken up cpu was fiddling with a sleeping spinlock, which is a nono. I kept the init of idle->on_rq to keep the state consistent and to avoid another long lasting debug session. As a side note, the whole debug mess could have been avoided if might_sleep() would have yelled when called from the idle task. That's fixed with patch 2/6 - and that one actually has a changelog :) Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1391803122-4425-2-git-send-email-bigeasy@linutronix.de Signed-off-by: Ingo Molnar diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 49db434..06da865 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4443,6 +4443,7 @@ void init_idle(struct task_struct *idle, int cpu) rcu_read_unlock(); rq->curr = rq->idle = idle; + idle->on_rq = 1; #if defined(CONFIG_SMP) idle->on_cpu = 1; #endif -- cgit v0.10.2 From db273be2a7d42f92b3471e0f717982928214a650 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Feb 2014 20:58:38 +0100 Subject: sched: Check for idle task in might_sleep() Idle is not allowed to call sleeping functions ever! Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1391803122-4425-3-git-send-email-bigeasy@linutronix.de Signed-off-by: Ingo Molnar diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 06da865..a01fe6c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6927,7 +6927,8 @@ void __might_sleep(const char *file, int line, int preempt_offset) static unsigned long prev_jiffy; /* ratelimiting */ rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ - if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || + if ((preempt_count_equals(preempt_offset) && !irqs_disabled() && + !is_idle_task(current)) || system_state != SYSTEM_RUNNING || oops_in_progress) return; if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) -- cgit v0.10.2 From 8f47b1871b8aac98f1a9d93bc3467fb97b65199a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Feb 2014 20:58:39 +0100 Subject: sched: Add better debug output for might_sleep() might_sleep() can tell us where interrupts have been disabled, but we have no idea what disabled preemption. Add some debug infrastructure. Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1391803122-4425-4-git-send-email-bigeasy@linutronix.de Signed-off-by: Ingo Molnar diff --git a/include/linux/sched.h b/include/linux/sched.h index c49a258..825ed83 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1463,6 +1463,9 @@ struct task_struct { struct mutex perf_event_mutex; struct list_head perf_event_list; #endif +#ifdef CONFIG_DEBUG_PREEMPT + unsigned long preempt_disable_ip; +#endif #ifdef CONFIG_NUMA struct mempolicy *mempolicy; /* Protected by alloc_lock */ short il_next; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a01fe6c..c94e851 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2501,8 +2501,13 @@ void __kprobes preempt_count_add(int val) DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK - 10); #endif - if (preempt_count() == val) - trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1)); + if (preempt_count() == val) { + unsigned long ip = get_parent_ip(CALLER_ADDR1); +#ifdef CONFIG_DEBUG_PREEMPT + current->preempt_disable_ip = ip; +#endif + trace_preempt_off(CALLER_ADDR0, ip); + } } EXPORT_SYMBOL(preempt_count_add); @@ -2545,6 +2550,13 @@ static noinline void __schedule_bug(struct task_struct *prev) print_modules(); if (irqs_disabled()) print_irqtrace_events(prev); +#ifdef CONFIG_DEBUG_PREEMPT + if (in_atomic_preempt_off()) { + pr_err("Preemption disabled at:"); + print_ip_sym(current->preempt_disable_ip); + pr_cont("\n"); + } +#endif dump_stack(); add_taint(TAINT_WARN, LOCKDEP_STILL_OK); } @@ -6946,6 +6958,13 @@ void __might_sleep(const char *file, int line, int preempt_offset) debug_show_held_locks(current); if (irqs_disabled()) print_irqtrace_events(current); +#ifdef CONFIG_DEBUG_PREEMPT + if (!preempt_count_equals(preempt_offset)) { + pr_err("Preemption disabled at:"); + print_ip_sym(current->preempt_disable_ip); + pr_cont("\n"); + } +#endif dump_stack(); } EXPORT_SYMBOL(__might_sleep); -- cgit v0.10.2 From d6b1e9119787fd2e31dcf0f0ce90b71197604206 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Feb 2014 20:58:40 +0100 Subject: sched: Adjust p->sched_reset_on_fork when nothing else changes If the policy and priority remain unchanged a possible modification of p->sched_reset_on_fork gets lost in the early exit path. Signed-off-by: Thomas Gleixner [ Rebase ontop of v3.14-rc1. ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1391803122-4425-5-git-send-email-bigeasy@linutronix.de Signed-off-by: Ingo Molnar diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c94e851..771eb87 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3362,7 +3362,8 @@ recheck: } /* - * If not changing anything there's no need to proceed further: + * If not changing anything there's no need to proceed further, + * but store a possible modification of reset_on_fork. */ if (unlikely(policy == p->policy)) { if (fair_policy(policy) && attr->sched_nice != task_nice(p)) @@ -3372,6 +3373,7 @@ recheck: if (dl_policy(policy)) goto change; + p->sched_reset_on_fork = reset_on_fork; task_rq_unlock(rq, p, &flags); return 0; } -- cgit v0.10.2 From 81a44c5441d7f7d2c3dc9105f4d65ad0d5818617 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Feb 2014 20:58:41 +0100 Subject: sched: Queue RT tasks to head when prio drops The following scenario does not work correctly: Runqueue of CPUx contains two runnable and pinned tasks: T1: SCHED_FIFO, prio 80 T2: SCHED_FIFO, prio 80 T1 is on the cpu and executes the following syscalls (classic priority ceiling scenario): sys_sched_setscheduler(pid(T1), SCHED_FIFO, .prio = 90); ... sys_sched_setscheduler(pid(T1), SCHED_FIFO, .prio = 80); ... Now T1 gets preempted by T3 (SCHED_FIFO, prio 95). After T3 goes back to sleep the scheduler picks T2. Surprise! The same happens w/o actual preemption when T1 is forced into the scheduler due to a sporadic NEED_RESCHED event. The scheduler invokes pick_next_task() which returns T2. So T1 gets preempted and scheduled out. This happens because sched_setscheduler() dequeues T1 from the prio 90 list and then enqueues it on the tail of the prio 80 list behind T2. This violates the POSIX spec and surprises user space which relies on the guarantee that SCHED_FIFO tasks are not scheduled out unless they give the CPU up voluntarily or are preempted by a higher priority task. In the latter case the preempted task must get back on the CPU after the preempting task schedules out again. We fixed a similar issue already in commit 60db48c (sched: Queue a deboosted task to the head of the RT prio queue). The same treatment is necessary for sched_setscheduler(). So enqueue to head of the prio bucket list if the priority of the task is lowered. It might be possible that existing user space relies on the current behaviour, but it can be considered highly unlikely due to the corner case nature of the application scenario. Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1391803122-4425-6-git-send-email-bigeasy@linutronix.de Signed-off-by: Ingo Molnar diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 771eb87..9c2fcbf 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3442,8 +3442,13 @@ change: if (running) p->sched_class->set_curr_task(rq); - if (on_rq) - enqueue_task(rq, p, 0); + if (on_rq) { + /* + * We enqueue to tail when the priority of a task is + * increased (user space view). + */ + enqueue_task(rq, p, oldprio <= p->prio ? ENQUEUE_HEAD : 0); + } check_class_changed(rq, p, prev_class, oldprio); task_rq_unlock(rq, p, &flags); -- cgit v0.10.2 From c365c292d05908c6ea6f32708f331e21033fe71d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Feb 2014 20:58:42 +0100 Subject: sched: Consider pi boosting in setscheduler() If a PI boosted task policy/priority is modified by a setscheduler() call we unconditionally dequeue and requeue the task if it is on the runqueue even if the new priority is lower than the current effective boosted priority. This can result in undesired reordering of the priority bucket list. If the new priority is less or equal than the current effective we just store the new parameters in the task struct and leave the scheduler class and the runqueue untouched. This is handled when the task deboosts itself. Only if the new priority is higher than the effective boosted priority we apply the change immediately. Signed-off-by: Thomas Gleixner [ Rebase ontop of v3.14-rc1. ] Signed-off-by: Sebastian Andrzej Siewior Cc: Dario Faggioli Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1391803122-4425-7-git-send-email-bigeasy@linutronix.de Signed-off-by: Ingo Molnar diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index f7453d4..6341f5b 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -18,6 +18,7 @@ static inline int rt_task(struct task_struct *p) #ifdef CONFIG_RT_MUTEXES extern int rt_mutex_getprio(struct task_struct *p); extern void rt_mutex_setprio(struct task_struct *p, int prio); +extern int rt_mutex_check_prio(struct task_struct *task, int newprio); extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task); extern void rt_mutex_adjust_pi(struct task_struct *p); static inline bool tsk_is_pi_blocked(struct task_struct *tsk) @@ -29,6 +30,12 @@ static inline int rt_mutex_getprio(struct task_struct *p) { return p->normal_prio; } + +static inline int rt_mutex_check_prio(struct task_struct *task, int newprio) +{ + return 0; +} + static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) { return NULL; diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 2e960a2..aa4dff0 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -213,6 +213,18 @@ struct task_struct *rt_mutex_get_top_task(struct task_struct *task) } /* + * Called by sched_setscheduler() to check whether the priority change + * is overruled by a possible priority boosting. + */ +int rt_mutex_check_prio(struct task_struct *task, int newprio) +{ + if (!task_has_pi_waiters(task)) + return 0; + + return task_top_pi_waiter(task)->task->prio <= newprio; +} + +/* * Adjust the priority of a task, after its pi_waiters got modified. * * This can be both boosting and unboosting. task->pi_lock must be held. diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9c2fcbf..003263b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2902,7 +2902,8 @@ EXPORT_SYMBOL(sleep_on_timeout); * This function changes the 'effective' priority of a task. It does * not touch ->normal_prio like __setscheduler(). * - * Used by the rt_mutex code to implement priority inheritance logic. + * Used by the rt_mutex code to implement priority inheritance + * logic. Call site only calls if the priority of the task changed. */ void rt_mutex_setprio(struct task_struct *p, int prio) { @@ -3171,9 +3172,8 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr) dl_se->dl_new = 1; } -/* Actually do priority change: must hold pi & rq lock. */ -static void __setscheduler(struct rq *rq, struct task_struct *p, - const struct sched_attr *attr) +static void __setscheduler_params(struct task_struct *p, + const struct sched_attr *attr) { int policy = attr->sched_policy; @@ -3193,9 +3193,14 @@ static void __setscheduler(struct rq *rq, struct task_struct *p, * getparam()/getattr() don't report silly values for !rt tasks. */ p->rt_priority = attr->sched_priority; + set_load_weight(p); +} - p->normal_prio = normal_prio(p); - p->prio = rt_mutex_getprio(p); +/* Actually do priority change: must hold pi & rq lock. */ +static void __setscheduler(struct rq *rq, struct task_struct *p, + const struct sched_attr *attr) +{ + __setscheduler_params(p, attr); if (dl_prio(p->prio)) p->sched_class = &dl_sched_class; @@ -3203,8 +3208,6 @@ static void __setscheduler(struct rq *rq, struct task_struct *p, p->sched_class = &rt_sched_class; else p->sched_class = &fair_sched_class; - - set_load_weight(p); } static void @@ -3257,6 +3260,7 @@ static int __sched_setscheduler(struct task_struct *p, const struct sched_attr *attr, bool user) { + int newprio = MAX_RT_PRIO - 1 - attr->sched_priority; int retval, oldprio, oldpolicy = -1, on_rq, running; int policy = attr->sched_policy; unsigned long flags; @@ -3427,6 +3431,24 @@ change: return -EBUSY; } + p->sched_reset_on_fork = reset_on_fork; + oldprio = p->prio; + + /* + * Special case for priority boosted tasks. + * + * If the new priority is lower or equal (user space view) + * than the current (boosted) priority, we just store the new + * normal parameters and do not touch the scheduler class and + * the runqueue. This will be done when the task deboost + * itself. + */ + if (rt_mutex_check_prio(p, newprio)) { + __setscheduler_params(p, attr); + task_rq_unlock(rq, p, &flags); + return 0; + } + on_rq = p->on_rq; running = task_current(rq, p); if (on_rq) @@ -3434,9 +3456,6 @@ change: if (running) p->sched_class->put_prev_task(rq, p); - p->sched_reset_on_fork = reset_on_fork; - - oldprio = p->prio; prev_class = p->sched_class; __setscheduler(rq, p, attr); -- cgit v0.10.2 From d82fd25356b902703152c1800845661835541878 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Sat, 8 Feb 2014 14:17:26 +0800 Subject: sched/rt: Remove 'leaf_rt_rq_list' from 'struct rq' This is a leftover from commit e23ee74777f389369431d77390c4b09332ce026a ("sched/rt: Simplify pull_rt_task() logic and remove .leaf_rt_rq_list"). Signed-off-by: Li Zefan Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/52F5CBF6.4060901@huawei.com Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 003263b..cc4965e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6876,7 +6876,6 @@ void __init sched_init(void) rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime; #ifdef CONFIG_RT_GROUP_SCHED - INIT_LIST_HEAD(&rq->leaf_rt_rq_list); init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL); #endif diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index caf4abd..d608125 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -543,10 +543,6 @@ struct rq { struct list_head leaf_cfs_rq_list; #endif /* CONFIG_FAIR_GROUP_SCHED */ -#ifdef CONFIG_RT_GROUP_SCHED - struct list_head leaf_rt_rq_list; -#endif - /* * This is part of a global counter where only the total sum * over all CPUs matters. A task can increase this counter on -- cgit v0.10.2 From 11c785b79ef2a669e4bf7be5cf2c3904b8fed015 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Sat, 8 Feb 2014 14:17:45 +0800 Subject: sched/rt: Make init_sched_rt_calss() __init It's a bootstrap function. Signed-off-by: Li Zefan Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/52F5CC09.1080502@huawei.com Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 3e488ca..4d4b386 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1849,7 +1849,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p) resched_task(rq->curr); } -void init_sched_rt_class(void) +void __init init_sched_rt_class(void) { unsigned int i; -- cgit v0.10.2 From 7e298d60f717257dc8365c975f45ff9c37165362 Mon Sep 17 00:00:00 2001 From: Dongsheng Yang Date: Tue, 11 Feb 2014 15:34:45 +0800 Subject: sched/prio: Use DEFAULT_PRIO to define NICE_TO_PRIO() and PRIO_TO_NICE() There is already a macro named DEFAULT_PRIO in prio.h, we can use it to define NICE_TO_PRIO and PRIO_TO_NICE rather than use hard coding of (MAX_RT_PRIO + 20). Signed-off-by: Dongsheng Yang Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/4e28ec36fb49e8906027cbbdd900ab26a149905e.1392103744.git.yangds.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h index 410ccb7..1ceaaa1 100644 --- a/include/linux/sched/prio.h +++ b/include/linux/sched/prio.h @@ -25,8 +25,8 @@ * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], * and back. */ -#define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20) -#define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20) +#define NICE_TO_PRIO(nice) ((nice) + DEFAULT_PRIO) +#define PRIO_TO_NICE(prio) ((prio) - DEFAULT_PRIO) /* * 'User priority' is the nice value converted to something we -- cgit v0.10.2 From 3ee237dddcd885d4e525791299d62de33b2ca117 Mon Sep 17 00:00:00 2001 From: Dongsheng Yang Date: Tue, 11 Feb 2014 15:34:46 +0800 Subject: sched/prio: Add 3 macros of MAX_NICE, MIN_NICE and NICE_WIDTH in prio.h Currently there is lots of hard coding to 19 and -20, to represent maximum and minimum of nice values. This patch add three macros in prio.h for maximum, minimum and width of nice value, and uses it to remove hardcoded values in prio.h. Signed-off-by: Dongsheng Yang Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/3994e89327b2b15f992277cdf9f409c516f87d1b.1392103744.git.yangds.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner [ Collapsed two small patches. ] Signed-off-by: Ingo Molnar diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h index 1ceaaa1..ac32258 100644 --- a/include/linux/sched/prio.h +++ b/include/linux/sched/prio.h @@ -1,6 +1,10 @@ #ifndef _SCHED_PRIO_H #define _SCHED_PRIO_H +#define MAX_NICE 19 +#define MIN_NICE -20 +#define NICE_WIDTH (MAX_NICE - MIN_NICE + 1) + /* * Priority of a process goes from 0..MAX_PRIO-1, valid RT * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH @@ -17,8 +21,8 @@ #define MAX_USER_RT_PRIO 100 #define MAX_RT_PRIO MAX_USER_RT_PRIO -#define MAX_PRIO (MAX_RT_PRIO + 40) -#define DEFAULT_PRIO (MAX_RT_PRIO + 20) +#define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH) +#define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2) /* * Convert user-nice values [ -20 ... 0 ... 19 ] -- cgit v0.10.2 From d277d868dab6537a85f4757e39648b1d6afc60d5 Mon Sep 17 00:00:00 2001 From: Dongsheng Yang Date: Tue, 11 Feb 2014 15:34:49 +0800 Subject: rcu: Use MAX_NICE to replace hardcoding of 19 Reviewed-by: Josh Triplett Signed-off-by: Dongsheng Yang Signed-off-by: Peter Zijlstra Cc: "Paul E. McKenney" Link: http://lkml.kernel.org/r/5b3bf232f41b33ab703a1595e94671b303e2d1fc.1392103744.git.yangds.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/kernel/rcu/torture.c b/kernel/rcu/torture.c index 732f8ae..219761d 100644 --- a/kernel/rcu/torture.c +++ b/kernel/rcu/torture.c @@ -805,7 +805,7 @@ rcu_torture_writer(void *arg) static DEFINE_RCU_RANDOM(rand); VERBOSE_PRINTK_STRING("rcu_torture_writer task started"); - set_user_nice(current, 19); + set_user_nice(current, MAX_NICE); do { schedule_timeout_uninterruptible(1); @@ -871,7 +871,7 @@ rcu_torture_fakewriter(void *arg) DEFINE_RCU_RANDOM(rand); VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started"); - set_user_nice(current, 19); + set_user_nice(current, MAX_NICE); do { schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); @@ -987,7 +987,7 @@ rcu_torture_reader(void *arg) unsigned long long ts; VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); - set_user_nice(current, 19); + set_user_nice(current, MAX_NICE); if (irqreader && cur_ops->irq_capable) setup_timer_on_stack(&t, rcu_torture_timer, 0); @@ -1584,7 +1584,7 @@ static int rcu_torture_barrier_cbs(void *arg) init_rcu_head_on_stack(&rcu); VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task started"); - set_user_nice(current, 19); + set_user_nice(current, MAX_NICE); do { wait_event(barrier_cbs_wq[myid], (newphase = -- cgit v0.10.2 From 75e45d512f257beedae0d8a67d053cde5537bd4c Mon Sep 17 00:00:00 2001 From: Dongsheng Yang Date: Tue, 11 Feb 2014 15:34:50 +0800 Subject: sched: Replace hardcoding of -20 and 19 with MIN_NICE and MAX_NICE Signed-off-by: Dongsheng Yang Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/bd80780f19b4f9b4a765acc353c8dbc130274dd6.1392103744.git.yangds.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c index 4a07353..e73efba 100644 --- a/kernel/sched/auto_group.c +++ b/kernel/sched/auto_group.c @@ -203,7 +203,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) struct autogroup *ag; int err; - if (nice < -20 || nice > 19) + if (nice < MIN_NICE || nice > MAX_NICE) return -EINVAL; err = security_task_setnice(current, nice); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index cc4965e..a8a73b8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2993,7 +2993,7 @@ void set_user_nice(struct task_struct *p, long nice) unsigned long flags; struct rq *rq; - if (task_nice(p) == nice || nice < -20 || nice > 19) + if (task_nice(p) == nice || nice < MIN_NICE || nice > MAX_NICE) return; /* * We have to be careful, if called from sys_setpriority(), @@ -3072,10 +3072,10 @@ SYSCALL_DEFINE1(nice, int, increment) increment = 40; nice = task_nice(current) + increment; - if (nice < -20) - nice = -20; - if (nice > 19) - nice = 19; + if (nice < MIN_NICE) + nice = MIN_NICE; + if (nice > MAX_NICE) + nice = MAX_NICE; if (increment < 0 && !can_nice(current, nice)) return -EPERM; @@ -3623,7 +3623,7 @@ static int sched_copy_attr(struct sched_attr __user *uattr, * XXX: do we want to be lenient like existing syscalls; or do we want * to be strict and return an error on out-of-bounds values? */ - attr->sched_nice = clamp(attr->sched_nice, -20, 19); + attr->sched_nice = clamp(attr->sched_nice, MIN_NICE, MAX_NICE); out: return ret; -- cgit v0.10.2 From c4a4d2f43177f6165132c6d36a4d46963018f726 Mon Sep 17 00:00:00 2001 From: Dongsheng Yang Date: Tue, 11 Feb 2014 15:34:51 +0800 Subject: sys: Replace hardcoding of -20 and 19 with MIN_NICE and MAX_NICE Signed-off-by: Dongsheng Yang Signed-off-by: Peter Zijlstra Cc: Andrew Morton Cc: Oleg Nesterov Cc: Robin Holt Cc: Al Viro Cc: Kees Cook Cc: "Eric W. Biederman" Cc: Stephen Rothwell Link: http://lkml.kernel.org/r/0261f094b836f1acbcdf52e7166487c0c77323c8.1392103744.git.yangds.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/kernel/sys.c b/kernel/sys.c index c0a58be..adaeab6 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -174,10 +174,10 @@ SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) /* normalize: avoid signed division (rounding problems) */ error = -ESRCH; - if (niceval < -20) - niceval = -20; - if (niceval > 19) - niceval = 19; + if (niceval < MIN_NICE) + niceval = MIN_NICE; + if (niceval > MAX_NICE) + niceval = MAX_NICE; rcu_read_lock(); read_lock(&tasklist_lock); -- cgit v0.10.2 From 144818422bfa272531250473b888394c21e6fe19 Mon Sep 17 00:00:00 2001 From: Dongsheng Yang Date: Tue, 11 Feb 2014 15:34:52 +0800 Subject: workqueue: Replace hardcoding of -20 and 19 with MIN_NICE and MAX_NICE Signed-off-by: Dongsheng Yang Acked-by: Tejun Heo Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/6d85138180c00ce86975addab6e34b24b84f00a5.1392103744.git.yangds.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 193e977..3fa5b8f 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3225,7 +3225,7 @@ static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr, return -ENOMEM; if (sscanf(buf, "%d", &attrs->nice) == 1 && - attrs->nice >= -20 && attrs->nice <= 19) + attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE) ret = apply_workqueue_attrs(wq, attrs); else ret = -EINVAL; -- cgit v0.10.2 From de91b9cb97fe68cb6ef0cfe9bee09d015c152af8 Mon Sep 17 00:00:00 2001 From: Morten Rasmussen Date: Tue, 18 Feb 2014 14:14:24 +0000 Subject: sched: Fix select_task_rq_fair() description comments Brings select_task_rq_fair() description comments up-to-date. Signed-off-by: Morten Rasmussen Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1392732864-10927-1-git-send-email-morten.rasmussen@arm.com Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e884e45..7982faf 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4397,13 +4397,14 @@ done: } /* - * sched_balance_self: balance the current task (running on cpu) in domains - * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and - * SD_BALANCE_EXEC. + * select_task_rq_fair: Select target runqueue for the waking task in domains + * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE, + * SD_BALANCE_FORK, or SD_BALANCE_EXEC. * - * Balance, ie. select the least loaded group. + * Balances load by selecting the idlest cpu in the idlest group, or under + * certain conditions an idle sibling cpu if the domain has SD_WAKE_AFFINE set. * - * Returns the target CPU number, or the same CPU if no balancing is needed. + * Returns the target cpu number. * * preempt must be disabled. */ -- cgit v0.10.2 From d987fc7f3228bf94cb6b21313ebab1d64ee637ad Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Mon, 5 Dec 2011 10:01:47 +0100 Subject: sched, nohz: Exclude isolated cores from load balancing The user explicitly disabled load balancing, else this core would not be disconnected. Don't add these to nohz.idle_cpus_mask. Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra Cc: Lei Wen Link: http://lkml.kernel.org/n/tip-vmme4f49psirp966pklm5l9j@git.kernel.org Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7982faf..a3a41c61 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6788,6 +6788,11 @@ out_unlock: return 0; } +static inline int on_null_domain(struct rq *rq) +{ + return unlikely(!rcu_dereference_sched(rq->sd)); +} + #ifdef CONFIG_NO_HZ_COMMON /* * idle load balancing details @@ -6842,8 +6847,13 @@ static void nohz_balancer_kick(void) static inline void nohz_balance_exit_idle(int cpu) { if (unlikely(test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))) { - cpumask_clear_cpu(cpu, nohz.idle_cpus_mask); - atomic_dec(&nohz.nr_cpus); + /* + * Completely isolated CPUs don't ever set, so we must test. + */ + if (likely(cpumask_test_cpu(cpu, nohz.idle_cpus_mask))) { + cpumask_clear_cpu(cpu, nohz.idle_cpus_mask); + atomic_dec(&nohz.nr_cpus); + } clear_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); } } @@ -6897,6 +6907,12 @@ void nohz_balance_enter_idle(int cpu) if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))) return; + /* + * If we're a completely isolated CPU, we don't play. + */ + if (on_null_domain(cpu_rq(cpu))) + return; + cpumask_set_cpu(cpu, nohz.idle_cpus_mask); atomic_inc(&nohz.nr_cpus); set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)); @@ -7159,11 +7175,6 @@ static void run_rebalance_domains(struct softirq_action *h) nohz_idle_balance(this_rq, idle); } -static inline int on_null_domain(struct rq *rq) -{ - return !rcu_dereference_sched(rq->sd); -} - /* * Trigger the SCHED_SOFTIRQ if it is time to do periodic load balancing. */ -- cgit v0.10.2 From 591ac0cb01ec8d0ff131a318f9b9abf6ecab6225 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 17 Feb 2014 10:59:29 -0500 Subject: cpuidle/powernv: Remove redundant cpuidle_idle_call() The core idle loop now takes care of it. We need to add the runlatch function calls to the idle routines which was earlier taken care of by the arch specific idle routine. Signed-off-by: Nicolas Pitre Signed-off-by: Preeti U Murthy Reviewed-by: Deepthi Dharwar Signed-off-by: Peter Zijlstra Cc: Paul Burton Cc: "Rafael J. Wysocki" Cc: Daniel Lezcano Cc: linux-pm@vger.kernel.org Cc: linaro-kernel@lists.linaro.org Link: http://lkml.kernel.org/n/tip-nr4mtbkkzf2oomaj85m24o7c@git.kernel.org Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 110f4fb..81a7a0a 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include @@ -225,16 +224,6 @@ static int __init pnv_probe(void) return 1; } -void powernv_idle(void) -{ - /* Hook to cpuidle framework if available, else - * call on default platform idle code - */ - if (cpuidle_idle_call()) { - power7_idle(); - } -} - define_machine(powernv) { .name = "PowerNV", .probe = pnv_probe, @@ -244,7 +233,7 @@ define_machine(powernv) { .show_cpuinfo = pnv_show_cpuinfo, .progress = pnv_progress, .machine_shutdown = pnv_shutdown, - .power_save = powernv_idle, + .power_save = power7_idle, .calibrate_decr = generic_calibrate_decr, .dma_set_mask = pnv_dma_set_mask, #ifdef CONFIG_KEXEC diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 78fd174..f48607c 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -14,6 +14,7 @@ #include #include +#include struct cpuidle_driver powernv_idle_driver = { .name = "powernv_idle", @@ -30,12 +31,14 @@ static int snooze_loop(struct cpuidle_device *dev, local_irq_enable(); set_thread_flag(TIF_POLLING_NRFLAG); + ppc64_runlatch_off(); while (!need_resched()) { HMT_low(); HMT_very_low(); } HMT_medium(); + ppc64_runlatch_on(); clear_thread_flag(TIF_POLLING_NRFLAG); smp_mb(); return index; @@ -45,7 +48,9 @@ static int nap_loop(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { + ppc64_runlatch_off(); power7_idle(); + ppc64_runlatch_on(); return index; } -- cgit v0.10.2 From 6990566b535908905b4eccda7cc9e09c2db52187 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 17 Feb 2014 10:59:30 -0500 Subject: cpuidle/arm64: Remove redundant cpuidle_idle_call() The core idle loop now takes care of it. Signed-off-by: Nicolas Pitre Acked-by: Catalin Marinas Signed-off-by: Peter Zijlstra Cc: Paul Burton Cc: Deepthi Dharwar Cc: Preeti U Murthy Cc: "Rafael J. Wysocki" Cc: Daniel Lezcano Cc: linux-pm@vger.kernel.org Cc: linaro-kernel@lists.linaro.org Link: http://lkml.kernel.org/n/tip-wk9vpc8dsn46s12pl602ljpo@git.kernel.org Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 1c0a9be..9cce009 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -94,10 +93,8 @@ void arch_cpu_idle(void) * This should do all the clock switching and wait for interrupt * tricks */ - if (cpuidle_idle_call()) { - cpu_do_idle(); - local_irq_enable(); - } + cpu_do_idle(); + local_irq_enable(); } #ifdef CONFIG_HOTPLUG_CPU -- cgit v0.10.2 From 039ece38da45f5e6a94be3aa7611cf3634bc2461 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 22 Feb 2014 16:53:30 +0100 Subject: libahci: Allow drivers to override start_engine Allwinner A10 and A20 ARM SoCs have an AHCI sata controller which needs a special register to be poked before starting the DMA engine. This register gets reset on an ahci_stop_engine call, so there is no other place then ahci_start_engine where this poking can be done. This commit allows drivers to override ahci_start_engine behavior for use by the Allwinner AHCI driver (and potentially other drivers in the future). Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index b6a49ce..1d4efe5 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -563,6 +563,7 @@ static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class, unsigned long deadline) { struct ata_port *ap = link->ap; + struct ahci_host_priv *hpriv = ap->host->private_data; bool online; int rc; @@ -573,7 +574,7 @@ static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class, rc = sata_link_hardreset(link, sata_ehc_deb_timing(&link->eh_context), deadline, &online, NULL); - ahci_start_engine(ap); + hpriv->start_engine(ap); DPRINTK("EXIT, rc=%d, class=%u\n", rc, *class); @@ -588,6 +589,7 @@ static int ahci_p5wdh_hardreset(struct ata_link *link, unsigned int *class, { struct ata_port *ap = link->ap; struct ahci_port_priv *pp = ap->private_data; + struct ahci_host_priv *hpriv = ap->host->private_data; u8 *d2h_fis = pp->rx_fis + RX_FIS_D2H_REG; struct ata_taskfile tf; bool online; @@ -603,7 +605,7 @@ static int ahci_p5wdh_hardreset(struct ata_link *link, unsigned int *class, rc = sata_link_hardreset(link, sata_ehc_deb_timing(&link->eh_context), deadline, &online, NULL); - ahci_start_engine(ap); + hpriv->start_engine(ap); /* The pseudo configuration device on SIMG4726 attached to * ASUS P5W-DH Deluxe doesn't send signature FIS after diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 2289efd..64d1a99 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -323,6 +323,12 @@ struct ahci_host_priv { u32 em_msg_type; /* EM message type */ struct clk *clk; /* Only for platforms supporting clk */ void *plat_data; /* Other platform data */ + /* + * Optional ahci_start_engine override, if not set this gets set to the + * default ahci_start_engine during ahci_save_initial_config, this can + * be overridden anytime before the host is activated. + */ + void (*start_engine)(struct ata_port *ap); }; extern int ahci_ignore_sss; diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 956851f..7985ae7 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -393,6 +393,9 @@ static ssize_t ahci_show_em_supported(struct device *dev, * * If inconsistent, config values are fixed up by this function. * + * If it is not set already this function sets hpriv->start_engine to + * ahci_start_engine. + * * LOCKING: * None. */ @@ -499,6 +502,9 @@ void ahci_save_initial_config(struct device *dev, hpriv->cap = cap; hpriv->cap2 = cap2; hpriv->port_map = port_map; + + if (!hpriv->start_engine) + hpriv->start_engine = ahci_start_engine; } EXPORT_SYMBOL_GPL(ahci_save_initial_config); @@ -765,7 +771,7 @@ static void ahci_start_port(struct ata_port *ap) /* enable DMA */ if (!(hpriv->flags & AHCI_HFLAG_DELAY_ENGINE)) - ahci_start_engine(ap); + hpriv->start_engine(ap); /* turn on LEDs */ if (ap->flags & ATA_FLAG_EM) { @@ -1234,7 +1240,7 @@ int ahci_kick_engine(struct ata_port *ap) /* restart engine */ out_restart: - ahci_start_engine(ap); + hpriv->start_engine(ap); return rc; } EXPORT_SYMBOL_GPL(ahci_kick_engine); @@ -1426,6 +1432,7 @@ static int ahci_hardreset(struct ata_link *link, unsigned int *class, const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context); struct ata_port *ap = link->ap; struct ahci_port_priv *pp = ap->private_data; + struct ahci_host_priv *hpriv = ap->host->private_data; u8 *d2h_fis = pp->rx_fis + RX_FIS_D2H_REG; struct ata_taskfile tf; bool online; @@ -1443,7 +1450,7 @@ static int ahci_hardreset(struct ata_link *link, unsigned int *class, rc = sata_link_hardreset(link, timing, deadline, &online, ahci_check_ready); - ahci_start_engine(ap); + hpriv->start_engine(ap); if (online) *class = ahci_dev_classify(ap); @@ -2007,10 +2014,12 @@ static void ahci_thaw(struct ata_port *ap) void ahci_error_handler(struct ata_port *ap) { + struct ahci_host_priv *hpriv = ap->host->private_data; + if (!(ap->pflags & ATA_PFLAG_FROZEN)) { /* restart engine */ ahci_stop_engine(ap); - ahci_start_engine(ap); + hpriv->start_engine(ap); } sata_pmp_error_handler(ap); @@ -2031,6 +2040,7 @@ static void ahci_post_internal_cmd(struct ata_queued_cmd *qc) static void ahci_set_aggressive_devslp(struct ata_port *ap, bool sleep) { + struct ahci_host_priv *hpriv = ap->host->private_data; void __iomem *port_mmio = ahci_port_base(ap); struct ata_device *dev = ap->link.device; u32 devslp, dm, dito, mdat, deto; @@ -2094,7 +2104,7 @@ static void ahci_set_aggressive_devslp(struct ata_port *ap, bool sleep) PORT_DEVSLP_ADSE); writel(devslp, port_mmio + PORT_DEVSLP); - ahci_start_engine(ap); + hpriv->start_engine(ap); /* enable device sleep feature for the drive */ err_mask = ata_dev_set_feature(dev, @@ -2106,6 +2116,7 @@ static void ahci_set_aggressive_devslp(struct ata_port *ap, bool sleep) static void ahci_enable_fbs(struct ata_port *ap) { + struct ahci_host_priv *hpriv = ap->host->private_data; struct ahci_port_priv *pp = ap->private_data; void __iomem *port_mmio = ahci_port_base(ap); u32 fbs; @@ -2134,11 +2145,12 @@ static void ahci_enable_fbs(struct ata_port *ap) } else dev_err(ap->host->dev, "Failed to enable FBS\n"); - ahci_start_engine(ap); + hpriv->start_engine(ap); } static void ahci_disable_fbs(struct ata_port *ap) { + struct ahci_host_priv *hpriv = ap->host->private_data; struct ahci_port_priv *pp = ap->private_data; void __iomem *port_mmio = ahci_port_base(ap); u32 fbs; @@ -2166,7 +2178,7 @@ static void ahci_disable_fbs(struct ata_port *ap) pp->fbs_enabled = false; } - ahci_start_engine(ap); + hpriv->start_engine(ap); } static void ahci_pmp_attach(struct ata_port *ap) diff --git a/drivers/ata/sata_highbank.c b/drivers/ata/sata_highbank.c index 5a68b7b..d4df0bf 100644 --- a/drivers/ata/sata_highbank.c +++ b/drivers/ata/sata_highbank.c @@ -402,6 +402,7 @@ static int ahci_highbank_hardreset(struct ata_link *link, unsigned int *class, static const unsigned long timing[] = { 5, 100, 500}; struct ata_port *ap = link->ap; struct ahci_port_priv *pp = ap->private_data; + struct ahci_host_priv *hpriv = ap->host->private_data; u8 *d2h_fis = pp->rx_fis + RX_FIS_D2H_REG; struct ata_taskfile tf; bool online; @@ -430,7 +431,7 @@ static int ahci_highbank_hardreset(struct ata_link *link, unsigned int *class, break; } while (!online && retry--); - ahci_start_engine(ap); + hpriv->start_engine(ap); if (online) *class = ahci_dev_classify(ap); -- cgit v0.10.2 From 156c5887948cd191417f18026aab9ce26e5a95da Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 22 Feb 2014 16:53:31 +0100 Subject: ahci-platform: Add support for devices with more then 1 clock The allwinner-sun4i AHCI controller needs 2 clocks to be enabled and the imx AHCI controller needs 3 clocks to be enabled. tj: Minor comment formatting updates. Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo diff --git a/Documentation/devicetree/bindings/ata/ahci-platform.txt b/Documentation/devicetree/bindings/ata/ahci-platform.txt index 89de156..3ced07d 100644 --- a/Documentation/devicetree/bindings/ata/ahci-platform.txt +++ b/Documentation/devicetree/bindings/ata/ahci-platform.txt @@ -10,6 +10,7 @@ Required properties: Optional properties: - dma-coherent : Present if dma operations are coherent +- clocks : a list of phandle + clock specifier pairs Example: sata@ffe08000 { diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 64d1a99..c12862b 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -51,6 +51,7 @@ enum { AHCI_MAX_PORTS = 32, + AHCI_MAX_CLKS = 3, AHCI_MAX_SG = 168, /* hardware max is 64K */ AHCI_DMA_BOUNDARY = 0xffffffff, AHCI_MAX_CMDS = 32, @@ -321,7 +322,7 @@ struct ahci_host_priv { u32 em_loc; /* enclosure management location */ u32 em_buf_sz; /* EM buffer size in byte */ u32 em_msg_type; /* EM message type */ - struct clk *clk; /* Only for platforms supporting clk */ + struct clk *clks[AHCI_MAX_CLKS]; /* Optional */ void *plat_data; /* Other platform data */ /* * Optional ahci_start_engine override, if not set this gets set to the diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index 9302d81..8d5a9a7 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -86,6 +86,60 @@ static struct scsi_host_template ahci_platform_sht = { AHCI_SHT("ahci_platform"), }; +/** + * ahci_platform_enable_clks - Enable platform clocks + * @hpriv: host private area to store config values + * + * This function enables all the clks found in hpriv->clks, starting at + * index 0. If any clk fails to enable it disables all the clks already + * enabled in reverse order, and then returns an error. + * + * RETURNS: + * 0 on success otherwise a negative error code + */ +int ahci_platform_enable_clks(struct ahci_host_priv *hpriv) +{ + int c, rc; + + for (c = 0; c < AHCI_MAX_CLKS && hpriv->clks[c]; c++) { + rc = clk_prepare_enable(hpriv->clks[c]); + if (rc) + goto disable_unprepare_clk; + } + return 0; + +disable_unprepare_clk: + while (--c >= 0) + clk_disable_unprepare(hpriv->clks[c]); + return rc; +} +EXPORT_SYMBOL_GPL(ahci_platform_enable_clks); + +/** + * ahci_platform_disable_clks - Disable platform clocks + * @hpriv: host private area to store config values + * + * This function disables all the clks found in hpriv->clks, in reverse + * order of ahci_platform_enable_clks (starting at the end of the array). + */ +void ahci_platform_disable_clks(struct ahci_host_priv *hpriv) +{ + int c; + + for (c = AHCI_MAX_CLKS - 1; c >= 0; c--) + if (hpriv->clks[c]) + clk_disable_unprepare(hpriv->clks[c]); +} +EXPORT_SYMBOL_GPL(ahci_platform_disable_clks); + +static void ahci_put_clks(struct ahci_host_priv *hpriv) +{ + int c; + + for (c = 0; c < AHCI_MAX_CLKS && hpriv->clks[c]; c++) + clk_put(hpriv->clks[c]); +} + static int ahci_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -96,6 +150,7 @@ static int ahci_probe(struct platform_device *pdev) struct ahci_host_priv *hpriv; struct ata_host *host; struct resource *mem; + struct clk *clk; int irq; int n_ports; int i; @@ -130,17 +185,31 @@ static int ahci_probe(struct platform_device *pdev) return -ENOMEM; } - hpriv->clk = clk_get(dev, NULL); - if (IS_ERR(hpriv->clk)) { - dev_err(dev, "can't get clock\n"); - } else { - rc = clk_prepare_enable(hpriv->clk); - if (rc) { - dev_err(dev, "clock prepare enable failed"); - goto free_clk; + for (i = 0; i < AHCI_MAX_CLKS; i++) { + /* + * For now we must use clk_get(dev, NULL) for the first clock, + * because some platforms (da850, spear13xx) are not yet + * converted to use devicetree for clocks. For new platforms + * this is equivalent to of_clk_get(dev->of_node, 0). + */ + if (i == 0) + clk = clk_get(dev, NULL); + else + clk = of_clk_get(dev->of_node, i); + + if (IS_ERR(clk)) { + rc = PTR_ERR(clk); + if (rc == -EPROBE_DEFER) + goto free_clk; + break; } + hpriv->clks[i] = clk; } + rc = ahci_enable_clks(dev, hpriv); + if (rc) + goto free_clk; + /* * Some platforms might need to prepare for mmio region access, * which could be done in the following init call. So, the mmio @@ -221,11 +290,9 @@ pdata_exit: if (pdata && pdata->exit) pdata->exit(dev); disable_unprepare_clk: - if (!IS_ERR(hpriv->clk)) - clk_disable_unprepare(hpriv->clk); + ahci_disable_clks(hpriv); free_clk: - if (!IS_ERR(hpriv->clk)) - clk_put(hpriv->clk); + ahci_put_clks(hpriv); return rc; } @@ -238,10 +305,8 @@ static void ahci_host_stop(struct ata_host *host) if (pdata && pdata->exit) pdata->exit(dev); - if (!IS_ERR(hpriv->clk)) { - clk_disable_unprepare(hpriv->clk); - clk_put(hpriv->clk); - } + ahci_disable_clks(hpriv); + ahci_put_clks(hpriv); } #ifdef CONFIG_PM_SLEEP @@ -276,8 +341,7 @@ static int ahci_suspend(struct device *dev) if (pdata && pdata->suspend) return pdata->suspend(dev); - if (!IS_ERR(hpriv->clk)) - clk_disable_unprepare(hpriv->clk); + ahci_disable_clks(hpriv); return 0; } @@ -289,13 +353,9 @@ static int ahci_resume(struct device *dev) struct ahci_host_priv *hpriv = host->private_data; int rc; - if (!IS_ERR(hpriv->clk)) { - rc = clk_prepare_enable(hpriv->clk); - if (rc) { - dev_err(dev, "clock prepare enable failed"); - return rc; - } - } + rc = ahci_enable_clks(dev, hpriv); + if (rc) + return rc; if (pdata && pdata->resume) { rc = pdata->resume(dev); @@ -316,8 +376,7 @@ static int ahci_resume(struct device *dev) return 0; disable_unprepare_clk: - if (!IS_ERR(hpriv->clk)) - clk_disable_unprepare(hpriv->clk); + ahci_disable_clks(hpriv); return rc; } diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h index 73a2500..769d065 100644 --- a/include/linux/ahci_platform.h +++ b/include/linux/ahci_platform.h @@ -19,6 +19,7 @@ struct device; struct ata_port_info; +struct ahci_host_priv; struct ahci_platform_data { int (*init)(struct device *dev, void __iomem *addr); @@ -30,4 +31,7 @@ struct ahci_platform_data { unsigned int mask_port_map; }; +int ahci_platform_enable_clks(struct ahci_host_priv *hpriv); +void ahci_platform_disable_clks(struct ahci_host_priv *hpriv); + #endif /* _AHCI_PLATFORM_H */ -- cgit v0.10.2 From 4b3e603a298db26c6c37e8b08adcce24d014df13 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 22 Feb 2014 16:53:32 +0100 Subject: ahci-platform: Add support for an optional regulator for sata-target power Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo diff --git a/Documentation/devicetree/bindings/ata/ahci-platform.txt b/Documentation/devicetree/bindings/ata/ahci-platform.txt index 3ced07d..1ac807f 100644 --- a/Documentation/devicetree/bindings/ata/ahci-platform.txt +++ b/Documentation/devicetree/bindings/ata/ahci-platform.txt @@ -11,6 +11,7 @@ Required properties: Optional properties: - dma-coherent : Present if dma operations are coherent - clocks : a list of phandle + clock specifier pairs +- target-supply : regulator for SATA target power Example: sata@ffe08000 { diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index c12862b..bf8100c 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -37,6 +37,7 @@ #include #include +#include /* Enclosure Management Control */ #define EM_CTRL_MSG_TYPE 0x000f0000 @@ -323,6 +324,7 @@ struct ahci_host_priv { u32 em_buf_sz; /* EM buffer size in byte */ u32 em_msg_type; /* EM message type */ struct clk *clks[AHCI_MAX_CLKS]; /* Optional */ + struct regulator *target_pwr; /* Optional */ void *plat_data; /* Other platform data */ /* * Optional ahci_start_engine override, if not set this gets set to the diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index 8d5a9a7..4befd78 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -185,6 +185,14 @@ static int ahci_probe(struct platform_device *pdev) return -ENOMEM; } + hpriv->target_pwr = devm_regulator_get_optional(dev, "target"); + if (IS_ERR(hpriv->target_pwr)) { + rc = PTR_ERR(hpriv->target_pwr); + if (rc == -EPROBE_DEFER) + return -EPROBE_DEFER; + hpriv->target_pwr = NULL; + } + for (i = 0; i < AHCI_MAX_CLKS; i++) { /* * For now we must use clk_get(dev, NULL) for the first clock, @@ -206,9 +214,15 @@ static int ahci_probe(struct platform_device *pdev) hpriv->clks[i] = clk; } + if (hpriv->target_pwr) { + rc = regulator_enable(hpriv->target_pwr); + if (rc) + goto free_clk; + } + rc = ahci_enable_clks(dev, hpriv); if (rc) - goto free_clk; + goto disable_regulator; /* * Some platforms might need to prepare for mmio region access, @@ -291,6 +305,9 @@ pdata_exit: pdata->exit(dev); disable_unprepare_clk: ahci_disable_clks(hpriv); +disable_regulator: + if (hpriv->target_pwr) + regulator_disable(hpriv->target_pwr); free_clk: ahci_put_clks(hpriv); return rc; @@ -307,6 +324,9 @@ static void ahci_host_stop(struct ata_host *host) ahci_disable_clks(hpriv); ahci_put_clks(hpriv); + + if (hpriv->target_pwr) + regulator_disable(hpriv->target_pwr); } #ifdef CONFIG_PM_SLEEP @@ -343,6 +363,9 @@ static int ahci_suspend(struct device *dev) ahci_disable_clks(hpriv); + if (hpriv->target_pwr) + regulator_disable(hpriv->target_pwr); + return 0; } @@ -353,9 +376,15 @@ static int ahci_resume(struct device *dev) struct ahci_host_priv *hpriv = host->private_data; int rc; + if (hpriv->target_pwr) { + rc = regulator_enable(hpriv->target_pwr); + if (rc) + return rc; + } + rc = ahci_enable_clks(dev, hpriv); if (rc) - return rc; + goto disable_regulator; if (pdata && pdata->resume) { rc = pdata->resume(dev); @@ -377,6 +406,9 @@ static int ahci_resume(struct device *dev) disable_unprepare_clk: ahci_disable_clks(hpriv); +disable_regulator: + if (hpriv->target_pwr) + regulator_disable(hpriv->target_pwr); return rc; } -- cgit v0.10.2 From 96a01ba52c60fdd74dd6e8cf06645d06515b1396 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 22 Feb 2014 16:53:33 +0100 Subject: ahci-platform: Add enable_ / disable_resources helper functions tj: Minor comment formatting updates. Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index 4befd78..a32df31 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -132,6 +132,62 @@ void ahci_platform_disable_clks(struct ahci_host_priv *hpriv) } EXPORT_SYMBOL_GPL(ahci_platform_disable_clks); +/** + * ahci_platform_enable_resources - Enable platform resources + * @hpriv: host private area to store config values + * + * This function enables all ahci_platform managed resources in the + * following order: + * 1) Regulator + * 2) Clocks (through ahci_platform_enable_clks) + * + * If resource enabling fails at any point the previous enabled resources + * are disabled in reverse order. + * + * RETURNS: + * 0 on success otherwise a negative error code + */ +int ahci_platform_enable_resources(struct ahci_host_priv *hpriv) +{ + int rc; + + if (hpriv->target_pwr) { + rc = regulator_enable(hpriv->target_pwr); + if (rc) + return rc; + } + + rc = ahci_platform_enable_clks(hpriv); + if (rc) + goto disable_regulator; + + return 0; + +disable_regulator: + if (hpriv->target_pwr) + regulator_disable(hpriv->target_pwr); + return rc; +} +EXPORT_SYMBOL_GPL(ahci_platform_enable_resources); + +/** + * ahci_platform_disable_resources - Disable platform resources + * @hpriv: host private area to store config values + * + * This function disables all ahci_platform managed resources in the + * following order: + * 1) Clocks (through ahci_platform_disable_clks) + * 2) Regulator + */ +void ahci_platform_disable_resources(struct ahci_host_priv *hpriv) +{ + ahci_platform_disable_clks(hpriv); + + if (hpriv->target_pwr) + regulator_disable(hpriv->target_pwr); +} +EXPORT_SYMBOL_GPL(ahci_platform_disable_resources); + static void ahci_put_clks(struct ahci_host_priv *hpriv) { int c; @@ -214,15 +270,9 @@ static int ahci_probe(struct platform_device *pdev) hpriv->clks[i] = clk; } - if (hpriv->target_pwr) { - rc = regulator_enable(hpriv->target_pwr); - if (rc) - goto free_clk; - } - - rc = ahci_enable_clks(dev, hpriv); + rc = ahci_platform_enable_resources(hpriv); if (rc) - goto disable_regulator; + goto free_clk; /* * Some platforms might need to prepare for mmio region access, @@ -233,7 +283,7 @@ static int ahci_probe(struct platform_device *pdev) if (pdata && pdata->init) { rc = pdata->init(dev, hpriv->mmio); if (rc) - goto disable_unprepare_clk; + goto disable_resources; } ahci_save_initial_config(dev, hpriv, @@ -303,11 +353,8 @@ static int ahci_probe(struct platform_device *pdev) pdata_exit: if (pdata && pdata->exit) pdata->exit(dev); -disable_unprepare_clk: - ahci_disable_clks(hpriv); -disable_regulator: - if (hpriv->target_pwr) - regulator_disable(hpriv->target_pwr); +disable_resources: + ahci_platform_disable_resources(hpriv); free_clk: ahci_put_clks(hpriv); return rc; @@ -322,11 +369,8 @@ static void ahci_host_stop(struct ata_host *host) if (pdata && pdata->exit) pdata->exit(dev); - ahci_disable_clks(hpriv); + ahci_platform_disable_resources(hpriv); ahci_put_clks(hpriv); - - if (hpriv->target_pwr) - regulator_disable(hpriv->target_pwr); } #ifdef CONFIG_PM_SLEEP @@ -361,10 +405,7 @@ static int ahci_suspend(struct device *dev) if (pdata && pdata->suspend) return pdata->suspend(dev); - ahci_disable_clks(hpriv); - - if (hpriv->target_pwr) - regulator_disable(hpriv->target_pwr); + ahci_platform_disable_resources(hpriv); return 0; } @@ -376,26 +417,20 @@ static int ahci_resume(struct device *dev) struct ahci_host_priv *hpriv = host->private_data; int rc; - if (hpriv->target_pwr) { - rc = regulator_enable(hpriv->target_pwr); - if (rc) - return rc; - } - - rc = ahci_enable_clks(dev, hpriv); + rc = ahci_platform_enable_resources(hpriv); if (rc) - goto disable_regulator; + return rc; if (pdata && pdata->resume) { rc = pdata->resume(dev); if (rc) - goto disable_unprepare_clk; + goto disable_resources; } if (dev->power.power_state.event == PM_EVENT_SUSPEND) { rc = ahci_reset_controller(host); if (rc) - goto disable_unprepare_clk; + goto disable_resources; ahci_init_controller(host); } @@ -404,11 +439,8 @@ static int ahci_resume(struct device *dev) return 0; -disable_unprepare_clk: - ahci_disable_clks(hpriv); -disable_regulator: - if (hpriv->target_pwr) - regulator_disable(hpriv->target_pwr); +disable_resources: + ahci_platform_disable_resources(hpriv); return rc; } diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h index 769d065..b674b01 100644 --- a/include/linux/ahci_platform.h +++ b/include/linux/ahci_platform.h @@ -33,5 +33,7 @@ struct ahci_platform_data { int ahci_platform_enable_clks(struct ahci_host_priv *hpriv); void ahci_platform_disable_clks(struct ahci_host_priv *hpriv); +int ahci_platform_enable_resources(struct ahci_host_priv *hpriv); +void ahci_platform_disable_resources(struct ahci_host_priv *hpriv); #endif /* _AHCI_PLATFORM_H */ -- cgit v0.10.2 From 23b07d4cb3c0c850055cf968af44019b8da185fb Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 22 Feb 2014 16:53:34 +0100 Subject: ahci-platform: "Library-ise" ahci_probe functionality ahci_probe consists of 3 steps: 1) Get resources (get mmio, clks, regulator) 2) Enable resources, handled by ahci_platform_enable_resouces 3) The more or less standard ahci-host controller init sequence This commit refactors step 1 and 3 into separate functions, so the platform drivers for AHCI implementations which need a specific order in step 2, and / or need to do some custom register poking at some time, can re-use ahci-platform.c code without needing to copy and paste it. Note that ahci_platform_init_host's prototype takes the 3 non function members of ahci_platform_data as arguments, the idea is that drivers using the new exported utility functions will not use ahci_platform_data at all, and hopefully in the future ahci_platform_data can go away entirely. tj: Minor comment formatting updates. Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index a32df31..19e9eaa 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -188,64 +188,60 @@ void ahci_platform_disable_resources(struct ahci_host_priv *hpriv) } EXPORT_SYMBOL_GPL(ahci_platform_disable_resources); -static void ahci_put_clks(struct ahci_host_priv *hpriv) +static void ahci_platform_put_resources(struct device *dev, void *res) { + struct ahci_host_priv *hpriv = res; int c; for (c = 0; c < AHCI_MAX_CLKS && hpriv->clks[c]; c++) clk_put(hpriv->clks[c]); } -static int ahci_probe(struct platform_device *pdev) +/** + * ahci_platform_get_resources - Get platform resources + * @pdev: platform device to get resources for + * + * This function allocates an ahci_host_priv struct, and gets the following + * resources, storing a reference to them inside the returned struct: + * + * 1) mmio registers (IORESOURCE_MEM 0, mandatory) + * 2) regulator for controlling the targets power (optional) + * 3) 0 - AHCI_MAX_CLKS clocks, as specified in the devs devicetree node, + * or for non devicetree enabled platforms a single clock + * + * RETURNS: + * The allocated ahci_host_priv on success, otherwise an ERR_PTR value + */ +struct ahci_host_priv *ahci_platform_get_resources( + struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct ahci_platform_data *pdata = dev_get_platdata(dev); - const struct platform_device_id *id = platform_get_device_id(pdev); - struct ata_port_info pi = ahci_port_info[id ? id->driver_data : 0]; - const struct ata_port_info *ppi[] = { &pi, NULL }; struct ahci_host_priv *hpriv; - struct ata_host *host; - struct resource *mem; struct clk *clk; - int irq; - int n_ports; - int i; - int rc; + int i, rc = -ENOMEM; - mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!mem) { - dev_err(dev, "no mmio space\n"); - return -EINVAL; - } - - irq = platform_get_irq(pdev, 0); - if (irq <= 0) { - dev_err(dev, "no irq\n"); - return -EINVAL; - } + if (!devres_open_group(dev, NULL, GFP_KERNEL)) + return ERR_PTR(-ENOMEM); - if (pdata && pdata->ata_port_info) - pi = *pdata->ata_port_info; - - hpriv = devm_kzalloc(dev, sizeof(*hpriv), GFP_KERNEL); - if (!hpriv) { - dev_err(dev, "can't alloc ahci_host_priv\n"); - return -ENOMEM; - } + hpriv = devres_alloc(ahci_platform_put_resources, sizeof(*hpriv), + GFP_KERNEL); + if (!hpriv) + goto err_out; - hpriv->flags |= (unsigned long)pi.private_data; + devres_add(dev, hpriv); - hpriv->mmio = devm_ioremap(dev, mem->start, resource_size(mem)); + hpriv->mmio = devm_ioremap_resource(dev, + platform_get_resource(pdev, IORESOURCE_MEM, 0)); if (!hpriv->mmio) { - dev_err(dev, "can't map %pR\n", mem); - return -ENOMEM; + dev_err(dev, "no mmio space\n"); + goto err_out; } hpriv->target_pwr = devm_regulator_get_optional(dev, "target"); if (IS_ERR(hpriv->target_pwr)) { rc = PTR_ERR(hpriv->target_pwr); if (rc == -EPROBE_DEFER) - return -EPROBE_DEFER; + goto err_out; hpriv->target_pwr = NULL; } @@ -264,33 +260,59 @@ static int ahci_probe(struct platform_device *pdev) if (IS_ERR(clk)) { rc = PTR_ERR(clk); if (rc == -EPROBE_DEFER) - goto free_clk; + goto err_out; break; } hpriv->clks[i] = clk; } - rc = ahci_platform_enable_resources(hpriv); - if (rc) - goto free_clk; + devres_remove_group(dev, NULL); + return hpriv; - /* - * Some platforms might need to prepare for mmio region access, - * which could be done in the following init call. So, the mmio - * region shouldn't be accessed before init (if provided) has - * returned successfully. - */ - if (pdata && pdata->init) { - rc = pdata->init(dev, hpriv->mmio); - if (rc) - goto disable_resources; - } +err_out: + devres_release_group(dev, NULL); + return ERR_PTR(rc); +} +EXPORT_SYMBOL_GPL(ahci_platform_get_resources); + +/** + * ahci_platform_init_host - Bring up an ahci-platform host + * @pdev: platform device pointer for the host + * @hpriv: ahci-host private data for the host + * @pi_template: template for the ata_port_info to use + * @force_port_map: param passed to ahci_save_initial_config + * @mask_port_map: param passed to ahci_save_initial_config + * + * This function does all the usual steps needed to bring up an + * ahci-platform host, note any necessary resources (ie clks, phy, etc.) + * must be initialized / enabled before calling this. + * + * RETURNS: + * 0 on success otherwise a negative error code + */ +int ahci_platform_init_host(struct platform_device *pdev, + struct ahci_host_priv *hpriv, + const struct ata_port_info *pi_template, + unsigned int force_port_map, + unsigned int mask_port_map) +{ + struct device *dev = &pdev->dev; + struct ata_port_info pi = *pi_template; + const struct ata_port_info *ppi[] = { &pi, NULL }; + struct ata_host *host; + int i, irq, n_ports, rc; - ahci_save_initial_config(dev, hpriv, - pdata ? pdata->force_port_map : 0, - pdata ? pdata->mask_port_map : 0); + irq = platform_get_irq(pdev, 0); + if (irq <= 0) { + dev_err(dev, "no irq\n"); + return -EINVAL; + } /* prepare host */ + hpriv->flags |= (unsigned long)pi.private_data; + + ahci_save_initial_config(dev, hpriv, force_port_map, mask_port_map); + if (hpriv->cap & HOST_CAP_NCQ) pi.flags |= ATA_FLAG_NCQ; @@ -307,10 +329,8 @@ static int ahci_probe(struct platform_device *pdev) n_ports = max(ahci_nr_ports(hpriv->cap), fls(hpriv->port_map)); host = ata_host_alloc_pinfo(dev, ppi, n_ports); - if (!host) { - rc = -ENOMEM; - goto pdata_exit; - } + if (!host) + return -ENOMEM; host->private_data = hpriv; @@ -325,7 +345,8 @@ static int ahci_probe(struct platform_device *pdev) for (i = 0; i < host->n_ports; i++) { struct ata_port *ap = host->ports[i]; - ata_port_desc(ap, "mmio %pR", mem); + ata_port_desc(ap, "mmio %pR", + platform_get_resource(pdev, IORESOURCE_MEM, 0)); ata_port_desc(ap, "port 0x%x", 0x100 + ap->port_no * 0x80); /* set enclosure management message type */ @@ -339,13 +360,53 @@ static int ahci_probe(struct platform_device *pdev) rc = ahci_reset_controller(host); if (rc) - goto pdata_exit; + return rc; ahci_init_controller(host); ahci_print_info(host, "platform"); - rc = ata_host_activate(host, irq, ahci_interrupt, IRQF_SHARED, - &ahci_platform_sht); + return ata_host_activate(host, irq, ahci_interrupt, IRQF_SHARED, + &ahci_platform_sht); +} +EXPORT_SYMBOL_GPL(ahci_platform_init_host); + +static int ahci_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct ahci_platform_data *pdata = dev_get_platdata(dev); + const struct platform_device_id *id = platform_get_device_id(pdev); + const struct ata_port_info *pi_template; + struct ahci_host_priv *hpriv; + int rc; + + hpriv = ahci_platform_get_resources(pdev); + if (IS_ERR(hpriv)) + return PTR_ERR(hpriv); + + rc = ahci_platform_enable_resources(hpriv); + if (rc) + return rc; + + /* + * Some platforms might need to prepare for mmio region access, + * which could be done in the following init call. So, the mmio + * region shouldn't be accessed before init (if provided) has + * returned successfully. + */ + if (pdata && pdata->init) { + rc = pdata->init(dev, hpriv->mmio); + if (rc) + goto disable_resources; + } + + if (pdata && pdata->ata_port_info) + pi_template = pdata->ata_port_info; + else + pi_template = &ahci_port_info[id ? id->driver_data : 0]; + + rc = ahci_platform_init_host(pdev, hpriv, pi_template, + pdata ? pdata->force_port_map : 0, + pdata ? pdata->mask_port_map : 0); if (rc) goto pdata_exit; @@ -355,8 +416,6 @@ pdata_exit: pdata->exit(dev); disable_resources: ahci_platform_disable_resources(hpriv); -free_clk: - ahci_put_clks(hpriv); return rc; } @@ -370,7 +429,6 @@ static void ahci_host_stop(struct ata_host *host) pdata->exit(dev); ahci_platform_disable_resources(hpriv); - ahci_put_clks(hpriv); } #ifdef CONFIG_PM_SLEEP diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h index b674b01..b80c51c 100644 --- a/include/linux/ahci_platform.h +++ b/include/linux/ahci_platform.h @@ -20,7 +20,14 @@ struct device; struct ata_port_info; struct ahci_host_priv; +struct platform_device; +/* + * Note ahci_platform_data is deprecated, it is only kept around for use + * by the old da850 and spear13xx ahci code. + * New drivers should instead declare their own platform_driver struct, and + * use ahci_platform* functions in their own probe, suspend and resume methods. + */ struct ahci_platform_data { int (*init)(struct device *dev, void __iomem *addr); void (*exit)(struct device *dev); @@ -35,5 +42,12 @@ int ahci_platform_enable_clks(struct ahci_host_priv *hpriv); void ahci_platform_disable_clks(struct ahci_host_priv *hpriv); int ahci_platform_enable_resources(struct ahci_host_priv *hpriv); void ahci_platform_disable_resources(struct ahci_host_priv *hpriv); +struct ahci_host_priv *ahci_platform_get_resources( + struct platform_device *pdev); +int ahci_platform_init_host(struct platform_device *pdev, + struct ahci_host_priv *hpriv, + const struct ata_port_info *pi_template, + unsigned int force_port_map, + unsigned int mask_port_map); #endif /* _AHCI_PLATFORM_H */ -- cgit v0.10.2 From 648cb6fd83b97f0f772db783a280af300fa9f2bc Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 22 Feb 2014 16:53:35 +0100 Subject: ahci-platform: "Library-ise" suspend / resume functionality Split suspend / resume code into host suspend / resume functionality and resource enable / disabling phases, and export the new suspend_ / resume_host functions. tj: Minor comment formatting updates. Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index 19e9eaa..01f7bbe 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -432,14 +432,23 @@ static void ahci_host_stop(struct ata_host *host) } #ifdef CONFIG_PM_SLEEP -static int ahci_suspend(struct device *dev) +/** + * ahci_platform_suspend_host - Suspend an ahci-platform host + * @dev: device pointer for the host + * + * This function does all the usual steps needed to suspend an + * ahci-platform host, note any necessary resources (ie clks, phy, etc.) + * must be disabled after calling this. + * + * RETURNS: + * 0 on success otherwise a negative error code + */ +int ahci_platform_suspend_host(struct device *dev) { - struct ahci_platform_data *pdata = dev_get_platdata(dev); struct ata_host *host = dev_get_drvdata(dev); struct ahci_host_priv *hpriv = host->private_data; void __iomem *mmio = hpriv->mmio; u32 ctl; - int rc; if (hpriv->flags & AHCI_HFLAG_NO_SUSPEND) { dev_err(dev, "firmware update required for suspend/resume\n"); @@ -456,7 +465,58 @@ static int ahci_suspend(struct device *dev) writel(ctl, mmio + HOST_CTL); readl(mmio + HOST_CTL); /* flush */ - rc = ata_host_suspend(host, PMSG_SUSPEND); + return ata_host_suspend(host, PMSG_SUSPEND); +} +EXPORT_SYMBOL_GPL(ahci_platform_suspend_host); + +/** + * ahci_platform_resume_host - Resume an ahci-platform host + * @dev: device pointer for the host + * + * This function does all the usual steps needed to resume an ahci-platform + * host, note any necessary resources (ie clks, phy, etc.) must be + * initialized / enabled before calling this. + * + * RETURNS: + * 0 on success otherwise a negative error code + */ +int ahci_platform_resume_host(struct device *dev) +{ + struct ata_host *host = dev_get_drvdata(dev); + int rc; + + if (dev->power.power_state.event == PM_EVENT_SUSPEND) { + rc = ahci_reset_controller(host); + if (rc) + return rc; + + ahci_init_controller(host); + } + + ata_host_resume(host); + + return 0; +} +EXPORT_SYMBOL_GPL(ahci_platform_resume_host); + +/** + * ahci_platform_suspend - Suspend an ahci-platform device + * @dev: the platform device to suspend + * + * This function suspends the host associated with the device, followed by + * disabling all the resources of the device. + * + * RETURNS: + * 0 on success otherwise a negative error code + */ +int ahci_platform_suspend(struct device *dev) +{ + struct ahci_platform_data *pdata = dev_get_platdata(dev); + struct ata_host *host = dev_get_drvdata(dev); + struct ahci_host_priv *hpriv = host->private_data; + int rc; + + rc = ahci_platform_suspend_host(dev); if (rc) return rc; @@ -467,8 +527,19 @@ static int ahci_suspend(struct device *dev) return 0; } +EXPORT_SYMBOL_GPL(ahci_platform_suspend); -static int ahci_resume(struct device *dev) +/** + * ahci_platform_resume - Resume an ahci-platform device + * @dev: the platform device to resume + * + * This function enables all the resources of the device followed by + * resuming the host associated with the device. + * + * RETURNS: + * 0 on success otherwise a negative error code + */ +int ahci_platform_resume(struct device *dev) { struct ahci_platform_data *pdata = dev_get_platdata(dev); struct ata_host *host = dev_get_drvdata(dev); @@ -485,15 +556,9 @@ static int ahci_resume(struct device *dev) goto disable_resources; } - if (dev->power.power_state.event == PM_EVENT_SUSPEND) { - rc = ahci_reset_controller(host); - if (rc) - goto disable_resources; - - ahci_init_controller(host); - } - - ata_host_resume(host); + rc = ahci_platform_resume_host(dev); + if (rc) + goto disable_resources; return 0; @@ -502,9 +567,11 @@ disable_resources: return rc; } +EXPORT_SYMBOL_GPL(ahci_platform_resume); #endif -static SIMPLE_DEV_PM_OPS(ahci_pm_ops, ahci_suspend, ahci_resume); +static SIMPLE_DEV_PM_OPS(ahci_pm_ops, ahci_platform_suspend, + ahci_platform_resume); static const struct of_device_id ahci_of_match[] = { { .compatible = "snps,spear-ahci", }, diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h index b80c51c..542f268 100644 --- a/include/linux/ahci_platform.h +++ b/include/linux/ahci_platform.h @@ -50,4 +50,9 @@ int ahci_platform_init_host(struct platform_device *pdev, unsigned int force_port_map, unsigned int mask_port_map); +int ahci_platform_suspend_host(struct device *dev); +int ahci_platform_resume_host(struct device *dev); +int ahci_platform_suspend(struct device *dev); +int ahci_platform_resume(struct device *dev); + #endif /* _AHCI_PLATFORM_H */ -- cgit v0.10.2 From c5754b5220f01e8722799d35c04a76e82c62d7d8 Mon Sep 17 00:00:00 2001 From: Olliver Schinagl Date: Sat, 22 Feb 2014 16:53:36 +0100 Subject: ARM: sunxi: Add support for Allwinner SUNXi SoCs sata to ahci_platform This patch adds support for the ahci sata controler found on Allwinner A10 and A20 SoCs to the ahci_platform driver. Orignally written by Olliver Schinagl using the approach of having a platform device which probe method creates a new child platform device which gets driven by ahci_platform.c, as done by ahci_imx.c . Refactored by Hans de Goede to add most of the non sunxi specific functionality to ahci_platform.c and use a platform_data pointer from of_device_id for the sunxi specific bits. Signed-off-by: Olliver Schinagl Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo diff --git a/Documentation/devicetree/bindings/ata/ahci-platform.txt b/Documentation/devicetree/bindings/ata/ahci-platform.txt index 1ac807f..499bfed 100644 --- a/Documentation/devicetree/bindings/ata/ahci-platform.txt +++ b/Documentation/devicetree/bindings/ata/ahci-platform.txt @@ -4,7 +4,9 @@ SATA nodes are defined to describe on-chip Serial ATA controllers. Each SATA controller should have its own node. Required properties: -- compatible : compatible list, contains "snps,spear-ahci" +- compatible : compatible list, one of "snps,spear-ahci", + "snps,exynos5440-ahci", "ibm,476gtr-ahci", or + "allwinner,sun4i-a10-ahci" - interrupts : - reg : @@ -13,10 +15,17 @@ Optional properties: - clocks : a list of phandle + clock specifier pairs - target-supply : regulator for SATA target power -Example: +Examples: sata@ffe08000 { compatible = "snps,spear-ahci"; reg = <0xffe08000 0x1000>; interrupts = <115>; - }; + + ahci: sata@01c18000 { + compatible = "allwinner,sun4i-a10-ahci"; + reg = <0x01c18000 0x1000>; + interrupts = <56>; + clocks = <&pll6 0>, <&ahb_gates 25>; + target-supply = <®_ahci_5v>; + }; diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 77416c1..ab93c33 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -106,6 +106,15 @@ config AHCI_IMX If unsure, say N. +config AHCI_SUNXI + tristate "Allwinner sunxi AHCI SATA support" + depends on ARCH_SUNXI && SATA_AHCI_PLATFORM + help + This option enables support for the Allwinner sunxi SoC's + onboard AHCI SATA. + + If unsure, say N. + config SATA_FSL tristate "Freescale 3.0Gbps SATA support" depends on FSL_SOC diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile index 46518c6..246050b 100644 --- a/drivers/ata/Makefile +++ b/drivers/ata/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_SATA_SIL24) += sata_sil24.o obj-$(CONFIG_SATA_DWC) += sata_dwc_460ex.o obj-$(CONFIG_SATA_HIGHBANK) += sata_highbank.o libahci.o obj-$(CONFIG_AHCI_IMX) += ahci_imx.o +obj-$(CONFIG_AHCI_SUNXI) += ahci_sunxi.o # SFF w/ custom DMA obj-$(CONFIG_PDC_ADMA) += pdc_adma.o diff --git a/drivers/ata/ahci_sunxi.c b/drivers/ata/ahci_sunxi.c new file mode 100644 index 0000000..001f7dfc --- /dev/null +++ b/drivers/ata/ahci_sunxi.c @@ -0,0 +1,249 @@ +/* + * Allwinner sunxi AHCI SATA platform driver + * Copyright 2013 Olliver Schinagl + * Copyright 2014 Hans de Goede + * + * based on the AHCI SATA platform driver by Jeff Garzik and Anton Vorontsov + * Based on code from Allwinner Technology Co., Ltd. , + * Daniel Wang + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "ahci.h" + +#define AHCI_BISTAFR 0x00a0 +#define AHCI_BISTCR 0x00a4 +#define AHCI_BISTFCTR 0x00a8 +#define AHCI_BISTSR 0x00ac +#define AHCI_BISTDECR 0x00b0 +#define AHCI_DIAGNR0 0x00b4 +#define AHCI_DIAGNR1 0x00b8 +#define AHCI_OOBR 0x00bc +#define AHCI_PHYCS0R 0x00c0 +#define AHCI_PHYCS1R 0x00c4 +#define AHCI_PHYCS2R 0x00c8 +#define AHCI_TIMER1MS 0x00e0 +#define AHCI_GPARAM1R 0x00e8 +#define AHCI_GPARAM2R 0x00ec +#define AHCI_PPARAMR 0x00f0 +#define AHCI_TESTR 0x00f4 +#define AHCI_VERSIONR 0x00f8 +#define AHCI_IDR 0x00fc +#define AHCI_RWCR 0x00fc +#define AHCI_P0DMACR 0x0170 +#define AHCI_P0PHYCR 0x0178 +#define AHCI_P0PHYSR 0x017c + +static void sunxi_clrbits(void __iomem *reg, u32 clr_val) +{ + u32 reg_val; + + reg_val = readl(reg); + reg_val &= ~(clr_val); + writel(reg_val, reg); +} + +static void sunxi_setbits(void __iomem *reg, u32 set_val) +{ + u32 reg_val; + + reg_val = readl(reg); + reg_val |= set_val; + writel(reg_val, reg); +} + +static void sunxi_clrsetbits(void __iomem *reg, u32 clr_val, u32 set_val) +{ + u32 reg_val; + + reg_val = readl(reg); + reg_val &= ~(clr_val); + reg_val |= set_val; + writel(reg_val, reg); +} + +static u32 sunxi_getbits(void __iomem *reg, u8 mask, u8 shift) +{ + return (readl(reg) >> shift) & mask; +} + +static int ahci_sunxi_phy_init(struct device *dev, void __iomem *reg_base) +{ + u32 reg_val; + int timeout; + + /* This magic is from the original code */ + writel(0, reg_base + AHCI_RWCR); + mdelay(5); + + sunxi_setbits(reg_base + AHCI_PHYCS1R, BIT(19)); + sunxi_clrsetbits(reg_base + AHCI_PHYCS0R, + (0x7 << 24), + (0x5 << 24) | BIT(23) | BIT(18)); + sunxi_clrsetbits(reg_base + AHCI_PHYCS1R, + (0x3 << 16) | (0x1f << 8) | (0x3 << 6), + (0x2 << 16) | (0x6 << 8) | (0x2 << 6)); + sunxi_setbits(reg_base + AHCI_PHYCS1R, BIT(28) | BIT(15)); + sunxi_clrbits(reg_base + AHCI_PHYCS1R, BIT(19)); + sunxi_clrsetbits(reg_base + AHCI_PHYCS0R, + (0x7 << 20), (0x3 << 20)); + sunxi_clrsetbits(reg_base + AHCI_PHYCS2R, + (0x1f << 5), (0x19 << 5)); + mdelay(5); + + sunxi_setbits(reg_base + AHCI_PHYCS0R, (0x1 << 19)); + + timeout = 250; /* Power up takes aprox 50 us */ + do { + reg_val = sunxi_getbits(reg_base + AHCI_PHYCS0R, 0x7, 28); + if (reg_val == 0x02) + break; + + if (--timeout == 0) { + dev_err(dev, "PHY power up failed.\n"); + return -EIO; + } + udelay(1); + } while (1); + + sunxi_setbits(reg_base + AHCI_PHYCS2R, (0x1 << 24)); + + timeout = 100; /* Calibration takes aprox 10 us */ + do { + reg_val = sunxi_getbits(reg_base + AHCI_PHYCS2R, 0x1, 24); + if (reg_val == 0x00) + break; + + if (--timeout == 0) { + dev_err(dev, "PHY calibration failed.\n"); + return -EIO; + } + udelay(1); + } while (1); + + mdelay(15); + + writel(0x7, reg_base + AHCI_RWCR); + + return 0; +} + +static void ahci_sunxi_start_engine(struct ata_port *ap) +{ + void __iomem *port_mmio = ahci_port_base(ap); + struct ahci_host_priv *hpriv = ap->host->private_data; + + /* Setup DMA before DMA start */ + sunxi_clrsetbits(hpriv->mmio + AHCI_P0DMACR, 0x0000ff00, 0x00004400); + + /* Start DMA */ + sunxi_setbits(port_mmio + PORT_CMD, PORT_CMD_START); +} + +static const struct ata_port_info ahci_sunxi_port_info = { + AHCI_HFLAGS(AHCI_HFLAG_32BIT_ONLY | AHCI_HFLAG_NO_MSI | + AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ), + .flags = AHCI_FLAG_COMMON | ATA_FLAG_NCQ, + .pio_mask = ATA_PIO4, + .udma_mask = ATA_UDMA6, + .port_ops = &ahci_platform_ops, +}; + +static int ahci_sunxi_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct ahci_host_priv *hpriv; + int rc; + + hpriv = ahci_platform_get_resources(pdev); + if (IS_ERR(hpriv)) + return PTR_ERR(hpriv); + + hpriv->start_engine = ahci_sunxi_start_engine; + + rc = ahci_platform_enable_resources(hpriv); + if (rc) + return rc; + + rc = ahci_sunxi_phy_init(dev, hpriv->mmio); + if (rc) + goto disable_resources; + + rc = ahci_platform_init_host(pdev, hpriv, &ahci_sunxi_port_info, 0, 0); + if (rc) + goto disable_resources; + + return 0; + +disable_resources: + ahci_platform_disable_resources(hpriv); + return rc; +} + +#ifdef CONFIG_PM_SLEEP +int ahci_sunxi_resume(struct device *dev) +{ + struct ata_host *host = dev_get_drvdata(dev); + struct ahci_host_priv *hpriv = host->private_data; + int rc; + + rc = ahci_platform_enable_resources(hpriv); + if (rc) + return rc; + + rc = ahci_sunxi_phy_init(dev, hpriv->mmio); + if (rc) + goto disable_resources; + + rc = ahci_platform_resume_host(dev); + if (rc) + goto disable_resources; + + return 0; + +disable_resources: + ahci_platform_disable_resources(hpriv); + return rc; +} +#endif + +static SIMPLE_DEV_PM_OPS(ahci_sunxi_pm_ops, ahci_platform_suspend, + ahci_sunxi_resume); + +static const struct of_device_id ahci_sunxi_of_match[] = { + { .compatible = "allwinner,sun4i-a10-ahci", }, + { }, +}; +MODULE_DEVICE_TABLE(of, ahci_sunxi_of_match); + +static struct platform_driver ahci_sunxi_driver = { + .probe = ahci_sunxi_probe, + .remove = ata_platform_remove_one, + .driver = { + .name = "ahci-sunxi", + .owner = THIS_MODULE, + .of_match_table = ahci_sunxi_of_match, + .pm = &ahci_sunxi_pm_ops, + }, +}; +module_platform_driver(ahci_sunxi_driver); + +MODULE_DESCRIPTION("Allwinner sunxi AHCI SATA driver"); +MODULE_AUTHOR("Olliver Schinagl "); +MODULE_LICENSE("GPL"); -- cgit v0.10.2 From 90870d79d4f28711610dd2e72d8fa616c922d110 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 22 Feb 2014 16:53:37 +0100 Subject: ahci-imx: Port to library-ised ahci_platform This avoids the ugliness of creating a nested platform device from probe. While moving it around anyways, move the mk6q phy init code from probe to imx_sata_enable, as the phy needs to be re-initialized on resume too, otherwise the drive won't be recognized after resume. Tested on a wandboard i.mx6 quad. Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo diff --git a/Documentation/devicetree/bindings/ata/ahci-platform.txt b/Documentation/devicetree/bindings/ata/ahci-platform.txt index 499bfed..d86e854 100644 --- a/Documentation/devicetree/bindings/ata/ahci-platform.txt +++ b/Documentation/devicetree/bindings/ata/ahci-platform.txt @@ -5,8 +5,9 @@ Each SATA controller should have its own node. Required properties: - compatible : compatible list, one of "snps,spear-ahci", - "snps,exynos5440-ahci", "ibm,476gtr-ahci", or - "allwinner,sun4i-a10-ahci" + "snps,exynos5440-ahci", "ibm,476gtr-ahci", + "allwinner,sun4i-a10-ahci", "fsl,imx53-ahci" or + "fsl,imx6q-ahci" - interrupts : - reg : @@ -15,6 +16,10 @@ Optional properties: - clocks : a list of phandle + clock specifier pairs - target-supply : regulator for SATA target power +"fsl,imx53-ahci", "fsl,imx6q-ahci" required properties: +- clocks : must contain the sata, sata_ref and ahb clocks +- clock-names : must contain "ahb" for the ahb clock + Examples: sata@ffe08000 { compatible = "snps,spear-ahci"; diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c index dd4d6f7..3cb5d69 100644 --- a/drivers/ata/ahci_imx.c +++ b/drivers/ata/ahci_imx.c @@ -42,13 +42,7 @@ enum ahci_imx_type { struct imx_ahci_priv { struct platform_device *ahci_pdev; enum ahci_imx_type type; - - /* i.MX53 clock */ - struct clk *sata_gate_clk; - /* Common clock */ - struct clk *sata_ref_clk; struct clk *ahb_clk; - struct regmap *gpr; bool no_device; bool first_time; @@ -58,28 +52,52 @@ static int ahci_imx_hotplug; module_param_named(hotplug, ahci_imx_hotplug, int, 0644); MODULE_PARM_DESC(hotplug, "AHCI IMX hot-plug support (0=Don't support, 1=support)"); -static int imx_sata_clock_enable(struct device *dev) +static void ahci_imx_host_stop(struct ata_host *host); + +static int imx_sata_enable(struct ahci_host_priv *hpriv) { - struct imx_ahci_priv *imxpriv = dev_get_drvdata(dev->parent); + struct imx_ahci_priv *imxpriv = hpriv->plat_data; int ret; - if (imxpriv->type == AHCI_IMX53) { - ret = clk_prepare_enable(imxpriv->sata_gate_clk); - if (ret < 0) { - dev_err(dev, "prepare-enable sata_gate clock err:%d\n", - ret); + if (imxpriv->no_device) + return 0; + + if (hpriv->target_pwr) { + ret = regulator_enable(hpriv->target_pwr); + if (ret) return ret; - } } - ret = clk_prepare_enable(imxpriv->sata_ref_clk); - if (ret < 0) { - dev_err(dev, "prepare-enable sata_ref clock err:%d\n", - ret); - goto clk_err; - } + ret = ahci_platform_enable_clks(hpriv); + if (ret < 0) + goto disable_regulator; if (imxpriv->type == AHCI_IMX6Q) { + /* + * set PHY Paremeters, two steps to configure the GPR13, + * one write for rest of parameters, mask of first write + * is 0x07ffffff, and the other one write for setting + * the mpll_clk_en. + */ + regmap_update_bits(imxpriv->gpr, IOMUXC_GPR13, + IMX6Q_GPR13_SATA_RX_EQ_VAL_MASK | + IMX6Q_GPR13_SATA_RX_LOS_LVL_MASK | + IMX6Q_GPR13_SATA_RX_DPLL_MODE_MASK | + IMX6Q_GPR13_SATA_SPD_MODE_MASK | + IMX6Q_GPR13_SATA_MPLL_SS_EN | + IMX6Q_GPR13_SATA_TX_ATTEN_MASK | + IMX6Q_GPR13_SATA_TX_BOOST_MASK | + IMX6Q_GPR13_SATA_TX_LVL_MASK | + IMX6Q_GPR13_SATA_MPLL_CLK_EN | + IMX6Q_GPR13_SATA_TX_EDGE_RATE, + IMX6Q_GPR13_SATA_RX_EQ_VAL_3_0_DB | + IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2M | + IMX6Q_GPR13_SATA_RX_DPLL_MODE_2P_4F | + IMX6Q_GPR13_SATA_SPD_MODE_3P0G | + IMX6Q_GPR13_SATA_MPLL_SS_EN | + IMX6Q_GPR13_SATA_TX_ATTEN_9_16 | + IMX6Q_GPR13_SATA_TX_BOOST_3_33_DB | + IMX6Q_GPR13_SATA_TX_LVL_1_025_V); regmap_update_bits(imxpriv->gpr, IOMUXC_GPR13, IMX6Q_GPR13_SATA_MPLL_CLK_EN, IMX6Q_GPR13_SATA_MPLL_CLK_EN); @@ -89,15 +107,19 @@ static int imx_sata_clock_enable(struct device *dev) return 0; -clk_err: - if (imxpriv->type == AHCI_IMX53) - clk_disable_unprepare(imxpriv->sata_gate_clk); +disable_regulator: + if (hpriv->target_pwr) + regulator_disable(hpriv->target_pwr); + return ret; } -static void imx_sata_clock_disable(struct device *dev) +static void imx_sata_disable(struct ahci_host_priv *hpriv) { - struct imx_ahci_priv *imxpriv = dev_get_drvdata(dev->parent); + struct imx_ahci_priv *imxpriv = hpriv->plat_data; + + if (imxpriv->no_device) + return; if (imxpriv->type == AHCI_IMX6Q) { regmap_update_bits(imxpriv->gpr, IOMUXC_GPR13, @@ -105,10 +127,10 @@ static void imx_sata_clock_disable(struct device *dev) !IMX6Q_GPR13_SATA_MPLL_CLK_EN); } - clk_disable_unprepare(imxpriv->sata_ref_clk); + ahci_platform_disable_clks(hpriv); - if (imxpriv->type == AHCI_IMX53) - clk_disable_unprepare(imxpriv->sata_gate_clk); + if (hpriv->target_pwr) + regulator_disable(hpriv->target_pwr); } static void ahci_imx_error_handler(struct ata_port *ap) @@ -118,7 +140,7 @@ static void ahci_imx_error_handler(struct ata_port *ap) struct ata_host *host = dev_get_drvdata(ap->dev); struct ahci_host_priv *hpriv = host->private_data; void __iomem *mmio = hpriv->mmio; - struct imx_ahci_priv *imxpriv = dev_get_drvdata(ap->dev->parent); + struct imx_ahci_priv *imxpriv = hpriv->plat_data; ahci_error_handler(ap); @@ -136,7 +158,7 @@ static void ahci_imx_error_handler(struct ata_port *ap) */ reg_val = readl(mmio + PORT_PHY_CTL); writel(reg_val | PORT_PHY_CTL_PDDQ_LOC, mmio + PORT_PHY_CTL); - imx_sata_clock_disable(ap->dev); + imx_sata_disable(hpriv); imxpriv->no_device = true; } @@ -144,7 +166,9 @@ static int ahci_imx_softreset(struct ata_link *link, unsigned int *class, unsigned long deadline) { struct ata_port *ap = link->ap; - struct imx_ahci_priv *imxpriv = dev_get_drvdata(ap->dev->parent); + struct ata_host *host = dev_get_drvdata(ap->dev); + struct ahci_host_priv *hpriv = host->private_data; + struct imx_ahci_priv *imxpriv = hpriv->plat_data; int ret = -EIO; if (imxpriv->type == AHCI_IMX53) @@ -156,7 +180,8 @@ static int ahci_imx_softreset(struct ata_link *link, unsigned int *class, } static struct ata_port_operations ahci_imx_ops = { - .inherits = &ahci_platform_ops, + .inherits = &ahci_ops, + .host_stop = ahci_imx_host_stop, .error_handler = ahci_imx_error_handler, .softreset = ahci_imx_softreset, }; @@ -168,79 +193,6 @@ static const struct ata_port_info ahci_imx_port_info = { .port_ops = &ahci_imx_ops, }; -static int imx_sata_init(struct device *dev, void __iomem *mmio) -{ - int ret = 0; - unsigned int reg_val; - struct imx_ahci_priv *imxpriv = dev_get_drvdata(dev->parent); - - ret = imx_sata_clock_enable(dev); - if (ret < 0) - return ret; - - /* - * Configure the HWINIT bits of the HOST_CAP and HOST_PORTS_IMPL, - * and IP vendor specific register HOST_TIMER1MS. - * Configure CAP_SSS (support stagered spin up). - * Implement the port0. - * Get the ahb clock rate, and configure the TIMER1MS register. - */ - reg_val = readl(mmio + HOST_CAP); - if (!(reg_val & HOST_CAP_SSS)) { - reg_val |= HOST_CAP_SSS; - writel(reg_val, mmio + HOST_CAP); - } - reg_val = readl(mmio + HOST_PORTS_IMPL); - if (!(reg_val & 0x1)) { - reg_val |= 0x1; - writel(reg_val, mmio + HOST_PORTS_IMPL); - } - - reg_val = clk_get_rate(imxpriv->ahb_clk) / 1000; - writel(reg_val, mmio + HOST_TIMER1MS); - - return 0; -} - -static void imx_sata_exit(struct device *dev) -{ - imx_sata_clock_disable(dev); -} - -static int imx_ahci_suspend(struct device *dev) -{ - struct imx_ahci_priv *imxpriv = dev_get_drvdata(dev->parent); - - /* - * If no_device is set, The CLKs had been gated off in the - * initialization so don't do it again here. - */ - if (!imxpriv->no_device) - imx_sata_clock_disable(dev); - - return 0; -} - -static int imx_ahci_resume(struct device *dev) -{ - struct imx_ahci_priv *imxpriv = dev_get_drvdata(dev->parent); - int ret = 0; - - if (!imxpriv->no_device) - ret = imx_sata_clock_enable(dev); - - return ret; -} - -static struct ahci_platform_data imx_sata_pdata = { - .init = imx_sata_init, - .exit = imx_sata_exit, - .ata_port_info = &ahci_imx_port_info, - .suspend = imx_ahci_suspend, - .resume = imx_ahci_resume, - -}; - static const struct of_device_id imx_ahci_of_match[] = { { .compatible = "fsl,imx53-ahci", .data = (void *)AHCI_IMX53 }, { .compatible = "fsl,imx6q-ahci", .data = (void *)AHCI_IMX6Q }, @@ -251,151 +203,122 @@ MODULE_DEVICE_TABLE(of, imx_ahci_of_match); static int imx_ahci_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct resource *mem, *irq, res[2]; const struct of_device_id *of_id; - enum ahci_imx_type type; - const struct ahci_platform_data *pdata = NULL; + struct ahci_host_priv *hpriv; struct imx_ahci_priv *imxpriv; - struct device *ahci_dev; - struct platform_device *ahci_pdev; + unsigned int reg_val; int ret; of_id = of_match_device(imx_ahci_of_match, dev); if (!of_id) return -EINVAL; - type = (enum ahci_imx_type)of_id->data; - pdata = &imx_sata_pdata; - imxpriv = devm_kzalloc(dev, sizeof(*imxpriv), GFP_KERNEL); - if (!imxpriv) { - dev_err(dev, "can't alloc ahci_host_priv\n"); + if (!imxpriv) return -ENOMEM; - } - - ahci_pdev = platform_device_alloc("ahci", -1); - if (!ahci_pdev) - return -ENODEV; - - ahci_dev = &ahci_pdev->dev; - ahci_dev->parent = dev; imxpriv->no_device = false; imxpriv->first_time = true; - imxpriv->type = type; - + imxpriv->type = (enum ahci_imx_type)of_id->data; imxpriv->ahb_clk = devm_clk_get(dev, "ahb"); if (IS_ERR(imxpriv->ahb_clk)) { dev_err(dev, "can't get ahb clock.\n"); - ret = PTR_ERR(imxpriv->ahb_clk); - goto err_out; + return PTR_ERR(imxpriv->ahb_clk); } - if (type == AHCI_IMX53) { - imxpriv->sata_gate_clk = devm_clk_get(dev, "sata_gate"); - if (IS_ERR(imxpriv->sata_gate_clk)) { - dev_err(dev, "can't get sata_gate clock.\n"); - ret = PTR_ERR(imxpriv->sata_gate_clk); - goto err_out; + if (imxpriv->type == AHCI_IMX6Q) { + imxpriv->gpr = syscon_regmap_lookup_by_compatible( + "fsl,imx6q-iomuxc-gpr"); + if (IS_ERR(imxpriv->gpr)) { + dev_err(dev, + "failed to find fsl,imx6q-iomux-gpr regmap\n"); + return PTR_ERR(imxpriv->gpr); } } - imxpriv->sata_ref_clk = devm_clk_get(dev, "sata_ref"); - if (IS_ERR(imxpriv->sata_ref_clk)) { - dev_err(dev, "can't get sata_ref clock.\n"); - ret = PTR_ERR(imxpriv->sata_ref_clk); - goto err_out; - } + hpriv = ahci_platform_get_resources(pdev); + if (IS_ERR(hpriv)) + return PTR_ERR(hpriv); + + hpriv->plat_data = imxpriv; - imxpriv->ahci_pdev = ahci_pdev; - platform_set_drvdata(pdev, imxpriv); + ret = imx_sata_enable(hpriv); + if (ret) + return ret; - mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); - irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (!mem || !irq) { - dev_err(dev, "no mmio/irq resource\n"); - ret = -ENOMEM; - goto err_out; + /* + * Configure the HWINIT bits of the HOST_CAP and HOST_PORTS_IMPL, + * and IP vendor specific register HOST_TIMER1MS. + * Configure CAP_SSS (support stagered spin up). + * Implement the port0. + * Get the ahb clock rate, and configure the TIMER1MS register. + */ + reg_val = readl(hpriv->mmio + HOST_CAP); + if (!(reg_val & HOST_CAP_SSS)) { + reg_val |= HOST_CAP_SSS; + writel(reg_val, hpriv->mmio + HOST_CAP); + } + reg_val = readl(hpriv->mmio + HOST_PORTS_IMPL); + if (!(reg_val & 0x1)) { + reg_val |= 0x1; + writel(reg_val, hpriv->mmio + HOST_PORTS_IMPL); } - res[0] = *mem; - res[1] = *irq; + reg_val = clk_get_rate(imxpriv->ahb_clk) / 1000; + writel(reg_val, hpriv->mmio + HOST_TIMER1MS); - ahci_dev->coherent_dma_mask = DMA_BIT_MASK(32); - ahci_dev->dma_mask = &ahci_dev->coherent_dma_mask; - ahci_dev->of_node = dev->of_node; + ret = ahci_platform_init_host(pdev, hpriv, &ahci_imx_port_info, 0, 0); + if (ret) + imx_sata_disable(hpriv); - if (type == AHCI_IMX6Q) { - imxpriv->gpr = syscon_regmap_lookup_by_compatible( - "fsl,imx6q-iomuxc-gpr"); - if (IS_ERR(imxpriv->gpr)) { - dev_err(dev, - "failed to find fsl,imx6q-iomux-gpr regmap\n"); - ret = PTR_ERR(imxpriv->gpr); - goto err_out; - } + return ret; +} - /* - * Set PHY Paremeters, two steps to configure the GPR13, - * one write for rest of parameters, mask of first write - * is 0x07fffffe, and the other one write for setting - * the mpll_clk_en happens in imx_sata_clock_enable(). - */ - regmap_update_bits(imxpriv->gpr, IOMUXC_GPR13, - IMX6Q_GPR13_SATA_RX_EQ_VAL_MASK | - IMX6Q_GPR13_SATA_RX_LOS_LVL_MASK | - IMX6Q_GPR13_SATA_RX_DPLL_MODE_MASK | - IMX6Q_GPR13_SATA_SPD_MODE_MASK | - IMX6Q_GPR13_SATA_MPLL_SS_EN | - IMX6Q_GPR13_SATA_TX_ATTEN_MASK | - IMX6Q_GPR13_SATA_TX_BOOST_MASK | - IMX6Q_GPR13_SATA_TX_LVL_MASK | - IMX6Q_GPR13_SATA_MPLL_CLK_EN | - IMX6Q_GPR13_SATA_TX_EDGE_RATE, - IMX6Q_GPR13_SATA_RX_EQ_VAL_3_0_DB | - IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2M | - IMX6Q_GPR13_SATA_RX_DPLL_MODE_2P_4F | - IMX6Q_GPR13_SATA_SPD_MODE_3P0G | - IMX6Q_GPR13_SATA_MPLL_SS_EN | - IMX6Q_GPR13_SATA_TX_ATTEN_9_16 | - IMX6Q_GPR13_SATA_TX_BOOST_3_33_DB | - IMX6Q_GPR13_SATA_TX_LVL_1_025_V); - } +static void ahci_imx_host_stop(struct ata_host *host) +{ + struct ahci_host_priv *hpriv = host->private_data; - ret = platform_device_add_resources(ahci_pdev, res, 2); - if (ret) - goto err_out; + imx_sata_disable(hpriv); +} - ret = platform_device_add_data(ahci_pdev, pdata, sizeof(*pdata)); - if (ret) - goto err_out; +static int imx_ahci_suspend(struct device *dev) +{ + struct ata_host *host = dev_get_drvdata(dev); + struct ahci_host_priv *hpriv = host->private_data; + int ret; - ret = platform_device_add(ahci_pdev); - if (ret) { -err_out: - platform_device_put(ahci_pdev); + ret = ahci_platform_suspend_host(dev); + if (ret) return ret; - } + + imx_sata_disable(hpriv); return 0; } -static int imx_ahci_remove(struct platform_device *pdev) +static int imx_ahci_resume(struct device *dev) { - struct imx_ahci_priv *imxpriv = platform_get_drvdata(pdev); - struct platform_device *ahci_pdev = imxpriv->ahci_pdev; + struct ata_host *host = dev_get_drvdata(dev); + struct ahci_host_priv *hpriv = host->private_data; + int ret; - platform_device_unregister(ahci_pdev); - return 0; + ret = imx_sata_enable(hpriv); + if (ret) + return ret; + + return ahci_platform_resume_host(dev); } +static SIMPLE_DEV_PM_OPS(ahci_imx_pm_ops, imx_ahci_suspend, imx_ahci_resume); + static struct platform_driver imx_ahci_driver = { .probe = imx_ahci_probe, - .remove = imx_ahci_remove, + .remove = ata_platform_remove_one, .driver = { .name = "ahci-imx", .owner = THIS_MODULE, .of_match_table = imx_ahci_of_match, + .pm = &ahci_imx_pm_ops, }, }; module_platform_driver(imx_ahci_driver); -- cgit v0.10.2 From c431147184c05849f1d7e14f8fd6254e1026319d Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Sat, 22 Feb 2014 16:53:38 +0100 Subject: ata: ahci_platform: Add DT compatible for Synopsis DWC AHCI controller Add compatible string "snps,dwc-ahci", which should be used for Synopsis Designware SATA cores. e.g. on TI OMAP5 and DRA7 platforms. Signed-off-by: Roger Quadros Reviewed-by: Bartlomiej Zolnierkiewicz Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index 01f7bbe..968e7d9 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -577,6 +577,7 @@ static const struct of_device_id ahci_of_match[] = { { .compatible = "snps,spear-ahci", }, { .compatible = "snps,exynos5440-ahci", }, { .compatible = "ibm,476gtr-ahci", }, + { .compatible = "snps,dwc-ahci", }, {}, }; MODULE_DEVICE_TABLE(of, ahci_of_match); -- cgit v0.10.2 From 42a7f53ba0f2baa2ea23cd830511cea7f8612dd2 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Sat, 22 Feb 2014 16:53:39 +0100 Subject: ata: ahci_platform: Update DT compatible list The ahci_platform driver supports "snps,dwc-ahci". Add this to the DT binding information. Signed-off-by: Roger Quadros Reviewed-by: Bartlomiej Zolnierkiewicz Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo diff --git a/Documentation/devicetree/bindings/ata/ahci-platform.txt b/Documentation/devicetree/bindings/ata/ahci-platform.txt index d86e854..48b285f 100644 --- a/Documentation/devicetree/bindings/ata/ahci-platform.txt +++ b/Documentation/devicetree/bindings/ata/ahci-platform.txt @@ -6,8 +6,8 @@ Each SATA controller should have its own node. Required properties: - compatible : compatible list, one of "snps,spear-ahci", "snps,exynos5440-ahci", "ibm,476gtr-ahci", - "allwinner,sun4i-a10-ahci", "fsl,imx53-ahci" or - "fsl,imx6q-ahci" + "allwinner,sun4i-a10-ahci", "fsl,imx53-ahci" + "fsl,imx6q-ahci" or "snps,dwc-ahci" - interrupts : - reg : -- cgit v0.10.2 From 21b5faeec229d4f70a7f60a7b0b065c98198f491 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Sat, 22 Feb 2014 16:53:40 +0100 Subject: ata: ahci_platform: Manage SATA PHY Some platforms have a PHY hooked up to the SATA controller. The PHY needs to be initialized and powered up for SATA to work. We do that using the PHY framework. tj: Minor comment formatting updates. CC: Balaji T K Signed-off-by: Roger Quadros Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index bf8100c..3ab7ac9 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -37,6 +37,7 @@ #include #include +#include #include /* Enclosure Management Control */ @@ -325,6 +326,7 @@ struct ahci_host_priv { u32 em_msg_type; /* EM message type */ struct clk *clks[AHCI_MAX_CLKS]; /* Optional */ struct regulator *target_pwr; /* Optional */ + struct phy *phy; /* If platform uses phy */ void *plat_data; /* Other platform data */ /* * Optional ahci_start_engine override, if not set this gets set to the diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index 968e7d9..243dde3 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "ahci.h" static void ahci_host_stop(struct ata_host *host); @@ -140,6 +141,7 @@ EXPORT_SYMBOL_GPL(ahci_platform_disable_clks); * following order: * 1) Regulator * 2) Clocks (through ahci_platform_enable_clks) + * 3) Phy * * If resource enabling fails at any point the previous enabled resources * are disabled in reverse order. @@ -161,8 +163,23 @@ int ahci_platform_enable_resources(struct ahci_host_priv *hpriv) if (rc) goto disable_regulator; + if (hpriv->phy) { + rc = phy_init(hpriv->phy); + if (rc) + goto disable_clks; + + rc = phy_power_on(hpriv->phy); + if (rc) { + phy_exit(hpriv->phy); + goto disable_clks; + } + } + return 0; +disable_clks: + ahci_platform_disable_clks(hpriv); + disable_regulator: if (hpriv->target_pwr) regulator_disable(hpriv->target_pwr); @@ -176,11 +193,17 @@ EXPORT_SYMBOL_GPL(ahci_platform_enable_resources); * * This function disables all ahci_platform managed resources in the * following order: - * 1) Clocks (through ahci_platform_disable_clks) - * 2) Regulator + * 1) Phy + * 2) Clocks (through ahci_platform_disable_clks) + * 3) Regulator */ void ahci_platform_disable_resources(struct ahci_host_priv *hpriv) { + if (hpriv->phy) { + phy_power_off(hpriv->phy); + phy_exit(hpriv->phy); + } + ahci_platform_disable_clks(hpriv); if (hpriv->target_pwr) @@ -208,6 +231,7 @@ static void ahci_platform_put_resources(struct device *dev, void *res) * 2) regulator for controlling the targets power (optional) * 3) 0 - AHCI_MAX_CLKS clocks, as specified in the devs devicetree node, * or for non devicetree enabled platforms a single clock + * 4) phy (optional) * * RETURNS: * The allocated ahci_host_priv on success, otherwise an ERR_PTR value @@ -266,6 +290,25 @@ struct ahci_host_priv *ahci_platform_get_resources( hpriv->clks[i] = clk; } + hpriv->phy = devm_phy_get(dev, "sata-phy"); + if (IS_ERR(hpriv->phy)) { + rc = PTR_ERR(hpriv->phy); + switch (rc) { + case -ENODEV: + case -ENOSYS: + /* continue normally */ + hpriv->phy = NULL; + break; + + case -EPROBE_DEFER: + goto err_out; + + default: + dev_err(dev, "couldn't get sata-phy\n"); + goto err_out; + } + } + devres_remove_group(dev, NULL); return hpriv; -- cgit v0.10.2 From e708e46edac8ab2f31e7ee991aa3c5b87638e658 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Sat, 22 Feb 2014 16:53:41 +0100 Subject: ata: ahci_platform: runtime resume the device before use On OMAP platforms the device needs to be runtime resumed before it can be accessed. The OMAP HWMOD framework takes care of enabling the module and its resources based on the device's runtime PM state. In this patch we runtime resume during .probe() and runtime suspend after .remove(). We also update the runtime PM state during .resume(). CC: Balaji T K Signed-off-by: Roger Quadros Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 3ab7ac9..51af275 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -324,6 +324,7 @@ struct ahci_host_priv { u32 em_loc; /* enclosure management location */ u32 em_buf_sz; /* EM buffer size in byte */ u32 em_msg_type; /* EM message type */ + bool got_runtime_pm; /* Did we do pm_runtime_get? */ struct clk *clks[AHCI_MAX_CLKS]; /* Optional */ struct regulator *target_pwr; /* Optional */ struct phy *phy; /* If platform uses phy */ diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index 243dde3..fc32863 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "ahci.h" static void ahci_host_stop(struct ata_host *host); @@ -216,6 +217,11 @@ static void ahci_platform_put_resources(struct device *dev, void *res) struct ahci_host_priv *hpriv = res; int c; + if (hpriv->got_runtime_pm) { + pm_runtime_put_sync(dev); + pm_runtime_disable(dev); + } + for (c = 0; c < AHCI_MAX_CLKS && hpriv->clks[c]; c++) clk_put(hpriv->clks[c]); } @@ -309,6 +315,10 @@ struct ahci_host_priv *ahci_platform_get_resources( } } + pm_runtime_enable(dev); + pm_runtime_get_sync(dev); + hpriv->got_runtime_pm = true; + devres_remove_group(dev, NULL); return hpriv; @@ -603,6 +613,11 @@ int ahci_platform_resume(struct device *dev) if (rc) goto disable_resources; + /* We resumed so update PM runtime state */ + pm_runtime_disable(dev); + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + return 0; disable_resources: -- cgit v0.10.2 From f1df8641e27b7edb978bdc7aaf50c235bc9e8be9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 22 Feb 2014 17:22:53 +0100 Subject: ahci_platform: Drop support for ahci-strict platform device type I've done a grep over the entire kernel tree and nothing is using this (anymore?). Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index fc32863..d3d2bad 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -31,7 +31,6 @@ static void ahci_host_stop(struct ata_host *host); enum ahci_type { AHCI, /* standard platform ahci */ IMX53_AHCI, /* ahci on i.mx53 */ - STRICT_AHCI, /* delayed DMA engine start */ }; static struct platform_device_id ahci_devtype[] = { @@ -42,9 +41,6 @@ static struct platform_device_id ahci_devtype[] = { .name = "imx53-ahci", .driver_data = IMX53_AHCI, }, { - .name = "strict-ahci", - .driver_data = STRICT_AHCI, - }, { /* sentinel */ } }; @@ -75,13 +71,6 @@ static const struct ata_port_info ahci_port_info[] = { .udma_mask = ATA_UDMA6, .port_ops = &ahci_platform_retry_srst_ops, }, - [STRICT_AHCI] = { - AHCI_HFLAGS (AHCI_HFLAG_DELAY_ENGINE), - .flags = AHCI_FLAG_COMMON, - .pio_mask = ATA_PIO4, - .udma_mask = ATA_UDMA6, - .port_ops = &ahci_platform_ops, - }, }; static struct scsi_host_template ahci_platform_sht = { -- cgit v0.10.2 From c093e1d36e317c5ac2dc788f407119259fc260fe Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 22 Feb 2014 17:22:54 +0100 Subject: ahci_platform: Drop support for imx53-ahci platform device type Since the 3.13 release the ahci_imx driver has proper devicetree enabled support for ahci on imx53 and that is used instead of the old board file created imx53-ahci platform device. Note this patch also complete drops the id-table, an id-table is not needed for a single id platform driver, the name field in the driver struct suffices. And the code already has an explicit "MODULE_ALIAS("platform:ahci");" so the id-table is not needed for that either. Cc: Marek Vasut Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index d3d2bad..8fab4bf 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -28,49 +28,17 @@ static void ahci_host_stop(struct ata_host *host); -enum ahci_type { - AHCI, /* standard platform ahci */ - IMX53_AHCI, /* ahci on i.mx53 */ -}; - -static struct platform_device_id ahci_devtype[] = { - { - .name = "ahci", - .driver_data = AHCI, - }, { - .name = "imx53-ahci", - .driver_data = IMX53_AHCI, - }, { - /* sentinel */ - } -}; -MODULE_DEVICE_TABLE(platform, ahci_devtype); - struct ata_port_operations ahci_platform_ops = { .inherits = &ahci_ops, .host_stop = ahci_host_stop, }; EXPORT_SYMBOL_GPL(ahci_platform_ops); -static struct ata_port_operations ahci_platform_retry_srst_ops = { - .inherits = &ahci_pmp_retry_srst_ops, - .host_stop = ahci_host_stop, -}; - -static const struct ata_port_info ahci_port_info[] = { - /* by features */ - [AHCI] = { - .flags = AHCI_FLAG_COMMON, - .pio_mask = ATA_PIO4, - .udma_mask = ATA_UDMA6, - .port_ops = &ahci_platform_ops, - }, - [IMX53_AHCI] = { - .flags = AHCI_FLAG_COMMON, - .pio_mask = ATA_PIO4, - .udma_mask = ATA_UDMA6, - .port_ops = &ahci_platform_retry_srst_ops, - }, +static const struct ata_port_info ahci_port_info = { + .flags = AHCI_FLAG_COMMON, + .pio_mask = ATA_PIO4, + .udma_mask = ATA_UDMA6, + .port_ops = &ahci_platform_ops, }; static struct scsi_host_template ahci_platform_sht = { @@ -416,7 +384,6 @@ static int ahci_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct ahci_platform_data *pdata = dev_get_platdata(dev); - const struct platform_device_id *id = platform_get_device_id(pdev); const struct ata_port_info *pi_template; struct ahci_host_priv *hpriv; int rc; @@ -444,7 +411,7 @@ static int ahci_probe(struct platform_device *pdev) if (pdata && pdata->ata_port_info) pi_template = pdata->ata_port_info; else - pi_template = &ahci_port_info[id ? id->driver_data : 0]; + pi_template = &ahci_port_info; rc = ahci_platform_init_host(pdev, hpriv, pi_template, pdata ? pdata->force_port_map : 0, @@ -638,7 +605,6 @@ static struct platform_driver ahci_driver = { .of_match_table = ahci_of_match, .pm = &ahci_pm_ops, }, - .id_table = ahci_devtype, }; module_platform_driver(ahci_driver); -- cgit v0.10.2 From 6ef95e87763fd69889655f4f14e499377a54d066 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 22 Feb 2014 17:22:55 +0100 Subject: ahci_platform: Drop unused ahci_platform_data members These members are not used anywhere, and in the future we want ahci_platform_data to go away entirely so there is no reason to keep these around. Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index 8fab4bf..db24d2a 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -384,7 +384,6 @@ static int ahci_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct ahci_platform_data *pdata = dev_get_platdata(dev); - const struct ata_port_info *pi_template; struct ahci_host_priv *hpriv; int rc; @@ -408,14 +407,7 @@ static int ahci_probe(struct platform_device *pdev) goto disable_resources; } - if (pdata && pdata->ata_port_info) - pi_template = pdata->ata_port_info; - else - pi_template = &ahci_port_info; - - rc = ahci_platform_init_host(pdev, hpriv, pi_template, - pdata ? pdata->force_port_map : 0, - pdata ? pdata->mask_port_map : 0); + rc = ahci_platform_init_host(pdev, hpriv, &ahci_port_info, 0, 0); if (rc) goto pdata_exit; diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h index 542f268..1f16d50 100644 --- a/include/linux/ahci_platform.h +++ b/include/linux/ahci_platform.h @@ -33,9 +33,6 @@ struct ahci_platform_data { void (*exit)(struct device *dev); int (*suspend)(struct device *dev); int (*resume)(struct device *dev); - const struct ata_port_info *ata_port_info; - unsigned int force_port_map; - unsigned int mask_port_map; }; int ahci_platform_enable_clks(struct ahci_host_priv *hpriv); -- cgit v0.10.2 From 70ff00f82a6af0ff68f8f7b411738634ce2f20d0 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sat, 22 Feb 2014 18:27:17 +0100 Subject: ASoC: sta32x: Fix cache sync codec->control_data contains a pointer to the regmap struct of the device, not to the device private data. Use snd_soc_codec_get_drvdata() instead. The issue was introduced in commit 29fdf4fbbe ("ASoC: sta32x: Convert to regmap"). Fixes: 29fdf4fbbe (ASoC: sta32x: Convert to regmap) Signed-off-by: Lars-Peter Clausen Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/sound/soc/codecs/sta32x.c b/sound/soc/codecs/sta32x.c index 42c5b45..ea78c17 100644 --- a/sound/soc/codecs/sta32x.c +++ b/sound/soc/codecs/sta32x.c @@ -331,7 +331,7 @@ static int sta32x_sync_coef_shadow(struct snd_soc_codec *codec) static int sta32x_cache_sync(struct snd_soc_codec *codec) { - struct sta32x_priv *sta32x = codec->control_data; + struct sta32x_priv *sta32x = snd_soc_codec_get_drvdata(codec); unsigned int mute; int rc; -- cgit v0.10.2 From d2ec147a76d0e051db19d378cac3ee7721877717 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 23 Feb 2014 12:52:41 +0100 Subject: ahci_sunxi: Use msleep instead of mdelay ahci_sunxi_phy_init is called from the probe and resume code paths, and sleeping is safe in both, so use msleep instead of mdelay. Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo diff --git a/drivers/ata/ahci_sunxi.c b/drivers/ata/ahci_sunxi.c index 001f7dfc..d1bf3f7 100644 --- a/drivers/ata/ahci_sunxi.c +++ b/drivers/ata/ahci_sunxi.c @@ -90,7 +90,7 @@ static int ahci_sunxi_phy_init(struct device *dev, void __iomem *reg_base) /* This magic is from the original code */ writel(0, reg_base + AHCI_RWCR); - mdelay(5); + msleep(5); sunxi_setbits(reg_base + AHCI_PHYCS1R, BIT(19)); sunxi_clrsetbits(reg_base + AHCI_PHYCS0R, @@ -105,7 +105,7 @@ static int ahci_sunxi_phy_init(struct device *dev, void __iomem *reg_base) (0x7 << 20), (0x3 << 20)); sunxi_clrsetbits(reg_base + AHCI_PHYCS2R, (0x1f << 5), (0x19 << 5)); - mdelay(5); + msleep(5); sunxi_setbits(reg_base + AHCI_PHYCS0R, (0x1 << 19)); @@ -137,7 +137,7 @@ static int ahci_sunxi_phy_init(struct device *dev, void __iomem *reg_base) udelay(1); } while (1); - mdelay(15); + msleep(15); writel(0x7, reg_base + AHCI_RWCR); -- cgit v0.10.2 From 806274c018e9858320a27b785df761f45c33a56c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 17 Feb 2014 13:13:05 -0800 Subject: rcutorture: Fix checkpatch complaint This commit does a code-style cleanup so that the first curly brace of an initializer does not appear at the beginning of a line. Signed-off-by: Paul E. McKenney diff --git a/kernel/rcu/torture.c b/kernel/rcu/torture.c index 732f8ae..dad6723 100644 --- a/kernel/rcu/torture.c +++ b/kernel/rcu/torture.c @@ -170,10 +170,10 @@ static struct rcu_torture __rcu *rcu_torture_current; static unsigned long rcu_torture_current_version; static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN]; static DEFINE_SPINLOCK(rcu_torture_lock); -static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) = - { 0 }; -static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch) = - { 0 }; +static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], + rcu_torture_count) = { 0 }; +static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], + rcu_torture_batch) = { 0 }; static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1]; static atomic_t n_rcu_torture_alloc; static atomic_t n_rcu_torture_alloc_fail; -- cgit v0.10.2 From 51b1130eb5823ddb90a9ad07d243031d8cb7ecf2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Jan 2014 11:49:39 -0800 Subject: rcutorture: Abstract rcu_torture_random() Because rcu_torture_random() will be used by the locking equivalent to rcutorture, pull it out into its own module. This new module cannot be separately configured, instead, use the Kconfig "select" statement from the Kconfig options of tests depending on it. Suggested-by: Rusty Russell Signed-off-by: Paul E. McKenney diff --git a/include/linux/torture.h b/include/linux/torture.h new file mode 100644 index 0000000..979e3e6 --- /dev/null +++ b/include/linux/torture.h @@ -0,0 +1,47 @@ +/* + * Common functions for in-kernel torture tests. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * Copyright IBM Corporation, 2014 + * + * Author: Paul E. McKenney + */ + +#ifndef __LINUX_TORTURE_H +#define __LINUX_TORTURE_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct torture_random_state { + unsigned long trs_state; + long trs_count; +}; + +#define DEFINE_TORTURE_RANDOM(name) struct torture_random_state name = { 0, 0 } + +unsigned long torture_random(struct torture_random_state *trsp); + +#endif /* __LINUX_TORTURE_H */ diff --git a/kernel/Makefile b/kernel/Makefile index bc010ee..5c0e766 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -93,6 +93,7 @@ obj-$(CONFIG_PADATA) += padata.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o +obj-$(CONFIG_TORTURE_TEST) += torture.o $(obj)/configs.o: $(obj)/config_data.h diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile index 01e9ec3..807ccfb 100644 --- a/kernel/rcu/Makefile +++ b/kernel/rcu/Makefile @@ -1,5 +1,5 @@ obj-y += update.o srcu.o -obj-$(CONFIG_RCU_TORTURE_TEST) += torture.o +obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_TREE_RCU) += tree.o obj-$(CONFIG_TREE_PREEMPT_RCU) += tree.o obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c new file mode 100644 index 0000000..94b1cd8b --- /dev/null +++ b/kernel/rcu/rcutorture.c @@ -0,0 +1,2125 @@ +/* + * Read-Copy Update module-based torture test facility + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2005, 2006 + * + * Authors: Paul E. McKenney + * Josh Triplett + * + * See also: Documentation/RCU/torture.txt + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Paul E. McKenney and Josh Triplett "); + +MODULE_ALIAS("rcutorture"); +#ifdef MODULE_PARAM_PREFIX +#undef MODULE_PARAM_PREFIX +#endif +#define MODULE_PARAM_PREFIX "rcutorture." + +static int fqs_duration; +module_param(fqs_duration, int, 0444); +MODULE_PARM_DESC(fqs_duration, "Duration of fqs bursts (us), 0 to disable"); +static int fqs_holdoff; +module_param(fqs_holdoff, int, 0444); +MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); +static int fqs_stutter = 3; +module_param(fqs_stutter, int, 0444); +MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); +static bool gp_exp; +module_param(gp_exp, bool, 0444); +MODULE_PARM_DESC(gp_exp, "Use expedited GP wait primitives"); +static bool gp_normal; +module_param(gp_normal, bool, 0444); +MODULE_PARM_DESC(gp_normal, "Use normal (non-expedited) GP wait primitives"); +static int irqreader = 1; +module_param(irqreader, int, 0444); +MODULE_PARM_DESC(irqreader, "Allow RCU readers from irq handlers"); +static int n_barrier_cbs; +module_param(n_barrier_cbs, int, 0444); +MODULE_PARM_DESC(n_barrier_cbs, "# of callbacks/kthreads for barrier testing"); +static int nfakewriters = 4; +module_param(nfakewriters, int, 0444); +MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads"); +static int nreaders = -1; +module_param(nreaders, int, 0444); +MODULE_PARM_DESC(nreaders, "Number of RCU reader threads"); +static int object_debug; +module_param(object_debug, int, 0444); +MODULE_PARM_DESC(object_debug, "Enable debug-object double call_rcu() testing"); +static int onoff_holdoff; +module_param(onoff_holdoff, int, 0444); +MODULE_PARM_DESC(onoff_holdoff, "Time after boot before CPU hotplugs (s)"); +static int onoff_interval; +module_param(onoff_interval, int, 0444); +MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable"); +static int shuffle_interval = 3; +module_param(shuffle_interval, int, 0444); +MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles"); +static int shutdown_secs; +module_param(shutdown_secs, int, 0444); +MODULE_PARM_DESC(shutdown_secs, "Shutdown time (s), <= zero to disable."); +static int stall_cpu; +module_param(stall_cpu, int, 0444); +MODULE_PARM_DESC(stall_cpu, "Stall duration (s), zero to disable."); +static int stall_cpu_holdoff = 10; +module_param(stall_cpu_holdoff, int, 0444); +MODULE_PARM_DESC(stall_cpu_holdoff, "Time to wait before starting stall (s)."); +static int stat_interval = 60; +module_param(stat_interval, int, 0644); +MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s"); +static int stutter = 5; +module_param(stutter, int, 0444); +MODULE_PARM_DESC(stutter, "Number of seconds to run/halt test"); +static int test_boost = 1; +module_param(test_boost, int, 0444); +MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes."); +static int test_boost_duration = 4; +module_param(test_boost_duration, int, 0444); +MODULE_PARM_DESC(test_boost_duration, "Duration of each boost test, seconds."); +static int test_boost_interval = 7; +module_param(test_boost_interval, int, 0444); +MODULE_PARM_DESC(test_boost_interval, "Interval between boost tests, seconds."); +static bool test_no_idle_hz = true; +module_param(test_no_idle_hz, bool, 0444); +MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs"); +static char *torture_type = "rcu"; +module_param(torture_type, charp, 0444); +MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, ...)"); +static bool verbose; +module_param(verbose, bool, 0444); +MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s"); + +#define TORTURE_FLAG "-torture:" +#define PRINTK_STRING(s) \ + do { pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0) +#define VERBOSE_PRINTK_STRING(s) \ + do { if (verbose) pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0) +#define VERBOSE_PRINTK_ERRSTRING(s) \ + do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0) + +static int nrealreaders; +static struct task_struct *writer_task; +static struct task_struct **fakewriter_tasks; +static struct task_struct **reader_tasks; +static struct task_struct *stats_task; +static struct task_struct *shuffler_task; +static struct task_struct *stutter_task; +static struct task_struct *fqs_task; +static struct task_struct *boost_tasks[NR_CPUS]; +static struct task_struct *shutdown_task; +#ifdef CONFIG_HOTPLUG_CPU +static struct task_struct *onoff_task; +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ +static struct task_struct *stall_task; +static struct task_struct **barrier_cbs_tasks; +static struct task_struct *barrier_task; + +#define RCU_TORTURE_PIPE_LEN 10 + +struct rcu_torture { + struct rcu_head rtort_rcu; + int rtort_pipe_count; + struct list_head rtort_free; + int rtort_mbtest; +}; + +static LIST_HEAD(rcu_torture_freelist); +static struct rcu_torture __rcu *rcu_torture_current; +static unsigned long rcu_torture_current_version; +static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN]; +static DEFINE_SPINLOCK(rcu_torture_lock); +static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], + rcu_torture_count) = { 0 }; +static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], + rcu_torture_batch) = { 0 }; +static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1]; +static atomic_t n_rcu_torture_alloc; +static atomic_t n_rcu_torture_alloc_fail; +static atomic_t n_rcu_torture_free; +static atomic_t n_rcu_torture_mberror; +static atomic_t n_rcu_torture_error; +static long n_rcu_torture_barrier_error; +static long n_rcu_torture_boost_ktrerror; +static long n_rcu_torture_boost_rterror; +static long n_rcu_torture_boost_failure; +static long n_rcu_torture_boosts; +static long n_rcu_torture_timers; +static long n_offline_attempts; +static long n_offline_successes; +static unsigned long sum_offline; +static int min_offline = -1; +static int max_offline; +static long n_online_attempts; +static long n_online_successes; +static unsigned long sum_online; +static int min_online = -1; +static int max_online; +static long n_barrier_attempts; +static long n_barrier_successes; +static struct list_head rcu_torture_removed; +static cpumask_var_t shuffle_tmp_mask; + +static int stutter_pause_test; + +#if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE) +#define RCUTORTURE_RUNNABLE_INIT 1 +#else +#define RCUTORTURE_RUNNABLE_INIT 0 +#endif +int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; +module_param(rcutorture_runnable, int, 0444); +MODULE_PARM_DESC(rcutorture_runnable, "Start rcutorture at boot"); + +#if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) +#define rcu_can_boost() 1 +#else /* #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */ +#define rcu_can_boost() 0 +#endif /* #else #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */ + +#ifdef CONFIG_RCU_TRACE +static u64 notrace rcu_trace_clock_local(void) +{ + u64 ts = trace_clock_local(); + unsigned long __maybe_unused ts_rem = do_div(ts, NSEC_PER_USEC); + return ts; +} +#else /* #ifdef CONFIG_RCU_TRACE */ +static u64 notrace rcu_trace_clock_local(void) +{ + return 0ULL; +} +#endif /* #else #ifdef CONFIG_RCU_TRACE */ + +static unsigned long shutdown_time; /* jiffies to system shutdown. */ +static unsigned long boost_starttime; /* jiffies of next boost test start. */ +DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ + /* and boost task create/destroy. */ +static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */ +static bool barrier_phase; /* Test phase. */ +static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */ +static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */ +static DECLARE_WAIT_QUEUE_HEAD(barrier_wq); + +/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ + +#define FULLSTOP_DONTSTOP 0 /* Normal operation. */ +#define FULLSTOP_SHUTDOWN 1 /* System shutdown with rcutorture running. */ +#define FULLSTOP_RMMOD 2 /* Normal rmmod of rcutorture. */ +static int fullstop = FULLSTOP_RMMOD; +/* + * Protect fullstop transitions and spawning of kthreads. + */ +static DEFINE_MUTEX(fullstop_mutex); + +/* Forward reference. */ +static void rcu_torture_cleanup(void); + +/* + * Detect and respond to a system shutdown. + */ +static int +rcutorture_shutdown_notify(struct notifier_block *unused1, + unsigned long unused2, void *unused3) +{ + mutex_lock(&fullstop_mutex); + if (fullstop == FULLSTOP_DONTSTOP) + fullstop = FULLSTOP_SHUTDOWN; + else + pr_warn(/* but going down anyway, so... */ + "Concurrent 'rmmod rcutorture' and shutdown illegal!\n"); + mutex_unlock(&fullstop_mutex); + return NOTIFY_DONE; +} + +/* + * Absorb kthreads into a kernel function that won't return, so that + * they won't ever access module text or data again. + */ +static void rcutorture_shutdown_absorb(const char *title) +{ + if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) { + pr_notice( + "rcutorture thread %s parking due to system shutdown\n", + title); + schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT); + } +} + +/* + * Allocate an element from the rcu_tortures pool. + */ +static struct rcu_torture * +rcu_torture_alloc(void) +{ + struct list_head *p; + + spin_lock_bh(&rcu_torture_lock); + if (list_empty(&rcu_torture_freelist)) { + atomic_inc(&n_rcu_torture_alloc_fail); + spin_unlock_bh(&rcu_torture_lock); + return NULL; + } + atomic_inc(&n_rcu_torture_alloc); + p = rcu_torture_freelist.next; + list_del_init(p); + spin_unlock_bh(&rcu_torture_lock); + return container_of(p, struct rcu_torture, rtort_free); +} + +/* + * Free an element to the rcu_tortures pool. + */ +static void +rcu_torture_free(struct rcu_torture *p) +{ + atomic_inc(&n_rcu_torture_free); + spin_lock_bh(&rcu_torture_lock); + list_add_tail(&p->rtort_free, &rcu_torture_freelist); + spin_unlock_bh(&rcu_torture_lock); +} + +static void +rcu_stutter_wait(const char *title) +{ + while (stutter_pause_test || !rcutorture_runnable) { + if (rcutorture_runnable) + schedule_timeout_interruptible(1); + else + schedule_timeout_interruptible(round_jiffies_relative(HZ)); + rcutorture_shutdown_absorb(title); + } +} + +/* + * Operations vector for selecting different types of tests. + */ + +struct rcu_torture_ops { + void (*init)(void); + int (*readlock)(void); + void (*read_delay)(struct torture_random_state *rrsp); + void (*readunlock)(int idx); + int (*completed)(void); + void (*deferred_free)(struct rcu_torture *p); + void (*sync)(void); + void (*exp_sync)(void); + void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); + void (*cb_barrier)(void); + void (*fqs)(void); + void (*stats)(char *page); + int irq_capable; + int can_boost; + const char *name; +}; + +static struct rcu_torture_ops *cur_ops; + +/* + * Definitions for rcu torture testing. + */ + +static int rcu_torture_read_lock(void) __acquires(RCU) +{ + rcu_read_lock(); + return 0; +} + +static void rcu_read_delay(struct torture_random_state *rrsp) +{ + const unsigned long shortdelay_us = 200; + const unsigned long longdelay_ms = 50; + + /* We want a short delay sometimes to make a reader delay the grace + * period, and we want a long delay occasionally to trigger + * force_quiescent_state. */ + + if (!(torture_random(rrsp) % (nrealreaders * 2000 * longdelay_ms))) + mdelay(longdelay_ms); + if (!(torture_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) + udelay(shortdelay_us); +#ifdef CONFIG_PREEMPT + if (!preempt_count() && + !(torture_random(rrsp) % (nrealreaders * 20000))) + preempt_schedule(); /* No QS if preempt_disable() in effect */ +#endif +} + +static void rcu_torture_read_unlock(int idx) __releases(RCU) +{ + rcu_read_unlock(); +} + +static int rcu_torture_completed(void) +{ + return rcu_batches_completed(); +} + +static void +rcu_torture_cb(struct rcu_head *p) +{ + int i; + struct rcu_torture *rp = container_of(p, struct rcu_torture, rtort_rcu); + + if (fullstop != FULLSTOP_DONTSTOP) { + /* Test is ending, just drop callbacks on the floor. */ + /* The next initialization will pick up the pieces. */ + return; + } + i = rp->rtort_pipe_count; + if (i > RCU_TORTURE_PIPE_LEN) + i = RCU_TORTURE_PIPE_LEN; + atomic_inc(&rcu_torture_wcount[i]); + if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) { + rp->rtort_mbtest = 0; + rcu_torture_free(rp); + } else { + cur_ops->deferred_free(rp); + } +} + +static int rcu_no_completed(void) +{ + return 0; +} + +static void rcu_torture_deferred_free(struct rcu_torture *p) +{ + call_rcu(&p->rtort_rcu, rcu_torture_cb); +} + +static void rcu_sync_torture_init(void) +{ + INIT_LIST_HEAD(&rcu_torture_removed); +} + +static struct rcu_torture_ops rcu_ops = { + .init = rcu_sync_torture_init, + .readlock = rcu_torture_read_lock, + .read_delay = rcu_read_delay, + .readunlock = rcu_torture_read_unlock, + .completed = rcu_torture_completed, + .deferred_free = rcu_torture_deferred_free, + .sync = synchronize_rcu, + .exp_sync = synchronize_rcu_expedited, + .call = call_rcu, + .cb_barrier = rcu_barrier, + .fqs = rcu_force_quiescent_state, + .stats = NULL, + .irq_capable = 1, + .can_boost = rcu_can_boost(), + .name = "rcu" +}; + +/* + * Definitions for rcu_bh torture testing. + */ + +static int rcu_bh_torture_read_lock(void) __acquires(RCU_BH) +{ + rcu_read_lock_bh(); + return 0; +} + +static void rcu_bh_torture_read_unlock(int idx) __releases(RCU_BH) +{ + rcu_read_unlock_bh(); +} + +static int rcu_bh_torture_completed(void) +{ + return rcu_batches_completed_bh(); +} + +static void rcu_bh_torture_deferred_free(struct rcu_torture *p) +{ + call_rcu_bh(&p->rtort_rcu, rcu_torture_cb); +} + +static struct rcu_torture_ops rcu_bh_ops = { + .init = rcu_sync_torture_init, + .readlock = rcu_bh_torture_read_lock, + .read_delay = rcu_read_delay, /* just reuse rcu's version. */ + .readunlock = rcu_bh_torture_read_unlock, + .completed = rcu_bh_torture_completed, + .deferred_free = rcu_bh_torture_deferred_free, + .sync = synchronize_rcu_bh, + .exp_sync = synchronize_rcu_bh_expedited, + .call = call_rcu_bh, + .cb_barrier = rcu_barrier_bh, + .fqs = rcu_bh_force_quiescent_state, + .stats = NULL, + .irq_capable = 1, + .name = "rcu_bh" +}; + +/* + * Definitions for srcu torture testing. + */ + +DEFINE_STATIC_SRCU(srcu_ctl); + +static int srcu_torture_read_lock(void) __acquires(&srcu_ctl) +{ + return srcu_read_lock(&srcu_ctl); +} + +static void srcu_read_delay(struct torture_random_state *rrsp) +{ + long delay; + const long uspertick = 1000000 / HZ; + const long longdelay = 10; + + /* We want there to be long-running readers, but not all the time. */ + + delay = torture_random(rrsp) % + (nrealreaders * 2 * longdelay * uspertick); + if (!delay) + schedule_timeout_interruptible(longdelay); + else + rcu_read_delay(rrsp); +} + +static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl) +{ + srcu_read_unlock(&srcu_ctl, idx); +} + +static int srcu_torture_completed(void) +{ + return srcu_batches_completed(&srcu_ctl); +} + +static void srcu_torture_deferred_free(struct rcu_torture *rp) +{ + call_srcu(&srcu_ctl, &rp->rtort_rcu, rcu_torture_cb); +} + +static void srcu_torture_synchronize(void) +{ + synchronize_srcu(&srcu_ctl); +} + +static void srcu_torture_call(struct rcu_head *head, + void (*func)(struct rcu_head *head)) +{ + call_srcu(&srcu_ctl, head, func); +} + +static void srcu_torture_barrier(void) +{ + srcu_barrier(&srcu_ctl); +} + +static void srcu_torture_stats(char *page) +{ + int cpu; + int idx = srcu_ctl.completed & 0x1; + + page += sprintf(page, "%s%s per-CPU(idx=%d):", + torture_type, TORTURE_FLAG, idx); + for_each_possible_cpu(cpu) { + page += sprintf(page, " %d(%lu,%lu)", cpu, + per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx], + per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]); + } + sprintf(page, "\n"); +} + +static void srcu_torture_synchronize_expedited(void) +{ + synchronize_srcu_expedited(&srcu_ctl); +} + +static struct rcu_torture_ops srcu_ops = { + .init = rcu_sync_torture_init, + .readlock = srcu_torture_read_lock, + .read_delay = srcu_read_delay, + .readunlock = srcu_torture_read_unlock, + .completed = srcu_torture_completed, + .deferred_free = srcu_torture_deferred_free, + .sync = srcu_torture_synchronize, + .exp_sync = srcu_torture_synchronize_expedited, + .call = srcu_torture_call, + .cb_barrier = srcu_torture_barrier, + .stats = srcu_torture_stats, + .name = "srcu" +}; + +/* + * Definitions for sched torture testing. + */ + +static int sched_torture_read_lock(void) +{ + preempt_disable(); + return 0; +} + +static void sched_torture_read_unlock(int idx) +{ + preempt_enable(); +} + +static void rcu_sched_torture_deferred_free(struct rcu_torture *p) +{ + call_rcu_sched(&p->rtort_rcu, rcu_torture_cb); +} + +static struct rcu_torture_ops sched_ops = { + .init = rcu_sync_torture_init, + .readlock = sched_torture_read_lock, + .read_delay = rcu_read_delay, /* just reuse rcu's version. */ + .readunlock = sched_torture_read_unlock, + .completed = rcu_no_completed, + .deferred_free = rcu_sched_torture_deferred_free, + .sync = synchronize_sched, + .exp_sync = synchronize_sched_expedited, + .call = call_rcu_sched, + .cb_barrier = rcu_barrier_sched, + .fqs = rcu_sched_force_quiescent_state, + .stats = NULL, + .irq_capable = 1, + .name = "sched" +}; + +/* + * RCU torture priority-boost testing. Runs one real-time thread per + * CPU for moderate bursts, repeatedly registering RCU callbacks and + * spinning waiting for them to be invoked. If a given callback takes + * too long to be invoked, we assume that priority inversion has occurred. + */ + +struct rcu_boost_inflight { + struct rcu_head rcu; + int inflight; +}; + +static void rcu_torture_boost_cb(struct rcu_head *head) +{ + struct rcu_boost_inflight *rbip = + container_of(head, struct rcu_boost_inflight, rcu); + + smp_mb(); /* Ensure RCU-core accesses precede clearing ->inflight */ + rbip->inflight = 0; +} + +static int rcu_torture_boost(void *arg) +{ + unsigned long call_rcu_time; + unsigned long endtime; + unsigned long oldstarttime; + struct rcu_boost_inflight rbi = { .inflight = 0 }; + struct sched_param sp; + + VERBOSE_PRINTK_STRING("rcu_torture_boost started"); + + /* Set real-time priority. */ + sp.sched_priority = 1; + if (sched_setscheduler(current, SCHED_FIFO, &sp) < 0) { + VERBOSE_PRINTK_STRING("rcu_torture_boost RT prio failed!"); + n_rcu_torture_boost_rterror++; + } + + init_rcu_head_on_stack(&rbi.rcu); + /* Each pass through the following loop does one boost-test cycle. */ + do { + /* Wait for the next test interval. */ + oldstarttime = boost_starttime; + while (ULONG_CMP_LT(jiffies, oldstarttime)) { + schedule_timeout_interruptible(oldstarttime - jiffies); + rcu_stutter_wait("rcu_torture_boost"); + if (kthread_should_stop() || + fullstop != FULLSTOP_DONTSTOP) + goto checkwait; + } + + /* Do one boost-test interval. */ + endtime = oldstarttime + test_boost_duration * HZ; + call_rcu_time = jiffies; + while (ULONG_CMP_LT(jiffies, endtime)) { + /* If we don't have a callback in flight, post one. */ + if (!rbi.inflight) { + smp_mb(); /* RCU core before ->inflight = 1. */ + rbi.inflight = 1; + call_rcu(&rbi.rcu, rcu_torture_boost_cb); + if (jiffies - call_rcu_time > + test_boost_duration * HZ - HZ / 2) { + VERBOSE_PRINTK_STRING("rcu_torture_boost boosting failed"); + n_rcu_torture_boost_failure++; + } + call_rcu_time = jiffies; + } + cond_resched(); + rcu_stutter_wait("rcu_torture_boost"); + if (kthread_should_stop() || + fullstop != FULLSTOP_DONTSTOP) + goto checkwait; + } + + /* + * Set the start time of the next test interval. + * Yes, this is vulnerable to long delays, but such + * delays simply cause a false negative for the next + * interval. Besides, we are running at RT priority, + * so delays should be relatively rare. + */ + while (oldstarttime == boost_starttime && + !kthread_should_stop()) { + if (mutex_trylock(&boost_mutex)) { + boost_starttime = jiffies + + test_boost_interval * HZ; + n_rcu_torture_boosts++; + mutex_unlock(&boost_mutex); + break; + } + schedule_timeout_uninterruptible(1); + } + + /* Go do the stutter. */ +checkwait: rcu_stutter_wait("rcu_torture_boost"); + } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); + + /* Clean up and exit. */ + VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping"); + rcutorture_shutdown_absorb("rcu_torture_boost"); + while (!kthread_should_stop() || rbi.inflight) + schedule_timeout_uninterruptible(1); + smp_mb(); /* order accesses to ->inflight before stack-frame death. */ + destroy_rcu_head_on_stack(&rbi.rcu); + return 0; +} + +/* + * RCU torture force-quiescent-state kthread. Repeatedly induces + * bursts of calls to force_quiescent_state(), increasing the probability + * of occurrence of some important types of race conditions. + */ +static int +rcu_torture_fqs(void *arg) +{ + unsigned long fqs_resume_time; + int fqs_burst_remaining; + + VERBOSE_PRINTK_STRING("rcu_torture_fqs task started"); + do { + fqs_resume_time = jiffies + fqs_stutter * HZ; + while (ULONG_CMP_LT(jiffies, fqs_resume_time) && + !kthread_should_stop()) { + schedule_timeout_interruptible(1); + } + fqs_burst_remaining = fqs_duration; + while (fqs_burst_remaining > 0 && + !kthread_should_stop()) { + cur_ops->fqs(); + udelay(fqs_holdoff); + fqs_burst_remaining -= fqs_holdoff; + } + rcu_stutter_wait("rcu_torture_fqs"); + } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); + VERBOSE_PRINTK_STRING("rcu_torture_fqs task stopping"); + rcutorture_shutdown_absorb("rcu_torture_fqs"); + while (!kthread_should_stop()) + schedule_timeout_uninterruptible(1); + return 0; +} + +/* + * RCU torture writer kthread. Repeatedly substitutes a new structure + * for that pointed to by rcu_torture_current, freeing the old structure + * after a series of grace periods (the "pipeline"). + */ +static int +rcu_torture_writer(void *arg) +{ + bool exp; + int i; + struct rcu_torture *rp; + struct rcu_torture *rp1; + struct rcu_torture *old_rp; + static DEFINE_TORTURE_RANDOM(rand); + + VERBOSE_PRINTK_STRING("rcu_torture_writer task started"); + set_user_nice(current, 19); + + do { + schedule_timeout_uninterruptible(1); + rp = rcu_torture_alloc(); + if (rp == NULL) + continue; + rp->rtort_pipe_count = 0; + udelay(torture_random(&rand) & 0x3ff); + old_rp = rcu_dereference_check(rcu_torture_current, + current == writer_task); + rp->rtort_mbtest = 1; + rcu_assign_pointer(rcu_torture_current, rp); + smp_wmb(); /* Mods to old_rp must follow rcu_assign_pointer() */ + if (old_rp) { + i = old_rp->rtort_pipe_count; + if (i > RCU_TORTURE_PIPE_LEN) + i = RCU_TORTURE_PIPE_LEN; + atomic_inc(&rcu_torture_wcount[i]); + old_rp->rtort_pipe_count++; + if (gp_normal == gp_exp) + exp = !!(torture_random(&rand) & 0x80); + else + exp = gp_exp; + if (!exp) { + cur_ops->deferred_free(old_rp); + } else { + cur_ops->exp_sync(); + list_add(&old_rp->rtort_free, + &rcu_torture_removed); + list_for_each_entry_safe(rp, rp1, + &rcu_torture_removed, + rtort_free) { + i = rp->rtort_pipe_count; + if (i > RCU_TORTURE_PIPE_LEN) + i = RCU_TORTURE_PIPE_LEN; + atomic_inc(&rcu_torture_wcount[i]); + if (++rp->rtort_pipe_count >= + RCU_TORTURE_PIPE_LEN) { + rp->rtort_mbtest = 0; + list_del(&rp->rtort_free); + rcu_torture_free(rp); + } + } + } + } + rcutorture_record_progress(++rcu_torture_current_version); + rcu_stutter_wait("rcu_torture_writer"); + } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); + VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping"); + rcutorture_shutdown_absorb("rcu_torture_writer"); + while (!kthread_should_stop()) + schedule_timeout_uninterruptible(1); + return 0; +} + +/* + * RCU torture fake writer kthread. Repeatedly calls sync, with a random + * delay between calls. + */ +static int +rcu_torture_fakewriter(void *arg) +{ + DEFINE_TORTURE_RANDOM(rand); + + VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started"); + set_user_nice(current, 19); + + do { + schedule_timeout_uninterruptible(1 + torture_random(&rand)%10); + udelay(torture_random(&rand) & 0x3ff); + if (cur_ops->cb_barrier != NULL && + torture_random(&rand) % (nfakewriters * 8) == 0) { + cur_ops->cb_barrier(); + } else if (gp_normal == gp_exp) { + if (torture_random(&rand) & 0x80) + cur_ops->sync(); + else + cur_ops->exp_sync(); + } else if (gp_normal) { + cur_ops->sync(); + } else { + cur_ops->exp_sync(); + } + rcu_stutter_wait("rcu_torture_fakewriter"); + } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); + + VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping"); + rcutorture_shutdown_absorb("rcu_torture_fakewriter"); + while (!kthread_should_stop()) + schedule_timeout_uninterruptible(1); + return 0; +} + +void rcutorture_trace_dump(void) +{ + static atomic_t beenhere = ATOMIC_INIT(0); + + if (atomic_read(&beenhere)) + return; + if (atomic_xchg(&beenhere, 1) != 0) + return; + ftrace_dump(DUMP_ALL); +} + +/* + * RCU torture reader from timer handler. Dereferences rcu_torture_current, + * incrementing the corresponding element of the pipeline array. The + * counter in the element should never be greater than 1, otherwise, the + * RCU implementation is broken. + */ +static void rcu_torture_timer(unsigned long unused) +{ + int idx; + int completed; + int completed_end; + static DEFINE_TORTURE_RANDOM(rand); + static DEFINE_SPINLOCK(rand_lock); + struct rcu_torture *p; + int pipe_count; + unsigned long long ts; + + idx = cur_ops->readlock(); + completed = cur_ops->completed(); + ts = rcu_trace_clock_local(); + p = rcu_dereference_check(rcu_torture_current, + rcu_read_lock_bh_held() || + rcu_read_lock_sched_held() || + srcu_read_lock_held(&srcu_ctl)); + if (p == NULL) { + /* Leave because rcu_torture_writer is not yet underway */ + cur_ops->readunlock(idx); + return; + } + if (p->rtort_mbtest == 0) + atomic_inc(&n_rcu_torture_mberror); + spin_lock(&rand_lock); + cur_ops->read_delay(&rand); + n_rcu_torture_timers++; + spin_unlock(&rand_lock); + preempt_disable(); + pipe_count = p->rtort_pipe_count; + if (pipe_count > RCU_TORTURE_PIPE_LEN) { + /* Should not happen, but... */ + pipe_count = RCU_TORTURE_PIPE_LEN; + } + completed_end = cur_ops->completed(); + if (pipe_count > 1) { + do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts, + completed, completed_end); + rcutorture_trace_dump(); + } + __this_cpu_inc(rcu_torture_count[pipe_count]); + completed = completed_end - completed; + if (completed > RCU_TORTURE_PIPE_LEN) { + /* Should not happen, but... */ + completed = RCU_TORTURE_PIPE_LEN; + } + __this_cpu_inc(rcu_torture_batch[completed]); + preempt_enable(); + cur_ops->readunlock(idx); +} + +/* + * RCU torture reader kthread. Repeatedly dereferences rcu_torture_current, + * incrementing the corresponding element of the pipeline array. The + * counter in the element should never be greater than 1, otherwise, the + * RCU implementation is broken. + */ +static int +rcu_torture_reader(void *arg) +{ + int completed; + int completed_end; + int idx; + DEFINE_TORTURE_RANDOM(rand); + struct rcu_torture *p; + int pipe_count; + struct timer_list t; + unsigned long long ts; + + VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); + set_user_nice(current, 19); + if (irqreader && cur_ops->irq_capable) + setup_timer_on_stack(&t, rcu_torture_timer, 0); + + do { + if (irqreader && cur_ops->irq_capable) { + if (!timer_pending(&t)) + mod_timer(&t, jiffies + 1); + } + idx = cur_ops->readlock(); + completed = cur_ops->completed(); + ts = rcu_trace_clock_local(); + p = rcu_dereference_check(rcu_torture_current, + rcu_read_lock_bh_held() || + rcu_read_lock_sched_held() || + srcu_read_lock_held(&srcu_ctl)); + if (p == NULL) { + /* Wait for rcu_torture_writer to get underway */ + cur_ops->readunlock(idx); + schedule_timeout_interruptible(HZ); + continue; + } + if (p->rtort_mbtest == 0) + atomic_inc(&n_rcu_torture_mberror); + cur_ops->read_delay(&rand); + preempt_disable(); + pipe_count = p->rtort_pipe_count; + if (pipe_count > RCU_TORTURE_PIPE_LEN) { + /* Should not happen, but... */ + pipe_count = RCU_TORTURE_PIPE_LEN; + } + completed_end = cur_ops->completed(); + if (pipe_count > 1) { + do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, + ts, completed, completed_end); + rcutorture_trace_dump(); + } + __this_cpu_inc(rcu_torture_count[pipe_count]); + completed = completed_end - completed; + if (completed > RCU_TORTURE_PIPE_LEN) { + /* Should not happen, but... */ + completed = RCU_TORTURE_PIPE_LEN; + } + __this_cpu_inc(rcu_torture_batch[completed]); + preempt_enable(); + cur_ops->readunlock(idx); + schedule(); + rcu_stutter_wait("rcu_torture_reader"); + } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); + VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping"); + rcutorture_shutdown_absorb("rcu_torture_reader"); + if (irqreader && cur_ops->irq_capable) + del_timer_sync(&t); + while (!kthread_should_stop()) + schedule_timeout_uninterruptible(1); + return 0; +} + +/* + * Create an RCU-torture statistics message in the specified buffer. + */ +static void +rcu_torture_printk(char *page) +{ + int cpu; + int i; + long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; + long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; + + for_each_possible_cpu(cpu) { + for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) { + pipesummary[i] += per_cpu(rcu_torture_count, cpu)[i]; + batchsummary[i] += per_cpu(rcu_torture_batch, cpu)[i]; + } + } + for (i = RCU_TORTURE_PIPE_LEN - 1; i >= 0; i--) { + if (pipesummary[i] != 0) + break; + } + page += sprintf(page, "%s%s ", torture_type, TORTURE_FLAG); + page += sprintf(page, + "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ", + rcu_torture_current, + rcu_torture_current_version, + list_empty(&rcu_torture_freelist), + atomic_read(&n_rcu_torture_alloc), + atomic_read(&n_rcu_torture_alloc_fail), + atomic_read(&n_rcu_torture_free)); + page += sprintf(page, "rtmbe: %d rtbke: %ld rtbre: %ld ", + atomic_read(&n_rcu_torture_mberror), + n_rcu_torture_boost_ktrerror, + n_rcu_torture_boost_rterror); + page += sprintf(page, "rtbf: %ld rtb: %ld nt: %ld ", + n_rcu_torture_boost_failure, + n_rcu_torture_boosts, + n_rcu_torture_timers); + page += sprintf(page, + "onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ", + n_online_successes, n_online_attempts, + n_offline_successes, n_offline_attempts, + min_online, max_online, + min_offline, max_offline, + sum_online, sum_offline, HZ); + page += sprintf(page, "barrier: %ld/%ld:%ld", + n_barrier_successes, + n_barrier_attempts, + n_rcu_torture_barrier_error); + page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG); + if (atomic_read(&n_rcu_torture_mberror) != 0 || + n_rcu_torture_barrier_error != 0 || + n_rcu_torture_boost_ktrerror != 0 || + n_rcu_torture_boost_rterror != 0 || + n_rcu_torture_boost_failure != 0 || + i > 1) { + page += sprintf(page, "!!! "); + atomic_inc(&n_rcu_torture_error); + WARN_ON_ONCE(1); + } + page += sprintf(page, "Reader Pipe: "); + for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) + page += sprintf(page, " %ld", pipesummary[i]); + page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG); + page += sprintf(page, "Reader Batch: "); + for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) + page += sprintf(page, " %ld", batchsummary[i]); + page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG); + page += sprintf(page, "Free-Block Circulation: "); + for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) { + page += sprintf(page, " %d", + atomic_read(&rcu_torture_wcount[i])); + } + page += sprintf(page, "\n"); + if (cur_ops->stats) + cur_ops->stats(page); +} + +/* + * Print torture statistics. Caller must ensure that there is only + * one call to this function at a given time!!! This is normally + * accomplished by relying on the module system to only have one copy + * of the module loaded, and then by giving the rcu_torture_stats + * kthread full control (or the init/cleanup functions when rcu_torture_stats + * thread is not running). + */ +static void +rcu_torture_stats_print(void) +{ + int size = nr_cpu_ids * 200 + 8192; + char *buf; + + buf = kmalloc(size, GFP_KERNEL); + if (!buf) { + pr_err("rcu-torture: Out of memory, need: %d", size); + return; + } + rcu_torture_printk(buf); + pr_alert("%s", buf); + kfree(buf); +} + +/* + * Periodically prints torture statistics, if periodic statistics printing + * was specified via the stat_interval module parameter. + * + * No need to worry about fullstop here, since this one doesn't reference + * volatile state or register callbacks. + */ +static int +rcu_torture_stats(void *arg) +{ + VERBOSE_PRINTK_STRING("rcu_torture_stats task started"); + do { + schedule_timeout_interruptible(stat_interval * HZ); + rcu_torture_stats_print(); + rcutorture_shutdown_absorb("rcu_torture_stats"); + } while (!kthread_should_stop()); + VERBOSE_PRINTK_STRING("rcu_torture_stats task stopping"); + return 0; +} + +static int rcu_idle_cpu; /* Force all torture tasks off this CPU */ + +/* Shuffle tasks such that we allow @rcu_idle_cpu to become idle. A special case + * is when @rcu_idle_cpu = -1, when we allow the tasks to run on all CPUs. + */ +static void rcu_torture_shuffle_tasks(void) +{ + int i; + + cpumask_setall(shuffle_tmp_mask); + get_online_cpus(); + + /* No point in shuffling if there is only one online CPU (ex: UP) */ + if (num_online_cpus() == 1) { + put_online_cpus(); + return; + } + + if (rcu_idle_cpu != -1) + cpumask_clear_cpu(rcu_idle_cpu, shuffle_tmp_mask); + + set_cpus_allowed_ptr(current, shuffle_tmp_mask); + + if (reader_tasks) { + for (i = 0; i < nrealreaders; i++) + if (reader_tasks[i]) + set_cpus_allowed_ptr(reader_tasks[i], + shuffle_tmp_mask); + } + if (fakewriter_tasks) { + for (i = 0; i < nfakewriters; i++) + if (fakewriter_tasks[i]) + set_cpus_allowed_ptr(fakewriter_tasks[i], + shuffle_tmp_mask); + } + if (writer_task) + set_cpus_allowed_ptr(writer_task, shuffle_tmp_mask); + if (stats_task) + set_cpus_allowed_ptr(stats_task, shuffle_tmp_mask); + if (stutter_task) + set_cpus_allowed_ptr(stutter_task, shuffle_tmp_mask); + if (fqs_task) + set_cpus_allowed_ptr(fqs_task, shuffle_tmp_mask); + if (shutdown_task) + set_cpus_allowed_ptr(shutdown_task, shuffle_tmp_mask); +#ifdef CONFIG_HOTPLUG_CPU + if (onoff_task) + set_cpus_allowed_ptr(onoff_task, shuffle_tmp_mask); +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ + if (stall_task) + set_cpus_allowed_ptr(stall_task, shuffle_tmp_mask); + if (barrier_cbs_tasks) + for (i = 0; i < n_barrier_cbs; i++) + if (barrier_cbs_tasks[i]) + set_cpus_allowed_ptr(barrier_cbs_tasks[i], + shuffle_tmp_mask); + if (barrier_task) + set_cpus_allowed_ptr(barrier_task, shuffle_tmp_mask); + + if (rcu_idle_cpu == -1) + rcu_idle_cpu = num_online_cpus() - 1; + else + rcu_idle_cpu--; + + put_online_cpus(); +} + +/* Shuffle tasks across CPUs, with the intent of allowing each CPU in the + * system to become idle at a time and cut off its timer ticks. This is meant + * to test the support for such tickless idle CPU in RCU. + */ +static int +rcu_torture_shuffle(void *arg) +{ + VERBOSE_PRINTK_STRING("rcu_torture_shuffle task started"); + do { + schedule_timeout_interruptible(shuffle_interval * HZ); + rcu_torture_shuffle_tasks(); + rcutorture_shutdown_absorb("rcu_torture_shuffle"); + } while (!kthread_should_stop()); + VERBOSE_PRINTK_STRING("rcu_torture_shuffle task stopping"); + return 0; +} + +/* Cause the rcutorture test to "stutter", starting and stopping all + * threads periodically. + */ +static int +rcu_torture_stutter(void *arg) +{ + VERBOSE_PRINTK_STRING("rcu_torture_stutter task started"); + do { + schedule_timeout_interruptible(stutter * HZ); + stutter_pause_test = 1; + if (!kthread_should_stop()) + schedule_timeout_interruptible(stutter * HZ); + stutter_pause_test = 0; + rcutorture_shutdown_absorb("rcu_torture_stutter"); + } while (!kthread_should_stop()); + VERBOSE_PRINTK_STRING("rcu_torture_stutter task stopping"); + return 0; +} + +static inline void +rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag) +{ + pr_alert("%s" TORTURE_FLAG + "--- %s: nreaders=%d nfakewriters=%d " + "stat_interval=%d verbose=%d test_no_idle_hz=%d " + "shuffle_interval=%d stutter=%d irqreader=%d " + "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d " + "test_boost=%d/%d test_boost_interval=%d " + "test_boost_duration=%d shutdown_secs=%d " + "stall_cpu=%d stall_cpu_holdoff=%d " + "n_barrier_cbs=%d " + "onoff_interval=%d onoff_holdoff=%d\n", + torture_type, tag, nrealreaders, nfakewriters, + stat_interval, verbose, test_no_idle_hz, shuffle_interval, + stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, + test_boost, cur_ops->can_boost, + test_boost_interval, test_boost_duration, shutdown_secs, + stall_cpu, stall_cpu_holdoff, + n_barrier_cbs, + onoff_interval, onoff_holdoff); +} + +static struct notifier_block rcutorture_shutdown_nb = { + .notifier_call = rcutorture_shutdown_notify, +}; + +static void rcutorture_booster_cleanup(int cpu) +{ + struct task_struct *t; + + if (boost_tasks[cpu] == NULL) + return; + mutex_lock(&boost_mutex); + VERBOSE_PRINTK_STRING("Stopping rcu_torture_boost task"); + t = boost_tasks[cpu]; + boost_tasks[cpu] = NULL; + mutex_unlock(&boost_mutex); + + /* This must be outside of the mutex, otherwise deadlock! */ + kthread_stop(t); + boost_tasks[cpu] = NULL; +} + +static int rcutorture_booster_init(int cpu) +{ + int retval; + + if (boost_tasks[cpu] != NULL) + return 0; /* Already created, nothing more to do. */ + + /* Don't allow time recalculation while creating a new task. */ + mutex_lock(&boost_mutex); + VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task"); + boost_tasks[cpu] = kthread_create_on_node(rcu_torture_boost, NULL, + cpu_to_node(cpu), + "rcu_torture_boost"); + if (IS_ERR(boost_tasks[cpu])) { + retval = PTR_ERR(boost_tasks[cpu]); + VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed"); + n_rcu_torture_boost_ktrerror++; + boost_tasks[cpu] = NULL; + mutex_unlock(&boost_mutex); + return retval; + } + kthread_bind(boost_tasks[cpu], cpu); + wake_up_process(boost_tasks[cpu]); + mutex_unlock(&boost_mutex); + return 0; +} + +/* + * Cause the rcutorture test to shutdown the system after the test has + * run for the time specified by the shutdown_secs module parameter. + */ +static int +rcu_torture_shutdown(void *arg) +{ + long delta; + unsigned long jiffies_snap; + + VERBOSE_PRINTK_STRING("rcu_torture_shutdown task started"); + jiffies_snap = ACCESS_ONCE(jiffies); + while (ULONG_CMP_LT(jiffies_snap, shutdown_time) && + !kthread_should_stop()) { + delta = shutdown_time - jiffies_snap; + if (verbose) + pr_alert("%s" TORTURE_FLAG + "rcu_torture_shutdown task: %lu jiffies remaining\n", + torture_type, delta); + schedule_timeout_interruptible(delta); + jiffies_snap = ACCESS_ONCE(jiffies); + } + if (kthread_should_stop()) { + VERBOSE_PRINTK_STRING("rcu_torture_shutdown task stopping"); + return 0; + } + + /* OK, shut down the system. */ + + VERBOSE_PRINTK_STRING("rcu_torture_shutdown task shutting down system"); + shutdown_task = NULL; /* Avoid self-kill deadlock. */ + rcu_torture_cleanup(); /* Get the success/failure message. */ + kernel_power_off(); /* Shut down the system. */ + return 0; +} + +#ifdef CONFIG_HOTPLUG_CPU + +/* + * Execute random CPU-hotplug operations at the interval specified + * by the onoff_interval. + */ +static int +rcu_torture_onoff(void *arg) +{ + int cpu; + unsigned long delta; + int maxcpu = -1; + DEFINE_TORTURE_RANDOM(rand); + int ret; + unsigned long starttime; + + VERBOSE_PRINTK_STRING("rcu_torture_onoff task started"); + for_each_online_cpu(cpu) + maxcpu = cpu; + WARN_ON(maxcpu < 0); + if (onoff_holdoff > 0) { + VERBOSE_PRINTK_STRING("rcu_torture_onoff begin holdoff"); + schedule_timeout_interruptible(onoff_holdoff * HZ); + VERBOSE_PRINTK_STRING("rcu_torture_onoff end holdoff"); + } + while (!kthread_should_stop()) { + cpu = (torture_random(&rand) >> 4) % (maxcpu + 1); + if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) { + if (verbose) + pr_alert("%s" TORTURE_FLAG + "rcu_torture_onoff task: offlining %d\n", + torture_type, cpu); + starttime = jiffies; + n_offline_attempts++; + ret = cpu_down(cpu); + if (ret) { + if (verbose) + pr_alert("%s" TORTURE_FLAG + "rcu_torture_onoff task: offline %d failed: errno %d\n", + torture_type, cpu, ret); + } else { + if (verbose) + pr_alert("%s" TORTURE_FLAG + "rcu_torture_onoff task: offlined %d\n", + torture_type, cpu); + n_offline_successes++; + delta = jiffies - starttime; + sum_offline += delta; + if (min_offline < 0) { + min_offline = delta; + max_offline = delta; + } + if (min_offline > delta) + min_offline = delta; + if (max_offline < delta) + max_offline = delta; + } + } else if (cpu_is_hotpluggable(cpu)) { + if (verbose) + pr_alert("%s" TORTURE_FLAG + "rcu_torture_onoff task: onlining %d\n", + torture_type, cpu); + starttime = jiffies; + n_online_attempts++; + ret = cpu_up(cpu); + if (ret) { + if (verbose) + pr_alert("%s" TORTURE_FLAG + "rcu_torture_onoff task: online %d failed: errno %d\n", + torture_type, cpu, ret); + } else { + if (verbose) + pr_alert("%s" TORTURE_FLAG + "rcu_torture_onoff task: onlined %d\n", + torture_type, cpu); + n_online_successes++; + delta = jiffies - starttime; + sum_online += delta; + if (min_online < 0) { + min_online = delta; + max_online = delta; + } + if (min_online > delta) + min_online = delta; + if (max_online < delta) + max_online = delta; + } + } + schedule_timeout_interruptible(onoff_interval * HZ); + } + VERBOSE_PRINTK_STRING("rcu_torture_onoff task stopping"); + return 0; +} + +static int +rcu_torture_onoff_init(void) +{ + int ret; + + if (onoff_interval <= 0) + return 0; + onoff_task = kthread_run(rcu_torture_onoff, NULL, "rcu_torture_onoff"); + if (IS_ERR(onoff_task)) { + ret = PTR_ERR(onoff_task); + onoff_task = NULL; + return ret; + } + return 0; +} + +static void rcu_torture_onoff_cleanup(void) +{ + if (onoff_task == NULL) + return; + VERBOSE_PRINTK_STRING("Stopping rcu_torture_onoff task"); + kthread_stop(onoff_task); + onoff_task = NULL; +} + +#else /* #ifdef CONFIG_HOTPLUG_CPU */ + +static int +rcu_torture_onoff_init(void) +{ + return 0; +} + +static void rcu_torture_onoff_cleanup(void) +{ +} + +#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ + +/* + * CPU-stall kthread. It waits as specified by stall_cpu_holdoff, then + * induces a CPU stall for the time specified by stall_cpu. + */ +static int rcu_torture_stall(void *args) +{ + unsigned long stop_at; + + VERBOSE_PRINTK_STRING("rcu_torture_stall task started"); + if (stall_cpu_holdoff > 0) { + VERBOSE_PRINTK_STRING("rcu_torture_stall begin holdoff"); + schedule_timeout_interruptible(stall_cpu_holdoff * HZ); + VERBOSE_PRINTK_STRING("rcu_torture_stall end holdoff"); + } + if (!kthread_should_stop()) { + stop_at = get_seconds() + stall_cpu; + /* RCU CPU stall is expected behavior in following code. */ + pr_alert("rcu_torture_stall start.\n"); + rcu_read_lock(); + preempt_disable(); + while (ULONG_CMP_LT(get_seconds(), stop_at)) + continue; /* Induce RCU CPU stall warning. */ + preempt_enable(); + rcu_read_unlock(); + pr_alert("rcu_torture_stall end.\n"); + } + rcutorture_shutdown_absorb("rcu_torture_stall"); + while (!kthread_should_stop()) + schedule_timeout_interruptible(10 * HZ); + return 0; +} + +/* Spawn CPU-stall kthread, if stall_cpu specified. */ +static int __init rcu_torture_stall_init(void) +{ + int ret; + + if (stall_cpu <= 0) + return 0; + stall_task = kthread_run(rcu_torture_stall, NULL, "rcu_torture_stall"); + if (IS_ERR(stall_task)) { + ret = PTR_ERR(stall_task); + stall_task = NULL; + return ret; + } + return 0; +} + +/* Clean up after the CPU-stall kthread, if one was spawned. */ +static void rcu_torture_stall_cleanup(void) +{ + if (stall_task == NULL) + return; + VERBOSE_PRINTK_STRING("Stopping rcu_torture_stall_task."); + kthread_stop(stall_task); + stall_task = NULL; +} + +/* Callback function for RCU barrier testing. */ +void rcu_torture_barrier_cbf(struct rcu_head *rcu) +{ + atomic_inc(&barrier_cbs_invoked); +} + +/* kthread function to register callbacks used to test RCU barriers. */ +static int rcu_torture_barrier_cbs(void *arg) +{ + long myid = (long)arg; + bool lastphase = 0; + bool newphase; + struct rcu_head rcu; + + init_rcu_head_on_stack(&rcu); + VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task started"); + set_user_nice(current, 19); + do { + wait_event(barrier_cbs_wq[myid], + (newphase = + ACCESS_ONCE(barrier_phase)) != lastphase || + kthread_should_stop() || + fullstop != FULLSTOP_DONTSTOP); + lastphase = newphase; + smp_mb(); /* ensure barrier_phase load before ->call(). */ + if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP) + break; + cur_ops->call(&rcu, rcu_torture_barrier_cbf); + if (atomic_dec_and_test(&barrier_cbs_count)) + wake_up(&barrier_wq); + } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); + VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task stopping"); + rcutorture_shutdown_absorb("rcu_torture_barrier_cbs"); + while (!kthread_should_stop()) + schedule_timeout_interruptible(1); + cur_ops->cb_barrier(); + destroy_rcu_head_on_stack(&rcu); + return 0; +} + +/* kthread function to drive and coordinate RCU barrier testing. */ +static int rcu_torture_barrier(void *arg) +{ + int i; + + VERBOSE_PRINTK_STRING("rcu_torture_barrier task starting"); + do { + atomic_set(&barrier_cbs_invoked, 0); + atomic_set(&barrier_cbs_count, n_barrier_cbs); + smp_mb(); /* Ensure barrier_phase after prior assignments. */ + barrier_phase = !barrier_phase; + for (i = 0; i < n_barrier_cbs; i++) + wake_up(&barrier_cbs_wq[i]); + wait_event(barrier_wq, + atomic_read(&barrier_cbs_count) == 0 || + kthread_should_stop() || + fullstop != FULLSTOP_DONTSTOP); + if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP) + break; + n_barrier_attempts++; + cur_ops->cb_barrier(); /* Implies smp_mb() for wait_event(). */ + if (atomic_read(&barrier_cbs_invoked) != n_barrier_cbs) { + n_rcu_torture_barrier_error++; + WARN_ON_ONCE(1); + } + n_barrier_successes++; + schedule_timeout_interruptible(HZ / 10); + } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); + VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping"); + rcutorture_shutdown_absorb("rcu_torture_barrier"); + while (!kthread_should_stop()) + schedule_timeout_interruptible(1); + return 0; +} + +/* Initialize RCU barrier testing. */ +static int rcu_torture_barrier_init(void) +{ + int i; + int ret; + + if (n_barrier_cbs == 0) + return 0; + if (cur_ops->call == NULL || cur_ops->cb_barrier == NULL) { + pr_alert("%s" TORTURE_FLAG + " Call or barrier ops missing for %s,\n", + torture_type, cur_ops->name); + pr_alert("%s" TORTURE_FLAG + " RCU barrier testing omitted from run.\n", + torture_type); + return 0; + } + atomic_set(&barrier_cbs_count, 0); + atomic_set(&barrier_cbs_invoked, 0); + barrier_cbs_tasks = + kzalloc(n_barrier_cbs * sizeof(barrier_cbs_tasks[0]), + GFP_KERNEL); + barrier_cbs_wq = + kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]), + GFP_KERNEL); + if (barrier_cbs_tasks == NULL || !barrier_cbs_wq) + return -ENOMEM; + for (i = 0; i < n_barrier_cbs; i++) { + init_waitqueue_head(&barrier_cbs_wq[i]); + barrier_cbs_tasks[i] = kthread_run(rcu_torture_barrier_cbs, + (void *)(long)i, + "rcu_torture_barrier_cbs"); + if (IS_ERR(barrier_cbs_tasks[i])) { + ret = PTR_ERR(barrier_cbs_tasks[i]); + VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier_cbs"); + barrier_cbs_tasks[i] = NULL; + return ret; + } + } + barrier_task = kthread_run(rcu_torture_barrier, NULL, + "rcu_torture_barrier"); + if (IS_ERR(barrier_task)) { + ret = PTR_ERR(barrier_task); + VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier"); + barrier_task = NULL; + } + return 0; +} + +/* Clean up after RCU barrier testing. */ +static void rcu_torture_barrier_cleanup(void) +{ + int i; + + if (barrier_task != NULL) { + VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier task"); + kthread_stop(barrier_task); + barrier_task = NULL; + } + if (barrier_cbs_tasks != NULL) { + for (i = 0; i < n_barrier_cbs; i++) { + if (barrier_cbs_tasks[i] != NULL) { + VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier_cbs task"); + kthread_stop(barrier_cbs_tasks[i]); + barrier_cbs_tasks[i] = NULL; + } + } + kfree(barrier_cbs_tasks); + barrier_cbs_tasks = NULL; + } + if (barrier_cbs_wq != NULL) { + kfree(barrier_cbs_wq); + barrier_cbs_wq = NULL; + } +} + +static int rcutorture_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + long cpu = (long)hcpu; + + switch (action) { + case CPU_ONLINE: + case CPU_DOWN_FAILED: + (void)rcutorture_booster_init(cpu); + break; + case CPU_DOWN_PREPARE: + rcutorture_booster_cleanup(cpu); + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block rcutorture_cpu_nb = { + .notifier_call = rcutorture_cpu_notify, +}; + +static void +rcu_torture_cleanup(void) +{ + int i; + + mutex_lock(&fullstop_mutex); + rcutorture_record_test_transition(); + if (fullstop == FULLSTOP_SHUTDOWN) { + pr_warn(/* but going down anyway, so... */ + "Concurrent 'rmmod rcutorture' and shutdown illegal!\n"); + mutex_unlock(&fullstop_mutex); + schedule_timeout_uninterruptible(10); + if (cur_ops->cb_barrier != NULL) + cur_ops->cb_barrier(); + return; + } + fullstop = FULLSTOP_RMMOD; + mutex_unlock(&fullstop_mutex); + unregister_reboot_notifier(&rcutorture_shutdown_nb); + rcu_torture_barrier_cleanup(); + rcu_torture_stall_cleanup(); + if (stutter_task) { + VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task"); + kthread_stop(stutter_task); + } + stutter_task = NULL; + if (shuffler_task) { + VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task"); + kthread_stop(shuffler_task); + free_cpumask_var(shuffle_tmp_mask); + } + shuffler_task = NULL; + + if (writer_task) { + VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task"); + kthread_stop(writer_task); + } + writer_task = NULL; + + if (reader_tasks) { + for (i = 0; i < nrealreaders; i++) { + if (reader_tasks[i]) { + VERBOSE_PRINTK_STRING( + "Stopping rcu_torture_reader task"); + kthread_stop(reader_tasks[i]); + } + reader_tasks[i] = NULL; + } + kfree(reader_tasks); + reader_tasks = NULL; + } + rcu_torture_current = NULL; + + if (fakewriter_tasks) { + for (i = 0; i < nfakewriters; i++) { + if (fakewriter_tasks[i]) { + VERBOSE_PRINTK_STRING( + "Stopping rcu_torture_fakewriter task"); + kthread_stop(fakewriter_tasks[i]); + } + fakewriter_tasks[i] = NULL; + } + kfree(fakewriter_tasks); + fakewriter_tasks = NULL; + } + + if (stats_task) { + VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task"); + kthread_stop(stats_task); + } + stats_task = NULL; + + if (fqs_task) { + VERBOSE_PRINTK_STRING("Stopping rcu_torture_fqs task"); + kthread_stop(fqs_task); + } + fqs_task = NULL; + if ((test_boost == 1 && cur_ops->can_boost) || + test_boost == 2) { + unregister_cpu_notifier(&rcutorture_cpu_nb); + for_each_possible_cpu(i) + rcutorture_booster_cleanup(i); + } + if (shutdown_task != NULL) { + VERBOSE_PRINTK_STRING("Stopping rcu_torture_shutdown task"); + kthread_stop(shutdown_task); + } + shutdown_task = NULL; + rcu_torture_onoff_cleanup(); + + /* Wait for all RCU callbacks to fire. */ + + if (cur_ops->cb_barrier != NULL) + cur_ops->cb_barrier(); + + rcu_torture_stats_print(); /* -After- the stats thread is stopped! */ + + if (atomic_read(&n_rcu_torture_error) || n_rcu_torture_barrier_error) + rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE"); + else if (n_online_successes != n_online_attempts || + n_offline_successes != n_offline_attempts) + rcu_torture_print_module_parms(cur_ops, + "End of test: RCU_HOTPLUG"); + else + rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS"); +} + +#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD +static void rcu_torture_leak_cb(struct rcu_head *rhp) +{ +} + +static void rcu_torture_err_cb(struct rcu_head *rhp) +{ + /* + * This -might- happen due to race conditions, but is unlikely. + * The scenario that leads to this happening is that the + * first of the pair of duplicate callbacks is queued, + * someone else starts a grace period that includes that + * callback, then the second of the pair must wait for the + * next grace period. Unlikely, but can happen. If it + * does happen, the debug-objects subsystem won't have splatted. + */ + pr_alert("rcutorture: duplicated callback was invoked.\n"); +} +#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ + +/* + * Verify that double-free causes debug-objects to complain, but only + * if CONFIG_DEBUG_OBJECTS_RCU_HEAD=y. Otherwise, say that the test + * cannot be carried out. + */ +static void rcu_test_debug_objects(void) +{ +#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD + struct rcu_head rh1; + struct rcu_head rh2; + + init_rcu_head_on_stack(&rh1); + init_rcu_head_on_stack(&rh2); + pr_alert("rcutorture: WARN: Duplicate call_rcu() test starting.\n"); + + /* Try to queue the rh2 pair of callbacks for the same grace period. */ + preempt_disable(); /* Prevent preemption from interrupting test. */ + rcu_read_lock(); /* Make it impossible to finish a grace period. */ + call_rcu(&rh1, rcu_torture_leak_cb); /* Start grace period. */ + local_irq_disable(); /* Make it harder to start a new grace period. */ + call_rcu(&rh2, rcu_torture_leak_cb); + call_rcu(&rh2, rcu_torture_err_cb); /* Duplicate callback. */ + local_irq_enable(); + rcu_read_unlock(); + preempt_enable(); + + /* Wait for them all to get done so we can safely return. */ + rcu_barrier(); + pr_alert("rcutorture: WARN: Duplicate call_rcu() test complete.\n"); + destroy_rcu_head_on_stack(&rh1); + destroy_rcu_head_on_stack(&rh2); +#else /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ + pr_alert("rcutorture: !CONFIG_DEBUG_OBJECTS_RCU_HEAD, not testing duplicate call_rcu()\n"); +#endif /* #else #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ +} + +static int __init +rcu_torture_init(void) +{ + int i; + int cpu; + int firsterr = 0; + int retval; + static struct rcu_torture_ops *torture_ops[] = { + &rcu_ops, &rcu_bh_ops, &srcu_ops, &sched_ops, + }; + + mutex_lock(&fullstop_mutex); + + /* Process args and tell the world that the torturer is on the job. */ + for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { + cur_ops = torture_ops[i]; + if (strcmp(torture_type, cur_ops->name) == 0) + break; + } + if (i == ARRAY_SIZE(torture_ops)) { + pr_alert("rcu-torture: invalid torture type: \"%s\"\n", + torture_type); + pr_alert("rcu-torture types:"); + for (i = 0; i < ARRAY_SIZE(torture_ops); i++) + pr_alert(" %s", torture_ops[i]->name); + pr_alert("\n"); + mutex_unlock(&fullstop_mutex); + return -EINVAL; + } + if (cur_ops->fqs == NULL && fqs_duration != 0) { + pr_alert("rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n"); + fqs_duration = 0; + } + if (cur_ops->init) + cur_ops->init(); /* no "goto unwind" prior to this point!!! */ + + if (nreaders >= 0) + nrealreaders = nreaders; + else + nrealreaders = 2 * num_online_cpus(); + rcu_torture_print_module_parms(cur_ops, "Start of test"); + fullstop = FULLSTOP_DONTSTOP; + + /* Set up the freelist. */ + + INIT_LIST_HEAD(&rcu_torture_freelist); + for (i = 0; i < ARRAY_SIZE(rcu_tortures); i++) { + rcu_tortures[i].rtort_mbtest = 0; + list_add_tail(&rcu_tortures[i].rtort_free, + &rcu_torture_freelist); + } + + /* Initialize the statistics so that each run gets its own numbers. */ + + rcu_torture_current = NULL; + rcu_torture_current_version = 0; + atomic_set(&n_rcu_torture_alloc, 0); + atomic_set(&n_rcu_torture_alloc_fail, 0); + atomic_set(&n_rcu_torture_free, 0); + atomic_set(&n_rcu_torture_mberror, 0); + atomic_set(&n_rcu_torture_error, 0); + n_rcu_torture_barrier_error = 0; + n_rcu_torture_boost_ktrerror = 0; + n_rcu_torture_boost_rterror = 0; + n_rcu_torture_boost_failure = 0; + n_rcu_torture_boosts = 0; + for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) + atomic_set(&rcu_torture_wcount[i], 0); + for_each_possible_cpu(cpu) { + for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) { + per_cpu(rcu_torture_count, cpu)[i] = 0; + per_cpu(rcu_torture_batch, cpu)[i] = 0; + } + } + + /* Start up the kthreads. */ + + VERBOSE_PRINTK_STRING("Creating rcu_torture_writer task"); + writer_task = kthread_create(rcu_torture_writer, NULL, + "rcu_torture_writer"); + if (IS_ERR(writer_task)) { + firsterr = PTR_ERR(writer_task); + VERBOSE_PRINTK_ERRSTRING("Failed to create writer"); + writer_task = NULL; + goto unwind; + } + wake_up_process(writer_task); + fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]), + GFP_KERNEL); + if (fakewriter_tasks == NULL) { + VERBOSE_PRINTK_ERRSTRING("out of memory"); + firsterr = -ENOMEM; + goto unwind; + } + for (i = 0; i < nfakewriters; i++) { + VERBOSE_PRINTK_STRING("Creating rcu_torture_fakewriter task"); + fakewriter_tasks[i] = kthread_run(rcu_torture_fakewriter, NULL, + "rcu_torture_fakewriter"); + if (IS_ERR(fakewriter_tasks[i])) { + firsterr = PTR_ERR(fakewriter_tasks[i]); + VERBOSE_PRINTK_ERRSTRING("Failed to create fakewriter"); + fakewriter_tasks[i] = NULL; + goto unwind; + } + } + reader_tasks = kzalloc(nrealreaders * sizeof(reader_tasks[0]), + GFP_KERNEL); + if (reader_tasks == NULL) { + VERBOSE_PRINTK_ERRSTRING("out of memory"); + firsterr = -ENOMEM; + goto unwind; + } + for (i = 0; i < nrealreaders; i++) { + VERBOSE_PRINTK_STRING("Creating rcu_torture_reader task"); + reader_tasks[i] = kthread_run(rcu_torture_reader, NULL, + "rcu_torture_reader"); + if (IS_ERR(reader_tasks[i])) { + firsterr = PTR_ERR(reader_tasks[i]); + VERBOSE_PRINTK_ERRSTRING("Failed to create reader"); + reader_tasks[i] = NULL; + goto unwind; + } + } + if (stat_interval > 0) { + VERBOSE_PRINTK_STRING("Creating rcu_torture_stats task"); + stats_task = kthread_run(rcu_torture_stats, NULL, + "rcu_torture_stats"); + if (IS_ERR(stats_task)) { + firsterr = PTR_ERR(stats_task); + VERBOSE_PRINTK_ERRSTRING("Failed to create stats"); + stats_task = NULL; + goto unwind; + } + } + if (test_no_idle_hz) { + rcu_idle_cpu = num_online_cpus() - 1; + + if (!alloc_cpumask_var(&shuffle_tmp_mask, GFP_KERNEL)) { + firsterr = -ENOMEM; + VERBOSE_PRINTK_ERRSTRING("Failed to alloc mask"); + goto unwind; + } + + /* Create the shuffler thread */ + shuffler_task = kthread_run(rcu_torture_shuffle, NULL, + "rcu_torture_shuffle"); + if (IS_ERR(shuffler_task)) { + free_cpumask_var(shuffle_tmp_mask); + firsterr = PTR_ERR(shuffler_task); + VERBOSE_PRINTK_ERRSTRING("Failed to create shuffler"); + shuffler_task = NULL; + goto unwind; + } + } + if (stutter < 0) + stutter = 0; + if (stutter) { + /* Create the stutter thread */ + stutter_task = kthread_run(rcu_torture_stutter, NULL, + "rcu_torture_stutter"); + if (IS_ERR(stutter_task)) { + firsterr = PTR_ERR(stutter_task); + VERBOSE_PRINTK_ERRSTRING("Failed to create stutter"); + stutter_task = NULL; + goto unwind; + } + } + if (fqs_duration < 0) + fqs_duration = 0; + if (fqs_duration) { + /* Create the stutter thread */ + fqs_task = kthread_run(rcu_torture_fqs, NULL, + "rcu_torture_fqs"); + if (IS_ERR(fqs_task)) { + firsterr = PTR_ERR(fqs_task); + VERBOSE_PRINTK_ERRSTRING("Failed to create fqs"); + fqs_task = NULL; + goto unwind; + } + } + if (test_boost_interval < 1) + test_boost_interval = 1; + if (test_boost_duration < 2) + test_boost_duration = 2; + if ((test_boost == 1 && cur_ops->can_boost) || + test_boost == 2) { + + boost_starttime = jiffies + test_boost_interval * HZ; + register_cpu_notifier(&rcutorture_cpu_nb); + for_each_possible_cpu(i) { + if (cpu_is_offline(i)) + continue; /* Heuristic: CPU can go offline. */ + retval = rcutorture_booster_init(i); + if (retval < 0) { + firsterr = retval; + goto unwind; + } + } + } + if (shutdown_secs > 0) { + shutdown_time = jiffies + shutdown_secs * HZ; + shutdown_task = kthread_create(rcu_torture_shutdown, NULL, + "rcu_torture_shutdown"); + if (IS_ERR(shutdown_task)) { + firsterr = PTR_ERR(shutdown_task); + VERBOSE_PRINTK_ERRSTRING("Failed to create shutdown"); + shutdown_task = NULL; + goto unwind; + } + wake_up_process(shutdown_task); + } + i = rcu_torture_onoff_init(); + if (i != 0) { + firsterr = i; + goto unwind; + } + register_reboot_notifier(&rcutorture_shutdown_nb); + i = rcu_torture_stall_init(); + if (i != 0) { + firsterr = i; + goto unwind; + } + retval = rcu_torture_barrier_init(); + if (retval != 0) { + firsterr = retval; + goto unwind; + } + if (object_debug) + rcu_test_debug_objects(); + rcutorture_record_test_transition(); + mutex_unlock(&fullstop_mutex); + return 0; + +unwind: + mutex_unlock(&fullstop_mutex); + rcu_torture_cleanup(); + return firsterr; +} + +module_init(rcu_torture_init); +module_exit(rcu_torture_cleanup); diff --git a/kernel/rcu/torture.c b/kernel/rcu/torture.c deleted file mode 100644 index dad6723..0000000 --- a/kernel/rcu/torture.c +++ /dev/null @@ -1,2148 +0,0 @@ -/* - * Read-Copy Update module-based torture test facility - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2005, 2006 - * - * Authors: Paul E. McKenney - * Josh Triplett - * - * See also: Documentation/RCU/torture.txt - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Paul E. McKenney and Josh Triplett "); - -MODULE_ALIAS("rcutorture"); -#ifdef MODULE_PARAM_PREFIX -#undef MODULE_PARAM_PREFIX -#endif -#define MODULE_PARAM_PREFIX "rcutorture." - -static int fqs_duration; -module_param(fqs_duration, int, 0444); -MODULE_PARM_DESC(fqs_duration, "Duration of fqs bursts (us), 0 to disable"); -static int fqs_holdoff; -module_param(fqs_holdoff, int, 0444); -MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); -static int fqs_stutter = 3; -module_param(fqs_stutter, int, 0444); -MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); -static bool gp_exp; -module_param(gp_exp, bool, 0444); -MODULE_PARM_DESC(gp_exp, "Use expedited GP wait primitives"); -static bool gp_normal; -module_param(gp_normal, bool, 0444); -MODULE_PARM_DESC(gp_normal, "Use normal (non-expedited) GP wait primitives"); -static int irqreader = 1; -module_param(irqreader, int, 0444); -MODULE_PARM_DESC(irqreader, "Allow RCU readers from irq handlers"); -static int n_barrier_cbs; -module_param(n_barrier_cbs, int, 0444); -MODULE_PARM_DESC(n_barrier_cbs, "# of callbacks/kthreads for barrier testing"); -static int nfakewriters = 4; -module_param(nfakewriters, int, 0444); -MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads"); -static int nreaders = -1; -module_param(nreaders, int, 0444); -MODULE_PARM_DESC(nreaders, "Number of RCU reader threads"); -static int object_debug; -module_param(object_debug, int, 0444); -MODULE_PARM_DESC(object_debug, "Enable debug-object double call_rcu() testing"); -static int onoff_holdoff; -module_param(onoff_holdoff, int, 0444); -MODULE_PARM_DESC(onoff_holdoff, "Time after boot before CPU hotplugs (s)"); -static int onoff_interval; -module_param(onoff_interval, int, 0444); -MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable"); -static int shuffle_interval = 3; -module_param(shuffle_interval, int, 0444); -MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles"); -static int shutdown_secs; -module_param(shutdown_secs, int, 0444); -MODULE_PARM_DESC(shutdown_secs, "Shutdown time (s), <= zero to disable."); -static int stall_cpu; -module_param(stall_cpu, int, 0444); -MODULE_PARM_DESC(stall_cpu, "Stall duration (s), zero to disable."); -static int stall_cpu_holdoff = 10; -module_param(stall_cpu_holdoff, int, 0444); -MODULE_PARM_DESC(stall_cpu_holdoff, "Time to wait before starting stall (s)."); -static int stat_interval = 60; -module_param(stat_interval, int, 0644); -MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s"); -static int stutter = 5; -module_param(stutter, int, 0444); -MODULE_PARM_DESC(stutter, "Number of seconds to run/halt test"); -static int test_boost = 1; -module_param(test_boost, int, 0444); -MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes."); -static int test_boost_duration = 4; -module_param(test_boost_duration, int, 0444); -MODULE_PARM_DESC(test_boost_duration, "Duration of each boost test, seconds."); -static int test_boost_interval = 7; -module_param(test_boost_interval, int, 0444); -MODULE_PARM_DESC(test_boost_interval, "Interval between boost tests, seconds."); -static bool test_no_idle_hz = true; -module_param(test_no_idle_hz, bool, 0444); -MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs"); -static char *torture_type = "rcu"; -module_param(torture_type, charp, 0444); -MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, ...)"); -static bool verbose; -module_param(verbose, bool, 0444); -MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s"); - -#define TORTURE_FLAG "-torture:" -#define PRINTK_STRING(s) \ - do { pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0) -#define VERBOSE_PRINTK_STRING(s) \ - do { if (verbose) pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0) -#define VERBOSE_PRINTK_ERRSTRING(s) \ - do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0) - -static int nrealreaders; -static struct task_struct *writer_task; -static struct task_struct **fakewriter_tasks; -static struct task_struct **reader_tasks; -static struct task_struct *stats_task; -static struct task_struct *shuffler_task; -static struct task_struct *stutter_task; -static struct task_struct *fqs_task; -static struct task_struct *boost_tasks[NR_CPUS]; -static struct task_struct *shutdown_task; -#ifdef CONFIG_HOTPLUG_CPU -static struct task_struct *onoff_task; -#endif /* #ifdef CONFIG_HOTPLUG_CPU */ -static struct task_struct *stall_task; -static struct task_struct **barrier_cbs_tasks; -static struct task_struct *barrier_task; - -#define RCU_TORTURE_PIPE_LEN 10 - -struct rcu_torture { - struct rcu_head rtort_rcu; - int rtort_pipe_count; - struct list_head rtort_free; - int rtort_mbtest; -}; - -static LIST_HEAD(rcu_torture_freelist); -static struct rcu_torture __rcu *rcu_torture_current; -static unsigned long rcu_torture_current_version; -static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN]; -static DEFINE_SPINLOCK(rcu_torture_lock); -static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], - rcu_torture_count) = { 0 }; -static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], - rcu_torture_batch) = { 0 }; -static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1]; -static atomic_t n_rcu_torture_alloc; -static atomic_t n_rcu_torture_alloc_fail; -static atomic_t n_rcu_torture_free; -static atomic_t n_rcu_torture_mberror; -static atomic_t n_rcu_torture_error; -static long n_rcu_torture_barrier_error; -static long n_rcu_torture_boost_ktrerror; -static long n_rcu_torture_boost_rterror; -static long n_rcu_torture_boost_failure; -static long n_rcu_torture_boosts; -static long n_rcu_torture_timers; -static long n_offline_attempts; -static long n_offline_successes; -static unsigned long sum_offline; -static int min_offline = -1; -static int max_offline; -static long n_online_attempts; -static long n_online_successes; -static unsigned long sum_online; -static int min_online = -1; -static int max_online; -static long n_barrier_attempts; -static long n_barrier_successes; -static struct list_head rcu_torture_removed; -static cpumask_var_t shuffle_tmp_mask; - -static int stutter_pause_test; - -#if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE) -#define RCUTORTURE_RUNNABLE_INIT 1 -#else -#define RCUTORTURE_RUNNABLE_INIT 0 -#endif -int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; -module_param(rcutorture_runnable, int, 0444); -MODULE_PARM_DESC(rcutorture_runnable, "Start rcutorture at boot"); - -#if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) -#define rcu_can_boost() 1 -#else /* #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */ -#define rcu_can_boost() 0 -#endif /* #else #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) */ - -#ifdef CONFIG_RCU_TRACE -static u64 notrace rcu_trace_clock_local(void) -{ - u64 ts = trace_clock_local(); - unsigned long __maybe_unused ts_rem = do_div(ts, NSEC_PER_USEC); - return ts; -} -#else /* #ifdef CONFIG_RCU_TRACE */ -static u64 notrace rcu_trace_clock_local(void) -{ - return 0ULL; -} -#endif /* #else #ifdef CONFIG_RCU_TRACE */ - -static unsigned long shutdown_time; /* jiffies to system shutdown. */ -static unsigned long boost_starttime; /* jiffies of next boost test start. */ -DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ - /* and boost task create/destroy. */ -static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */ -static bool barrier_phase; /* Test phase. */ -static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */ -static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */ -static DECLARE_WAIT_QUEUE_HEAD(barrier_wq); - -/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ - -#define FULLSTOP_DONTSTOP 0 /* Normal operation. */ -#define FULLSTOP_SHUTDOWN 1 /* System shutdown with rcutorture running. */ -#define FULLSTOP_RMMOD 2 /* Normal rmmod of rcutorture. */ -static int fullstop = FULLSTOP_RMMOD; -/* - * Protect fullstop transitions and spawning of kthreads. - */ -static DEFINE_MUTEX(fullstop_mutex); - -/* Forward reference. */ -static void rcu_torture_cleanup(void); - -/* - * Detect and respond to a system shutdown. - */ -static int -rcutorture_shutdown_notify(struct notifier_block *unused1, - unsigned long unused2, void *unused3) -{ - mutex_lock(&fullstop_mutex); - if (fullstop == FULLSTOP_DONTSTOP) - fullstop = FULLSTOP_SHUTDOWN; - else - pr_warn(/* but going down anyway, so... */ - "Concurrent 'rmmod rcutorture' and shutdown illegal!\n"); - mutex_unlock(&fullstop_mutex); - return NOTIFY_DONE; -} - -/* - * Absorb kthreads into a kernel function that won't return, so that - * they won't ever access module text or data again. - */ -static void rcutorture_shutdown_absorb(const char *title) -{ - if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) { - pr_notice( - "rcutorture thread %s parking due to system shutdown\n", - title); - schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT); - } -} - -/* - * Allocate an element from the rcu_tortures pool. - */ -static struct rcu_torture * -rcu_torture_alloc(void) -{ - struct list_head *p; - - spin_lock_bh(&rcu_torture_lock); - if (list_empty(&rcu_torture_freelist)) { - atomic_inc(&n_rcu_torture_alloc_fail); - spin_unlock_bh(&rcu_torture_lock); - return NULL; - } - atomic_inc(&n_rcu_torture_alloc); - p = rcu_torture_freelist.next; - list_del_init(p); - spin_unlock_bh(&rcu_torture_lock); - return container_of(p, struct rcu_torture, rtort_free); -} - -/* - * Free an element to the rcu_tortures pool. - */ -static void -rcu_torture_free(struct rcu_torture *p) -{ - atomic_inc(&n_rcu_torture_free); - spin_lock_bh(&rcu_torture_lock); - list_add_tail(&p->rtort_free, &rcu_torture_freelist); - spin_unlock_bh(&rcu_torture_lock); -} - -struct rcu_random_state { - unsigned long rrs_state; - long rrs_count; -}; - -#define RCU_RANDOM_MULT 39916801 /* prime */ -#define RCU_RANDOM_ADD 479001701 /* prime */ -#define RCU_RANDOM_REFRESH 10000 - -#define DEFINE_RCU_RANDOM(name) struct rcu_random_state name = { 0, 0 } - -/* - * Crude but fast random-number generator. Uses a linear congruential - * generator, with occasional help from cpu_clock(). - */ -static unsigned long -rcu_random(struct rcu_random_state *rrsp) -{ - if (--rrsp->rrs_count < 0) { - rrsp->rrs_state += (unsigned long)local_clock(); - rrsp->rrs_count = RCU_RANDOM_REFRESH; - } - rrsp->rrs_state = rrsp->rrs_state * RCU_RANDOM_MULT + RCU_RANDOM_ADD; - return swahw32(rrsp->rrs_state); -} - -static void -rcu_stutter_wait(const char *title) -{ - while (stutter_pause_test || !rcutorture_runnable) { - if (rcutorture_runnable) - schedule_timeout_interruptible(1); - else - schedule_timeout_interruptible(round_jiffies_relative(HZ)); - rcutorture_shutdown_absorb(title); - } -} - -/* - * Operations vector for selecting different types of tests. - */ - -struct rcu_torture_ops { - void (*init)(void); - int (*readlock)(void); - void (*read_delay)(struct rcu_random_state *rrsp); - void (*readunlock)(int idx); - int (*completed)(void); - void (*deferred_free)(struct rcu_torture *p); - void (*sync)(void); - void (*exp_sync)(void); - void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); - void (*cb_barrier)(void); - void (*fqs)(void); - void (*stats)(char *page); - int irq_capable; - int can_boost; - const char *name; -}; - -static struct rcu_torture_ops *cur_ops; - -/* - * Definitions for rcu torture testing. - */ - -static int rcu_torture_read_lock(void) __acquires(RCU) -{ - rcu_read_lock(); - return 0; -} - -static void rcu_read_delay(struct rcu_random_state *rrsp) -{ - const unsigned long shortdelay_us = 200; - const unsigned long longdelay_ms = 50; - - /* We want a short delay sometimes to make a reader delay the grace - * period, and we want a long delay occasionally to trigger - * force_quiescent_state. */ - - if (!(rcu_random(rrsp) % (nrealreaders * 2000 * longdelay_ms))) - mdelay(longdelay_ms); - if (!(rcu_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) - udelay(shortdelay_us); -#ifdef CONFIG_PREEMPT - if (!preempt_count() && !(rcu_random(rrsp) % (nrealreaders * 20000))) - preempt_schedule(); /* No QS if preempt_disable() in effect */ -#endif -} - -static void rcu_torture_read_unlock(int idx) __releases(RCU) -{ - rcu_read_unlock(); -} - -static int rcu_torture_completed(void) -{ - return rcu_batches_completed(); -} - -static void -rcu_torture_cb(struct rcu_head *p) -{ - int i; - struct rcu_torture *rp = container_of(p, struct rcu_torture, rtort_rcu); - - if (fullstop != FULLSTOP_DONTSTOP) { - /* Test is ending, just drop callbacks on the floor. */ - /* The next initialization will pick up the pieces. */ - return; - } - i = rp->rtort_pipe_count; - if (i > RCU_TORTURE_PIPE_LEN) - i = RCU_TORTURE_PIPE_LEN; - atomic_inc(&rcu_torture_wcount[i]); - if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN) { - rp->rtort_mbtest = 0; - rcu_torture_free(rp); - } else { - cur_ops->deferred_free(rp); - } -} - -static int rcu_no_completed(void) -{ - return 0; -} - -static void rcu_torture_deferred_free(struct rcu_torture *p) -{ - call_rcu(&p->rtort_rcu, rcu_torture_cb); -} - -static void rcu_sync_torture_init(void) -{ - INIT_LIST_HEAD(&rcu_torture_removed); -} - -static struct rcu_torture_ops rcu_ops = { - .init = rcu_sync_torture_init, - .readlock = rcu_torture_read_lock, - .read_delay = rcu_read_delay, - .readunlock = rcu_torture_read_unlock, - .completed = rcu_torture_completed, - .deferred_free = rcu_torture_deferred_free, - .sync = synchronize_rcu, - .exp_sync = synchronize_rcu_expedited, - .call = call_rcu, - .cb_barrier = rcu_barrier, - .fqs = rcu_force_quiescent_state, - .stats = NULL, - .irq_capable = 1, - .can_boost = rcu_can_boost(), - .name = "rcu" -}; - -/* - * Definitions for rcu_bh torture testing. - */ - -static int rcu_bh_torture_read_lock(void) __acquires(RCU_BH) -{ - rcu_read_lock_bh(); - return 0; -} - -static void rcu_bh_torture_read_unlock(int idx) __releases(RCU_BH) -{ - rcu_read_unlock_bh(); -} - -static int rcu_bh_torture_completed(void) -{ - return rcu_batches_completed_bh(); -} - -static void rcu_bh_torture_deferred_free(struct rcu_torture *p) -{ - call_rcu_bh(&p->rtort_rcu, rcu_torture_cb); -} - -static struct rcu_torture_ops rcu_bh_ops = { - .init = rcu_sync_torture_init, - .readlock = rcu_bh_torture_read_lock, - .read_delay = rcu_read_delay, /* just reuse rcu's version. */ - .readunlock = rcu_bh_torture_read_unlock, - .completed = rcu_bh_torture_completed, - .deferred_free = rcu_bh_torture_deferred_free, - .sync = synchronize_rcu_bh, - .exp_sync = synchronize_rcu_bh_expedited, - .call = call_rcu_bh, - .cb_barrier = rcu_barrier_bh, - .fqs = rcu_bh_force_quiescent_state, - .stats = NULL, - .irq_capable = 1, - .name = "rcu_bh" -}; - -/* - * Definitions for srcu torture testing. - */ - -DEFINE_STATIC_SRCU(srcu_ctl); - -static int srcu_torture_read_lock(void) __acquires(&srcu_ctl) -{ - return srcu_read_lock(&srcu_ctl); -} - -static void srcu_read_delay(struct rcu_random_state *rrsp) -{ - long delay; - const long uspertick = 1000000 / HZ; - const long longdelay = 10; - - /* We want there to be long-running readers, but not all the time. */ - - delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay * uspertick); - if (!delay) - schedule_timeout_interruptible(longdelay); - else - rcu_read_delay(rrsp); -} - -static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl) -{ - srcu_read_unlock(&srcu_ctl, idx); -} - -static int srcu_torture_completed(void) -{ - return srcu_batches_completed(&srcu_ctl); -} - -static void srcu_torture_deferred_free(struct rcu_torture *rp) -{ - call_srcu(&srcu_ctl, &rp->rtort_rcu, rcu_torture_cb); -} - -static void srcu_torture_synchronize(void) -{ - synchronize_srcu(&srcu_ctl); -} - -static void srcu_torture_call(struct rcu_head *head, - void (*func)(struct rcu_head *head)) -{ - call_srcu(&srcu_ctl, head, func); -} - -static void srcu_torture_barrier(void) -{ - srcu_barrier(&srcu_ctl); -} - -static void srcu_torture_stats(char *page) -{ - int cpu; - int idx = srcu_ctl.completed & 0x1; - - page += sprintf(page, "%s%s per-CPU(idx=%d):", - torture_type, TORTURE_FLAG, idx); - for_each_possible_cpu(cpu) { - page += sprintf(page, " %d(%lu,%lu)", cpu, - per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx], - per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]); - } - sprintf(page, "\n"); -} - -static void srcu_torture_synchronize_expedited(void) -{ - synchronize_srcu_expedited(&srcu_ctl); -} - -static struct rcu_torture_ops srcu_ops = { - .init = rcu_sync_torture_init, - .readlock = srcu_torture_read_lock, - .read_delay = srcu_read_delay, - .readunlock = srcu_torture_read_unlock, - .completed = srcu_torture_completed, - .deferred_free = srcu_torture_deferred_free, - .sync = srcu_torture_synchronize, - .exp_sync = srcu_torture_synchronize_expedited, - .call = srcu_torture_call, - .cb_barrier = srcu_torture_barrier, - .stats = srcu_torture_stats, - .name = "srcu" -}; - -/* - * Definitions for sched torture testing. - */ - -static int sched_torture_read_lock(void) -{ - preempt_disable(); - return 0; -} - -static void sched_torture_read_unlock(int idx) -{ - preempt_enable(); -} - -static void rcu_sched_torture_deferred_free(struct rcu_torture *p) -{ - call_rcu_sched(&p->rtort_rcu, rcu_torture_cb); -} - -static struct rcu_torture_ops sched_ops = { - .init = rcu_sync_torture_init, - .readlock = sched_torture_read_lock, - .read_delay = rcu_read_delay, /* just reuse rcu's version. */ - .readunlock = sched_torture_read_unlock, - .completed = rcu_no_completed, - .deferred_free = rcu_sched_torture_deferred_free, - .sync = synchronize_sched, - .exp_sync = synchronize_sched_expedited, - .call = call_rcu_sched, - .cb_barrier = rcu_barrier_sched, - .fqs = rcu_sched_force_quiescent_state, - .stats = NULL, - .irq_capable = 1, - .name = "sched" -}; - -/* - * RCU torture priority-boost testing. Runs one real-time thread per - * CPU for moderate bursts, repeatedly registering RCU callbacks and - * spinning waiting for them to be invoked. If a given callback takes - * too long to be invoked, we assume that priority inversion has occurred. - */ - -struct rcu_boost_inflight { - struct rcu_head rcu; - int inflight; -}; - -static void rcu_torture_boost_cb(struct rcu_head *head) -{ - struct rcu_boost_inflight *rbip = - container_of(head, struct rcu_boost_inflight, rcu); - - smp_mb(); /* Ensure RCU-core accesses precede clearing ->inflight */ - rbip->inflight = 0; -} - -static int rcu_torture_boost(void *arg) -{ - unsigned long call_rcu_time; - unsigned long endtime; - unsigned long oldstarttime; - struct rcu_boost_inflight rbi = { .inflight = 0 }; - struct sched_param sp; - - VERBOSE_PRINTK_STRING("rcu_torture_boost started"); - - /* Set real-time priority. */ - sp.sched_priority = 1; - if (sched_setscheduler(current, SCHED_FIFO, &sp) < 0) { - VERBOSE_PRINTK_STRING("rcu_torture_boost RT prio failed!"); - n_rcu_torture_boost_rterror++; - } - - init_rcu_head_on_stack(&rbi.rcu); - /* Each pass through the following loop does one boost-test cycle. */ - do { - /* Wait for the next test interval. */ - oldstarttime = boost_starttime; - while (ULONG_CMP_LT(jiffies, oldstarttime)) { - schedule_timeout_interruptible(oldstarttime - jiffies); - rcu_stutter_wait("rcu_torture_boost"); - if (kthread_should_stop() || - fullstop != FULLSTOP_DONTSTOP) - goto checkwait; - } - - /* Do one boost-test interval. */ - endtime = oldstarttime + test_boost_duration * HZ; - call_rcu_time = jiffies; - while (ULONG_CMP_LT(jiffies, endtime)) { - /* If we don't have a callback in flight, post one. */ - if (!rbi.inflight) { - smp_mb(); /* RCU core before ->inflight = 1. */ - rbi.inflight = 1; - call_rcu(&rbi.rcu, rcu_torture_boost_cb); - if (jiffies - call_rcu_time > - test_boost_duration * HZ - HZ / 2) { - VERBOSE_PRINTK_STRING("rcu_torture_boost boosting failed"); - n_rcu_torture_boost_failure++; - } - call_rcu_time = jiffies; - } - cond_resched(); - rcu_stutter_wait("rcu_torture_boost"); - if (kthread_should_stop() || - fullstop != FULLSTOP_DONTSTOP) - goto checkwait; - } - - /* - * Set the start time of the next test interval. - * Yes, this is vulnerable to long delays, but such - * delays simply cause a false negative for the next - * interval. Besides, we are running at RT priority, - * so delays should be relatively rare. - */ - while (oldstarttime == boost_starttime && - !kthread_should_stop()) { - if (mutex_trylock(&boost_mutex)) { - boost_starttime = jiffies + - test_boost_interval * HZ; - n_rcu_torture_boosts++; - mutex_unlock(&boost_mutex); - break; - } - schedule_timeout_uninterruptible(1); - } - - /* Go do the stutter. */ -checkwait: rcu_stutter_wait("rcu_torture_boost"); - } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); - - /* Clean up and exit. */ - VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping"); - rcutorture_shutdown_absorb("rcu_torture_boost"); - while (!kthread_should_stop() || rbi.inflight) - schedule_timeout_uninterruptible(1); - smp_mb(); /* order accesses to ->inflight before stack-frame death. */ - destroy_rcu_head_on_stack(&rbi.rcu); - return 0; -} - -/* - * RCU torture force-quiescent-state kthread. Repeatedly induces - * bursts of calls to force_quiescent_state(), increasing the probability - * of occurrence of some important types of race conditions. - */ -static int -rcu_torture_fqs(void *arg) -{ - unsigned long fqs_resume_time; - int fqs_burst_remaining; - - VERBOSE_PRINTK_STRING("rcu_torture_fqs task started"); - do { - fqs_resume_time = jiffies + fqs_stutter * HZ; - while (ULONG_CMP_LT(jiffies, fqs_resume_time) && - !kthread_should_stop()) { - schedule_timeout_interruptible(1); - } - fqs_burst_remaining = fqs_duration; - while (fqs_burst_remaining > 0 && - !kthread_should_stop()) { - cur_ops->fqs(); - udelay(fqs_holdoff); - fqs_burst_remaining -= fqs_holdoff; - } - rcu_stutter_wait("rcu_torture_fqs"); - } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); - VERBOSE_PRINTK_STRING("rcu_torture_fqs task stopping"); - rcutorture_shutdown_absorb("rcu_torture_fqs"); - while (!kthread_should_stop()) - schedule_timeout_uninterruptible(1); - return 0; -} - -/* - * RCU torture writer kthread. Repeatedly substitutes a new structure - * for that pointed to by rcu_torture_current, freeing the old structure - * after a series of grace periods (the "pipeline"). - */ -static int -rcu_torture_writer(void *arg) -{ - bool exp; - int i; - struct rcu_torture *rp; - struct rcu_torture *rp1; - struct rcu_torture *old_rp; - static DEFINE_RCU_RANDOM(rand); - - VERBOSE_PRINTK_STRING("rcu_torture_writer task started"); - set_user_nice(current, 19); - - do { - schedule_timeout_uninterruptible(1); - rp = rcu_torture_alloc(); - if (rp == NULL) - continue; - rp->rtort_pipe_count = 0; - udelay(rcu_random(&rand) & 0x3ff); - old_rp = rcu_dereference_check(rcu_torture_current, - current == writer_task); - rp->rtort_mbtest = 1; - rcu_assign_pointer(rcu_torture_current, rp); - smp_wmb(); /* Mods to old_rp must follow rcu_assign_pointer() */ - if (old_rp) { - i = old_rp->rtort_pipe_count; - if (i > RCU_TORTURE_PIPE_LEN) - i = RCU_TORTURE_PIPE_LEN; - atomic_inc(&rcu_torture_wcount[i]); - old_rp->rtort_pipe_count++; - if (gp_normal == gp_exp) - exp = !!(rcu_random(&rand) & 0x80); - else - exp = gp_exp; - if (!exp) { - cur_ops->deferred_free(old_rp); - } else { - cur_ops->exp_sync(); - list_add(&old_rp->rtort_free, - &rcu_torture_removed); - list_for_each_entry_safe(rp, rp1, - &rcu_torture_removed, - rtort_free) { - i = rp->rtort_pipe_count; - if (i > RCU_TORTURE_PIPE_LEN) - i = RCU_TORTURE_PIPE_LEN; - atomic_inc(&rcu_torture_wcount[i]); - if (++rp->rtort_pipe_count >= - RCU_TORTURE_PIPE_LEN) { - rp->rtort_mbtest = 0; - list_del(&rp->rtort_free); - rcu_torture_free(rp); - } - } - } - } - rcutorture_record_progress(++rcu_torture_current_version); - rcu_stutter_wait("rcu_torture_writer"); - } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); - VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping"); - rcutorture_shutdown_absorb("rcu_torture_writer"); - while (!kthread_should_stop()) - schedule_timeout_uninterruptible(1); - return 0; -} - -/* - * RCU torture fake writer kthread. Repeatedly calls sync, with a random - * delay between calls. - */ -static int -rcu_torture_fakewriter(void *arg) -{ - DEFINE_RCU_RANDOM(rand); - - VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started"); - set_user_nice(current, 19); - - do { - schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); - udelay(rcu_random(&rand) & 0x3ff); - if (cur_ops->cb_barrier != NULL && - rcu_random(&rand) % (nfakewriters * 8) == 0) { - cur_ops->cb_barrier(); - } else if (gp_normal == gp_exp) { - if (rcu_random(&rand) & 0x80) - cur_ops->sync(); - else - cur_ops->exp_sync(); - } else if (gp_normal) { - cur_ops->sync(); - } else { - cur_ops->exp_sync(); - } - rcu_stutter_wait("rcu_torture_fakewriter"); - } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); - - VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping"); - rcutorture_shutdown_absorb("rcu_torture_fakewriter"); - while (!kthread_should_stop()) - schedule_timeout_uninterruptible(1); - return 0; -} - -void rcutorture_trace_dump(void) -{ - static atomic_t beenhere = ATOMIC_INIT(0); - - if (atomic_read(&beenhere)) - return; - if (atomic_xchg(&beenhere, 1) != 0) - return; - ftrace_dump(DUMP_ALL); -} - -/* - * RCU torture reader from timer handler. Dereferences rcu_torture_current, - * incrementing the corresponding element of the pipeline array. The - * counter in the element should never be greater than 1, otherwise, the - * RCU implementation is broken. - */ -static void rcu_torture_timer(unsigned long unused) -{ - int idx; - int completed; - int completed_end; - static DEFINE_RCU_RANDOM(rand); - static DEFINE_SPINLOCK(rand_lock); - struct rcu_torture *p; - int pipe_count; - unsigned long long ts; - - idx = cur_ops->readlock(); - completed = cur_ops->completed(); - ts = rcu_trace_clock_local(); - p = rcu_dereference_check(rcu_torture_current, - rcu_read_lock_bh_held() || - rcu_read_lock_sched_held() || - srcu_read_lock_held(&srcu_ctl)); - if (p == NULL) { - /* Leave because rcu_torture_writer is not yet underway */ - cur_ops->readunlock(idx); - return; - } - if (p->rtort_mbtest == 0) - atomic_inc(&n_rcu_torture_mberror); - spin_lock(&rand_lock); - cur_ops->read_delay(&rand); - n_rcu_torture_timers++; - spin_unlock(&rand_lock); - preempt_disable(); - pipe_count = p->rtort_pipe_count; - if (pipe_count > RCU_TORTURE_PIPE_LEN) { - /* Should not happen, but... */ - pipe_count = RCU_TORTURE_PIPE_LEN; - } - completed_end = cur_ops->completed(); - if (pipe_count > 1) { - do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, ts, - completed, completed_end); - rcutorture_trace_dump(); - } - __this_cpu_inc(rcu_torture_count[pipe_count]); - completed = completed_end - completed; - if (completed > RCU_TORTURE_PIPE_LEN) { - /* Should not happen, but... */ - completed = RCU_TORTURE_PIPE_LEN; - } - __this_cpu_inc(rcu_torture_batch[completed]); - preempt_enable(); - cur_ops->readunlock(idx); -} - -/* - * RCU torture reader kthread. Repeatedly dereferences rcu_torture_current, - * incrementing the corresponding element of the pipeline array. The - * counter in the element should never be greater than 1, otherwise, the - * RCU implementation is broken. - */ -static int -rcu_torture_reader(void *arg) -{ - int completed; - int completed_end; - int idx; - DEFINE_RCU_RANDOM(rand); - struct rcu_torture *p; - int pipe_count; - struct timer_list t; - unsigned long long ts; - - VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); - set_user_nice(current, 19); - if (irqreader && cur_ops->irq_capable) - setup_timer_on_stack(&t, rcu_torture_timer, 0); - - do { - if (irqreader && cur_ops->irq_capable) { - if (!timer_pending(&t)) - mod_timer(&t, jiffies + 1); - } - idx = cur_ops->readlock(); - completed = cur_ops->completed(); - ts = rcu_trace_clock_local(); - p = rcu_dereference_check(rcu_torture_current, - rcu_read_lock_bh_held() || - rcu_read_lock_sched_held() || - srcu_read_lock_held(&srcu_ctl)); - if (p == NULL) { - /* Wait for rcu_torture_writer to get underway */ - cur_ops->readunlock(idx); - schedule_timeout_interruptible(HZ); - continue; - } - if (p->rtort_mbtest == 0) - atomic_inc(&n_rcu_torture_mberror); - cur_ops->read_delay(&rand); - preempt_disable(); - pipe_count = p->rtort_pipe_count; - if (pipe_count > RCU_TORTURE_PIPE_LEN) { - /* Should not happen, but... */ - pipe_count = RCU_TORTURE_PIPE_LEN; - } - completed_end = cur_ops->completed(); - if (pipe_count > 1) { - do_trace_rcu_torture_read(cur_ops->name, &p->rtort_rcu, - ts, completed, completed_end); - rcutorture_trace_dump(); - } - __this_cpu_inc(rcu_torture_count[pipe_count]); - completed = completed_end - completed; - if (completed > RCU_TORTURE_PIPE_LEN) { - /* Should not happen, but... */ - completed = RCU_TORTURE_PIPE_LEN; - } - __this_cpu_inc(rcu_torture_batch[completed]); - preempt_enable(); - cur_ops->readunlock(idx); - schedule(); - rcu_stutter_wait("rcu_torture_reader"); - } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); - VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping"); - rcutorture_shutdown_absorb("rcu_torture_reader"); - if (irqreader && cur_ops->irq_capable) - del_timer_sync(&t); - while (!kthread_should_stop()) - schedule_timeout_uninterruptible(1); - return 0; -} - -/* - * Create an RCU-torture statistics message in the specified buffer. - */ -static void -rcu_torture_printk(char *page) -{ - int cpu; - int i; - long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; - long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 }; - - for_each_possible_cpu(cpu) { - for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) { - pipesummary[i] += per_cpu(rcu_torture_count, cpu)[i]; - batchsummary[i] += per_cpu(rcu_torture_batch, cpu)[i]; - } - } - for (i = RCU_TORTURE_PIPE_LEN - 1; i >= 0; i--) { - if (pipesummary[i] != 0) - break; - } - page += sprintf(page, "%s%s ", torture_type, TORTURE_FLAG); - page += sprintf(page, - "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ", - rcu_torture_current, - rcu_torture_current_version, - list_empty(&rcu_torture_freelist), - atomic_read(&n_rcu_torture_alloc), - atomic_read(&n_rcu_torture_alloc_fail), - atomic_read(&n_rcu_torture_free)); - page += sprintf(page, "rtmbe: %d rtbke: %ld rtbre: %ld ", - atomic_read(&n_rcu_torture_mberror), - n_rcu_torture_boost_ktrerror, - n_rcu_torture_boost_rterror); - page += sprintf(page, "rtbf: %ld rtb: %ld nt: %ld ", - n_rcu_torture_boost_failure, - n_rcu_torture_boosts, - n_rcu_torture_timers); - page += sprintf(page, - "onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ", - n_online_successes, n_online_attempts, - n_offline_successes, n_offline_attempts, - min_online, max_online, - min_offline, max_offline, - sum_online, sum_offline, HZ); - page += sprintf(page, "barrier: %ld/%ld:%ld", - n_barrier_successes, - n_barrier_attempts, - n_rcu_torture_barrier_error); - page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG); - if (atomic_read(&n_rcu_torture_mberror) != 0 || - n_rcu_torture_barrier_error != 0 || - n_rcu_torture_boost_ktrerror != 0 || - n_rcu_torture_boost_rterror != 0 || - n_rcu_torture_boost_failure != 0 || - i > 1) { - page += sprintf(page, "!!! "); - atomic_inc(&n_rcu_torture_error); - WARN_ON_ONCE(1); - } - page += sprintf(page, "Reader Pipe: "); - for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) - page += sprintf(page, " %ld", pipesummary[i]); - page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG); - page += sprintf(page, "Reader Batch: "); - for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) - page += sprintf(page, " %ld", batchsummary[i]); - page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG); - page += sprintf(page, "Free-Block Circulation: "); - for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) { - page += sprintf(page, " %d", - atomic_read(&rcu_torture_wcount[i])); - } - page += sprintf(page, "\n"); - if (cur_ops->stats) - cur_ops->stats(page); -} - -/* - * Print torture statistics. Caller must ensure that there is only - * one call to this function at a given time!!! This is normally - * accomplished by relying on the module system to only have one copy - * of the module loaded, and then by giving the rcu_torture_stats - * kthread full control (or the init/cleanup functions when rcu_torture_stats - * thread is not running). - */ -static void -rcu_torture_stats_print(void) -{ - int size = nr_cpu_ids * 200 + 8192; - char *buf; - - buf = kmalloc(size, GFP_KERNEL); - if (!buf) { - pr_err("rcu-torture: Out of memory, need: %d", size); - return; - } - rcu_torture_printk(buf); - pr_alert("%s", buf); - kfree(buf); -} - -/* - * Periodically prints torture statistics, if periodic statistics printing - * was specified via the stat_interval module parameter. - * - * No need to worry about fullstop here, since this one doesn't reference - * volatile state or register callbacks. - */ -static int -rcu_torture_stats(void *arg) -{ - VERBOSE_PRINTK_STRING("rcu_torture_stats task started"); - do { - schedule_timeout_interruptible(stat_interval * HZ); - rcu_torture_stats_print(); - rcutorture_shutdown_absorb("rcu_torture_stats"); - } while (!kthread_should_stop()); - VERBOSE_PRINTK_STRING("rcu_torture_stats task stopping"); - return 0; -} - -static int rcu_idle_cpu; /* Force all torture tasks off this CPU */ - -/* Shuffle tasks such that we allow @rcu_idle_cpu to become idle. A special case - * is when @rcu_idle_cpu = -1, when we allow the tasks to run on all CPUs. - */ -static void rcu_torture_shuffle_tasks(void) -{ - int i; - - cpumask_setall(shuffle_tmp_mask); - get_online_cpus(); - - /* No point in shuffling if there is only one online CPU (ex: UP) */ - if (num_online_cpus() == 1) { - put_online_cpus(); - return; - } - - if (rcu_idle_cpu != -1) - cpumask_clear_cpu(rcu_idle_cpu, shuffle_tmp_mask); - - set_cpus_allowed_ptr(current, shuffle_tmp_mask); - - if (reader_tasks) { - for (i = 0; i < nrealreaders; i++) - if (reader_tasks[i]) - set_cpus_allowed_ptr(reader_tasks[i], - shuffle_tmp_mask); - } - if (fakewriter_tasks) { - for (i = 0; i < nfakewriters; i++) - if (fakewriter_tasks[i]) - set_cpus_allowed_ptr(fakewriter_tasks[i], - shuffle_tmp_mask); - } - if (writer_task) - set_cpus_allowed_ptr(writer_task, shuffle_tmp_mask); - if (stats_task) - set_cpus_allowed_ptr(stats_task, shuffle_tmp_mask); - if (stutter_task) - set_cpus_allowed_ptr(stutter_task, shuffle_tmp_mask); - if (fqs_task) - set_cpus_allowed_ptr(fqs_task, shuffle_tmp_mask); - if (shutdown_task) - set_cpus_allowed_ptr(shutdown_task, shuffle_tmp_mask); -#ifdef CONFIG_HOTPLUG_CPU - if (onoff_task) - set_cpus_allowed_ptr(onoff_task, shuffle_tmp_mask); -#endif /* #ifdef CONFIG_HOTPLUG_CPU */ - if (stall_task) - set_cpus_allowed_ptr(stall_task, shuffle_tmp_mask); - if (barrier_cbs_tasks) - for (i = 0; i < n_barrier_cbs; i++) - if (barrier_cbs_tasks[i]) - set_cpus_allowed_ptr(barrier_cbs_tasks[i], - shuffle_tmp_mask); - if (barrier_task) - set_cpus_allowed_ptr(barrier_task, shuffle_tmp_mask); - - if (rcu_idle_cpu == -1) - rcu_idle_cpu = num_online_cpus() - 1; - else - rcu_idle_cpu--; - - put_online_cpus(); -} - -/* Shuffle tasks across CPUs, with the intent of allowing each CPU in the - * system to become idle at a time and cut off its timer ticks. This is meant - * to test the support for such tickless idle CPU in RCU. - */ -static int -rcu_torture_shuffle(void *arg) -{ - VERBOSE_PRINTK_STRING("rcu_torture_shuffle task started"); - do { - schedule_timeout_interruptible(shuffle_interval * HZ); - rcu_torture_shuffle_tasks(); - rcutorture_shutdown_absorb("rcu_torture_shuffle"); - } while (!kthread_should_stop()); - VERBOSE_PRINTK_STRING("rcu_torture_shuffle task stopping"); - return 0; -} - -/* Cause the rcutorture test to "stutter", starting and stopping all - * threads periodically. - */ -static int -rcu_torture_stutter(void *arg) -{ - VERBOSE_PRINTK_STRING("rcu_torture_stutter task started"); - do { - schedule_timeout_interruptible(stutter * HZ); - stutter_pause_test = 1; - if (!kthread_should_stop()) - schedule_timeout_interruptible(stutter * HZ); - stutter_pause_test = 0; - rcutorture_shutdown_absorb("rcu_torture_stutter"); - } while (!kthread_should_stop()); - VERBOSE_PRINTK_STRING("rcu_torture_stutter task stopping"); - return 0; -} - -static inline void -rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag) -{ - pr_alert("%s" TORTURE_FLAG - "--- %s: nreaders=%d nfakewriters=%d " - "stat_interval=%d verbose=%d test_no_idle_hz=%d " - "shuffle_interval=%d stutter=%d irqreader=%d " - "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d " - "test_boost=%d/%d test_boost_interval=%d " - "test_boost_duration=%d shutdown_secs=%d " - "stall_cpu=%d stall_cpu_holdoff=%d " - "n_barrier_cbs=%d " - "onoff_interval=%d onoff_holdoff=%d\n", - torture_type, tag, nrealreaders, nfakewriters, - stat_interval, verbose, test_no_idle_hz, shuffle_interval, - stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, - test_boost, cur_ops->can_boost, - test_boost_interval, test_boost_duration, shutdown_secs, - stall_cpu, stall_cpu_holdoff, - n_barrier_cbs, - onoff_interval, onoff_holdoff); -} - -static struct notifier_block rcutorture_shutdown_nb = { - .notifier_call = rcutorture_shutdown_notify, -}; - -static void rcutorture_booster_cleanup(int cpu) -{ - struct task_struct *t; - - if (boost_tasks[cpu] == NULL) - return; - mutex_lock(&boost_mutex); - VERBOSE_PRINTK_STRING("Stopping rcu_torture_boost task"); - t = boost_tasks[cpu]; - boost_tasks[cpu] = NULL; - mutex_unlock(&boost_mutex); - - /* This must be outside of the mutex, otherwise deadlock! */ - kthread_stop(t); - boost_tasks[cpu] = NULL; -} - -static int rcutorture_booster_init(int cpu) -{ - int retval; - - if (boost_tasks[cpu] != NULL) - return 0; /* Already created, nothing more to do. */ - - /* Don't allow time recalculation while creating a new task. */ - mutex_lock(&boost_mutex); - VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task"); - boost_tasks[cpu] = kthread_create_on_node(rcu_torture_boost, NULL, - cpu_to_node(cpu), - "rcu_torture_boost"); - if (IS_ERR(boost_tasks[cpu])) { - retval = PTR_ERR(boost_tasks[cpu]); - VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed"); - n_rcu_torture_boost_ktrerror++; - boost_tasks[cpu] = NULL; - mutex_unlock(&boost_mutex); - return retval; - } - kthread_bind(boost_tasks[cpu], cpu); - wake_up_process(boost_tasks[cpu]); - mutex_unlock(&boost_mutex); - return 0; -} - -/* - * Cause the rcutorture test to shutdown the system after the test has - * run for the time specified by the shutdown_secs module parameter. - */ -static int -rcu_torture_shutdown(void *arg) -{ - long delta; - unsigned long jiffies_snap; - - VERBOSE_PRINTK_STRING("rcu_torture_shutdown task started"); - jiffies_snap = ACCESS_ONCE(jiffies); - while (ULONG_CMP_LT(jiffies_snap, shutdown_time) && - !kthread_should_stop()) { - delta = shutdown_time - jiffies_snap; - if (verbose) - pr_alert("%s" TORTURE_FLAG - "rcu_torture_shutdown task: %lu jiffies remaining\n", - torture_type, delta); - schedule_timeout_interruptible(delta); - jiffies_snap = ACCESS_ONCE(jiffies); - } - if (kthread_should_stop()) { - VERBOSE_PRINTK_STRING("rcu_torture_shutdown task stopping"); - return 0; - } - - /* OK, shut down the system. */ - - VERBOSE_PRINTK_STRING("rcu_torture_shutdown task shutting down system"); - shutdown_task = NULL; /* Avoid self-kill deadlock. */ - rcu_torture_cleanup(); /* Get the success/failure message. */ - kernel_power_off(); /* Shut down the system. */ - return 0; -} - -#ifdef CONFIG_HOTPLUG_CPU - -/* - * Execute random CPU-hotplug operations at the interval specified - * by the onoff_interval. - */ -static int -rcu_torture_onoff(void *arg) -{ - int cpu; - unsigned long delta; - int maxcpu = -1; - DEFINE_RCU_RANDOM(rand); - int ret; - unsigned long starttime; - - VERBOSE_PRINTK_STRING("rcu_torture_onoff task started"); - for_each_online_cpu(cpu) - maxcpu = cpu; - WARN_ON(maxcpu < 0); - if (onoff_holdoff > 0) { - VERBOSE_PRINTK_STRING("rcu_torture_onoff begin holdoff"); - schedule_timeout_interruptible(onoff_holdoff * HZ); - VERBOSE_PRINTK_STRING("rcu_torture_onoff end holdoff"); - } - while (!kthread_should_stop()) { - cpu = (rcu_random(&rand) >> 4) % (maxcpu + 1); - if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) { - if (verbose) - pr_alert("%s" TORTURE_FLAG - "rcu_torture_onoff task: offlining %d\n", - torture_type, cpu); - starttime = jiffies; - n_offline_attempts++; - ret = cpu_down(cpu); - if (ret) { - if (verbose) - pr_alert("%s" TORTURE_FLAG - "rcu_torture_onoff task: offline %d failed: errno %d\n", - torture_type, cpu, ret); - } else { - if (verbose) - pr_alert("%s" TORTURE_FLAG - "rcu_torture_onoff task: offlined %d\n", - torture_type, cpu); - n_offline_successes++; - delta = jiffies - starttime; - sum_offline += delta; - if (min_offline < 0) { - min_offline = delta; - max_offline = delta; - } - if (min_offline > delta) - min_offline = delta; - if (max_offline < delta) - max_offline = delta; - } - } else if (cpu_is_hotpluggable(cpu)) { - if (verbose) - pr_alert("%s" TORTURE_FLAG - "rcu_torture_onoff task: onlining %d\n", - torture_type, cpu); - starttime = jiffies; - n_online_attempts++; - ret = cpu_up(cpu); - if (ret) { - if (verbose) - pr_alert("%s" TORTURE_FLAG - "rcu_torture_onoff task: online %d failed: errno %d\n", - torture_type, cpu, ret); - } else { - if (verbose) - pr_alert("%s" TORTURE_FLAG - "rcu_torture_onoff task: onlined %d\n", - torture_type, cpu); - n_online_successes++; - delta = jiffies - starttime; - sum_online += delta; - if (min_online < 0) { - min_online = delta; - max_online = delta; - } - if (min_online > delta) - min_online = delta; - if (max_online < delta) - max_online = delta; - } - } - schedule_timeout_interruptible(onoff_interval * HZ); - } - VERBOSE_PRINTK_STRING("rcu_torture_onoff task stopping"); - return 0; -} - -static int -rcu_torture_onoff_init(void) -{ - int ret; - - if (onoff_interval <= 0) - return 0; - onoff_task = kthread_run(rcu_torture_onoff, NULL, "rcu_torture_onoff"); - if (IS_ERR(onoff_task)) { - ret = PTR_ERR(onoff_task); - onoff_task = NULL; - return ret; - } - return 0; -} - -static void rcu_torture_onoff_cleanup(void) -{ - if (onoff_task == NULL) - return; - VERBOSE_PRINTK_STRING("Stopping rcu_torture_onoff task"); - kthread_stop(onoff_task); - onoff_task = NULL; -} - -#else /* #ifdef CONFIG_HOTPLUG_CPU */ - -static int -rcu_torture_onoff_init(void) -{ - return 0; -} - -static void rcu_torture_onoff_cleanup(void) -{ -} - -#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ - -/* - * CPU-stall kthread. It waits as specified by stall_cpu_holdoff, then - * induces a CPU stall for the time specified by stall_cpu. - */ -static int rcu_torture_stall(void *args) -{ - unsigned long stop_at; - - VERBOSE_PRINTK_STRING("rcu_torture_stall task started"); - if (stall_cpu_holdoff > 0) { - VERBOSE_PRINTK_STRING("rcu_torture_stall begin holdoff"); - schedule_timeout_interruptible(stall_cpu_holdoff * HZ); - VERBOSE_PRINTK_STRING("rcu_torture_stall end holdoff"); - } - if (!kthread_should_stop()) { - stop_at = get_seconds() + stall_cpu; - /* RCU CPU stall is expected behavior in following code. */ - pr_alert("rcu_torture_stall start.\n"); - rcu_read_lock(); - preempt_disable(); - while (ULONG_CMP_LT(get_seconds(), stop_at)) - continue; /* Induce RCU CPU stall warning. */ - preempt_enable(); - rcu_read_unlock(); - pr_alert("rcu_torture_stall end.\n"); - } - rcutorture_shutdown_absorb("rcu_torture_stall"); - while (!kthread_should_stop()) - schedule_timeout_interruptible(10 * HZ); - return 0; -} - -/* Spawn CPU-stall kthread, if stall_cpu specified. */ -static int __init rcu_torture_stall_init(void) -{ - int ret; - - if (stall_cpu <= 0) - return 0; - stall_task = kthread_run(rcu_torture_stall, NULL, "rcu_torture_stall"); - if (IS_ERR(stall_task)) { - ret = PTR_ERR(stall_task); - stall_task = NULL; - return ret; - } - return 0; -} - -/* Clean up after the CPU-stall kthread, if one was spawned. */ -static void rcu_torture_stall_cleanup(void) -{ - if (stall_task == NULL) - return; - VERBOSE_PRINTK_STRING("Stopping rcu_torture_stall_task."); - kthread_stop(stall_task); - stall_task = NULL; -} - -/* Callback function for RCU barrier testing. */ -void rcu_torture_barrier_cbf(struct rcu_head *rcu) -{ - atomic_inc(&barrier_cbs_invoked); -} - -/* kthread function to register callbacks used to test RCU barriers. */ -static int rcu_torture_barrier_cbs(void *arg) -{ - long myid = (long)arg; - bool lastphase = 0; - bool newphase; - struct rcu_head rcu; - - init_rcu_head_on_stack(&rcu); - VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task started"); - set_user_nice(current, 19); - do { - wait_event(barrier_cbs_wq[myid], - (newphase = - ACCESS_ONCE(barrier_phase)) != lastphase || - kthread_should_stop() || - fullstop != FULLSTOP_DONTSTOP); - lastphase = newphase; - smp_mb(); /* ensure barrier_phase load before ->call(). */ - if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP) - break; - cur_ops->call(&rcu, rcu_torture_barrier_cbf); - if (atomic_dec_and_test(&barrier_cbs_count)) - wake_up(&barrier_wq); - } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); - VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task stopping"); - rcutorture_shutdown_absorb("rcu_torture_barrier_cbs"); - while (!kthread_should_stop()) - schedule_timeout_interruptible(1); - cur_ops->cb_barrier(); - destroy_rcu_head_on_stack(&rcu); - return 0; -} - -/* kthread function to drive and coordinate RCU barrier testing. */ -static int rcu_torture_barrier(void *arg) -{ - int i; - - VERBOSE_PRINTK_STRING("rcu_torture_barrier task starting"); - do { - atomic_set(&barrier_cbs_invoked, 0); - atomic_set(&barrier_cbs_count, n_barrier_cbs); - smp_mb(); /* Ensure barrier_phase after prior assignments. */ - barrier_phase = !barrier_phase; - for (i = 0; i < n_barrier_cbs; i++) - wake_up(&barrier_cbs_wq[i]); - wait_event(barrier_wq, - atomic_read(&barrier_cbs_count) == 0 || - kthread_should_stop() || - fullstop != FULLSTOP_DONTSTOP); - if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP) - break; - n_barrier_attempts++; - cur_ops->cb_barrier(); /* Implies smp_mb() for wait_event(). */ - if (atomic_read(&barrier_cbs_invoked) != n_barrier_cbs) { - n_rcu_torture_barrier_error++; - WARN_ON_ONCE(1); - } - n_barrier_successes++; - schedule_timeout_interruptible(HZ / 10); - } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); - VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping"); - rcutorture_shutdown_absorb("rcu_torture_barrier"); - while (!kthread_should_stop()) - schedule_timeout_interruptible(1); - return 0; -} - -/* Initialize RCU barrier testing. */ -static int rcu_torture_barrier_init(void) -{ - int i; - int ret; - - if (n_barrier_cbs == 0) - return 0; - if (cur_ops->call == NULL || cur_ops->cb_barrier == NULL) { - pr_alert("%s" TORTURE_FLAG - " Call or barrier ops missing for %s,\n", - torture_type, cur_ops->name); - pr_alert("%s" TORTURE_FLAG - " RCU barrier testing omitted from run.\n", - torture_type); - return 0; - } - atomic_set(&barrier_cbs_count, 0); - atomic_set(&barrier_cbs_invoked, 0); - barrier_cbs_tasks = - kzalloc(n_barrier_cbs * sizeof(barrier_cbs_tasks[0]), - GFP_KERNEL); - barrier_cbs_wq = - kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]), - GFP_KERNEL); - if (barrier_cbs_tasks == NULL || !barrier_cbs_wq) - return -ENOMEM; - for (i = 0; i < n_barrier_cbs; i++) { - init_waitqueue_head(&barrier_cbs_wq[i]); - barrier_cbs_tasks[i] = kthread_run(rcu_torture_barrier_cbs, - (void *)(long)i, - "rcu_torture_barrier_cbs"); - if (IS_ERR(barrier_cbs_tasks[i])) { - ret = PTR_ERR(barrier_cbs_tasks[i]); - VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier_cbs"); - barrier_cbs_tasks[i] = NULL; - return ret; - } - } - barrier_task = kthread_run(rcu_torture_barrier, NULL, - "rcu_torture_barrier"); - if (IS_ERR(barrier_task)) { - ret = PTR_ERR(barrier_task); - VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier"); - barrier_task = NULL; - } - return 0; -} - -/* Clean up after RCU barrier testing. */ -static void rcu_torture_barrier_cleanup(void) -{ - int i; - - if (barrier_task != NULL) { - VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier task"); - kthread_stop(barrier_task); - barrier_task = NULL; - } - if (barrier_cbs_tasks != NULL) { - for (i = 0; i < n_barrier_cbs; i++) { - if (barrier_cbs_tasks[i] != NULL) { - VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier_cbs task"); - kthread_stop(barrier_cbs_tasks[i]); - barrier_cbs_tasks[i] = NULL; - } - } - kfree(barrier_cbs_tasks); - barrier_cbs_tasks = NULL; - } - if (barrier_cbs_wq != NULL) { - kfree(barrier_cbs_wq); - barrier_cbs_wq = NULL; - } -} - -static int rcutorture_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - long cpu = (long)hcpu; - - switch (action) { - case CPU_ONLINE: - case CPU_DOWN_FAILED: - (void)rcutorture_booster_init(cpu); - break; - case CPU_DOWN_PREPARE: - rcutorture_booster_cleanup(cpu); - break; - default: - break; - } - return NOTIFY_OK; -} - -static struct notifier_block rcutorture_cpu_nb = { - .notifier_call = rcutorture_cpu_notify, -}; - -static void -rcu_torture_cleanup(void) -{ - int i; - - mutex_lock(&fullstop_mutex); - rcutorture_record_test_transition(); - if (fullstop == FULLSTOP_SHUTDOWN) { - pr_warn(/* but going down anyway, so... */ - "Concurrent 'rmmod rcutorture' and shutdown illegal!\n"); - mutex_unlock(&fullstop_mutex); - schedule_timeout_uninterruptible(10); - if (cur_ops->cb_barrier != NULL) - cur_ops->cb_barrier(); - return; - } - fullstop = FULLSTOP_RMMOD; - mutex_unlock(&fullstop_mutex); - unregister_reboot_notifier(&rcutorture_shutdown_nb); - rcu_torture_barrier_cleanup(); - rcu_torture_stall_cleanup(); - if (stutter_task) { - VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task"); - kthread_stop(stutter_task); - } - stutter_task = NULL; - if (shuffler_task) { - VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task"); - kthread_stop(shuffler_task); - free_cpumask_var(shuffle_tmp_mask); - } - shuffler_task = NULL; - - if (writer_task) { - VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task"); - kthread_stop(writer_task); - } - writer_task = NULL; - - if (reader_tasks) { - for (i = 0; i < nrealreaders; i++) { - if (reader_tasks[i]) { - VERBOSE_PRINTK_STRING( - "Stopping rcu_torture_reader task"); - kthread_stop(reader_tasks[i]); - } - reader_tasks[i] = NULL; - } - kfree(reader_tasks); - reader_tasks = NULL; - } - rcu_torture_current = NULL; - - if (fakewriter_tasks) { - for (i = 0; i < nfakewriters; i++) { - if (fakewriter_tasks[i]) { - VERBOSE_PRINTK_STRING( - "Stopping rcu_torture_fakewriter task"); - kthread_stop(fakewriter_tasks[i]); - } - fakewriter_tasks[i] = NULL; - } - kfree(fakewriter_tasks); - fakewriter_tasks = NULL; - } - - if (stats_task) { - VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task"); - kthread_stop(stats_task); - } - stats_task = NULL; - - if (fqs_task) { - VERBOSE_PRINTK_STRING("Stopping rcu_torture_fqs task"); - kthread_stop(fqs_task); - } - fqs_task = NULL; - if ((test_boost == 1 && cur_ops->can_boost) || - test_boost == 2) { - unregister_cpu_notifier(&rcutorture_cpu_nb); - for_each_possible_cpu(i) - rcutorture_booster_cleanup(i); - } - if (shutdown_task != NULL) { - VERBOSE_PRINTK_STRING("Stopping rcu_torture_shutdown task"); - kthread_stop(shutdown_task); - } - shutdown_task = NULL; - rcu_torture_onoff_cleanup(); - - /* Wait for all RCU callbacks to fire. */ - - if (cur_ops->cb_barrier != NULL) - cur_ops->cb_barrier(); - - rcu_torture_stats_print(); /* -After- the stats thread is stopped! */ - - if (atomic_read(&n_rcu_torture_error) || n_rcu_torture_barrier_error) - rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE"); - else if (n_online_successes != n_online_attempts || - n_offline_successes != n_offline_attempts) - rcu_torture_print_module_parms(cur_ops, - "End of test: RCU_HOTPLUG"); - else - rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS"); -} - -#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD -static void rcu_torture_leak_cb(struct rcu_head *rhp) -{ -} - -static void rcu_torture_err_cb(struct rcu_head *rhp) -{ - /* - * This -might- happen due to race conditions, but is unlikely. - * The scenario that leads to this happening is that the - * first of the pair of duplicate callbacks is queued, - * someone else starts a grace period that includes that - * callback, then the second of the pair must wait for the - * next grace period. Unlikely, but can happen. If it - * does happen, the debug-objects subsystem won't have splatted. - */ - pr_alert("rcutorture: duplicated callback was invoked.\n"); -} -#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ - -/* - * Verify that double-free causes debug-objects to complain, but only - * if CONFIG_DEBUG_OBJECTS_RCU_HEAD=y. Otherwise, say that the test - * cannot be carried out. - */ -static void rcu_test_debug_objects(void) -{ -#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD - struct rcu_head rh1; - struct rcu_head rh2; - - init_rcu_head_on_stack(&rh1); - init_rcu_head_on_stack(&rh2); - pr_alert("rcutorture: WARN: Duplicate call_rcu() test starting.\n"); - - /* Try to queue the rh2 pair of callbacks for the same grace period. */ - preempt_disable(); /* Prevent preemption from interrupting test. */ - rcu_read_lock(); /* Make it impossible to finish a grace period. */ - call_rcu(&rh1, rcu_torture_leak_cb); /* Start grace period. */ - local_irq_disable(); /* Make it harder to start a new grace period. */ - call_rcu(&rh2, rcu_torture_leak_cb); - call_rcu(&rh2, rcu_torture_err_cb); /* Duplicate callback. */ - local_irq_enable(); - rcu_read_unlock(); - preempt_enable(); - - /* Wait for them all to get done so we can safely return. */ - rcu_barrier(); - pr_alert("rcutorture: WARN: Duplicate call_rcu() test complete.\n"); - destroy_rcu_head_on_stack(&rh1); - destroy_rcu_head_on_stack(&rh2); -#else /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ - pr_alert("rcutorture: !CONFIG_DEBUG_OBJECTS_RCU_HEAD, not testing duplicate call_rcu()\n"); -#endif /* #else #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ -} - -static int __init -rcu_torture_init(void) -{ - int i; - int cpu; - int firsterr = 0; - int retval; - static struct rcu_torture_ops *torture_ops[] = { - &rcu_ops, &rcu_bh_ops, &srcu_ops, &sched_ops, - }; - - mutex_lock(&fullstop_mutex); - - /* Process args and tell the world that the torturer is on the job. */ - for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { - cur_ops = torture_ops[i]; - if (strcmp(torture_type, cur_ops->name) == 0) - break; - } - if (i == ARRAY_SIZE(torture_ops)) { - pr_alert("rcu-torture: invalid torture type: \"%s\"\n", - torture_type); - pr_alert("rcu-torture types:"); - for (i = 0; i < ARRAY_SIZE(torture_ops); i++) - pr_alert(" %s", torture_ops[i]->name); - pr_alert("\n"); - mutex_unlock(&fullstop_mutex); - return -EINVAL; - } - if (cur_ops->fqs == NULL && fqs_duration != 0) { - pr_alert("rcu-torture: ->fqs NULL and non-zero fqs_duration, fqs disabled.\n"); - fqs_duration = 0; - } - if (cur_ops->init) - cur_ops->init(); /* no "goto unwind" prior to this point!!! */ - - if (nreaders >= 0) - nrealreaders = nreaders; - else - nrealreaders = 2 * num_online_cpus(); - rcu_torture_print_module_parms(cur_ops, "Start of test"); - fullstop = FULLSTOP_DONTSTOP; - - /* Set up the freelist. */ - - INIT_LIST_HEAD(&rcu_torture_freelist); - for (i = 0; i < ARRAY_SIZE(rcu_tortures); i++) { - rcu_tortures[i].rtort_mbtest = 0; - list_add_tail(&rcu_tortures[i].rtort_free, - &rcu_torture_freelist); - } - - /* Initialize the statistics so that each run gets its own numbers. */ - - rcu_torture_current = NULL; - rcu_torture_current_version = 0; - atomic_set(&n_rcu_torture_alloc, 0); - atomic_set(&n_rcu_torture_alloc_fail, 0); - atomic_set(&n_rcu_torture_free, 0); - atomic_set(&n_rcu_torture_mberror, 0); - atomic_set(&n_rcu_torture_error, 0); - n_rcu_torture_barrier_error = 0; - n_rcu_torture_boost_ktrerror = 0; - n_rcu_torture_boost_rterror = 0; - n_rcu_torture_boost_failure = 0; - n_rcu_torture_boosts = 0; - for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) - atomic_set(&rcu_torture_wcount[i], 0); - for_each_possible_cpu(cpu) { - for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) { - per_cpu(rcu_torture_count, cpu)[i] = 0; - per_cpu(rcu_torture_batch, cpu)[i] = 0; - } - } - - /* Start up the kthreads. */ - - VERBOSE_PRINTK_STRING("Creating rcu_torture_writer task"); - writer_task = kthread_create(rcu_torture_writer, NULL, - "rcu_torture_writer"); - if (IS_ERR(writer_task)) { - firsterr = PTR_ERR(writer_task); - VERBOSE_PRINTK_ERRSTRING("Failed to create writer"); - writer_task = NULL; - goto unwind; - } - wake_up_process(writer_task); - fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]), - GFP_KERNEL); - if (fakewriter_tasks == NULL) { - VERBOSE_PRINTK_ERRSTRING("out of memory"); - firsterr = -ENOMEM; - goto unwind; - } - for (i = 0; i < nfakewriters; i++) { - VERBOSE_PRINTK_STRING("Creating rcu_torture_fakewriter task"); - fakewriter_tasks[i] = kthread_run(rcu_torture_fakewriter, NULL, - "rcu_torture_fakewriter"); - if (IS_ERR(fakewriter_tasks[i])) { - firsterr = PTR_ERR(fakewriter_tasks[i]); - VERBOSE_PRINTK_ERRSTRING("Failed to create fakewriter"); - fakewriter_tasks[i] = NULL; - goto unwind; - } - } - reader_tasks = kzalloc(nrealreaders * sizeof(reader_tasks[0]), - GFP_KERNEL); - if (reader_tasks == NULL) { - VERBOSE_PRINTK_ERRSTRING("out of memory"); - firsterr = -ENOMEM; - goto unwind; - } - for (i = 0; i < nrealreaders; i++) { - VERBOSE_PRINTK_STRING("Creating rcu_torture_reader task"); - reader_tasks[i] = kthread_run(rcu_torture_reader, NULL, - "rcu_torture_reader"); - if (IS_ERR(reader_tasks[i])) { - firsterr = PTR_ERR(reader_tasks[i]); - VERBOSE_PRINTK_ERRSTRING("Failed to create reader"); - reader_tasks[i] = NULL; - goto unwind; - } - } - if (stat_interval > 0) { - VERBOSE_PRINTK_STRING("Creating rcu_torture_stats task"); - stats_task = kthread_run(rcu_torture_stats, NULL, - "rcu_torture_stats"); - if (IS_ERR(stats_task)) { - firsterr = PTR_ERR(stats_task); - VERBOSE_PRINTK_ERRSTRING("Failed to create stats"); - stats_task = NULL; - goto unwind; - } - } - if (test_no_idle_hz) { - rcu_idle_cpu = num_online_cpus() - 1; - - if (!alloc_cpumask_var(&shuffle_tmp_mask, GFP_KERNEL)) { - firsterr = -ENOMEM; - VERBOSE_PRINTK_ERRSTRING("Failed to alloc mask"); - goto unwind; - } - - /* Create the shuffler thread */ - shuffler_task = kthread_run(rcu_torture_shuffle, NULL, - "rcu_torture_shuffle"); - if (IS_ERR(shuffler_task)) { - free_cpumask_var(shuffle_tmp_mask); - firsterr = PTR_ERR(shuffler_task); - VERBOSE_PRINTK_ERRSTRING("Failed to create shuffler"); - shuffler_task = NULL; - goto unwind; - } - } - if (stutter < 0) - stutter = 0; - if (stutter) { - /* Create the stutter thread */ - stutter_task = kthread_run(rcu_torture_stutter, NULL, - "rcu_torture_stutter"); - if (IS_ERR(stutter_task)) { - firsterr = PTR_ERR(stutter_task); - VERBOSE_PRINTK_ERRSTRING("Failed to create stutter"); - stutter_task = NULL; - goto unwind; - } - } - if (fqs_duration < 0) - fqs_duration = 0; - if (fqs_duration) { - /* Create the stutter thread */ - fqs_task = kthread_run(rcu_torture_fqs, NULL, - "rcu_torture_fqs"); - if (IS_ERR(fqs_task)) { - firsterr = PTR_ERR(fqs_task); - VERBOSE_PRINTK_ERRSTRING("Failed to create fqs"); - fqs_task = NULL; - goto unwind; - } - } - if (test_boost_interval < 1) - test_boost_interval = 1; - if (test_boost_duration < 2) - test_boost_duration = 2; - if ((test_boost == 1 && cur_ops->can_boost) || - test_boost == 2) { - - boost_starttime = jiffies + test_boost_interval * HZ; - register_cpu_notifier(&rcutorture_cpu_nb); - for_each_possible_cpu(i) { - if (cpu_is_offline(i)) - continue; /* Heuristic: CPU can go offline. */ - retval = rcutorture_booster_init(i); - if (retval < 0) { - firsterr = retval; - goto unwind; - } - } - } - if (shutdown_secs > 0) { - shutdown_time = jiffies + shutdown_secs * HZ; - shutdown_task = kthread_create(rcu_torture_shutdown, NULL, - "rcu_torture_shutdown"); - if (IS_ERR(shutdown_task)) { - firsterr = PTR_ERR(shutdown_task); - VERBOSE_PRINTK_ERRSTRING("Failed to create shutdown"); - shutdown_task = NULL; - goto unwind; - } - wake_up_process(shutdown_task); - } - i = rcu_torture_onoff_init(); - if (i != 0) { - firsterr = i; - goto unwind; - } - register_reboot_notifier(&rcutorture_shutdown_nb); - i = rcu_torture_stall_init(); - if (i != 0) { - firsterr = i; - goto unwind; - } - retval = rcu_torture_barrier_init(); - if (retval != 0) { - firsterr = retval; - goto unwind; - } - if (object_debug) - rcu_test_debug_objects(); - rcutorture_record_test_transition(); - mutex_unlock(&fullstop_mutex); - return 0; - -unwind: - mutex_unlock(&fullstop_mutex); - rcu_torture_cleanup(); - return firsterr; -} - -module_init(rcu_torture_init); -module_exit(rcu_torture_cleanup); diff --git a/kernel/torture.c b/kernel/torture.c new file mode 100644 index 0000000..c82c70f --- /dev/null +++ b/kernel/torture.c @@ -0,0 +1,71 @@ +/* + * Common functions for in-kernel torture tests. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * Copyright (C) IBM Corporation, 2014 + * + * Author: Paul E. McKenney + * Based on kernel/rcu/torture.c. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Paul E. McKenney "); + +#define TORTURE_RANDOM_MULT 39916801 /* prime */ +#define TORTURE_RANDOM_ADD 479001701 /* prime */ +#define TORTURE_RANDOM_REFRESH 10000 + +/* + * Crude but fast random-number generator. Uses a linear congruential + * generator, with occasional help from cpu_clock(). + */ +unsigned long +torture_random(struct torture_random_state *trsp) +{ + if (--trsp->trs_count < 0) { + trsp->trs_state += (unsigned long)local_clock(); + trsp->trs_count = TORTURE_RANDOM_REFRESH; + } + trsp->trs_state = trsp->trs_state * TORTURE_RANDOM_MULT + + TORTURE_RANDOM_ADD; + return swahw32(trsp->trs_state); +} +EXPORT_SYMBOL_GPL(torture_random); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a48abea..2bfb4e5 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1141,9 +1141,14 @@ config SPARSE_RCU_POINTER Say N if you are unsure. +config TORTURE_TEST + tristate + default n + config RCU_TORTURE_TEST tristate "torture tests for RCU" depends on DEBUG_KERNEL + select TORTURE_TEST default n help This option provides a kernel module that runs torture tests -- cgit v0.10.2 From daeda23de108fab1202b6843e5adb59de0a261f9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Jan 2014 12:31:20 -0800 Subject: rcutorture: Don't create results directory for dryruns This commit prevents the results directory from being created for dryruns. However, a script generated from a dryrun will create the results directory should it be run. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett > $resdir/$ds/log -pwd > $resdir/$ds/testid.txt -if test -d .git -then - git status >> $resdir/$ds/testid.txt - git rev-parse HEAD >> $resdir/$ds/testid.txt + touch $resdir/$ds/log + echo $scriptname $args >> $resdir/$ds/log + + pwd > $resdir/$ds/testid.txt + if test -d .git + then + git status >> $resdir/$ds/testid.txt + git rev-parse HEAD >> $resdir/$ds/testid.txt + fi fi # Create a file of test-name/#cpus pairs, sorted by decreasing #cpus. @@ -371,6 +369,8 @@ then echo RCU_QEMU_CMD="$RCU_QEMU_CMD; export RCU_QEMU_CMD" echo RCU_QEMU_INTERACTIVE="$RCU_QEMU_INTERACTIVE; export RCU_QEMU_INTERACTIVE" echo RCU_QEMU_MAC="$RCU_QEMU_MAC; export RCU_QEMU_MAC" + echo "mkdir -p "$resdir" || :" + echo "mkdir $resdir/$ds" cat $T/script exit 0 elif test "$dryrun" = sched -- cgit v0.10.2 From 9e2502254132261e0ea8010692fd447b1cedf627 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Jan 2014 16:27:00 -0800 Subject: rcutorture: Abstract torture_param() Create a torture_param() macro and apply it to rcutorture in order to save a few lines of code. This same macro may be applied to other torture frameworks. Signed-off-by: Paul E. McKenney diff --git a/include/linux/torture.h b/include/linux/torture.h index 979e3e6..5aae880 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -35,13 +35,18 @@ #include #include +/* Definitions for a non-string torture-test module parameter. */ +#define torture_param(type, name, init, msg) \ + static type name = init; \ + module_param(name, type, 0444); \ + MODULE_PARM_DESC(name, msg); + +/* Low-rider random number generator. */ struct torture_random_state { unsigned long trs_state; long trs_count; }; - #define DEFINE_TORTURE_RANDOM(name) struct torture_random_state name = { 0, 0 } - unsigned long torture_random(struct torture_random_state *trsp); #endif /* __LINUX_TORTURE_H */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 94b1cd8b..930791e 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -59,78 +59,43 @@ MODULE_ALIAS("rcutorture"); #endif #define MODULE_PARAM_PREFIX "rcutorture." -static int fqs_duration; -module_param(fqs_duration, int, 0444); -MODULE_PARM_DESC(fqs_duration, "Duration of fqs bursts (us), 0 to disable"); -static int fqs_holdoff; -module_param(fqs_holdoff, int, 0444); -MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); -static int fqs_stutter = 3; -module_param(fqs_stutter, int, 0444); -MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); -static bool gp_exp; -module_param(gp_exp, bool, 0444); -MODULE_PARM_DESC(gp_exp, "Use expedited GP wait primitives"); -static bool gp_normal; -module_param(gp_normal, bool, 0444); -MODULE_PARM_DESC(gp_normal, "Use normal (non-expedited) GP wait primitives"); -static int irqreader = 1; -module_param(irqreader, int, 0444); -MODULE_PARM_DESC(irqreader, "Allow RCU readers from irq handlers"); -static int n_barrier_cbs; -module_param(n_barrier_cbs, int, 0444); -MODULE_PARM_DESC(n_barrier_cbs, "# of callbacks/kthreads for barrier testing"); -static int nfakewriters = 4; -module_param(nfakewriters, int, 0444); -MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads"); -static int nreaders = -1; -module_param(nreaders, int, 0444); -MODULE_PARM_DESC(nreaders, "Number of RCU reader threads"); -static int object_debug; -module_param(object_debug, int, 0444); -MODULE_PARM_DESC(object_debug, "Enable debug-object double call_rcu() testing"); -static int onoff_holdoff; -module_param(onoff_holdoff, int, 0444); -MODULE_PARM_DESC(onoff_holdoff, "Time after boot before CPU hotplugs (s)"); -static int onoff_interval; -module_param(onoff_interval, int, 0444); -MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable"); -static int shuffle_interval = 3; -module_param(shuffle_interval, int, 0444); -MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles"); -static int shutdown_secs; -module_param(shutdown_secs, int, 0444); -MODULE_PARM_DESC(shutdown_secs, "Shutdown time (s), <= zero to disable."); -static int stall_cpu; -module_param(stall_cpu, int, 0444); -MODULE_PARM_DESC(stall_cpu, "Stall duration (s), zero to disable."); -static int stall_cpu_holdoff = 10; -module_param(stall_cpu_holdoff, int, 0444); -MODULE_PARM_DESC(stall_cpu_holdoff, "Time to wait before starting stall (s)."); -static int stat_interval = 60; -module_param(stat_interval, int, 0644); -MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s"); -static int stutter = 5; -module_param(stutter, int, 0444); -MODULE_PARM_DESC(stutter, "Number of seconds to run/halt test"); -static int test_boost = 1; -module_param(test_boost, int, 0444); -MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes."); -static int test_boost_duration = 4; -module_param(test_boost_duration, int, 0444); -MODULE_PARM_DESC(test_boost_duration, "Duration of each boost test, seconds."); -static int test_boost_interval = 7; -module_param(test_boost_interval, int, 0444); -MODULE_PARM_DESC(test_boost_interval, "Interval between boost tests, seconds."); -static bool test_no_idle_hz = true; -module_param(test_no_idle_hz, bool, 0444); -MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs"); +torture_param(int, fqs_duration, 0, + "Duration of fqs bursts (us), 0 to disable"); +torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)"); +torture_param(int, fqs_stutter, 3, "Wait time between fqs bursts (s)"); +torture_param(bool, gp_exp, false, "Use expedited GP wait primitives"); +torture_param(bool, gp_normal, false, + "Use normal (non-expedited) GP wait primitives"); +torture_param(int, irqreader, 1, "Allow RCU readers from irq handlers"); +torture_param(int, n_barrier_cbs, 0, + "# of callbacks/kthreads for barrier testing"); +torture_param(int, nfakewriters, 4, "Number of RCU fake writer threads"); +torture_param(int, nreaders, -1, "Number of RCU reader threads"); +torture_param(int, object_debug, 0, + "Enable debug-object double call_rcu() testing"); +torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)"); +torture_param(int, onoff_interval, 0, + "Time between CPU hotplugs (s), 0=disable"); +torture_param(int, shuffle_interval, 3, "Number of seconds between shuffles"); +torture_param(int, shutdown_secs, 0, "Shutdown time (s), <= zero to disable."); +torture_param(int, stall_cpu, 0, "Stall duration (s), zero to disable."); +torture_param(int, stall_cpu_holdoff, 10, + "Time to wait before starting stall (s)."); +torture_param(int, stat_interval, 60, + "Number of seconds between stats printk()s"); +torture_param(int, stutter, 5, "Number of seconds to run/halt test"); +torture_param(int, test_boost, 1, "Test RCU prio boost: 0=no, 1=maybe, 2=yes."); +torture_param(int, test_boost_duration, 4, + "Duration of each boost test, seconds."); +torture_param(int, test_boost_interval, 7, + "Interval between boost tests, seconds."); +torture_param(bool, test_no_idle_hz, true, + "Test support for tickless idle CPUs"); +torture_param(bool, verbose, false, "Enable verbose debugging printk()s"); + static char *torture_type = "rcu"; module_param(torture_type, charp, 0444); MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, ...)"); -static bool verbose; -module_param(verbose, bool, 0444); -MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s"); #define TORTURE_FLAG "-torture:" #define PRINTK_STRING(s) \ -- cgit v0.10.2 From 5ccf60f23d33afd53568cff4f3f421f2ca624401 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 29 Jan 2014 07:25:25 -0800 Subject: rcutorture: Rename PRINTK to TOROUT Since it doesn't do printk()s anymore anyway, this commit renames these macros from PRINTK to TOROUT (short for torture output). Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 930791e..34a75b1 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -98,11 +98,11 @@ module_param(torture_type, charp, 0444); MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, ...)"); #define TORTURE_FLAG "-torture:" -#define PRINTK_STRING(s) \ +#define TOROUT_STRING(s) \ do { pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0) -#define VERBOSE_PRINTK_STRING(s) \ +#define VERBOSE_TOROUT_STRING(s) \ do { if (verbose) pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0) -#define VERBOSE_PRINTK_ERRSTRING(s) \ +#define VERBOSE_TOROUT_ERRSTRING(s) \ do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0) static int nrealreaders; @@ -619,12 +619,12 @@ static int rcu_torture_boost(void *arg) struct rcu_boost_inflight rbi = { .inflight = 0 }; struct sched_param sp; - VERBOSE_PRINTK_STRING("rcu_torture_boost started"); + VERBOSE_TOROUT_STRING("rcu_torture_boost started"); /* Set real-time priority. */ sp.sched_priority = 1; if (sched_setscheduler(current, SCHED_FIFO, &sp) < 0) { - VERBOSE_PRINTK_STRING("rcu_torture_boost RT prio failed!"); + VERBOSE_TOROUT_STRING("rcu_torture_boost RT prio failed!"); n_rcu_torture_boost_rterror++; } @@ -652,7 +652,7 @@ static int rcu_torture_boost(void *arg) call_rcu(&rbi.rcu, rcu_torture_boost_cb); if (jiffies - call_rcu_time > test_boost_duration * HZ - HZ / 2) { - VERBOSE_PRINTK_STRING("rcu_torture_boost boosting failed"); + VERBOSE_TOROUT_STRING("rcu_torture_boost boosting failed"); n_rcu_torture_boost_failure++; } call_rcu_time = jiffies; @@ -688,7 +688,7 @@ checkwait: rcu_stutter_wait("rcu_torture_boost"); } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); /* Clean up and exit. */ - VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping"); + VERBOSE_TOROUT_STRING("rcu_torture_boost task stopping"); rcutorture_shutdown_absorb("rcu_torture_boost"); while (!kthread_should_stop() || rbi.inflight) schedule_timeout_uninterruptible(1); @@ -708,7 +708,7 @@ rcu_torture_fqs(void *arg) unsigned long fqs_resume_time; int fqs_burst_remaining; - VERBOSE_PRINTK_STRING("rcu_torture_fqs task started"); + VERBOSE_TOROUT_STRING("rcu_torture_fqs task started"); do { fqs_resume_time = jiffies + fqs_stutter * HZ; while (ULONG_CMP_LT(jiffies, fqs_resume_time) && @@ -724,7 +724,7 @@ rcu_torture_fqs(void *arg) } rcu_stutter_wait("rcu_torture_fqs"); } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); - VERBOSE_PRINTK_STRING("rcu_torture_fqs task stopping"); + VERBOSE_TOROUT_STRING("rcu_torture_fqs task stopping"); rcutorture_shutdown_absorb("rcu_torture_fqs"); while (!kthread_should_stop()) schedule_timeout_uninterruptible(1); @@ -746,7 +746,7 @@ rcu_torture_writer(void *arg) struct rcu_torture *old_rp; static DEFINE_TORTURE_RANDOM(rand); - VERBOSE_PRINTK_STRING("rcu_torture_writer task started"); + VERBOSE_TOROUT_STRING("rcu_torture_writer task started"); set_user_nice(current, 19); do { @@ -796,7 +796,7 @@ rcu_torture_writer(void *arg) rcutorture_record_progress(++rcu_torture_current_version); rcu_stutter_wait("rcu_torture_writer"); } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); - VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping"); + VERBOSE_TOROUT_STRING("rcu_torture_writer task stopping"); rcutorture_shutdown_absorb("rcu_torture_writer"); while (!kthread_should_stop()) schedule_timeout_uninterruptible(1); @@ -812,7 +812,7 @@ rcu_torture_fakewriter(void *arg) { DEFINE_TORTURE_RANDOM(rand); - VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started"); + VERBOSE_TOROUT_STRING("rcu_torture_fakewriter task started"); set_user_nice(current, 19); do { @@ -834,7 +834,7 @@ rcu_torture_fakewriter(void *arg) rcu_stutter_wait("rcu_torture_fakewriter"); } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); - VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping"); + VERBOSE_TOROUT_STRING("rcu_torture_fakewriter task stopping"); rcutorture_shutdown_absorb("rcu_torture_fakewriter"); while (!kthread_should_stop()) schedule_timeout_uninterruptible(1); @@ -928,7 +928,7 @@ rcu_torture_reader(void *arg) struct timer_list t; unsigned long long ts; - VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); + VERBOSE_TOROUT_STRING("rcu_torture_reader task started"); set_user_nice(current, 19); if (irqreader && cur_ops->irq_capable) setup_timer_on_stack(&t, rcu_torture_timer, 0); @@ -978,7 +978,7 @@ rcu_torture_reader(void *arg) schedule(); rcu_stutter_wait("rcu_torture_reader"); } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); - VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping"); + VERBOSE_TOROUT_STRING("rcu_torture_reader task stopping"); rcutorture_shutdown_absorb("rcu_torture_reader"); if (irqreader && cur_ops->irq_capable) del_timer_sync(&t); @@ -1099,13 +1099,13 @@ rcu_torture_stats_print(void) static int rcu_torture_stats(void *arg) { - VERBOSE_PRINTK_STRING("rcu_torture_stats task started"); + VERBOSE_TOROUT_STRING("rcu_torture_stats task started"); do { schedule_timeout_interruptible(stat_interval * HZ); rcu_torture_stats_print(); rcutorture_shutdown_absorb("rcu_torture_stats"); } while (!kthread_should_stop()); - VERBOSE_PRINTK_STRING("rcu_torture_stats task stopping"); + VERBOSE_TOROUT_STRING("rcu_torture_stats task stopping"); return 0; } @@ -1183,13 +1183,13 @@ static void rcu_torture_shuffle_tasks(void) static int rcu_torture_shuffle(void *arg) { - VERBOSE_PRINTK_STRING("rcu_torture_shuffle task started"); + VERBOSE_TOROUT_STRING("rcu_torture_shuffle task started"); do { schedule_timeout_interruptible(shuffle_interval * HZ); rcu_torture_shuffle_tasks(); rcutorture_shutdown_absorb("rcu_torture_shuffle"); } while (!kthread_should_stop()); - VERBOSE_PRINTK_STRING("rcu_torture_shuffle task stopping"); + VERBOSE_TOROUT_STRING("rcu_torture_shuffle task stopping"); return 0; } @@ -1199,7 +1199,7 @@ rcu_torture_shuffle(void *arg) static int rcu_torture_stutter(void *arg) { - VERBOSE_PRINTK_STRING("rcu_torture_stutter task started"); + VERBOSE_TOROUT_STRING("rcu_torture_stutter task started"); do { schedule_timeout_interruptible(stutter * HZ); stutter_pause_test = 1; @@ -1208,7 +1208,7 @@ rcu_torture_stutter(void *arg) stutter_pause_test = 0; rcutorture_shutdown_absorb("rcu_torture_stutter"); } while (!kthread_should_stop()); - VERBOSE_PRINTK_STRING("rcu_torture_stutter task stopping"); + VERBOSE_TOROUT_STRING("rcu_torture_stutter task stopping"); return 0; } @@ -1246,7 +1246,7 @@ static void rcutorture_booster_cleanup(int cpu) if (boost_tasks[cpu] == NULL) return; mutex_lock(&boost_mutex); - VERBOSE_PRINTK_STRING("Stopping rcu_torture_boost task"); + VERBOSE_TOROUT_STRING("Stopping rcu_torture_boost task"); t = boost_tasks[cpu]; boost_tasks[cpu] = NULL; mutex_unlock(&boost_mutex); @@ -1265,13 +1265,13 @@ static int rcutorture_booster_init(int cpu) /* Don't allow time recalculation while creating a new task. */ mutex_lock(&boost_mutex); - VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task"); + VERBOSE_TOROUT_STRING("Creating rcu_torture_boost task"); boost_tasks[cpu] = kthread_create_on_node(rcu_torture_boost, NULL, cpu_to_node(cpu), "rcu_torture_boost"); if (IS_ERR(boost_tasks[cpu])) { retval = PTR_ERR(boost_tasks[cpu]); - VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed"); + VERBOSE_TOROUT_STRING("rcu_torture_boost task create failed"); n_rcu_torture_boost_ktrerror++; boost_tasks[cpu] = NULL; mutex_unlock(&boost_mutex); @@ -1293,7 +1293,7 @@ rcu_torture_shutdown(void *arg) long delta; unsigned long jiffies_snap; - VERBOSE_PRINTK_STRING("rcu_torture_shutdown task started"); + VERBOSE_TOROUT_STRING("rcu_torture_shutdown task started"); jiffies_snap = ACCESS_ONCE(jiffies); while (ULONG_CMP_LT(jiffies_snap, shutdown_time) && !kthread_should_stop()) { @@ -1306,13 +1306,13 @@ rcu_torture_shutdown(void *arg) jiffies_snap = ACCESS_ONCE(jiffies); } if (kthread_should_stop()) { - VERBOSE_PRINTK_STRING("rcu_torture_shutdown task stopping"); + VERBOSE_TOROUT_STRING("rcu_torture_shutdown task stopping"); return 0; } /* OK, shut down the system. */ - VERBOSE_PRINTK_STRING("rcu_torture_shutdown task shutting down system"); + VERBOSE_TOROUT_STRING("rcu_torture_shutdown task shutting down system"); shutdown_task = NULL; /* Avoid self-kill deadlock. */ rcu_torture_cleanup(); /* Get the success/failure message. */ kernel_power_off(); /* Shut down the system. */ @@ -1335,14 +1335,14 @@ rcu_torture_onoff(void *arg) int ret; unsigned long starttime; - VERBOSE_PRINTK_STRING("rcu_torture_onoff task started"); + VERBOSE_TOROUT_STRING("rcu_torture_onoff task started"); for_each_online_cpu(cpu) maxcpu = cpu; WARN_ON(maxcpu < 0); if (onoff_holdoff > 0) { - VERBOSE_PRINTK_STRING("rcu_torture_onoff begin holdoff"); + VERBOSE_TOROUT_STRING("rcu_torture_onoff begin holdoff"); schedule_timeout_interruptible(onoff_holdoff * HZ); - VERBOSE_PRINTK_STRING("rcu_torture_onoff end holdoff"); + VERBOSE_TOROUT_STRING("rcu_torture_onoff end holdoff"); } while (!kthread_should_stop()) { cpu = (torture_random(&rand) >> 4) % (maxcpu + 1); @@ -1409,7 +1409,7 @@ rcu_torture_onoff(void *arg) } schedule_timeout_interruptible(onoff_interval * HZ); } - VERBOSE_PRINTK_STRING("rcu_torture_onoff task stopping"); + VERBOSE_TOROUT_STRING("rcu_torture_onoff task stopping"); return 0; } @@ -1433,7 +1433,7 @@ static void rcu_torture_onoff_cleanup(void) { if (onoff_task == NULL) return; - VERBOSE_PRINTK_STRING("Stopping rcu_torture_onoff task"); + VERBOSE_TOROUT_STRING("Stopping rcu_torture_onoff task"); kthread_stop(onoff_task); onoff_task = NULL; } @@ -1460,11 +1460,11 @@ static int rcu_torture_stall(void *args) { unsigned long stop_at; - VERBOSE_PRINTK_STRING("rcu_torture_stall task started"); + VERBOSE_TOROUT_STRING("rcu_torture_stall task started"); if (stall_cpu_holdoff > 0) { - VERBOSE_PRINTK_STRING("rcu_torture_stall begin holdoff"); + VERBOSE_TOROUT_STRING("rcu_torture_stall begin holdoff"); schedule_timeout_interruptible(stall_cpu_holdoff * HZ); - VERBOSE_PRINTK_STRING("rcu_torture_stall end holdoff"); + VERBOSE_TOROUT_STRING("rcu_torture_stall end holdoff"); } if (!kthread_should_stop()) { stop_at = get_seconds() + stall_cpu; @@ -1505,7 +1505,7 @@ static void rcu_torture_stall_cleanup(void) { if (stall_task == NULL) return; - VERBOSE_PRINTK_STRING("Stopping rcu_torture_stall_task."); + VERBOSE_TOROUT_STRING("Stopping rcu_torture_stall_task."); kthread_stop(stall_task); stall_task = NULL; } @@ -1525,7 +1525,7 @@ static int rcu_torture_barrier_cbs(void *arg) struct rcu_head rcu; init_rcu_head_on_stack(&rcu); - VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task started"); + VERBOSE_TOROUT_STRING("rcu_torture_barrier_cbs task started"); set_user_nice(current, 19); do { wait_event(barrier_cbs_wq[myid], @@ -1541,7 +1541,7 @@ static int rcu_torture_barrier_cbs(void *arg) if (atomic_dec_and_test(&barrier_cbs_count)) wake_up(&barrier_wq); } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); - VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task stopping"); + VERBOSE_TOROUT_STRING("rcu_torture_barrier_cbs task stopping"); rcutorture_shutdown_absorb("rcu_torture_barrier_cbs"); while (!kthread_should_stop()) schedule_timeout_interruptible(1); @@ -1555,7 +1555,7 @@ static int rcu_torture_barrier(void *arg) { int i; - VERBOSE_PRINTK_STRING("rcu_torture_barrier task starting"); + VERBOSE_TOROUT_STRING("rcu_torture_barrier task starting"); do { atomic_set(&barrier_cbs_invoked, 0); atomic_set(&barrier_cbs_count, n_barrier_cbs); @@ -1578,7 +1578,7 @@ static int rcu_torture_barrier(void *arg) n_barrier_successes++; schedule_timeout_interruptible(HZ / 10); } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); - VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping"); + VERBOSE_TOROUT_STRING("rcu_torture_barrier task stopping"); rcutorture_shutdown_absorb("rcu_torture_barrier"); while (!kthread_should_stop()) schedule_timeout_interruptible(1); @@ -1619,7 +1619,7 @@ static int rcu_torture_barrier_init(void) "rcu_torture_barrier_cbs"); if (IS_ERR(barrier_cbs_tasks[i])) { ret = PTR_ERR(barrier_cbs_tasks[i]); - VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier_cbs"); + VERBOSE_TOROUT_ERRSTRING("Failed to create rcu_torture_barrier_cbs"); barrier_cbs_tasks[i] = NULL; return ret; } @@ -1628,7 +1628,7 @@ static int rcu_torture_barrier_init(void) "rcu_torture_barrier"); if (IS_ERR(barrier_task)) { ret = PTR_ERR(barrier_task); - VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier"); + VERBOSE_TOROUT_ERRSTRING("Failed to create rcu_torture_barrier"); barrier_task = NULL; } return 0; @@ -1640,14 +1640,14 @@ static void rcu_torture_barrier_cleanup(void) int i; if (barrier_task != NULL) { - VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier task"); + VERBOSE_TOROUT_STRING("Stopping rcu_torture_barrier task"); kthread_stop(barrier_task); barrier_task = NULL; } if (barrier_cbs_tasks != NULL) { for (i = 0; i < n_barrier_cbs; i++) { if (barrier_cbs_tasks[i] != NULL) { - VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier_cbs task"); + VERBOSE_TOROUT_STRING("Stopping rcu_torture_barrier_cbs task"); kthread_stop(barrier_cbs_tasks[i]); barrier_cbs_tasks[i] = NULL; } @@ -1706,19 +1706,19 @@ rcu_torture_cleanup(void) rcu_torture_barrier_cleanup(); rcu_torture_stall_cleanup(); if (stutter_task) { - VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task"); + VERBOSE_TOROUT_STRING("Stopping rcu_torture_stutter task"); kthread_stop(stutter_task); } stutter_task = NULL; if (shuffler_task) { - VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task"); + VERBOSE_TOROUT_STRING("Stopping rcu_torture_shuffle task"); kthread_stop(shuffler_task); free_cpumask_var(shuffle_tmp_mask); } shuffler_task = NULL; if (writer_task) { - VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task"); + VERBOSE_TOROUT_STRING("Stopping rcu_torture_writer task"); kthread_stop(writer_task); } writer_task = NULL; @@ -1726,7 +1726,7 @@ rcu_torture_cleanup(void) if (reader_tasks) { for (i = 0; i < nrealreaders; i++) { if (reader_tasks[i]) { - VERBOSE_PRINTK_STRING( + VERBOSE_TOROUT_STRING( "Stopping rcu_torture_reader task"); kthread_stop(reader_tasks[i]); } @@ -1740,7 +1740,7 @@ rcu_torture_cleanup(void) if (fakewriter_tasks) { for (i = 0; i < nfakewriters; i++) { if (fakewriter_tasks[i]) { - VERBOSE_PRINTK_STRING( + VERBOSE_TOROUT_STRING( "Stopping rcu_torture_fakewriter task"); kthread_stop(fakewriter_tasks[i]); } @@ -1751,13 +1751,13 @@ rcu_torture_cleanup(void) } if (stats_task) { - VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task"); + VERBOSE_TOROUT_STRING("Stopping rcu_torture_stats task"); kthread_stop(stats_task); } stats_task = NULL; if (fqs_task) { - VERBOSE_PRINTK_STRING("Stopping rcu_torture_fqs task"); + VERBOSE_TOROUT_STRING("Stopping rcu_torture_fqs task"); kthread_stop(fqs_task); } fqs_task = NULL; @@ -1768,7 +1768,7 @@ rcu_torture_cleanup(void) rcutorture_booster_cleanup(i); } if (shutdown_task != NULL) { - VERBOSE_PRINTK_STRING("Stopping rcu_torture_shutdown task"); + VERBOSE_TOROUT_STRING("Stopping rcu_torture_shutdown task"); kthread_stop(shutdown_task); } shutdown_task = NULL; @@ -1924,12 +1924,12 @@ rcu_torture_init(void) /* Start up the kthreads. */ - VERBOSE_PRINTK_STRING("Creating rcu_torture_writer task"); + VERBOSE_TOROUT_STRING("Creating rcu_torture_writer task"); writer_task = kthread_create(rcu_torture_writer, NULL, "rcu_torture_writer"); if (IS_ERR(writer_task)) { firsterr = PTR_ERR(writer_task); - VERBOSE_PRINTK_ERRSTRING("Failed to create writer"); + VERBOSE_TOROUT_ERRSTRING("Failed to create writer"); writer_task = NULL; goto unwind; } @@ -1937,17 +1937,17 @@ rcu_torture_init(void) fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]), GFP_KERNEL); if (fakewriter_tasks == NULL) { - VERBOSE_PRINTK_ERRSTRING("out of memory"); + VERBOSE_TOROUT_ERRSTRING("out of memory"); firsterr = -ENOMEM; goto unwind; } for (i = 0; i < nfakewriters; i++) { - VERBOSE_PRINTK_STRING("Creating rcu_torture_fakewriter task"); + VERBOSE_TOROUT_STRING("Creating rcu_torture_fakewriter task"); fakewriter_tasks[i] = kthread_run(rcu_torture_fakewriter, NULL, "rcu_torture_fakewriter"); if (IS_ERR(fakewriter_tasks[i])) { firsterr = PTR_ERR(fakewriter_tasks[i]); - VERBOSE_PRINTK_ERRSTRING("Failed to create fakewriter"); + VERBOSE_TOROUT_ERRSTRING("Failed to create fakewriter"); fakewriter_tasks[i] = NULL; goto unwind; } @@ -1955,28 +1955,28 @@ rcu_torture_init(void) reader_tasks = kzalloc(nrealreaders * sizeof(reader_tasks[0]), GFP_KERNEL); if (reader_tasks == NULL) { - VERBOSE_PRINTK_ERRSTRING("out of memory"); + VERBOSE_TOROUT_ERRSTRING("out of memory"); firsterr = -ENOMEM; goto unwind; } for (i = 0; i < nrealreaders; i++) { - VERBOSE_PRINTK_STRING("Creating rcu_torture_reader task"); + VERBOSE_TOROUT_STRING("Creating rcu_torture_reader task"); reader_tasks[i] = kthread_run(rcu_torture_reader, NULL, "rcu_torture_reader"); if (IS_ERR(reader_tasks[i])) { firsterr = PTR_ERR(reader_tasks[i]); - VERBOSE_PRINTK_ERRSTRING("Failed to create reader"); + VERBOSE_TOROUT_ERRSTRING("Failed to create reader"); reader_tasks[i] = NULL; goto unwind; } } if (stat_interval > 0) { - VERBOSE_PRINTK_STRING("Creating rcu_torture_stats task"); + VERBOSE_TOROUT_STRING("Creating rcu_torture_stats task"); stats_task = kthread_run(rcu_torture_stats, NULL, "rcu_torture_stats"); if (IS_ERR(stats_task)) { firsterr = PTR_ERR(stats_task); - VERBOSE_PRINTK_ERRSTRING("Failed to create stats"); + VERBOSE_TOROUT_ERRSTRING("Failed to create stats"); stats_task = NULL; goto unwind; } @@ -1986,7 +1986,7 @@ rcu_torture_init(void) if (!alloc_cpumask_var(&shuffle_tmp_mask, GFP_KERNEL)) { firsterr = -ENOMEM; - VERBOSE_PRINTK_ERRSTRING("Failed to alloc mask"); + VERBOSE_TOROUT_ERRSTRING("Failed to alloc mask"); goto unwind; } @@ -1996,7 +1996,7 @@ rcu_torture_init(void) if (IS_ERR(shuffler_task)) { free_cpumask_var(shuffle_tmp_mask); firsterr = PTR_ERR(shuffler_task); - VERBOSE_PRINTK_ERRSTRING("Failed to create shuffler"); + VERBOSE_TOROUT_ERRSTRING("Failed to create shuffler"); shuffler_task = NULL; goto unwind; } @@ -2009,7 +2009,7 @@ rcu_torture_init(void) "rcu_torture_stutter"); if (IS_ERR(stutter_task)) { firsterr = PTR_ERR(stutter_task); - VERBOSE_PRINTK_ERRSTRING("Failed to create stutter"); + VERBOSE_TOROUT_ERRSTRING("Failed to create stutter"); stutter_task = NULL; goto unwind; } @@ -2022,7 +2022,7 @@ rcu_torture_init(void) "rcu_torture_fqs"); if (IS_ERR(fqs_task)) { firsterr = PTR_ERR(fqs_task); - VERBOSE_PRINTK_ERRSTRING("Failed to create fqs"); + VERBOSE_TOROUT_ERRSTRING("Failed to create fqs"); fqs_task = NULL; goto unwind; } @@ -2052,7 +2052,7 @@ rcu_torture_init(void) "rcu_torture_shutdown"); if (IS_ERR(shutdown_task)) { firsterr = PTR_ERR(shutdown_task); - VERBOSE_PRINTK_ERRSTRING("Failed to create shutdown"); + VERBOSE_TOROUT_ERRSTRING("Failed to create shutdown"); shutdown_task = NULL; goto unwind; } -- cgit v0.10.2 From c2884de38e01134ae040d55aa5644049d1bb850f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 29 Jan 2014 07:30:50 -0800 Subject: rcutorture: Abstract TOROUT_STRING() and friends These diagnostic macros are not confined to torturing RCU, so this commit makes them available to other torture tests. Also removed the do-while from TOROUT_STRING() in response to checkpatch complaints. Signed-off-by: Paul E. McKenney diff --git a/include/linux/torture.h b/include/linux/torture.h index 5aae880..09be888 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -41,6 +41,14 @@ module_param(name, type, 0444); \ MODULE_PARM_DESC(name, msg); +#define TORTURE_FLAG "-torture:" +#define TOROUT_STRING(s) \ + pr_alert("%s" TORTURE_FLAG s "\n", torture_type) +#define VERBOSE_TOROUT_STRING(s) \ + do { if (verbose) pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0) +#define VERBOSE_TOROUT_ERRSTRING(s) \ + do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0) + /* Low-rider random number generator. */ struct torture_random_state { unsigned long trs_state; diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 34a75b1..86fd8c1 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -97,14 +97,6 @@ static char *torture_type = "rcu"; module_param(torture_type, charp, 0444); MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, ...)"); -#define TORTURE_FLAG "-torture:" -#define TOROUT_STRING(s) \ - do { pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0) -#define VERBOSE_TOROUT_STRING(s) \ - do { if (verbose) pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0) -#define VERBOSE_TOROUT_ERRSTRING(s) \ - do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0) - static int nrealreaders; static struct task_struct *writer_task; static struct task_struct **fakewriter_tasks; -- cgit v0.10.2 From f67a33561e6e5463b548219df98130da95f2e4a7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 29 Jan 2014 07:40:27 -0800 Subject: rcutorture: Abstract torture_shutdown_absorb() Because handling races between rmmod and normal shutdown is not specific to rcutorture, this commit renames rcutorture_shutdown_absorb() to torture_shutdown_absorb() and pulls it out into then kernel/torture.c module. This implies pulling the fullstop mechanism into kernel/torture.c as well. The exporting of fullstop and fullstop_mutex is ugly and must die. And it does in fact die in later commits that introduce higher-level APIs that encapsulate both of these variables. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett ` diff --git a/include/linux/torture.h b/include/linux/torture.h index 09be888..8474d77 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -41,6 +41,18 @@ module_param(name, type, 0444); \ MODULE_PARM_DESC(name, msg); +/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ +#define FULLSTOP_DONTSTOP 0 /* Normal operation. */ +#define FULLSTOP_SHUTDOWN 1 /* System shutdown with rcutorture running. */ +#define FULLSTOP_RMMOD 2 /* Normal rmmod of rcutorture. */ +extern int fullstop; +/* Protect fullstop transitions and spawning of kthreads. */ +extern struct mutex fullstop_mutex; + +/* Common module parameters. */ +extern char *torture_type; +extern bool verbose; + #define TORTURE_FLAG "-torture:" #define TOROUT_STRING(s) \ pr_alert("%s" TORTURE_FLAG s "\n", torture_type) @@ -57,4 +69,7 @@ struct torture_random_state { #define DEFINE_TORTURE_RANDOM(name) struct torture_random_state name = { 0, 0 } unsigned long torture_random(struct torture_random_state *trsp); +/* Shutdown task absorption, for when the tasks cannot safely be killed. */ +void torture_shutdown_absorb(const char *title); + #endif /* __LINUX_TORTURE_H */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 86fd8c1..a868758 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -91,11 +91,15 @@ torture_param(int, test_boost_interval, 7, "Interval between boost tests, seconds."); torture_param(bool, test_no_idle_hz, true, "Test support for tickless idle CPUs"); -torture_param(bool, verbose, false, "Enable verbose debugging printk()s"); -static char *torture_type = "rcu"; +char *torture_type = "rcu"; +EXPORT_SYMBOL_GPL(torture_type); module_param(torture_type, charp, 0444); MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, ...)"); +bool verbose; +EXPORT_SYMBOL_GPL(verbose); +module_param(verbose, bool, 0444); +MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s"); static int nrealreaders; static struct task_struct *writer_task; @@ -200,17 +204,6 @@ static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */ static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */ static DECLARE_WAIT_QUEUE_HEAD(barrier_wq); -/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ - -#define FULLSTOP_DONTSTOP 0 /* Normal operation. */ -#define FULLSTOP_SHUTDOWN 1 /* System shutdown with rcutorture running. */ -#define FULLSTOP_RMMOD 2 /* Normal rmmod of rcutorture. */ -static int fullstop = FULLSTOP_RMMOD; -/* - * Protect fullstop transitions and spawning of kthreads. - */ -static DEFINE_MUTEX(fullstop_mutex); - /* Forward reference. */ static void rcu_torture_cleanup(void); @@ -232,20 +225,6 @@ rcutorture_shutdown_notify(struct notifier_block *unused1, } /* - * Absorb kthreads into a kernel function that won't return, so that - * they won't ever access module text or data again. - */ -static void rcutorture_shutdown_absorb(const char *title) -{ - if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) { - pr_notice( - "rcutorture thread %s parking due to system shutdown\n", - title); - schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT); - } -} - -/* * Allocate an element from the rcu_tortures pool. */ static struct rcu_torture * @@ -286,7 +265,7 @@ rcu_stutter_wait(const char *title) schedule_timeout_interruptible(1); else schedule_timeout_interruptible(round_jiffies_relative(HZ)); - rcutorture_shutdown_absorb(title); + torture_shutdown_absorb(title); } } @@ -681,7 +660,7 @@ checkwait: rcu_stutter_wait("rcu_torture_boost"); /* Clean up and exit. */ VERBOSE_TOROUT_STRING("rcu_torture_boost task stopping"); - rcutorture_shutdown_absorb("rcu_torture_boost"); + torture_shutdown_absorb("rcu_torture_boost"); while (!kthread_should_stop() || rbi.inflight) schedule_timeout_uninterruptible(1); smp_mb(); /* order accesses to ->inflight before stack-frame death. */ @@ -717,7 +696,7 @@ rcu_torture_fqs(void *arg) rcu_stutter_wait("rcu_torture_fqs"); } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); VERBOSE_TOROUT_STRING("rcu_torture_fqs task stopping"); - rcutorture_shutdown_absorb("rcu_torture_fqs"); + torture_shutdown_absorb("rcu_torture_fqs"); while (!kthread_should_stop()) schedule_timeout_uninterruptible(1); return 0; @@ -789,7 +768,7 @@ rcu_torture_writer(void *arg) rcu_stutter_wait("rcu_torture_writer"); } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); VERBOSE_TOROUT_STRING("rcu_torture_writer task stopping"); - rcutorture_shutdown_absorb("rcu_torture_writer"); + torture_shutdown_absorb("rcu_torture_writer"); while (!kthread_should_stop()) schedule_timeout_uninterruptible(1); return 0; @@ -827,7 +806,7 @@ rcu_torture_fakewriter(void *arg) } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); VERBOSE_TOROUT_STRING("rcu_torture_fakewriter task stopping"); - rcutorture_shutdown_absorb("rcu_torture_fakewriter"); + torture_shutdown_absorb("rcu_torture_fakewriter"); while (!kthread_should_stop()) schedule_timeout_uninterruptible(1); return 0; @@ -971,7 +950,7 @@ rcu_torture_reader(void *arg) rcu_stutter_wait("rcu_torture_reader"); } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); VERBOSE_TOROUT_STRING("rcu_torture_reader task stopping"); - rcutorture_shutdown_absorb("rcu_torture_reader"); + torture_shutdown_absorb("rcu_torture_reader"); if (irqreader && cur_ops->irq_capable) del_timer_sync(&t); while (!kthread_should_stop()) @@ -1095,7 +1074,7 @@ rcu_torture_stats(void *arg) do { schedule_timeout_interruptible(stat_interval * HZ); rcu_torture_stats_print(); - rcutorture_shutdown_absorb("rcu_torture_stats"); + torture_shutdown_absorb("rcu_torture_stats"); } while (!kthread_should_stop()); VERBOSE_TOROUT_STRING("rcu_torture_stats task stopping"); return 0; @@ -1179,7 +1158,7 @@ rcu_torture_shuffle(void *arg) do { schedule_timeout_interruptible(shuffle_interval * HZ); rcu_torture_shuffle_tasks(); - rcutorture_shutdown_absorb("rcu_torture_shuffle"); + torture_shutdown_absorb("rcu_torture_shuffle"); } while (!kthread_should_stop()); VERBOSE_TOROUT_STRING("rcu_torture_shuffle task stopping"); return 0; @@ -1198,7 +1177,7 @@ rcu_torture_stutter(void *arg) if (!kthread_should_stop()) schedule_timeout_interruptible(stutter * HZ); stutter_pause_test = 0; - rcutorture_shutdown_absorb("rcu_torture_stutter"); + torture_shutdown_absorb("rcu_torture_stutter"); } while (!kthread_should_stop()); VERBOSE_TOROUT_STRING("rcu_torture_stutter task stopping"); return 0; @@ -1470,7 +1449,7 @@ static int rcu_torture_stall(void *args) rcu_read_unlock(); pr_alert("rcu_torture_stall end.\n"); } - rcutorture_shutdown_absorb("rcu_torture_stall"); + torture_shutdown_absorb("rcu_torture_stall"); while (!kthread_should_stop()) schedule_timeout_interruptible(10 * HZ); return 0; @@ -1534,7 +1513,7 @@ static int rcu_torture_barrier_cbs(void *arg) wake_up(&barrier_wq); } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); VERBOSE_TOROUT_STRING("rcu_torture_barrier_cbs task stopping"); - rcutorture_shutdown_absorb("rcu_torture_barrier_cbs"); + torture_shutdown_absorb("rcu_torture_barrier_cbs"); while (!kthread_should_stop()) schedule_timeout_interruptible(1); cur_ops->cb_barrier(); @@ -1571,7 +1550,7 @@ static int rcu_torture_barrier(void *arg) schedule_timeout_interruptible(HZ / 10); } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); VERBOSE_TOROUT_STRING("rcu_torture_barrier task stopping"); - rcutorture_shutdown_absorb("rcu_torture_barrier"); + torture_shutdown_absorb("rcu_torture_barrier"); while (!kthread_should_stop()) schedule_timeout_interruptible(1); return 0; diff --git a/kernel/torture.c b/kernel/torture.c index c82c70f..f050420 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -49,6 +49,11 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney "); +int fullstop = FULLSTOP_RMMOD; +EXPORT_SYMBOL_GPL(fullstop); +DEFINE_MUTEX(fullstop_mutex); +EXPORT_SYMBOL_GPL(fullstop_mutex); + #define TORTURE_RANDOM_MULT 39916801 /* prime */ #define TORTURE_RANDOM_ADD 479001701 /* prime */ #define TORTURE_RANDOM_REFRESH 10000 @@ -69,3 +74,18 @@ torture_random(struct torture_random_state *trsp) return swahw32(trsp->trs_state); } EXPORT_SYMBOL_GPL(torture_random); + +/* + * Absorb kthreads into a kernel function that won't return, so that + * they won't ever access module text or data again. + */ +void torture_shutdown_absorb(const char *title) +{ + while (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) { + pr_notice( + "torture thread %s parking due to system shutdown\n", + title); + schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT); + } +} +EXPORT_SYMBOL_GPL(torture_shutdown_absorb); -- cgit v0.10.2 From 3808dc9fab05913060626d7f0edd0f195cb9dcab Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 28 Jan 2014 15:29:21 -0800 Subject: rcutorture: Abstract torture_shuffle() The torture_shuffle() function forces each CPU in turn to go idle periodically in order to check for problems interacting with per-CPU variables and with dyntick-idle mode. Because this sort of debugging is not specific to RCU, this commit abstracts that functionality. This in turn requires abstracting some additional infrastructure. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/include/linux/torture.h b/include/linux/torture.h index 8474d77..544a2c3 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -69,6 +69,11 @@ struct torture_random_state { #define DEFINE_TORTURE_RANDOM(name) struct torture_random_state name = { 0, 0 } unsigned long torture_random(struct torture_random_state *trsp); +/* Task shuffler, which causes CPUs to occasionally go idle. */ +void torture_shuffle_task_register(struct task_struct *tp); +int torture_shuffle_init(long shuffint); +void torture_shuffle_cleanup(void); + /* Shutdown task absorption, for when the tasks cannot safely be killed. */ void torture_shutdown_absorb(const char *title); diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index a868758..0380696 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -106,7 +106,6 @@ static struct task_struct *writer_task; static struct task_struct **fakewriter_tasks; static struct task_struct **reader_tasks; static struct task_struct *stats_task; -static struct task_struct *shuffler_task; static struct task_struct *stutter_task; static struct task_struct *fqs_task; static struct task_struct *boost_tasks[NR_CPUS]; @@ -161,7 +160,6 @@ static int max_online; static long n_barrier_attempts; static long n_barrier_successes; static struct list_head rcu_torture_removed; -static cpumask_var_t shuffle_tmp_mask; static int stutter_pause_test; @@ -1080,90 +1078,6 @@ rcu_torture_stats(void *arg) return 0; } -static int rcu_idle_cpu; /* Force all torture tasks off this CPU */ - -/* Shuffle tasks such that we allow @rcu_idle_cpu to become idle. A special case - * is when @rcu_idle_cpu = -1, when we allow the tasks to run on all CPUs. - */ -static void rcu_torture_shuffle_tasks(void) -{ - int i; - - cpumask_setall(shuffle_tmp_mask); - get_online_cpus(); - - /* No point in shuffling if there is only one online CPU (ex: UP) */ - if (num_online_cpus() == 1) { - put_online_cpus(); - return; - } - - if (rcu_idle_cpu != -1) - cpumask_clear_cpu(rcu_idle_cpu, shuffle_tmp_mask); - - set_cpus_allowed_ptr(current, shuffle_tmp_mask); - - if (reader_tasks) { - for (i = 0; i < nrealreaders; i++) - if (reader_tasks[i]) - set_cpus_allowed_ptr(reader_tasks[i], - shuffle_tmp_mask); - } - if (fakewriter_tasks) { - for (i = 0; i < nfakewriters; i++) - if (fakewriter_tasks[i]) - set_cpus_allowed_ptr(fakewriter_tasks[i], - shuffle_tmp_mask); - } - if (writer_task) - set_cpus_allowed_ptr(writer_task, shuffle_tmp_mask); - if (stats_task) - set_cpus_allowed_ptr(stats_task, shuffle_tmp_mask); - if (stutter_task) - set_cpus_allowed_ptr(stutter_task, shuffle_tmp_mask); - if (fqs_task) - set_cpus_allowed_ptr(fqs_task, shuffle_tmp_mask); - if (shutdown_task) - set_cpus_allowed_ptr(shutdown_task, shuffle_tmp_mask); -#ifdef CONFIG_HOTPLUG_CPU - if (onoff_task) - set_cpus_allowed_ptr(onoff_task, shuffle_tmp_mask); -#endif /* #ifdef CONFIG_HOTPLUG_CPU */ - if (stall_task) - set_cpus_allowed_ptr(stall_task, shuffle_tmp_mask); - if (barrier_cbs_tasks) - for (i = 0; i < n_barrier_cbs; i++) - if (barrier_cbs_tasks[i]) - set_cpus_allowed_ptr(barrier_cbs_tasks[i], - shuffle_tmp_mask); - if (barrier_task) - set_cpus_allowed_ptr(barrier_task, shuffle_tmp_mask); - - if (rcu_idle_cpu == -1) - rcu_idle_cpu = num_online_cpus() - 1; - else - rcu_idle_cpu--; - - put_online_cpus(); -} - -/* Shuffle tasks across CPUs, with the intent of allowing each CPU in the - * system to become idle at a time and cut off its timer ticks. This is meant - * to test the support for such tickless idle CPU in RCU. - */ -static int -rcu_torture_shuffle(void *arg) -{ - VERBOSE_TOROUT_STRING("rcu_torture_shuffle task started"); - do { - schedule_timeout_interruptible(shuffle_interval * HZ); - rcu_torture_shuffle_tasks(); - torture_shutdown_absorb("rcu_torture_shuffle"); - } while (!kthread_should_stop()); - VERBOSE_TOROUT_STRING("rcu_torture_shuffle task stopping"); - return 0; -} - /* Cause the rcutorture test to "stutter", starting and stopping all * threads periodically. */ @@ -1397,6 +1311,7 @@ rcu_torture_onoff_init(void) onoff_task = NULL; return ret; } + torture_shuffle_task_register(onoff_task); return 0; } @@ -1468,6 +1383,7 @@ static int __init rcu_torture_stall_init(void) stall_task = NULL; return ret; } + torture_shuffle_task_register(stall_task); return 0; } @@ -1594,6 +1510,7 @@ static int rcu_torture_barrier_init(void) barrier_cbs_tasks[i] = NULL; return ret; } + torture_shuffle_task_register(barrier_cbs_tasks[i]); } barrier_task = kthread_run(rcu_torture_barrier, NULL, "rcu_torture_barrier"); @@ -1602,6 +1519,7 @@ static int rcu_torture_barrier_init(void) VERBOSE_TOROUT_ERRSTRING("Failed to create rcu_torture_barrier"); barrier_task = NULL; } + torture_shuffle_task_register(barrier_task); return 0; } @@ -1674,6 +1592,8 @@ rcu_torture_cleanup(void) fullstop = FULLSTOP_RMMOD; mutex_unlock(&fullstop_mutex); unregister_reboot_notifier(&rcutorture_shutdown_nb); + + torture_shuffle_cleanup(); /* Must be first task cleaned up. */ rcu_torture_barrier_cleanup(); rcu_torture_stall_cleanup(); if (stutter_task) { @@ -1681,12 +1601,6 @@ rcu_torture_cleanup(void) kthread_stop(stutter_task); } stutter_task = NULL; - if (shuffler_task) { - VERBOSE_TOROUT_STRING("Stopping rcu_torture_shuffle task"); - kthread_stop(shuffler_task); - free_cpumask_var(shuffle_tmp_mask); - } - shuffler_task = NULL; if (writer_task) { VERBOSE_TOROUT_STRING("Stopping rcu_torture_writer task"); @@ -1904,6 +1818,7 @@ rcu_torture_init(void) writer_task = NULL; goto unwind; } + torture_shuffle_task_register(writer_task); wake_up_process(writer_task); fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]), GFP_KERNEL); @@ -1922,6 +1837,7 @@ rcu_torture_init(void) fakewriter_tasks[i] = NULL; goto unwind; } + torture_shuffle_task_register(fakewriter_tasks[i]); } reader_tasks = kzalloc(nrealreaders * sizeof(reader_tasks[0]), GFP_KERNEL); @@ -1940,6 +1856,7 @@ rcu_torture_init(void) reader_tasks[i] = NULL; goto unwind; } + torture_shuffle_task_register(reader_tasks[i]); } if (stat_interval > 0) { VERBOSE_TOROUT_STRING("Creating rcu_torture_stats task"); @@ -1951,26 +1868,12 @@ rcu_torture_init(void) stats_task = NULL; goto unwind; } + torture_shuffle_task_register(stats_task); } if (test_no_idle_hz) { - rcu_idle_cpu = num_online_cpus() - 1; - - if (!alloc_cpumask_var(&shuffle_tmp_mask, GFP_KERNEL)) { - firsterr = -ENOMEM; - VERBOSE_TOROUT_ERRSTRING("Failed to alloc mask"); + firsterr = torture_shuffle_init(shuffle_interval * HZ); + if (firsterr) goto unwind; - } - - /* Create the shuffler thread */ - shuffler_task = kthread_run(rcu_torture_shuffle, NULL, - "rcu_torture_shuffle"); - if (IS_ERR(shuffler_task)) { - free_cpumask_var(shuffle_tmp_mask); - firsterr = PTR_ERR(shuffler_task); - VERBOSE_TOROUT_ERRSTRING("Failed to create shuffler"); - shuffler_task = NULL; - goto unwind; - } } if (stutter < 0) stutter = 0; @@ -1984,6 +1887,7 @@ rcu_torture_init(void) stutter_task = NULL; goto unwind; } + torture_shuffle_task_register(stutter_task); } if (fqs_duration < 0) fqs_duration = 0; @@ -1997,6 +1901,7 @@ rcu_torture_init(void) fqs_task = NULL; goto unwind; } + torture_shuffle_task_register(fqs_task); } if (test_boost_interval < 1) test_boost_interval = 1; @@ -2027,6 +1932,7 @@ rcu_torture_init(void) shutdown_task = NULL; goto unwind; } + torture_shuffle_task_register(shutdown_task); wake_up_process(shutdown_task); } i = rcu_torture_onoff_init(); diff --git a/kernel/torture.c b/kernel/torture.c index f050420..26058f2 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -76,6 +76,157 @@ torture_random(struct torture_random_state *trsp) EXPORT_SYMBOL_GPL(torture_random); /* + * Variables for shuffling. The idea is to ensure that each CPU stays + * idle for an extended period to test interactions with dyntick idle, + * as well as interactions with any per-CPU varibles. + */ +struct shuffle_task { + struct list_head st_l; + struct task_struct *st_t; +}; + +static long shuffle_interval; /* In jiffies. */ +static struct task_struct *shuffler_task; +static cpumask_var_t shuffle_tmp_mask; +static int shuffle_idle_cpu; /* Force all torture tasks off this CPU */ +static struct list_head shuffle_task_list = LIST_HEAD_INIT(shuffle_task_list); +static DEFINE_MUTEX(shuffle_task_mutex); + +/* + * Register a task to be shuffled. If there is no memory, just splat + * and don't bother registering. + */ +void torture_shuffle_task_register(struct task_struct *tp) +{ + struct shuffle_task *stp; + + if (WARN_ON_ONCE(tp == NULL)) + return; + stp = kmalloc(sizeof(*stp), GFP_KERNEL); + if (WARN_ON_ONCE(stp == NULL)) + return; + stp->st_t = tp; + mutex_lock(&shuffle_task_mutex); + list_add(&stp->st_l, &shuffle_task_list); + mutex_unlock(&shuffle_task_mutex); +} +EXPORT_SYMBOL_GPL(torture_shuffle_task_register); + +/* + * Unregister all tasks, for example, at the end of the torture run. + */ +static void torture_shuffle_task_unregister_all(void) +{ + struct shuffle_task *stp; + struct shuffle_task *p; + + mutex_lock(&shuffle_task_mutex); + list_for_each_entry_safe(stp, p, &shuffle_task_list, st_l) { + list_del(&stp->st_l); + kfree(stp); + } + mutex_unlock(&shuffle_task_mutex); +} + +/* Shuffle tasks such that we allow shuffle_idle_cpu to become idle. + * A special case is when shuffle_idle_cpu = -1, in which case we allow + * the tasks to run on all CPUs. + */ +static void torture_shuffle_tasks(void) +{ + struct shuffle_task *stp; + + cpumask_setall(shuffle_tmp_mask); + get_online_cpus(); + + /* No point in shuffling if there is only one online CPU (ex: UP) */ + if (num_online_cpus() == 1) { + put_online_cpus(); + return; + } + + /* Advance to the next CPU. Upon overflow, don't idle any CPUs. */ + shuffle_idle_cpu = cpumask_next(shuffle_idle_cpu, shuffle_tmp_mask); + if (shuffle_idle_cpu >= nr_cpu_ids) + shuffle_idle_cpu = -1; + if (shuffle_idle_cpu != -1) { + cpumask_clear_cpu(shuffle_idle_cpu, shuffle_tmp_mask); + if (cpumask_empty(shuffle_tmp_mask)) { + put_online_cpus(); + return; + } + } + + mutex_lock(&shuffle_task_mutex); + list_for_each_entry(stp, &shuffle_task_list, st_l) + set_cpus_allowed_ptr(stp->st_t, shuffle_tmp_mask); + mutex_unlock(&shuffle_task_mutex); + + put_online_cpus(); +} + +/* Shuffle tasks across CPUs, with the intent of allowing each CPU in the + * system to become idle at a time and cut off its timer ticks. This is meant + * to test the support for such tickless idle CPU in RCU. + */ +static int torture_shuffle(void *arg) +{ + VERBOSE_TOROUT_STRING("torture_shuffle task started"); + do { + schedule_timeout_interruptible(shuffle_interval); + torture_shuffle_tasks(); + torture_shutdown_absorb("torture_shuffle"); + } while (!torture_must_stop()); + VERBOSE_TOROUT_STRING("torture_shuffle task stopping"); + return 0; +} + +/* + * Start the shuffler, with shuffint in jiffies. + */ +int torture_shuffle_init(long shuffint) +{ + int ret; + + shuffle_interval = shuffint; + + shuffle_idle_cpu = -1; + + if (!alloc_cpumask_var(&shuffle_tmp_mask, GFP_KERNEL)) { + VERBOSE_TOROUT_ERRSTRING("Failed to alloc mask"); + return -ENOMEM; + } + + /* Create the shuffler thread */ + shuffler_task = kthread_run(torture_shuffle, NULL, "torture_shuffle"); + if (IS_ERR(shuffler_task)) { + ret = PTR_ERR(shuffler_task); + free_cpumask_var(shuffle_tmp_mask); + VERBOSE_TOROUT_ERRSTRING("Failed to create shuffler"); + shuffler_task = NULL; + return ret; + } + torture_shuffle_task_register(shuffler_task); + return 0; +} +EXPORT_SYMBOL_GPL(torture_shuffle_init); + +/* + * Stop the shuffling. + */ +void torture_shuffle_cleanup(void) +{ + torture_shuffle_task_unregister_all(); + if (shuffler_task) { + VERBOSE_TOROUT_STRING("Stopping torture_shuffle task"); + kthread_stop(shuffler_task); + free_cpumask_var(shuffle_tmp_mask); + } + shuffler_task = NULL; +} +EXPORT_SYMBOL_GPL(torture_shuffle_cleanup); + +/* * Absorb kthreads into a kernel function that won't return, so that * they won't ever access module text or data again. */ -- cgit v0.10.2 From 2e9e8081d2e7a4efb582a240aa7fee991bbbabb0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 28 Jan 2014 15:58:22 -0800 Subject: rcutorture: Abstract torture_onoff() Because online/offline torturing is not specific to RCU, this commit abstracts it into the kernel/torture.c module to allow other torture tests to use it. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/include/linux/torture.h b/include/linux/torture.h index 544a2c3..be3694a 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -61,6 +61,18 @@ extern bool verbose; #define VERBOSE_TOROUT_ERRSTRING(s) \ do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0) +/* Definitions for a non-string torture-test module parameter. */ +#define torture_parm(type, name, init, msg) \ + static type name = init; \ + module_param(name, type, 0444); \ + MODULE_PARM_DESC(name, msg); + +/* Definitions for online/offline exerciser. */ +int torture_onoff_init(long ooholdoff, long oointerval); +void torture_onoff_cleanup(void); +char *torture_onoff_stats(char *page); +bool torture_onoff_failures(void); + /* Low-rider random number generator. */ struct torture_random_state { unsigned long trs_state; diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 0380696..0e8b52b 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -110,9 +110,6 @@ static struct task_struct *stutter_task; static struct task_struct *fqs_task; static struct task_struct *boost_tasks[NR_CPUS]; static struct task_struct *shutdown_task; -#ifdef CONFIG_HOTPLUG_CPU -static struct task_struct *onoff_task; -#endif /* #ifdef CONFIG_HOTPLUG_CPU */ static struct task_struct *stall_task; static struct task_struct **barrier_cbs_tasks; static struct task_struct *barrier_task; @@ -147,16 +144,6 @@ static long n_rcu_torture_boost_rterror; static long n_rcu_torture_boost_failure; static long n_rcu_torture_boosts; static long n_rcu_torture_timers; -static long n_offline_attempts; -static long n_offline_successes; -static unsigned long sum_offline; -static int min_offline = -1; -static int max_offline; -static long n_online_attempts; -static long n_online_successes; -static unsigned long sum_online; -static int min_online = -1; -static int max_online; static long n_barrier_attempts; static long n_barrier_successes; static struct list_head rcu_torture_removed; @@ -994,13 +981,7 @@ rcu_torture_printk(char *page) n_rcu_torture_boost_failure, n_rcu_torture_boosts, n_rcu_torture_timers); - page += sprintf(page, - "onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ", - n_online_successes, n_online_attempts, - n_offline_successes, n_offline_attempts, - min_online, max_online, - min_offline, max_offline, - sum_online, sum_offline, HZ); + page = torture_onoff_stats(page); page += sprintf(page, "barrier: %ld/%ld:%ld", n_barrier_successes, n_barrier_attempts, @@ -1204,140 +1185,6 @@ rcu_torture_shutdown(void *arg) return 0; } -#ifdef CONFIG_HOTPLUG_CPU - -/* - * Execute random CPU-hotplug operations at the interval specified - * by the onoff_interval. - */ -static int -rcu_torture_onoff(void *arg) -{ - int cpu; - unsigned long delta; - int maxcpu = -1; - DEFINE_TORTURE_RANDOM(rand); - int ret; - unsigned long starttime; - - VERBOSE_TOROUT_STRING("rcu_torture_onoff task started"); - for_each_online_cpu(cpu) - maxcpu = cpu; - WARN_ON(maxcpu < 0); - if (onoff_holdoff > 0) { - VERBOSE_TOROUT_STRING("rcu_torture_onoff begin holdoff"); - schedule_timeout_interruptible(onoff_holdoff * HZ); - VERBOSE_TOROUT_STRING("rcu_torture_onoff end holdoff"); - } - while (!kthread_should_stop()) { - cpu = (torture_random(&rand) >> 4) % (maxcpu + 1); - if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) { - if (verbose) - pr_alert("%s" TORTURE_FLAG - "rcu_torture_onoff task: offlining %d\n", - torture_type, cpu); - starttime = jiffies; - n_offline_attempts++; - ret = cpu_down(cpu); - if (ret) { - if (verbose) - pr_alert("%s" TORTURE_FLAG - "rcu_torture_onoff task: offline %d failed: errno %d\n", - torture_type, cpu, ret); - } else { - if (verbose) - pr_alert("%s" TORTURE_FLAG - "rcu_torture_onoff task: offlined %d\n", - torture_type, cpu); - n_offline_successes++; - delta = jiffies - starttime; - sum_offline += delta; - if (min_offline < 0) { - min_offline = delta; - max_offline = delta; - } - if (min_offline > delta) - min_offline = delta; - if (max_offline < delta) - max_offline = delta; - } - } else if (cpu_is_hotpluggable(cpu)) { - if (verbose) - pr_alert("%s" TORTURE_FLAG - "rcu_torture_onoff task: onlining %d\n", - torture_type, cpu); - starttime = jiffies; - n_online_attempts++; - ret = cpu_up(cpu); - if (ret) { - if (verbose) - pr_alert("%s" TORTURE_FLAG - "rcu_torture_onoff task: online %d failed: errno %d\n", - torture_type, cpu, ret); - } else { - if (verbose) - pr_alert("%s" TORTURE_FLAG - "rcu_torture_onoff task: onlined %d\n", - torture_type, cpu); - n_online_successes++; - delta = jiffies - starttime; - sum_online += delta; - if (min_online < 0) { - min_online = delta; - max_online = delta; - } - if (min_online > delta) - min_online = delta; - if (max_online < delta) - max_online = delta; - } - } - schedule_timeout_interruptible(onoff_interval * HZ); - } - VERBOSE_TOROUT_STRING("rcu_torture_onoff task stopping"); - return 0; -} - -static int -rcu_torture_onoff_init(void) -{ - int ret; - - if (onoff_interval <= 0) - return 0; - onoff_task = kthread_run(rcu_torture_onoff, NULL, "rcu_torture_onoff"); - if (IS_ERR(onoff_task)) { - ret = PTR_ERR(onoff_task); - onoff_task = NULL; - return ret; - } - torture_shuffle_task_register(onoff_task); - return 0; -} - -static void rcu_torture_onoff_cleanup(void) -{ - if (onoff_task == NULL) - return; - VERBOSE_TOROUT_STRING("Stopping rcu_torture_onoff task"); - kthread_stop(onoff_task); - onoff_task = NULL; -} - -#else /* #ifdef CONFIG_HOTPLUG_CPU */ - -static int -rcu_torture_onoff_init(void) -{ - return 0; -} - -static void rcu_torture_onoff_cleanup(void) -{ -} - -#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ - /* * CPU-stall kthread. It waits as specified by stall_cpu_holdoff, then * induces a CPU stall for the time specified by stall_cpu. @@ -1657,7 +1504,7 @@ rcu_torture_cleanup(void) kthread_stop(shutdown_task); } shutdown_task = NULL; - rcu_torture_onoff_cleanup(); + torture_onoff_cleanup(); /* Wait for all RCU callbacks to fire. */ @@ -1668,8 +1515,7 @@ rcu_torture_cleanup(void) if (atomic_read(&n_rcu_torture_error) || n_rcu_torture_barrier_error) rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE"); - else if (n_online_successes != n_online_attempts || - n_offline_successes != n_offline_attempts) + else if (torture_onoff_failures()) rcu_torture_print_module_parms(cur_ops, "End of test: RCU_HOTPLUG"); else @@ -1935,7 +1781,7 @@ rcu_torture_init(void) torture_shuffle_task_register(shutdown_task); wake_up_process(shutdown_task); } - i = rcu_torture_onoff_init(); + i = torture_onoff_init(onoff_holdoff * HZ, onoff_interval * HZ); if (i != 0) { firsterr = i; goto unwind; diff --git a/kernel/torture.c b/kernel/torture.c index 26058f2..a7ec8a7 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -54,6 +54,192 @@ EXPORT_SYMBOL_GPL(fullstop); DEFINE_MUTEX(fullstop_mutex); EXPORT_SYMBOL_GPL(fullstop_mutex); +#ifdef CONFIG_HOTPLUG_CPU + +/* + * Variables for online-offline handling. Only present if CPU hotplug + * is enabled, otherwise does nothing. + */ + +static struct task_struct *onoff_task; +static long onoff_holdoff; +static long onoff_interval; +static long n_offline_attempts; +static long n_offline_successes; +static unsigned long sum_offline; +static int min_offline = -1; +static int max_offline; +static long n_online_attempts; +static long n_online_successes; +static unsigned long sum_online; +static int min_online = -1; +static int max_online; + +/* + * Execute random CPU-hotplug operations at the interval specified + * by the onoff_interval. + */ +static int +torture_onoff(void *arg) +{ + int cpu; + unsigned long delta; + int maxcpu = -1; + DEFINE_TORTURE_RANDOM(rand); + int ret; + unsigned long starttime; + + VERBOSE_TOROUT_STRING("torture_onoff task started"); + for_each_online_cpu(cpu) + maxcpu = cpu; + WARN_ON(maxcpu < 0); + if (onoff_holdoff > 0) { + VERBOSE_TOROUT_STRING("torture_onoff begin holdoff"); + schedule_timeout_interruptible(onoff_holdoff); + VERBOSE_TOROUT_STRING("torture_onoff end holdoff"); + } + while (!torture_must_stop()) { + cpu = (torture_random(&rand) >> 4) % (maxcpu + 1); + if (cpu_online(cpu) && cpu_is_hotpluggable(cpu)) { + if (verbose) + pr_alert("%s" TORTURE_FLAG + "torture_onoff task: offlining %d\n", + torture_type, cpu); + starttime = jiffies; + n_offline_attempts++; + ret = cpu_down(cpu); + if (ret) { + if (verbose) + pr_alert("%s" TORTURE_FLAG + "torture_onoff task: offline %d failed: errno %d\n", + torture_type, cpu, ret); + } else { + if (verbose) + pr_alert("%s" TORTURE_FLAG + "torture_onoff task: offlined %d\n", + torture_type, cpu); + n_offline_successes++; + delta = jiffies - starttime; + sum_offline += delta; + if (min_offline < 0) { + min_offline = delta; + max_offline = delta; + } + if (min_offline > delta) + min_offline = delta; + if (max_offline < delta) + max_offline = delta; + } + } else if (cpu_is_hotpluggable(cpu)) { + if (verbose) + pr_alert("%s" TORTURE_FLAG + "torture_onoff task: onlining %d\n", + torture_type, cpu); + starttime = jiffies; + n_online_attempts++; + ret = cpu_up(cpu); + if (ret) { + if (verbose) + pr_alert("%s" TORTURE_FLAG + "torture_onoff task: online %d failed: errno %d\n", + torture_type, cpu, ret); + } else { + if (verbose) + pr_alert("%s" TORTURE_FLAG + "torture_onoff task: onlined %d\n", + torture_type, cpu); + n_online_successes++; + delta = jiffies - starttime; + sum_online += delta; + if (min_online < 0) { + min_online = delta; + max_online = delta; + } + if (min_online > delta) + min_online = delta; + if (max_online < delta) + max_online = delta; + } + } + schedule_timeout_interruptible(onoff_interval); + } + VERBOSE_TOROUT_STRING("torture_onoff task stopping"); + return 0; +} + +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ + +/* + * Initiate online-offline handling. + */ +int torture_onoff_init(long ooholdoff, long oointerval) +{ +#ifdef CONFIG_HOTPLUG_CPU + int ret; + + onoff_holdoff = ooholdoff; + onoff_interval = oointerval; + if (onoff_interval <= 0) + return 0; + onoff_task = kthread_run(torture_onoff, NULL, "torture_onoff"); + if (IS_ERR(onoff_task)) { + ret = PTR_ERR(onoff_task); + onoff_task = NULL; + return ret; + } + torture_shuffle_task_register(onoff_task); +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ + return 0; +} +EXPORT_SYMBOL_GPL(torture_onoff_init); + +/* + * Clean up after online/offline testing. + */ +void torture_onoff_cleanup(void) +{ +#ifdef CONFIG_HOTPLUG_CPU + if (onoff_task == NULL) + return; + VERBOSE_TOROUT_STRING("Stopping torture_onoff task"); + kthread_stop(onoff_task); + onoff_task = NULL; +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ +} +EXPORT_SYMBOL_GPL(torture_onoff_cleanup); + +/* + * Print online/offline testing statistics. + */ +char *torture_onoff_stats(char *page) +{ +#ifdef CONFIG_HOTPLUG_CPU + page += sprintf(page, + "onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ", + n_online_successes, n_online_attempts, + n_offline_successes, n_offline_attempts, + min_online, max_online, + min_offline, max_offline, + sum_online, sum_offline, HZ); +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ + return page; +} +EXPORT_SYMBOL_GPL(torture_onoff_stats); + +/* + * Were all the online/offline operations successful? + */ +bool torture_onoff_failures(void) +{ +#ifdef CONFIG_HOTPLUG_CPU + return n_online_successes != n_online_attempts || + n_offline_successes != n_offline_attempts; +#else /* #ifdef CONFIG_HOTPLUG_CPU */ + return false; +#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ +} +EXPORT_SYMBOL_GPL(torture_onoff_failures); + #define TORTURE_RANDOM_MULT 39916801 /* prime */ #define TORTURE_RANDOM_ADD 479001701 /* prime */ #define TORTURE_RANDOM_REFRESH 10000 -- cgit v0.10.2 From b5daa8f3b3b2b0133ad40e13d4f722070119ce36 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 30 Jan 2014 13:38:09 -0800 Subject: rcutorture: Abstract torture-test initialization This commit creates torture_init_begin() and torture_init_end() functions to abstract locking and allow the torture_type and verbose variables in kernel/torture.o to become static. With a bit more abstraction, fullstop_mutex will also become static. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/include/linux/torture.h b/include/linux/torture.h index be3694a..428d271 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -49,10 +49,6 @@ extern int fullstop; /* Protect fullstop transitions and spawning of kthreads. */ extern struct mutex fullstop_mutex; -/* Common module parameters. */ -extern char *torture_type; -extern bool verbose; - #define TORTURE_FLAG "-torture:" #define TOROUT_STRING(s) \ pr_alert("%s" TORTURE_FLAG s "\n", torture_type) @@ -89,4 +85,9 @@ void torture_shuffle_cleanup(void); /* Shutdown task absorption, for when the tasks cannot safely be killed. */ void torture_shutdown_absorb(const char *title); +/* Initialization and cleanup. */ + +void torture_init_begin(char *ttype, bool v); +void torture_init_end(void); + #endif /* __LINUX_TORTURE_H */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 0e8b52b..93aca2f 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -91,15 +91,12 @@ torture_param(int, test_boost_interval, 7, "Interval between boost tests, seconds."); torture_param(bool, test_no_idle_hz, true, "Test support for tickless idle CPUs"); +torture_param(bool, verbose, true, + "Enable verbose debugging printk()s"); -char *torture_type = "rcu"; -EXPORT_SYMBOL_GPL(torture_type); +static char *torture_type = "rcu"; module_param(torture_type, charp, 0444); MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, ...)"); -bool verbose; -EXPORT_SYMBOL_GPL(verbose); -module_param(verbose, bool, 0444); -MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s"); static int nrealreaders; static struct task_struct *writer_task; @@ -1425,8 +1422,8 @@ rcu_torture_cleanup(void) { int i; - mutex_lock(&fullstop_mutex); rcutorture_record_test_transition(); + mutex_lock(&fullstop_mutex); if (fullstop == FULLSTOP_SHUTDOWN) { pr_warn(/* but going down anyway, so... */ "Concurrent 'rmmod rcutorture' and shutdown illegal!\n"); @@ -1589,7 +1586,7 @@ rcu_torture_init(void) &rcu_ops, &rcu_bh_ops, &srcu_ops, &sched_ops, }; - mutex_lock(&fullstop_mutex); + torture_init_begin(torture_type, verbose); /* Process args and tell the world that the torturer is on the job. */ for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { @@ -1604,7 +1601,7 @@ rcu_torture_init(void) for (i = 0; i < ARRAY_SIZE(torture_ops); i++) pr_alert(" %s", torture_ops[i]->name); pr_alert("\n"); - mutex_unlock(&fullstop_mutex); + torture_init_end(); return -EINVAL; } if (cur_ops->fqs == NULL && fqs_duration != 0) { @@ -1800,11 +1797,11 @@ rcu_torture_init(void) if (object_debug) rcu_test_debug_objects(); rcutorture_record_test_transition(); - mutex_unlock(&fullstop_mutex); + torture_init_end(); return 0; unwind: - mutex_unlock(&fullstop_mutex); + torture_init_end(); rcu_torture_cleanup(); return firsterr; } diff --git a/kernel/torture.c b/kernel/torture.c index a7ec8a7..828d0b1 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -49,6 +49,9 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney "); +static char *torture_type; +static bool verbose; + int fullstop = FULLSTOP_RMMOD; EXPORT_SYMBOL_GPL(fullstop); DEFINE_MUTEX(fullstop_mutex); @@ -426,3 +429,27 @@ void torture_shutdown_absorb(const char *title) } } EXPORT_SYMBOL_GPL(torture_shutdown_absorb); + +/* + * Initialize torture module. Please note that this is -not- invoked via + * the usual module_init() mechanism, but rather by an explicit call from + * the client torture module. This call must be paired with a later + * torture_init_end(). + */ +void __init torture_init_begin(char *ttype, bool v) +{ + mutex_lock(&fullstop_mutex); + torture_type = ttype; + verbose = v; + +} +EXPORT_SYMBOL_GPL(torture_init_begin); + +/* + * Tell the torture module that initialization is complete. + */ +void __init torture_init_end(void) +{ + mutex_unlock(&fullstop_mutex); +} +EXPORT_SYMBOL_GPL(torture_init_end); -- cgit v0.10.2 From cc47ae0830264f07442070b36fe0d0a4d4e3c313 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 30 Jan 2014 14:21:11 -0800 Subject: rcutorture: Abstract torture-test cleanup This commit creates a torture_cleanup() that handles the generic cleanup actions local to kernel/torture.c. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/include/linux/torture.h b/include/linux/torture.h index 428d271..31d6993 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -65,7 +65,6 @@ extern struct mutex fullstop_mutex; /* Definitions for online/offline exerciser. */ int torture_onoff_init(long ooholdoff, long oointerval); -void torture_onoff_cleanup(void); char *torture_onoff_stats(char *page); bool torture_onoff_failures(void); @@ -80,14 +79,13 @@ unsigned long torture_random(struct torture_random_state *trsp); /* Task shuffler, which causes CPUs to occasionally go idle. */ void torture_shuffle_task_register(struct task_struct *tp); int torture_shuffle_init(long shuffint); -void torture_shuffle_cleanup(void); /* Shutdown task absorption, for when the tasks cannot safely be killed. */ void torture_shutdown_absorb(const char *title); /* Initialization and cleanup. */ - void torture_init_begin(char *ttype, bool v); void torture_init_end(void); +bool torture_cleanup(void); #endif /* __LINUX_TORTURE_H */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 93aca2f..68a689f 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1423,21 +1423,13 @@ rcu_torture_cleanup(void) int i; rcutorture_record_test_transition(); - mutex_lock(&fullstop_mutex); - if (fullstop == FULLSTOP_SHUTDOWN) { - pr_warn(/* but going down anyway, so... */ - "Concurrent 'rmmod rcutorture' and shutdown illegal!\n"); - mutex_unlock(&fullstop_mutex); - schedule_timeout_uninterruptible(10); + if (torture_cleanup()) { if (cur_ops->cb_barrier != NULL) cur_ops->cb_barrier(); return; } - fullstop = FULLSTOP_RMMOD; - mutex_unlock(&fullstop_mutex); unregister_reboot_notifier(&rcutorture_shutdown_nb); - torture_shuffle_cleanup(); /* Must be first task cleaned up. */ rcu_torture_barrier_cleanup(); rcu_torture_stall_cleanup(); if (stutter_task) { @@ -1501,7 +1493,6 @@ rcu_torture_cleanup(void) kthread_stop(shutdown_task); } shutdown_task = NULL; - torture_onoff_cleanup(); /* Wait for all RCU callbacks to fire. */ diff --git a/kernel/torture.c b/kernel/torture.c index 828d0b1..41ae5cc 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -199,7 +199,7 @@ EXPORT_SYMBOL_GPL(torture_onoff_init); /* * Clean up after online/offline testing. */ -void torture_onoff_cleanup(void) +static void torture_onoff_cleanup(void) { #ifdef CONFIG_HOTPLUG_CPU if (onoff_task == NULL) @@ -403,7 +403,7 @@ EXPORT_SYMBOL_GPL(torture_shuffle_init); /* * Stop the shuffling. */ -void torture_shuffle_cleanup(void) +static void torture_shuffle_cleanup(void) { torture_shuffle_task_unregister_all(); if (shuffler_task) { @@ -453,3 +453,29 @@ void __init torture_init_end(void) mutex_unlock(&fullstop_mutex); } EXPORT_SYMBOL_GPL(torture_init_end); + +/* + * Clean up torture module. Please note that this is -not- invoked via + * the usual module_exit() mechanism, but rather by an explicit call from + * the client torture module. Returns true if a race with system shutdown + * is detected. + * + * This must be called before the caller starts shutting down its own + * kthreads. + */ +bool torture_cleanup(void) +{ + mutex_lock(&fullstop_mutex); + if (fullstop == FULLSTOP_SHUTDOWN) { + pr_warn("Concurrent rmmod and shutdown illegal!\n"); + mutex_unlock(&fullstop_mutex); + schedule_timeout_uninterruptible(10); + return true; + } + fullstop = FULLSTOP_RMMOD; + mutex_unlock(&fullstop_mutex); + torture_shuffle_cleanup(); + torture_onoff_cleanup(); + return false; +} +EXPORT_SYMBOL_GPL(torture_cleanup); -- cgit v0.10.2 From 3c626237eb359cf125051f2fe8b419ddae0f5291 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 30 Jan 2014 15:29:29 -0800 Subject: rcutorture: Print dates when doing parallel rcutorture runs Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 521f9e0..242b52d 100644 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -286,7 +286,7 @@ awk < $T/cfgcpu.pack \ # Dump out the scripting required to run one test batch. function dump(first, pastlast) { - print "echo ----start batch----" + print "echo ----Start batch: `date`" jn=1 for (j = first; j < pastlast; j++) { builddir=KVM "/b" jn @@ -302,27 +302,27 @@ function dump(first, pastlast) ovf = "(!)"; else ovf = ""; - print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build."; + print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date`"; print "rm -f " builddir ".*"; print "touch " builddir ".wait"; print "mkdir " builddir " > /dev/null 2>&1 || :"; print "mkdir " rd cfr[jn] " || :"; print "kvm-test-1-rcu.sh " CONFIGDIR cf[j], builddir, rd cfr[jn], dur " \"" RCU_QEMU_ARG "\" \"" RCU_BOOTARGS "\" > " builddir ".out 2>&1 &" - print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete." + print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date`" print "while test -f " builddir ".wait" print "do" print "\tsleep 1" print "done" - print "echo ", cfr[jn], cpusr[jn] ovf ": Build complete." + print "echo ", cfr[jn], cpusr[jn] ovf ": Build complete. `date`" jn++; } for (j = 1; j < jn; j++) { builddir=KVM "/b" j print "rm -f " builddir ".ready" - print "echo ----", cfr[j], cpusr[j] ovf ": Starting kernel" + print "echo ----", cfr[j], cpusr[j] ovf ": Starting kernel. `date`" } print "wait" - print "echo ---- All kernel runs complete" + print "echo ---- All kernel runs complete. `date`" for (j = 1; j < jn; j++) { builddir=KVM "/b" j print "echo ----", cfr[j], cpusr[j] ovf ": Build/run results:" -- cgit v0.10.2 From 4622b487ecf0094401ac10e504606e5cbdea5a6e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 30 Jan 2014 15:37:19 -0800 Subject: rcutorture: Abstract torture_shutdown_notify() Because handling the race between rmmod and system shutdown is not specific to RCU, this commit abstracts torture_shutdown_notify(), placing this code into kernel/torture.c. This change also allows fullstop_mutex to be private to kernel/torture.c. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/include/linux/torture.h b/include/linux/torture.h index 31d6993..742d8a4 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -46,8 +46,6 @@ #define FULLSTOP_SHUTDOWN 1 /* System shutdown with rcutorture running. */ #define FULLSTOP_RMMOD 2 /* Normal rmmod of rcutorture. */ extern int fullstop; -/* Protect fullstop transitions and spawning of kthreads. */ -extern struct mutex fullstop_mutex; #define TORTURE_FLAG "-torture:" #define TOROUT_STRING(s) \ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 68a689f..2560e93 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -190,23 +190,6 @@ static DECLARE_WAIT_QUEUE_HEAD(barrier_wq); static void rcu_torture_cleanup(void); /* - * Detect and respond to a system shutdown. - */ -static int -rcutorture_shutdown_notify(struct notifier_block *unused1, - unsigned long unused2, void *unused3) -{ - mutex_lock(&fullstop_mutex); - if (fullstop == FULLSTOP_DONTSTOP) - fullstop = FULLSTOP_SHUTDOWN; - else - pr_warn(/* but going down anyway, so... */ - "Concurrent 'rmmod rcutorture' and shutdown illegal!\n"); - mutex_unlock(&fullstop_mutex); - return NOTIFY_DONE; -} - -/* * Allocate an element from the rcu_tortures pool. */ static struct rcu_torture * @@ -1098,10 +1081,6 @@ rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag) onoff_interval, onoff_holdoff); } -static struct notifier_block rcutorture_shutdown_nb = { - .notifier_call = rcutorture_shutdown_notify, -}; - static void rcutorture_booster_cleanup(int cpu) { struct task_struct *t; @@ -1428,7 +1407,6 @@ rcu_torture_cleanup(void) cur_ops->cb_barrier(); return; } - unregister_reboot_notifier(&rcutorture_shutdown_nb); rcu_torture_barrier_cleanup(); rcu_torture_stall_cleanup(); @@ -1774,7 +1752,6 @@ rcu_torture_init(void) firsterr = i; goto unwind; } - register_reboot_notifier(&rcutorture_shutdown_nb); i = rcu_torture_stall_init(); if (i != 0) { firsterr = i; diff --git a/kernel/torture.c b/kernel/torture.c index 41ae5cc..b02fa27 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -54,8 +54,7 @@ static bool verbose; int fullstop = FULLSTOP_RMMOD; EXPORT_SYMBOL_GPL(fullstop); -DEFINE_MUTEX(fullstop_mutex); -EXPORT_SYMBOL_GPL(fullstop_mutex); +static DEFINE_MUTEX(fullstop_mutex); #ifdef CONFIG_HOTPLUG_CPU @@ -422,15 +421,33 @@ EXPORT_SYMBOL_GPL(torture_shuffle_cleanup); void torture_shutdown_absorb(const char *title) { while (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) { - pr_notice( - "torture thread %s parking due to system shutdown\n", - title); + pr_notice("torture thread %s parking due to system shutdown\n", + title); schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT); } } EXPORT_SYMBOL_GPL(torture_shutdown_absorb); /* + * Detect and respond to a system shutdown. + */ +static int torture_shutdown_notify(struct notifier_block *unused1, + unsigned long unused2, void *unused3) +{ + mutex_lock(&fullstop_mutex); + if (fullstop == FULLSTOP_DONTSTOP) + fullstop = FULLSTOP_SHUTDOWN; + else + pr_warn("Concurrent rmmod and shutdown illegal!\n"); + mutex_unlock(&fullstop_mutex); + return NOTIFY_DONE; +} + +static struct notifier_block torture_shutdown_nb = { + .notifier_call = torture_shutdown_notify, +}; + +/* * Initialize torture module. Please note that this is -not- invoked via * the usual module_init() mechanism, but rather by an explicit call from * the client torture module. This call must be paired with a later @@ -451,6 +468,7 @@ EXPORT_SYMBOL_GPL(torture_init_begin); void __init torture_init_end(void) { mutex_unlock(&fullstop_mutex); + register_reboot_notifier(&torture_shutdown_nb); } EXPORT_SYMBOL_GPL(torture_init_end); @@ -474,6 +492,7 @@ bool torture_cleanup(void) } fullstop = FULLSTOP_RMMOD; mutex_unlock(&fullstop_mutex); + unregister_reboot_notifier(&torture_shutdown_nb); torture_shuffle_cleanup(); torture_onoff_cleanup(); return false; -- cgit v0.10.2 From 36970bb91d89618d3495babf44b934e9c9db6bbc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 30 Jan 2014 15:49:29 -0800 Subject: rcutorture: Privatize fullstop This commit introduces the torture_must_stop() function in order to keep use of the fullstop variable local to kernel/torture.c. There is also a torture_must_stop_irq() counterpart for use from RCU callbacks, timeout handlers, and the like. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/include/linux/torture.h b/include/linux/torture.h index 742d8a4..0259db3 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -41,12 +41,6 @@ module_param(name, type, 0444); \ MODULE_PARM_DESC(name, msg); -/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ -#define FULLSTOP_DONTSTOP 0 /* Normal operation. */ -#define FULLSTOP_SHUTDOWN 1 /* System shutdown with rcutorture running. */ -#define FULLSTOP_RMMOD 2 /* Normal rmmod of rcutorture. */ -extern int fullstop; - #define TORTURE_FLAG "-torture:" #define TOROUT_STRING(s) \ pr_alert("%s" TORTURE_FLAG s "\n", torture_type) @@ -85,5 +79,7 @@ void torture_shutdown_absorb(const char *title); void torture_init_begin(char *ttype, bool v); void torture_init_end(void); bool torture_cleanup(void); +bool torture_must_stop(void); +bool torture_must_stop_irq(void); #endif /* __LINUX_TORTURE_H */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 2560e93..9357c88 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -304,7 +304,7 @@ rcu_torture_cb(struct rcu_head *p) int i; struct rcu_torture *rp = container_of(p, struct rcu_torture, rtort_rcu); - if (fullstop != FULLSTOP_DONTSTOP) { + if (torture_must_stop_irq()) { /* Test is ending, just drop callbacks on the floor. */ /* The next initialization will pick up the pieces. */ return; @@ -572,8 +572,7 @@ static int rcu_torture_boost(void *arg) while (ULONG_CMP_LT(jiffies, oldstarttime)) { schedule_timeout_interruptible(oldstarttime - jiffies); rcu_stutter_wait("rcu_torture_boost"); - if (kthread_should_stop() || - fullstop != FULLSTOP_DONTSTOP) + if (torture_must_stop()) goto checkwait; } @@ -595,8 +594,7 @@ static int rcu_torture_boost(void *arg) } cond_resched(); rcu_stutter_wait("rcu_torture_boost"); - if (kthread_should_stop() || - fullstop != FULLSTOP_DONTSTOP) + if (torture_must_stop()) goto checkwait; } @@ -621,7 +619,7 @@ static int rcu_torture_boost(void *arg) /* Go do the stutter. */ checkwait: rcu_stutter_wait("rcu_torture_boost"); - } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); + } while (!torture_must_stop()); /* Clean up and exit. */ VERBOSE_TOROUT_STRING("rcu_torture_boost task stopping"); @@ -659,7 +657,7 @@ rcu_torture_fqs(void *arg) fqs_burst_remaining -= fqs_holdoff; } rcu_stutter_wait("rcu_torture_fqs"); - } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); + } while (!torture_must_stop()); VERBOSE_TOROUT_STRING("rcu_torture_fqs task stopping"); torture_shutdown_absorb("rcu_torture_fqs"); while (!kthread_should_stop()) @@ -731,7 +729,7 @@ rcu_torture_writer(void *arg) } rcutorture_record_progress(++rcu_torture_current_version); rcu_stutter_wait("rcu_torture_writer"); - } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); + } while (!torture_must_stop()); VERBOSE_TOROUT_STRING("rcu_torture_writer task stopping"); torture_shutdown_absorb("rcu_torture_writer"); while (!kthread_should_stop()) @@ -768,7 +766,7 @@ rcu_torture_fakewriter(void *arg) cur_ops->exp_sync(); } rcu_stutter_wait("rcu_torture_fakewriter"); - } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); + } while (!torture_must_stop()); VERBOSE_TOROUT_STRING("rcu_torture_fakewriter task stopping"); torture_shutdown_absorb("rcu_torture_fakewriter"); @@ -913,7 +911,7 @@ rcu_torture_reader(void *arg) cur_ops->readunlock(idx); schedule(); rcu_stutter_wait("rcu_torture_reader"); - } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); + } while (!torture_must_stop()); VERBOSE_TOROUT_STRING("rcu_torture_reader task stopping"); torture_shutdown_absorb("rcu_torture_reader"); if (irqreader && cur_ops->irq_capable) @@ -1022,9 +1020,6 @@ rcu_torture_stats_print(void) /* * Periodically prints torture statistics, if periodic statistics printing * was specified via the stat_interval module parameter. - * - * No need to worry about fullstop here, since this one doesn't reference - * volatile state or register callbacks. */ static int rcu_torture_stats(void *arg) @@ -1034,7 +1029,7 @@ rcu_torture_stats(void *arg) schedule_timeout_interruptible(stat_interval * HZ); rcu_torture_stats_print(); torture_shutdown_absorb("rcu_torture_stats"); - } while (!kthread_should_stop()); + } while (!torture_must_stop()); VERBOSE_TOROUT_STRING("rcu_torture_stats task stopping"); return 0; } @@ -1241,16 +1236,15 @@ static int rcu_torture_barrier_cbs(void *arg) wait_event(barrier_cbs_wq[myid], (newphase = ACCESS_ONCE(barrier_phase)) != lastphase || - kthread_should_stop() || - fullstop != FULLSTOP_DONTSTOP); + torture_must_stop()); lastphase = newphase; smp_mb(); /* ensure barrier_phase load before ->call(). */ - if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP) + if (torture_must_stop()) break; cur_ops->call(&rcu, rcu_torture_barrier_cbf); if (atomic_dec_and_test(&barrier_cbs_count)) wake_up(&barrier_wq); - } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); + } while (!torture_must_stop()); VERBOSE_TOROUT_STRING("rcu_torture_barrier_cbs task stopping"); torture_shutdown_absorb("rcu_torture_barrier_cbs"); while (!kthread_should_stop()) @@ -1275,9 +1269,8 @@ static int rcu_torture_barrier(void *arg) wake_up(&barrier_cbs_wq[i]); wait_event(barrier_wq, atomic_read(&barrier_cbs_count) == 0 || - kthread_should_stop() || - fullstop != FULLSTOP_DONTSTOP); - if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP) + torture_must_stop()); + if (torture_must_stop()) break; n_barrier_attempts++; cur_ops->cb_barrier(); /* Implies smp_mb() for wait_event(). */ @@ -1287,7 +1280,7 @@ static int rcu_torture_barrier(void *arg) } n_barrier_successes++; schedule_timeout_interruptible(HZ / 10); - } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); + } while (!torture_must_stop()); VERBOSE_TOROUT_STRING("rcu_torture_barrier task stopping"); torture_shutdown_absorb("rcu_torture_barrier"); while (!kthread_should_stop()) @@ -1585,7 +1578,6 @@ rcu_torture_init(void) else nrealreaders = 2 * num_online_cpus(); rcu_torture_print_module_parms(cur_ops, "Start of test"); - fullstop = FULLSTOP_DONTSTOP; /* Set up the freelist. */ diff --git a/kernel/torture.c b/kernel/torture.c index b02fa27..ed360cf 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -52,8 +52,11 @@ MODULE_AUTHOR("Paul E. McKenney "); static char *torture_type; static bool verbose; -int fullstop = FULLSTOP_RMMOD; -EXPORT_SYMBOL_GPL(fullstop); +/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ +#define FULLSTOP_DONTSTOP 0 /* Normal operation. */ +#define FULLSTOP_SHUTDOWN 1 /* System shutdown with torture running. */ +#define FULLSTOP_RMMOD 2 /* Normal rmmod of torture. */ +static int fullstop = FULLSTOP_RMMOD; static DEFINE_MUTEX(fullstop_mutex); #ifdef CONFIG_HOTPLUG_CPU @@ -458,6 +461,7 @@ void __init torture_init_begin(char *ttype, bool v) mutex_lock(&fullstop_mutex); torture_type = ttype; verbose = v; + fullstop = FULLSTOP_DONTSTOP; } EXPORT_SYMBOL_GPL(torture_init_begin); @@ -498,3 +502,22 @@ bool torture_cleanup(void) return false; } EXPORT_SYMBOL_GPL(torture_cleanup); + +/* + * Is it time for the current torture test to stop? + */ +bool torture_must_stop(void) +{ + return torture_must_stop_irq() || kthread_should_stop(); +} +EXPORT_SYMBOL_GPL(torture_must_stop); + +/* + * Is it time for the current torture test to stop? This is the irq-safe + * version, hence no check for kthread_should_stop(). + */ +bool torture_must_stop_irq(void) +{ + return fullstop != FULLSTOP_DONTSTOP; +} +EXPORT_SYMBOL_GPL(torture_must_stop_irq); -- cgit v0.10.2 From fac480efcba6a9f0aea91947f151fd569538b0af Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 30 Jan 2014 17:06:30 -0800 Subject: rcutorture: Add diagnostic for unscheduled system shutdown Currently, rcutorture can terminate via rmmod, via self-shutdown, via something else shutting the system down, or of course the usual catastrophic termination. The first two get flagged, so this commit adds a message for the third. For the fourth, your warranty is void as always. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/kernel/torture.c b/kernel/torture.c index ed360cf..d51de30 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -438,10 +438,12 @@ static int torture_shutdown_notify(struct notifier_block *unused1, unsigned long unused2, void *unused3) { mutex_lock(&fullstop_mutex); - if (fullstop == FULLSTOP_DONTSTOP) + if (fullstop == FULLSTOP_DONTSTOP) { + VERBOSE_TOROUT_STRING("Unscheduled system shutdown detected"); fullstop = FULLSTOP_SHUTDOWN; - else + } else { pr_warn("Concurrent rmmod and shutdown illegal!\n"); + } mutex_unlock(&fullstop_mutex); return NOTIFY_DONE; } -- cgit v0.10.2 From 628edaa5062282b6e3d76c886fd2cbccae5cb87b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 31 Jan 2014 11:57:43 -0800 Subject: rcutorture: Abstract stutter_wait() Because stuttering the test load (stopping and restarting it) is useful for non-RCU testing, this commit moves the load-stuttering functionality to kernel/torture.c. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/include/linux/torture.h b/include/linux/torture.h index 0259db3..203f127 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -75,8 +75,13 @@ int torture_shuffle_init(long shuffint); /* Shutdown task absorption, for when the tasks cannot safely be killed. */ void torture_shutdown_absorb(const char *title); +/* Task stuttering, which forces load/no-load transitions. */ +void stutter_wait(const char *title); +int torture_stutter_init(int s); +void torture_stutter_cleanup(void); + /* Initialization and cleanup. */ -void torture_init_begin(char *ttype, bool v); +void torture_init_begin(char *ttype, bool v, int *runnable); void torture_init_end(void); bool torture_cleanup(void); bool torture_must_stop(void); diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 9357c88..4329ad1 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -103,7 +103,6 @@ static struct task_struct *writer_task; static struct task_struct **fakewriter_tasks; static struct task_struct **reader_tasks; static struct task_struct *stats_task; -static struct task_struct *stutter_task; static struct task_struct *fqs_task; static struct task_struct *boost_tasks[NR_CPUS]; static struct task_struct *shutdown_task; @@ -145,8 +144,6 @@ static long n_barrier_attempts; static long n_barrier_successes; static struct list_head rcu_torture_removed; -static int stutter_pause_test; - #if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE) #define RCUTORTURE_RUNNABLE_INIT 1 #else @@ -222,18 +219,6 @@ rcu_torture_free(struct rcu_torture *p) spin_unlock_bh(&rcu_torture_lock); } -static void -rcu_stutter_wait(const char *title) -{ - while (stutter_pause_test || !rcutorture_runnable) { - if (rcutorture_runnable) - schedule_timeout_interruptible(1); - else - schedule_timeout_interruptible(round_jiffies_relative(HZ)); - torture_shutdown_absorb(title); - } -} - /* * Operations vector for selecting different types of tests. */ @@ -571,7 +556,7 @@ static int rcu_torture_boost(void *arg) oldstarttime = boost_starttime; while (ULONG_CMP_LT(jiffies, oldstarttime)) { schedule_timeout_interruptible(oldstarttime - jiffies); - rcu_stutter_wait("rcu_torture_boost"); + stutter_wait("rcu_torture_boost"); if (torture_must_stop()) goto checkwait; } @@ -593,7 +578,7 @@ static int rcu_torture_boost(void *arg) call_rcu_time = jiffies; } cond_resched(); - rcu_stutter_wait("rcu_torture_boost"); + stutter_wait("rcu_torture_boost"); if (torture_must_stop()) goto checkwait; } @@ -618,7 +603,7 @@ static int rcu_torture_boost(void *arg) } /* Go do the stutter. */ -checkwait: rcu_stutter_wait("rcu_torture_boost"); +checkwait: stutter_wait("rcu_torture_boost"); } while (!torture_must_stop()); /* Clean up and exit. */ @@ -656,7 +641,7 @@ rcu_torture_fqs(void *arg) udelay(fqs_holdoff); fqs_burst_remaining -= fqs_holdoff; } - rcu_stutter_wait("rcu_torture_fqs"); + stutter_wait("rcu_torture_fqs"); } while (!torture_must_stop()); VERBOSE_TOROUT_STRING("rcu_torture_fqs task stopping"); torture_shutdown_absorb("rcu_torture_fqs"); @@ -728,7 +713,7 @@ rcu_torture_writer(void *arg) } } rcutorture_record_progress(++rcu_torture_current_version); - rcu_stutter_wait("rcu_torture_writer"); + stutter_wait("rcu_torture_writer"); } while (!torture_must_stop()); VERBOSE_TOROUT_STRING("rcu_torture_writer task stopping"); torture_shutdown_absorb("rcu_torture_writer"); @@ -765,7 +750,7 @@ rcu_torture_fakewriter(void *arg) } else { cur_ops->exp_sync(); } - rcu_stutter_wait("rcu_torture_fakewriter"); + stutter_wait("rcu_torture_fakewriter"); } while (!torture_must_stop()); VERBOSE_TOROUT_STRING("rcu_torture_fakewriter task stopping"); @@ -910,7 +895,7 @@ rcu_torture_reader(void *arg) preempt_enable(); cur_ops->readunlock(idx); schedule(); - rcu_stutter_wait("rcu_torture_reader"); + stutter_wait("rcu_torture_reader"); } while (!torture_must_stop()); VERBOSE_TOROUT_STRING("rcu_torture_reader task stopping"); torture_shutdown_absorb("rcu_torture_reader"); @@ -1034,25 +1019,6 @@ rcu_torture_stats(void *arg) return 0; } -/* Cause the rcutorture test to "stutter", starting and stopping all - * threads periodically. - */ -static int -rcu_torture_stutter(void *arg) -{ - VERBOSE_TOROUT_STRING("rcu_torture_stutter task started"); - do { - schedule_timeout_interruptible(stutter * HZ); - stutter_pause_test = 1; - if (!kthread_should_stop()) - schedule_timeout_interruptible(stutter * HZ); - stutter_pause_test = 0; - torture_shutdown_absorb("rcu_torture_stutter"); - } while (!kthread_should_stop()); - VERBOSE_TOROUT_STRING("rcu_torture_stutter task stopping"); - return 0; -} - static inline void rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, const char *tag) { @@ -1403,11 +1369,7 @@ rcu_torture_cleanup(void) rcu_torture_barrier_cleanup(); rcu_torture_stall_cleanup(); - if (stutter_task) { - VERBOSE_TOROUT_STRING("Stopping rcu_torture_stutter task"); - kthread_stop(stutter_task); - } - stutter_task = NULL; + torture_stutter_cleanup(); if (writer_task) { VERBOSE_TOROUT_STRING("Stopping rcu_torture_writer task"); @@ -1548,7 +1510,7 @@ rcu_torture_init(void) &rcu_ops, &rcu_bh_ops, &srcu_ops, &sched_ops, }; - torture_init_begin(torture_type, verbose); + torture_init_begin(torture_type, verbose, &rcutorture_runnable); /* Process args and tell the world that the torturer is on the job. */ for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { @@ -1682,21 +1644,14 @@ rcu_torture_init(void) if (stutter < 0) stutter = 0; if (stutter) { - /* Create the stutter thread */ - stutter_task = kthread_run(rcu_torture_stutter, NULL, - "rcu_torture_stutter"); - if (IS_ERR(stutter_task)) { - firsterr = PTR_ERR(stutter_task); - VERBOSE_TOROUT_ERRSTRING("Failed to create stutter"); - stutter_task = NULL; + firsterr = torture_stutter_init(stutter * HZ); + if (firsterr) goto unwind; - } - torture_shuffle_task_register(stutter_task); } if (fqs_duration < 0) fqs_duration = 0; if (fqs_duration) { - /* Create the stutter thread */ + /* Create the fqs thread */ fqs_task = kthread_run(rcu_torture_fqs, NULL, "rcu_torture_fqs"); if (IS_ERR(fqs_task)) { diff --git a/kernel/torture.c b/kernel/torture.c index d51de30..b30c2ee 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -58,6 +58,7 @@ static bool verbose; #define FULLSTOP_RMMOD 2 /* Normal rmmod of torture. */ static int fullstop = FULLSTOP_RMMOD; static DEFINE_MUTEX(fullstop_mutex); +static int *torture_runnable; #ifdef CONFIG_HOTPLUG_CPU @@ -453,16 +454,100 @@ static struct notifier_block torture_shutdown_nb = { }; /* + * Variables for stuttering, which means to periodically pause and + * restart testing in order to catch bugs that appear when load is + * suddenly applied to or removed from the system. + */ +static struct task_struct *stutter_task; +static int stutter_pause_test; +static int stutter; + +/* + * Block until the stutter interval ends. This must be called periodically + * by all running kthreads that need to be subject to stuttering. + */ +void stutter_wait(const char *title) +{ + while (ACCESS_ONCE(stutter_pause_test) || + (torture_runnable && !ACCESS_ONCE(*torture_runnable))) { + if (stutter_pause_test) + schedule_timeout_interruptible(1); + else + schedule_timeout_interruptible(round_jiffies_relative(HZ)); + torture_shutdown_absorb(title); + } +} +EXPORT_SYMBOL_GPL(stutter_wait); + +/* + * Cause the torture test to "stutter", starting and stopping all + * threads periodically. + */ +static int torture_stutter(void *arg) +{ + VERBOSE_TOROUT_STRING("torture_stutter task started"); + do { + if (!torture_must_stop()) { + schedule_timeout_interruptible(stutter); + ACCESS_ONCE(stutter_pause_test) = 1; + } + if (!torture_must_stop()) + schedule_timeout_interruptible(stutter); + ACCESS_ONCE(stutter_pause_test) = 0; + torture_shutdown_absorb("torture_stutter"); + } while (!torture_must_stop()); + VERBOSE_TOROUT_STRING("torture_stutter task stopping"); + return 0; +} + +/* + * Initialize and kick off the torture_stutter kthread. + */ +int torture_stutter_init(int s) +{ + int ret; + + stutter = s; + stutter_task = kthread_run(torture_stutter, NULL, "torture_stutter"); + if (IS_ERR(stutter_task)) { + ret = PTR_ERR(stutter_task); + VERBOSE_TOROUT_ERRSTRING("Failed to create stutter"); + stutter_task = NULL; + return ret; + } + torture_shuffle_task_register(stutter_task); + return 0; +} +EXPORT_SYMBOL_GPL(torture_stutter_init); + +/* + * Cleanup after the torture_stutter kthread. + */ +void torture_stutter_cleanup(void) +{ + if (!stutter_task) + return; + VERBOSE_TOROUT_STRING("Stopping torture_stutter task"); + kthread_stop(stutter_task); + stutter_task = NULL; +} +EXPORT_SYMBOL_GPL(torture_stutter_cleanup); + +/* * Initialize torture module. Please note that this is -not- invoked via * the usual module_init() mechanism, but rather by an explicit call from * the client torture module. This call must be paired with a later * torture_init_end(). + * + * The runnable parameter points to a flag that controls whether or not + * the test is currently runnable. If there is no such flag, pass in NULL. */ -void __init torture_init_begin(char *ttype, bool v) +void __init torture_init_begin(char *ttype, bool v, int *runnable) { mutex_lock(&fullstop_mutex); torture_type = ttype; verbose = v; + torture_runnable = runnable; fullstop = FULLSTOP_DONTSTOP; } -- cgit v0.10.2 From 57a2fe90fcdaa812ac1aa6c91ba0e591c30f461a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 31 Jan 2014 12:58:39 -0800 Subject: rcutorture: Apply ACCESS_ONCE() to racy fullstop accesses Because the fullstop variable can be accessed while it is being updated, this commit avoids any resulting compiler mischief through use of ACCESS_ONCE() for non-initialization accesses to this shared variable. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/kernel/torture.c b/kernel/torture.c index b30c2ee..1bafd02 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -439,9 +439,9 @@ static int torture_shutdown_notify(struct notifier_block *unused1, unsigned long unused2, void *unused3) { mutex_lock(&fullstop_mutex); - if (fullstop == FULLSTOP_DONTSTOP) { + if (ACCESS_ONCE(fullstop) == FULLSTOP_DONTSTOP) { VERBOSE_TOROUT_STRING("Unscheduled system shutdown detected"); - fullstop = FULLSTOP_SHUTDOWN; + ACCESS_ONCE(fullstop) = FULLSTOP_SHUTDOWN; } else { pr_warn("Concurrent rmmod and shutdown illegal!\n"); } @@ -575,13 +575,13 @@ EXPORT_SYMBOL_GPL(torture_init_end); bool torture_cleanup(void) { mutex_lock(&fullstop_mutex); - if (fullstop == FULLSTOP_SHUTDOWN) { + if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) { pr_warn("Concurrent rmmod and shutdown illegal!\n"); mutex_unlock(&fullstop_mutex); schedule_timeout_uninterruptible(10); return true; } - fullstop = FULLSTOP_RMMOD; + ACCESS_ONCE(fullstop) = FULLSTOP_RMMOD; mutex_unlock(&fullstop_mutex); unregister_reboot_notifier(&torture_shutdown_nb); torture_shuffle_cleanup(); @@ -605,6 +605,6 @@ EXPORT_SYMBOL_GPL(torture_must_stop); */ bool torture_must_stop_irq(void) { - return fullstop != FULLSTOP_DONTSTOP; + return ACCESS_ONCE(fullstop) != FULLSTOP_DONTSTOP; } EXPORT_SYMBOL_GPL(torture_must_stop_irq); -- cgit v0.10.2 From e991dbc0770b01b7dc7d6d7660442e83ebd11828 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 31 Jan 2014 14:52:13 -0800 Subject: rcutorture: Abstract torture_shutdown() Because auto-shutdown of torture testing is not specific to RCU, this commit moves the auto-shutdown function to kernel/torture.c. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/include/linux/torture.h b/include/linux/torture.h index 203f127..c79c41d 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -72,8 +72,10 @@ unsigned long torture_random(struct torture_random_state *trsp); void torture_shuffle_task_register(struct task_struct *tp); int torture_shuffle_init(long shuffint); -/* Shutdown task absorption, for when the tasks cannot safely be killed. */ +/* Test auto-shutdown handling. */ void torture_shutdown_absorb(const char *title); +int torture_shutdown_init(int ssecs, void (*cleanup)(void)); +void torture_shutdown_cleanup(void); /* Task stuttering, which forces load/no-load transitions. */ void stutter_wait(const char *title); diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 4329ad1..897b0f9 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -105,7 +105,6 @@ static struct task_struct **reader_tasks; static struct task_struct *stats_task; static struct task_struct *fqs_task; static struct task_struct *boost_tasks[NR_CPUS]; -static struct task_struct *shutdown_task; static struct task_struct *stall_task; static struct task_struct **barrier_cbs_tasks; static struct task_struct *barrier_task; @@ -173,7 +172,6 @@ static u64 notrace rcu_trace_clock_local(void) } #endif /* #else #ifdef CONFIG_RCU_TRACE */ -static unsigned long shutdown_time; /* jiffies to system shutdown. */ static unsigned long boost_starttime; /* jiffies of next boost test start. */ DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ /* and boost task create/destroy. */ @@ -183,9 +181,6 @@ static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */ static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */ static DECLARE_WAIT_QUEUE_HEAD(barrier_wq); -/* Forward reference. */ -static void rcu_torture_cleanup(void); - /* * Allocate an element from the rcu_tortures pool. */ @@ -1087,42 +1082,6 @@ static int rcutorture_booster_init(int cpu) } /* - * Cause the rcutorture test to shutdown the system after the test has - * run for the time specified by the shutdown_secs module parameter. - */ -static int -rcu_torture_shutdown(void *arg) -{ - long delta; - unsigned long jiffies_snap; - - VERBOSE_TOROUT_STRING("rcu_torture_shutdown task started"); - jiffies_snap = ACCESS_ONCE(jiffies); - while (ULONG_CMP_LT(jiffies_snap, shutdown_time) && - !kthread_should_stop()) { - delta = shutdown_time - jiffies_snap; - if (verbose) - pr_alert("%s" TORTURE_FLAG - "rcu_torture_shutdown task: %lu jiffies remaining\n", - torture_type, delta); - schedule_timeout_interruptible(delta); - jiffies_snap = ACCESS_ONCE(jiffies); - } - if (kthread_should_stop()) { - VERBOSE_TOROUT_STRING("rcu_torture_shutdown task stopping"); - return 0; - } - - /* OK, shut down the system. */ - - VERBOSE_TOROUT_STRING("rcu_torture_shutdown task shutting down system"); - shutdown_task = NULL; /* Avoid self-kill deadlock. */ - rcu_torture_cleanup(); /* Get the success/failure message. */ - kernel_power_off(); /* Shut down the system. */ - return 0; -} - -/* * CPU-stall kthread. It waits as specified by stall_cpu_holdoff, then * induces a CPU stall for the time specified by stall_cpu. */ @@ -1421,11 +1380,7 @@ rcu_torture_cleanup(void) for_each_possible_cpu(i) rcutorture_booster_cleanup(i); } - if (shutdown_task != NULL) { - VERBOSE_TOROUT_STRING("Stopping rcu_torture_shutdown task"); - kthread_stop(shutdown_task); - } - shutdown_task = NULL; + torture_shutdown_cleanup(); /* Wait for all RCU callbacks to fire. */ @@ -1681,18 +1636,10 @@ rcu_torture_init(void) } } } - if (shutdown_secs > 0) { - shutdown_time = jiffies + shutdown_secs * HZ; - shutdown_task = kthread_create(rcu_torture_shutdown, NULL, - "rcu_torture_shutdown"); - if (IS_ERR(shutdown_task)) { - firsterr = PTR_ERR(shutdown_task); - VERBOSE_TOROUT_ERRSTRING("Failed to create shutdown"); - shutdown_task = NULL; - goto unwind; - } - torture_shuffle_task_register(shutdown_task); - wake_up_process(shutdown_task); + i = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup); + if (i != 0) { + firsterr = i; + goto unwind; } i = torture_onoff_init(onoff_holdoff * HZ, onoff_interval * HZ); if (i != 0) { diff --git a/kernel/torture.c b/kernel/torture.c index 1bafd02..df2c700 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -419,6 +419,15 @@ static void torture_shuffle_cleanup(void) EXPORT_SYMBOL_GPL(torture_shuffle_cleanup); /* + * Variables for auto-shutdown. This allows "lights out" torture runs + * to be fully scripted. + */ +static int shutdown_secs; /* desired test duration in seconds. */ +static struct task_struct *shutdown_task; +static unsigned long shutdown_time; /* jiffies to system shutdown. */ +static void (*torture_shutdown_hook)(void); + +/* * Absorb kthreads into a kernel function that won't return, so that * they won't ever access module text or data again. */ @@ -433,6 +442,81 @@ void torture_shutdown_absorb(const char *title) EXPORT_SYMBOL_GPL(torture_shutdown_absorb); /* + * Cause the torture test to shutdown the system after the test has + * run for the time specified by the shutdown_secs parameter. + */ +static int torture_shutdown(void *arg) +{ + long delta; + unsigned long jiffies_snap; + + VERBOSE_TOROUT_STRING("torture_shutdown task started"); + jiffies_snap = jiffies; + while (ULONG_CMP_LT(jiffies_snap, shutdown_time) && + !torture_must_stop()) { + delta = shutdown_time - jiffies_snap; + if (verbose) + pr_alert("%s" TORTURE_FLAG + "torture_shutdown task: %lu jiffies remaining\n", + torture_type, delta); + schedule_timeout_interruptible(delta); + jiffies_snap = jiffies; + } + if (torture_must_stop()) { + VERBOSE_TOROUT_STRING("torture_shutdown task stopping"); + return 0; + } + + /* OK, shut down the system. */ + + VERBOSE_TOROUT_STRING("torture_shutdown task shutting down system"); + shutdown_task = NULL; /* Avoid self-kill deadlock. */ + torture_shutdown_hook();/* Shut down the enclosing torture test. */ + kernel_power_off(); /* Shut down the system. */ + return 0; +} + +/* + * Start up the shutdown task. + */ +int torture_shutdown_init(int ssecs, void (*cleanup)(void)) +{ + int ret; + + shutdown_secs = ssecs; + torture_shutdown_hook = cleanup; + if (shutdown_secs > 0) { + shutdown_time = jiffies + shutdown_secs * HZ; + shutdown_task = kthread_create(torture_shutdown, NULL, + "torture_shutdown"); + if (IS_ERR(shutdown_task)) { + ret = PTR_ERR(shutdown_task); + VERBOSE_TOROUT_ERRSTRING("Failed to create shutdown"); + shutdown_task = NULL; + return ret; + } + torture_shuffle_task_register(shutdown_task); + wake_up_process(shutdown_task); + } + return 0; +} +EXPORT_SYMBOL_GPL(torture_shutdown_init); + +/* + * Shut down the shutdown task. Say what??? Heh! This can happen if + * the torture module gets an rmmod before the shutdown time arrives. ;-) + */ +void torture_shutdown_cleanup(void) +{ + if (shutdown_task != NULL) { + VERBOSE_TOROUT_STRING("Stopping torture_shutdown task"); + kthread_stop(shutdown_task); + } + shutdown_task = NULL; +} +EXPORT_SYMBOL_GPL(torture_shutdown_cleanup); + +/* * Detect and respond to a system shutdown. */ static int torture_shutdown_notify(struct notifier_block *unused1, -- cgit v0.10.2 From 01025ebc99e39ac962c32e063cad9a3012ee8b0a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 31 Jan 2014 15:15:02 -0800 Subject: rcutorture: Clean up rcu_torture_init() error checking This commit applies some simple cleanups to rcu_torture_init() error checking. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 897b0f9..746c427 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1460,7 +1460,6 @@ rcu_torture_init(void) int i; int cpu; int firsterr = 0; - int retval; static struct rcu_torture_ops *torture_ops[] = { &rcu_ops, &rcu_bh_ops, &srcu_ops, &sched_ops, }; @@ -1629,33 +1628,23 @@ rcu_torture_init(void) for_each_possible_cpu(i) { if (cpu_is_offline(i)) continue; /* Heuristic: CPU can go offline. */ - retval = rcutorture_booster_init(i); - if (retval < 0) { - firsterr = retval; + firsterr = rcutorture_booster_init(i); + if (firsterr) goto unwind; - } } } - i = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup); - if (i != 0) { - firsterr = i; + firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup); + if (firsterr) goto unwind; - } - i = torture_onoff_init(onoff_holdoff * HZ, onoff_interval * HZ); - if (i != 0) { - firsterr = i; + firsterr = torture_onoff_init(onoff_holdoff * HZ, onoff_interval * HZ); + if (firsterr) goto unwind; - } - i = rcu_torture_stall_init(); - if (i != 0) { - firsterr = i; + firsterr = rcu_torture_stall_init(); + if (firsterr) goto unwind; - } - retval = rcu_torture_barrier_init(); - if (retval != 0) { - firsterr = retval; + firsterr = rcu_torture_barrier_init(); + if (firsterr) goto unwind; - } if (object_debug) rcu_test_debug_objects(); rcutorture_record_test_transition(); -- cgit v0.10.2 From 14562d1cf12b434da2c69b5603a4149ac43f3b48 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 31 Jan 2014 15:39:52 -0800 Subject: rcutorture: Announce task creation A few "stealth-start rcutorture kthreads" have accumulated over the years, so this commit adds console-log announcements (but only if the torture tests are running verbose). Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 746c427..6e9ba51 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1120,6 +1120,7 @@ static int __init rcu_torture_stall_init(void) if (stall_cpu <= 0) return 0; + VERBOSE_TOROUT_STRING("Creating rcu_torture_stall task"); stall_task = kthread_run(rcu_torture_stall, NULL, "rcu_torture_stall"); if (IS_ERR(stall_task)) { ret = PTR_ERR(stall_task); @@ -1242,6 +1243,7 @@ static int rcu_torture_barrier_init(void) return -ENOMEM; for (i = 0; i < n_barrier_cbs; i++) { init_waitqueue_head(&barrier_cbs_wq[i]); + VERBOSE_TOROUT_STRING("Creating rcu_torture_barrier_cbs task"); barrier_cbs_tasks[i] = kthread_run(rcu_torture_barrier_cbs, (void *)(long)i, "rcu_torture_barrier_cbs"); @@ -1253,6 +1255,7 @@ static int rcu_torture_barrier_init(void) } torture_shuffle_task_register(barrier_cbs_tasks[i]); } + VERBOSE_TOROUT_STRING("Creating rcu_torture_barrier task"); barrier_task = kthread_run(rcu_torture_barrier, NULL, "rcu_torture_barrier"); if (IS_ERR(barrier_task)) { @@ -1606,6 +1609,7 @@ rcu_torture_init(void) fqs_duration = 0; if (fqs_duration) { /* Create the fqs thread */ + VERBOSE_TOROUT_STRING("Creating rcu_torture_fqs task"); fqs_task = kthread_run(rcu_torture_fqs, NULL, "rcu_torture_fqs"); if (IS_ERR(fqs_task)) { diff --git a/kernel/torture.c b/kernel/torture.c index df2c700..5e2838f 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -187,6 +187,7 @@ int torture_onoff_init(long ooholdoff, long oointerval) onoff_interval = oointerval; if (onoff_interval <= 0) return 0; + VERBOSE_TOROUT_STRING("Creating torture_onoff task"); onoff_task = kthread_run(torture_onoff, NULL, "torture_onoff"); if (IS_ERR(onoff_task)) { ret = PTR_ERR(onoff_task); @@ -390,6 +391,7 @@ int torture_shuffle_init(long shuffint) } /* Create the shuffler thread */ + VERBOSE_TOROUT_STRING("Creating torture_shuffle task"); shuffler_task = kthread_run(torture_shuffle, NULL, "torture_shuffle"); if (IS_ERR(shuffler_task)) { ret = PTR_ERR(shuffler_task); @@ -486,6 +488,7 @@ int torture_shutdown_init(int ssecs, void (*cleanup)(void)) shutdown_secs = ssecs; torture_shutdown_hook = cleanup; if (shutdown_secs > 0) { + VERBOSE_TOROUT_STRING("Creating torture_shutdown task"); shutdown_time = jiffies + shutdown_secs * HZ; shutdown_task = kthread_create(torture_shutdown, NULL, "torture_shutdown"); @@ -592,6 +595,7 @@ int torture_stutter_init(int s) int ret; stutter = s; + VERBOSE_TOROUT_STRING("Creating torture_stutter task"); stutter_task = kthread_run(torture_stutter, NULL, "torture_stutter"); if (IS_ERR(stutter_task)) { ret = PTR_ERR(stutter_task); -- cgit v0.10.2 From 7fafaac5b9ce22cc57777865390520476ad2262d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 31 Jan 2014 17:37:28 -0800 Subject: rcutorture: Fix rcutorture shutdown races Not all of the rcutorture kthreads waited for kthread_should_stop() before returning from their top-level functions, and none of them used torture_shutdown_absorb() properly. These problems can result in segfaults and hangs at shutdown time, and some recent changes perturbed timing sufficiently to make them much more probable. This commit therefore creates a torture_kthread_stopping() function that does the proper kthread shutdown dance in one centralized location. Accommodate this grouping by making VERBOSE_TOROUT_STRING() capable of taking a non-const string as its argument, which allows the new torture_kthread_stopping() to pass its "title" argument directly to the updated version of VERBOSE_TOROUT_STRING(). Signed-off-by: Paul E. McKenney diff --git a/include/linux/torture.h b/include/linux/torture.h index c79c41d..2ea1109 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -45,7 +45,7 @@ #define TOROUT_STRING(s) \ pr_alert("%s" TORTURE_FLAG s "\n", torture_type) #define VERBOSE_TOROUT_STRING(s) \ - do { if (verbose) pr_alert("%s" TORTURE_FLAG s "\n", torture_type); } while (0) + do { if (verbose) pr_alert("%s" TORTURE_FLAG " %s\n", torture_type, s); } while (0) #define VERBOSE_TOROUT_ERRSTRING(s) \ do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0) @@ -88,5 +88,6 @@ void torture_init_end(void); bool torture_cleanup(void); bool torture_must_stop(void); bool torture_must_stop_irq(void); +void torture_kthread_stopping(char *title); #endif /* __LINUX_TORTURE_H */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 6e9ba51..bcaafd6 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -602,12 +602,13 @@ checkwait: stutter_wait("rcu_torture_boost"); } while (!torture_must_stop()); /* Clean up and exit. */ - VERBOSE_TOROUT_STRING("rcu_torture_boost task stopping"); - torture_shutdown_absorb("rcu_torture_boost"); - while (!kthread_should_stop() || rbi.inflight) + while (!kthread_should_stop() || rbi.inflight) { + torture_shutdown_absorb("rcu_torture_boost"); schedule_timeout_uninterruptible(1); + } smp_mb(); /* order accesses to ->inflight before stack-frame death. */ destroy_rcu_head_on_stack(&rbi.rcu); + torture_kthread_stopping("rcu_torture_boost"); return 0; } @@ -638,10 +639,7 @@ rcu_torture_fqs(void *arg) } stutter_wait("rcu_torture_fqs"); } while (!torture_must_stop()); - VERBOSE_TOROUT_STRING("rcu_torture_fqs task stopping"); - torture_shutdown_absorb("rcu_torture_fqs"); - while (!kthread_should_stop()) - schedule_timeout_uninterruptible(1); + torture_kthread_stopping("rcu_torture_fqs"); return 0; } @@ -710,10 +708,7 @@ rcu_torture_writer(void *arg) rcutorture_record_progress(++rcu_torture_current_version); stutter_wait("rcu_torture_writer"); } while (!torture_must_stop()); - VERBOSE_TOROUT_STRING("rcu_torture_writer task stopping"); - torture_shutdown_absorb("rcu_torture_writer"); - while (!kthread_should_stop()) - schedule_timeout_uninterruptible(1); + torture_kthread_stopping("rcu_torture_writer"); return 0; } @@ -748,10 +743,7 @@ rcu_torture_fakewriter(void *arg) stutter_wait("rcu_torture_fakewriter"); } while (!torture_must_stop()); - VERBOSE_TOROUT_STRING("rcu_torture_fakewriter task stopping"); - torture_shutdown_absorb("rcu_torture_fakewriter"); - while (!kthread_should_stop()) - schedule_timeout_uninterruptible(1); + torture_kthread_stopping("rcu_torture_fakewriter"); return 0; } @@ -892,12 +884,9 @@ rcu_torture_reader(void *arg) schedule(); stutter_wait("rcu_torture_reader"); } while (!torture_must_stop()); - VERBOSE_TOROUT_STRING("rcu_torture_reader task stopping"); - torture_shutdown_absorb("rcu_torture_reader"); if (irqreader && cur_ops->irq_capable) del_timer_sync(&t); - while (!kthread_should_stop()) - schedule_timeout_uninterruptible(1); + torture_kthread_stopping("rcu_torture_reader"); return 0; } @@ -1010,7 +999,7 @@ rcu_torture_stats(void *arg) rcu_torture_stats_print(); torture_shutdown_absorb("rcu_torture_stats"); } while (!torture_must_stop()); - VERBOSE_TOROUT_STRING("rcu_torture_stats task stopping"); + torture_kthread_stopping("rcu_torture_stats"); return 0; } @@ -1171,12 +1160,9 @@ static int rcu_torture_barrier_cbs(void *arg) if (atomic_dec_and_test(&barrier_cbs_count)) wake_up(&barrier_wq); } while (!torture_must_stop()); - VERBOSE_TOROUT_STRING("rcu_torture_barrier_cbs task stopping"); - torture_shutdown_absorb("rcu_torture_barrier_cbs"); - while (!kthread_should_stop()) - schedule_timeout_interruptible(1); cur_ops->cb_barrier(); destroy_rcu_head_on_stack(&rcu); + torture_kthread_stopping("rcu_torture_barrier_cbs"); return 0; } @@ -1207,10 +1193,7 @@ static int rcu_torture_barrier(void *arg) n_barrier_successes++; schedule_timeout_interruptible(HZ / 10); } while (!torture_must_stop()); - VERBOSE_TOROUT_STRING("rcu_torture_barrier task stopping"); - torture_shutdown_absorb("rcu_torture_barrier"); - while (!kthread_should_stop()) - schedule_timeout_interruptible(1); + torture_kthread_stopping("rcu_torture_barrier"); return 0; } diff --git a/kernel/torture.c b/kernel/torture.c index 5e2838f..3305766 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -169,7 +169,7 @@ torture_onoff(void *arg) } schedule_timeout_interruptible(onoff_interval); } - VERBOSE_TOROUT_STRING("torture_onoff task stopping"); + torture_kthread_stopping("torture_onoff"); return 0; } @@ -370,7 +370,7 @@ static int torture_shuffle(void *arg) torture_shuffle_tasks(); torture_shutdown_absorb("torture_shuffle"); } while (!torture_must_stop()); - VERBOSE_TOROUT_STRING("torture_shuffle task stopping"); + torture_kthread_stopping("torture_shuffle"); return 0; } @@ -465,7 +465,7 @@ static int torture_shutdown(void *arg) jiffies_snap = jiffies; } if (torture_must_stop()) { - VERBOSE_TOROUT_STRING("torture_shutdown task stopping"); + torture_kthread_stopping("torture_shutdown"); return 0; } @@ -583,7 +583,7 @@ static int torture_stutter(void *arg) ACCESS_ONCE(stutter_pause_test) = 0; torture_shutdown_absorb("torture_stutter"); } while (!torture_must_stop()); - VERBOSE_TOROUT_STRING("torture_stutter task stopping"); + torture_kthread_stopping("torture_stutter"); return 0; } @@ -696,3 +696,21 @@ bool torture_must_stop_irq(void) return ACCESS_ONCE(fullstop) != FULLSTOP_DONTSTOP; } EXPORT_SYMBOL_GPL(torture_must_stop_irq); + +/* + * Each kthread must wait for kthread_should_stop() before returning from + * its top-level function, otherwise segfaults ensue. This function + * prints a "stopping" message and waits for kthread_should_stop(), and + * should be called from all torture kthreads immediately prior to + * returning. + */ +void torture_kthread_stopping(char *title) +{ + if (verbose) + VERBOSE_TOROUT_STRING(title); + while (!kthread_should_stop()) { + torture_shutdown_absorb(title); + schedule_timeout_uninterruptible(1); + } +} +EXPORT_SYMBOL_GPL(torture_kthread_stopping); -- cgit v0.10.2 From bc8f83e2c0d585b201dfbb52e98f6f8741d324ea Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 3 Feb 2014 10:02:41 -0800 Subject: rcutorture: Fix missing-return bug in rcu_torture_barrier_init() This commit adds a missing error return to the code path that creates the rcu_torture_barrier() kthread. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index bcaafd6..25e9b16 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1245,6 +1245,7 @@ static int rcu_torture_barrier_init(void) ret = PTR_ERR(barrier_task); VERBOSE_TOROUT_ERRSTRING("Failed to create rcu_torture_barrier"); barrier_task = NULL; + return ret; } torture_shuffle_task_register(barrier_task); return 0; -- cgit v0.10.2 From 47cf29b9e721967aac95ebda9e50408219755852 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 3 Feb 2014 11:52:27 -0800 Subject: rcutorture: Abstract torture_create_kthread() Creation of kthreads is not RCU-specific, so this commit abstracts out torture_create_kthread(), saving a few tens of lines of code in the process. This change requires modifying VERBOSE_TOROUT_ERRSTRING() to take a non-const string, so that _torture_create_kthread() can avoid an open-coded substitute. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/include/linux/torture.h b/include/linux/torture.h index 2ea1109..430cc30 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -47,7 +47,7 @@ #define VERBOSE_TOROUT_STRING(s) \ do { if (verbose) pr_alert("%s" TORTURE_FLAG " %s\n", torture_type, s); } while (0) #define VERBOSE_TOROUT_ERRSTRING(s) \ - do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! " s "\n", torture_type); } while (0) + do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! %s\n", torture_type, s); } while (0) /* Definitions for a non-string torture-test module parameter. */ #define torture_parm(type, name, init, msg) \ @@ -89,5 +89,11 @@ bool torture_cleanup(void); bool torture_must_stop(void); bool torture_must_stop_irq(void); void torture_kthread_stopping(char *title); +int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m, + char *f, struct task_struct **tp); + +#define torture_create_kthread(n, arg, tp) \ + _torture_create_kthread(n, (arg), #n, "Creating " #n " task", \ + "Failed to create " #n, &(tp)) #endif /* __LINUX_TORTURE_H */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 25e9b16..a6f6c84 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1105,19 +1105,9 @@ static int rcu_torture_stall(void *args) /* Spawn CPU-stall kthread, if stall_cpu specified. */ static int __init rcu_torture_stall_init(void) { - int ret; - if (stall_cpu <= 0) return 0; - VERBOSE_TOROUT_STRING("Creating rcu_torture_stall task"); - stall_task = kthread_run(rcu_torture_stall, NULL, "rcu_torture_stall"); - if (IS_ERR(stall_task)) { - ret = PTR_ERR(stall_task); - stall_task = NULL; - return ret; - } - torture_shuffle_task_register(stall_task); - return 0; + return torture_create_kthread(rcu_torture_stall, NULL, stall_task); } /* Clean up after the CPU-stall kthread, if one was spawned. */ @@ -1226,29 +1216,13 @@ static int rcu_torture_barrier_init(void) return -ENOMEM; for (i = 0; i < n_barrier_cbs; i++) { init_waitqueue_head(&barrier_cbs_wq[i]); - VERBOSE_TOROUT_STRING("Creating rcu_torture_barrier_cbs task"); - barrier_cbs_tasks[i] = kthread_run(rcu_torture_barrier_cbs, - (void *)(long)i, - "rcu_torture_barrier_cbs"); - if (IS_ERR(barrier_cbs_tasks[i])) { - ret = PTR_ERR(barrier_cbs_tasks[i]); - VERBOSE_TOROUT_ERRSTRING("Failed to create rcu_torture_barrier_cbs"); - barrier_cbs_tasks[i] = NULL; + ret = torture_create_kthread(rcu_torture_barrier_cbs, + (void *)(long)i, + barrier_cbs_tasks[i]); + if (ret) return ret; - } - torture_shuffle_task_register(barrier_cbs_tasks[i]); } - VERBOSE_TOROUT_STRING("Creating rcu_torture_barrier task"); - barrier_task = kthread_run(rcu_torture_barrier, NULL, - "rcu_torture_barrier"); - if (IS_ERR(barrier_task)) { - ret = PTR_ERR(barrier_task); - VERBOSE_TOROUT_ERRSTRING("Failed to create rcu_torture_barrier"); - barrier_task = NULL; - return ret; - } - torture_shuffle_task_register(barrier_task); - return 0; + return torture_create_kthread(rcu_torture_barrier, NULL, barrier_task); } /* Clean up after RCU barrier testing. */ @@ -1516,17 +1490,10 @@ rcu_torture_init(void) /* Start up the kthreads. */ - VERBOSE_TOROUT_STRING("Creating rcu_torture_writer task"); - writer_task = kthread_create(rcu_torture_writer, NULL, - "rcu_torture_writer"); - if (IS_ERR(writer_task)) { - firsterr = PTR_ERR(writer_task); - VERBOSE_TOROUT_ERRSTRING("Failed to create writer"); - writer_task = NULL; + firsterr = torture_create_kthread(rcu_torture_writer, NULL, + writer_task); + if (firsterr) goto unwind; - } - torture_shuffle_task_register(writer_task); - wake_up_process(writer_task); fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]), GFP_KERNEL); if (fakewriter_tasks == NULL) { @@ -1535,16 +1502,10 @@ rcu_torture_init(void) goto unwind; } for (i = 0; i < nfakewriters; i++) { - VERBOSE_TOROUT_STRING("Creating rcu_torture_fakewriter task"); - fakewriter_tasks[i] = kthread_run(rcu_torture_fakewriter, NULL, - "rcu_torture_fakewriter"); - if (IS_ERR(fakewriter_tasks[i])) { - firsterr = PTR_ERR(fakewriter_tasks[i]); - VERBOSE_TOROUT_ERRSTRING("Failed to create fakewriter"); - fakewriter_tasks[i] = NULL; + firsterr = torture_create_kthread(rcu_torture_fakewriter, + NULL, fakewriter_tasks[i]); + if (firsterr) goto unwind; - } - torture_shuffle_task_register(fakewriter_tasks[i]); } reader_tasks = kzalloc(nrealreaders * sizeof(reader_tasks[0]), GFP_KERNEL); @@ -1554,28 +1515,16 @@ rcu_torture_init(void) goto unwind; } for (i = 0; i < nrealreaders; i++) { - VERBOSE_TOROUT_STRING("Creating rcu_torture_reader task"); - reader_tasks[i] = kthread_run(rcu_torture_reader, NULL, - "rcu_torture_reader"); - if (IS_ERR(reader_tasks[i])) { - firsterr = PTR_ERR(reader_tasks[i]); - VERBOSE_TOROUT_ERRSTRING("Failed to create reader"); - reader_tasks[i] = NULL; + firsterr = torture_create_kthread(rcu_torture_reader, NULL, + reader_tasks[i]); + if (firsterr) goto unwind; - } - torture_shuffle_task_register(reader_tasks[i]); } if (stat_interval > 0) { - VERBOSE_TOROUT_STRING("Creating rcu_torture_stats task"); - stats_task = kthread_run(rcu_torture_stats, NULL, - "rcu_torture_stats"); - if (IS_ERR(stats_task)) { - firsterr = PTR_ERR(stats_task); - VERBOSE_TOROUT_ERRSTRING("Failed to create stats"); - stats_task = NULL; + firsterr = torture_create_kthread(rcu_torture_stats, NULL, + stats_task); + if (firsterr) goto unwind; - } - torture_shuffle_task_register(stats_task); } if (test_no_idle_hz) { firsterr = torture_shuffle_init(shuffle_interval * HZ); @@ -1593,16 +1542,9 @@ rcu_torture_init(void) fqs_duration = 0; if (fqs_duration) { /* Create the fqs thread */ - VERBOSE_TOROUT_STRING("Creating rcu_torture_fqs task"); - fqs_task = kthread_run(rcu_torture_fqs, NULL, - "rcu_torture_fqs"); - if (IS_ERR(fqs_task)) { - firsterr = PTR_ERR(fqs_task); - VERBOSE_TOROUT_ERRSTRING("Failed to create fqs"); - fqs_task = NULL; + torture_create_kthread(rcu_torture_fqs, NULL, fqs_task); + if (firsterr) goto unwind; - } - torture_shuffle_task_register(fqs_task); } if (test_boost_interval < 1) test_boost_interval = 1; diff --git a/kernel/torture.c b/kernel/torture.c index 3305766..4394518 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -180,23 +180,16 @@ torture_onoff(void *arg) */ int torture_onoff_init(long ooholdoff, long oointerval) { -#ifdef CONFIG_HOTPLUG_CPU - int ret; + int ret = 0; +#ifdef CONFIG_HOTPLUG_CPU onoff_holdoff = ooholdoff; onoff_interval = oointerval; if (onoff_interval <= 0) return 0; - VERBOSE_TOROUT_STRING("Creating torture_onoff task"); - onoff_task = kthread_run(torture_onoff, NULL, "torture_onoff"); - if (IS_ERR(onoff_task)) { - ret = PTR_ERR(onoff_task); - onoff_task = NULL; - return ret; - } - torture_shuffle_task_register(onoff_task); + ret = torture_create_kthread(torture_onoff, NULL, onoff_task); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ - return 0; + return ret; } EXPORT_SYMBOL_GPL(torture_onoff_init); @@ -379,8 +372,6 @@ static int torture_shuffle(void *arg) */ int torture_shuffle_init(long shuffint) { - int ret; - shuffle_interval = shuffint; shuffle_idle_cpu = -1; @@ -391,17 +382,7 @@ int torture_shuffle_init(long shuffint) } /* Create the shuffler thread */ - VERBOSE_TOROUT_STRING("Creating torture_shuffle task"); - shuffler_task = kthread_run(torture_shuffle, NULL, "torture_shuffle"); - if (IS_ERR(shuffler_task)) { - ret = PTR_ERR(shuffler_task); - free_cpumask_var(shuffle_tmp_mask); - VERBOSE_TOROUT_ERRSTRING("Failed to create shuffler"); - shuffler_task = NULL; - return ret; - } - torture_shuffle_task_register(shuffler_task); - return 0; + return torture_create_kthread(torture_shuffle, NULL, shuffler_task); } EXPORT_SYMBOL_GPL(torture_shuffle_init); @@ -483,25 +464,16 @@ static int torture_shutdown(void *arg) */ int torture_shutdown_init(int ssecs, void (*cleanup)(void)) { - int ret; + int ret = 0; shutdown_secs = ssecs; torture_shutdown_hook = cleanup; if (shutdown_secs > 0) { - VERBOSE_TOROUT_STRING("Creating torture_shutdown task"); shutdown_time = jiffies + shutdown_secs * HZ; - shutdown_task = kthread_create(torture_shutdown, NULL, - "torture_shutdown"); - if (IS_ERR(shutdown_task)) { - ret = PTR_ERR(shutdown_task); - VERBOSE_TOROUT_ERRSTRING("Failed to create shutdown"); - shutdown_task = NULL; - return ret; - } - torture_shuffle_task_register(shutdown_task); - wake_up_process(shutdown_task); + ret = torture_create_kthread(torture_shutdown, NULL, + shutdown_task); } - return 0; + return ret; } EXPORT_SYMBOL_GPL(torture_shutdown_init); @@ -595,16 +567,8 @@ int torture_stutter_init(int s) int ret; stutter = s; - VERBOSE_TOROUT_STRING("Creating torture_stutter task"); - stutter_task = kthread_run(torture_stutter, NULL, "torture_stutter"); - if (IS_ERR(stutter_task)) { - ret = PTR_ERR(stutter_task); - VERBOSE_TOROUT_ERRSTRING("Failed to create stutter"); - stutter_task = NULL; - return ret; - } - torture_shuffle_task_register(stutter_task); - return 0; + ret = torture_create_kthread(torture_stutter, NULL, stutter_task); + return ret; } EXPORT_SYMBOL_GPL(torture_stutter_init); @@ -714,3 +678,25 @@ void torture_kthread_stopping(char *title) } } EXPORT_SYMBOL_GPL(torture_kthread_stopping); + +/* + * Create a generic torture kthread that is immediately runnable. If you + * need the kthread to be stopped so that you can do something to it before + * it starts, you will need to open-code your own. + */ +int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m, + char *f, struct task_struct **tp) +{ + int ret = 0; + + VERBOSE_TOROUT_STRING(m); + *tp = kthread_run(fn, arg, s); + if (IS_ERR(*tp)) { + ret = PTR_ERR(*tp); + VERBOSE_TOROUT_ERRSTRING(f); + *tp = NULL; + } + torture_shuffle_task_register(*tp); + return ret; +} +EXPORT_SYMBOL_GPL(_torture_create_kthread); -- cgit v0.10.2 From 9c029b86098decd4660eec511b8d2d42da3e7dd9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 4 Feb 2014 11:47:08 -0800 Subject: rcutorture: Abstract torture_stop_kthread() Stopping of kthreads is not RCU-specific, so this commit abstracts out torture_stop_kthread(), saving a few lines of code in the process. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/include/linux/torture.h b/include/linux/torture.h index 430cc30..7ccfb0a 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -91,9 +91,12 @@ bool torture_must_stop_irq(void); void torture_kthread_stopping(char *title); int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m, char *f, struct task_struct **tp); +void _torture_stop_kthread(char *m, struct task_struct **tp); #define torture_create_kthread(n, arg, tp) \ _torture_create_kthread(n, (arg), #n, "Creating " #n " task", \ "Failed to create " #n, &(tp)) +#define torture_stop_kthread(n, tp) \ + _torture_stop_kthread("Stopping " #n " task", &(tp)) #endif /* __LINUX_TORTURE_H */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index a6f6c84..37bd4be 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1033,14 +1033,12 @@ static void rcutorture_booster_cleanup(int cpu) if (boost_tasks[cpu] == NULL) return; mutex_lock(&boost_mutex); - VERBOSE_TOROUT_STRING("Stopping rcu_torture_boost task"); t = boost_tasks[cpu]; boost_tasks[cpu] = NULL; mutex_unlock(&boost_mutex); /* This must be outside of the mutex, otherwise deadlock! */ - kthread_stop(t); - boost_tasks[cpu] = NULL; + torture_stop_kthread(rcu_torture_boost, t); } static int rcutorture_booster_init(int cpu) @@ -1110,16 +1108,6 @@ static int __init rcu_torture_stall_init(void) return torture_create_kthread(rcu_torture_stall, NULL, stall_task); } -/* Clean up after the CPU-stall kthread, if one was spawned. */ -static void rcu_torture_stall_cleanup(void) -{ - if (stall_task == NULL) - return; - VERBOSE_TOROUT_STRING("Stopping rcu_torture_stall_task."); - kthread_stop(stall_task); - stall_task = NULL; -} - /* Callback function for RCU barrier testing. */ void rcu_torture_barrier_cbf(struct rcu_head *rcu) { @@ -1230,19 +1218,11 @@ static void rcu_torture_barrier_cleanup(void) { int i; - if (barrier_task != NULL) { - VERBOSE_TOROUT_STRING("Stopping rcu_torture_barrier task"); - kthread_stop(barrier_task); - barrier_task = NULL; - } + torture_stop_kthread(rcu_torture_barrier, barrier_task); if (barrier_cbs_tasks != NULL) { - for (i = 0; i < n_barrier_cbs; i++) { - if (barrier_cbs_tasks[i] != NULL) { - VERBOSE_TOROUT_STRING("Stopping rcu_torture_barrier_cbs task"); - kthread_stop(barrier_cbs_tasks[i]); - barrier_cbs_tasks[i] = NULL; - } - } + for (i = 0; i < n_barrier_cbs; i++) + torture_stop_kthread(rcu_torture_barrier_cbs, + barrier_cbs_tasks[i]); kfree(barrier_cbs_tasks); barrier_cbs_tasks = NULL; } @@ -1288,53 +1268,29 @@ rcu_torture_cleanup(void) } rcu_torture_barrier_cleanup(); - rcu_torture_stall_cleanup(); + torture_stop_kthread(rcu_torture_stall, stall_task); torture_stutter_cleanup(); - - if (writer_task) { - VERBOSE_TOROUT_STRING("Stopping rcu_torture_writer task"); - kthread_stop(writer_task); - } - writer_task = NULL; + torture_stop_kthread(rcu_torture_writer, writer_task); if (reader_tasks) { - for (i = 0; i < nrealreaders; i++) { - if (reader_tasks[i]) { - VERBOSE_TOROUT_STRING( - "Stopping rcu_torture_reader task"); - kthread_stop(reader_tasks[i]); - } - reader_tasks[i] = NULL; - } + for (i = 0; i < nrealreaders; i++) + torture_stop_kthread(rcu_torture_reader, + reader_tasks[i]); kfree(reader_tasks); - reader_tasks = NULL; } rcu_torture_current = NULL; if (fakewriter_tasks) { for (i = 0; i < nfakewriters; i++) { - if (fakewriter_tasks[i]) { - VERBOSE_TOROUT_STRING( - "Stopping rcu_torture_fakewriter task"); - kthread_stop(fakewriter_tasks[i]); - } - fakewriter_tasks[i] = NULL; + torture_stop_kthread(rcu_torture_fakewriter, + fakewriter_tasks[i]); } kfree(fakewriter_tasks); fakewriter_tasks = NULL; } - if (stats_task) { - VERBOSE_TOROUT_STRING("Stopping rcu_torture_stats task"); - kthread_stop(stats_task); - } - stats_task = NULL; - - if (fqs_task) { - VERBOSE_TOROUT_STRING("Stopping rcu_torture_fqs task"); - kthread_stop(fqs_task); - } - fqs_task = NULL; + torture_stop_kthread(rcu_torture_stats, stats_task); + torture_stop_kthread(rcu_torture_fqs, fqs_task); if ((test_boost == 1 && cur_ops->can_boost) || test_boost == 2) { unregister_cpu_notifier(&rcutorture_cpu_nb); diff --git a/kernel/torture.c b/kernel/torture.c index 4394518..871f636 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -700,3 +700,16 @@ int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m, return ret; } EXPORT_SYMBOL_GPL(_torture_create_kthread); + +/* + * Stop a generic kthread, emitting a message. + */ +void _torture_stop_kthread(char *m, struct task_struct **tp) +{ + if (*tp == NULL) + return; + VERBOSE_TOROUT_STRING(m); + kthread_stop(*tp); + *tp = NULL; +} +EXPORT_SYMBOL_GPL(_torture_stop_kthread); -- cgit v0.10.2 From bfefc73aa1d1bad317bccef8a15da39263d3d962 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 4 Feb 2014 12:35:27 -0800 Subject: rcutorture: Stop generic kthreads in torture_cleanup() The specific torture modules (like rcutorture) need to call torture_cleanup() in any case, so this commit makes torture_cleanup() deal with torture_shutdown_cleanup() and torture_stutter_cleanup() so that the specific modules don't have to deal with these details. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/include/linux/torture.h b/include/linux/torture.h index 7ccfb0a..b2e2b46 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -75,12 +75,10 @@ int torture_shuffle_init(long shuffint); /* Test auto-shutdown handling. */ void torture_shutdown_absorb(const char *title); int torture_shutdown_init(int ssecs, void (*cleanup)(void)); -void torture_shutdown_cleanup(void); /* Task stuttering, which forces load/no-load transitions. */ void stutter_wait(const char *title); int torture_stutter_init(int s); -void torture_stutter_cleanup(void); /* Initialization and cleanup. */ void torture_init_begin(char *ttype, bool v, int *runnable); diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 37bd4be..40792e7 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -53,11 +53,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney and Josh Triplett "); -MODULE_ALIAS("rcutorture"); -#ifdef MODULE_PARAM_PREFIX -#undef MODULE_PARAM_PREFIX -#endif -#define MODULE_PARAM_PREFIX "rcutorture." torture_param(int, fqs_duration, 0, "Duration of fqs bursts (us), 0 to disable"); @@ -1269,7 +1264,6 @@ rcu_torture_cleanup(void) rcu_torture_barrier_cleanup(); torture_stop_kthread(rcu_torture_stall, stall_task); - torture_stutter_cleanup(); torture_stop_kthread(rcu_torture_writer, writer_task); if (reader_tasks) { @@ -1297,7 +1291,6 @@ rcu_torture_cleanup(void) for_each_possible_cpu(i) rcutorture_booster_cleanup(i); } - torture_shutdown_cleanup(); /* Wait for all RCU callbacks to fire. */ diff --git a/kernel/torture.c b/kernel/torture.c index 871f636..b26c7b4 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -478,20 +478,6 @@ int torture_shutdown_init(int ssecs, void (*cleanup)(void)) EXPORT_SYMBOL_GPL(torture_shutdown_init); /* - * Shut down the shutdown task. Say what??? Heh! This can happen if - * the torture module gets an rmmod before the shutdown time arrives. ;-) - */ -void torture_shutdown_cleanup(void) -{ - if (shutdown_task != NULL) { - VERBOSE_TOROUT_STRING("Stopping torture_shutdown task"); - kthread_stop(shutdown_task); - } - shutdown_task = NULL; -} -EXPORT_SYMBOL_GPL(torture_shutdown_cleanup); - -/* * Detect and respond to a system shutdown. */ static int torture_shutdown_notify(struct notifier_block *unused1, @@ -513,6 +499,20 @@ static struct notifier_block torture_shutdown_nb = { }; /* + * Shut down the shutdown task. Say what??? Heh! This can happen if + * the torture module gets an rmmod before the shutdown time arrives. ;-) + */ +static void torture_shutdown_cleanup(void) +{ + unregister_reboot_notifier(&torture_shutdown_nb); + if (shutdown_task != NULL) { + VERBOSE_TOROUT_STRING("Stopping torture_shutdown task"); + kthread_stop(shutdown_task); + } + shutdown_task = NULL; +} + +/* * Variables for stuttering, which means to periodically pause and * restart testing in order to catch bugs that appear when load is * suddenly applied to or removed from the system. @@ -575,7 +575,7 @@ EXPORT_SYMBOL_GPL(torture_stutter_init); /* * Cleanup after the torture_stutter kthread. */ -void torture_stutter_cleanup(void) +static void torture_stutter_cleanup(void) { if (!stutter_task) return; @@ -583,7 +583,6 @@ void torture_stutter_cleanup(void) kthread_stop(stutter_task); stutter_task = NULL; } -EXPORT_SYMBOL_GPL(torture_stutter_cleanup); /* * Initialize torture module. Please note that this is -not- invoked via @@ -619,7 +618,8 @@ EXPORT_SYMBOL_GPL(torture_init_end); * Clean up torture module. Please note that this is -not- invoked via * the usual module_exit() mechanism, but rather by an explicit call from * the client torture module. Returns true if a race with system shutdown - * is detected. + * is detected, otherwise, all kthreads started by functions in this file + * will be shut down. * * This must be called before the caller starts shutting down its own * kthreads. @@ -635,8 +635,9 @@ bool torture_cleanup(void) } ACCESS_ONCE(fullstop) = FULLSTOP_RMMOD; mutex_unlock(&fullstop_mutex); - unregister_reboot_notifier(&torture_shutdown_nb); + torture_shutdown_cleanup(); torture_shuffle_cleanup(); + torture_stutter_cleanup(); torture_onoff_cleanup(); return false; } -- cgit v0.10.2 From 2193e1604eac422df05f77b53667237fcf130bf5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 7 Feb 2014 15:16:25 -0800 Subject: rcutorture: Abstract kvm-recheck.sh This commit creates a plug-in to allow kvm-recheck.sh to process non-rcutorture console output. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh new file mode 100755 index 0000000..d75b1dc5 --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# +# Analyze a given results directory for rcutorture progress. +# +# Usage: sh kvm-recheck-rcu.sh resdir +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# Copyright (C) IBM Corporation, 2014 +# +# Authors: Paul E. McKenney + +i="$1" +if test -d $i +then + : +else + echo Unreadable results directory: $i + exit 1 +fi + +configfile=`echo $i | sed -e 's/^.*\///'` +ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'` +if test -z "$ngps" +then + echo $configfile +else + title="$configfile ------- $ngps grace periods" + dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null` + if test -z "$dur" + then + : + else + ngpsps=`awk -v ngps=$ngps -v dur=$dur ' + BEGIN { print ngps / dur }' < /dev/null` + title="$title ($ngpsps per second)" + fi + echo $title +fi diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index 89b5dba..31c8706 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -37,25 +37,7 @@ do resdir=`echo $i | sed -e 's,/$,,' -e 's,/[^/]*$,,'` head -1 $resdir/log fi - configfile=`echo $i | sed -e 's/^.*\///'` - ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'` - if test -z "$ngps" - then - echo $configfile - else - title="$configfile ------- $ngps grace periods" - dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null` - if test -z "$dur" - then - : - else - ngpsps=$((ngps / dur)) - ngpsps=`awk -v ngps=$ngps -v dur=$dur ' - BEGIN { print ngps / dur }' < /dev/null` - title="$title ($ngpsps per second)" - fi - echo $title - fi + kvm-recheck-rcu.sh $i configcheck.sh $i/.config $i/ConfigFragment parse-build.sh $i/Make.out $configfile parse-rcutorture.sh $i/console.log $configfile -- cgit v0.10.2 From 0af3fe1efa534a43385fe2694c42ffec7a310e46 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 4 Feb 2014 15:51:41 -0800 Subject: locktorture: Add a lock-torture kernel module This commit adds the locking counterpart to rcutorture. Signed-off-by: Paul E. McKenney [ paulmck: Make n_lock_torture_errors and torture_spinlock static as suggested by Fengguang Wu. ] Reviewed-by: Josh Triplett diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index baab8e5..a28ea6d 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -23,3 +23,4 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o +obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c new file mode 100644 index 0000000..d69d20d --- /dev/null +++ b/kernel/locking/locktorture.c @@ -0,0 +1,421 @@ +/* + * Module-based torture test facility for locking + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * Copyright (C) IBM Corporation, 2014 + * + * Author: Paul E. McKenney + * Based on kernel/rcu/torture.c. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Paul E. McKenney "); + +torture_param(int, nwriters_stress, -1, + "Number of write-locking stress-test threads"); +torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)"); +torture_param(int, onoff_interval, 0, + "Time between CPU hotplugs (s), 0=disable"); +torture_param(int, shuffle_interval, 3, + "Number of jiffies between shuffles, 0=disable"); +torture_param(int, shutdown_secs, 0, "Shutdown time (j), <= zero to disable."); +torture_param(int, stat_interval, 60, + "Number of seconds between stats printk()s"); +torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable"); +torture_param(bool, verbose, true, + "Enable verbose debugging printk()s"); + +static char *torture_type = "spin_lock"; +module_param(torture_type, charp, 0444); +MODULE_PARM_DESC(torture_type, + "Type of lock to torture (spin_lock, spin_lock_irq, ...)"); + +static atomic_t n_lock_torture_errors; + +static struct task_struct *stats_task; +static struct task_struct **writer_tasks; + +static int nrealwriters_stress; +static bool lock_is_write_held; + +struct lock_writer_stress_stats { + long n_write_lock_fail; + long n_write_lock_acquired; +}; +static struct lock_writer_stress_stats *lwsa; + +#if defined(MODULE) || defined(CONFIG_LOCK_TORTURE_TEST_RUNNABLE) +#define LOCKTORTURE_RUNNABLE_INIT 1 +#else +#define LOCKTORTURE_RUNNABLE_INIT 0 +#endif +int locktorture_runnable = LOCKTORTURE_RUNNABLE_INIT; +module_param(locktorture_runnable, int, 0444); +MODULE_PARM_DESC(locktorture_runnable, "Start locktorture at boot"); + +/* Forward reference. */ +static void lock_torture_cleanup(void); + +/* + * Operations vector for selecting different types of tests. + */ +struct lock_torture_ops { + void (*init)(void); + int (*writelock)(void); + void (*write_delay)(struct torture_random_state *trsp); + void (*writeunlock)(void); + unsigned long flags; + const char *name; +}; + +static struct lock_torture_ops *cur_ops; + +/* + * Definitions for lock torture testing. + */ + +static DEFINE_SPINLOCK(torture_spinlock); + +static int torture_spin_lock_write_lock(void) __acquires(torture_spinlock) +{ + spin_lock(&torture_spinlock); + return 0; +} + +static void torture_spin_lock_write_delay(struct torture_random_state *trsp) +{ + const unsigned long shortdelay_us = 2; + const unsigned long longdelay_us = 100; + + /* We want a short delay mostly to emulate likely code, and + * we want a long delay occasionally to force massive contention. + */ + if (!(torture_random(trsp) % + (nrealwriters_stress * 2000 * longdelay_us))) + mdelay(longdelay_us); + if (!(torture_random(trsp) % + (nrealwriters_stress * 2 * shortdelay_us))) + udelay(shortdelay_us); +#ifdef CONFIG_PREEMPT + if (!(torture_random(trsp) % (nrealwriters_stress * 20000))) + preempt_schedule(); /* Allow test to be preempted. */ +#endif +} + +static void torture_spin_lock_write_unlock(void) __releases(torture_spinlock) +{ + spin_unlock(&torture_spinlock); +} + +static struct lock_torture_ops spin_lock_ops = { + .writelock = torture_spin_lock_write_lock, + .write_delay = torture_spin_lock_write_delay, + .writeunlock = torture_spin_lock_write_unlock, + .name = "spin_lock" +}; + +static int torture_spin_lock_write_lock_irq(void) +__acquires(torture_spinlock_irq) +{ + unsigned long flags; + + spin_lock_irqsave(&torture_spinlock, flags); + cur_ops->flags = flags; + return 0; +} + +static void torture_lock_spin_write_unlock_irq(void) +__releases(torture_spinlock) +{ + spin_unlock_irqrestore(&torture_spinlock, cur_ops->flags); +} + +static struct lock_torture_ops spin_lock_irq_ops = { + .writelock = torture_spin_lock_write_lock_irq, + .write_delay = torture_spin_lock_write_delay, + .writeunlock = torture_lock_spin_write_unlock_irq, + .name = "spin_lock_irq" +}; + +/* + * Lock torture writer kthread. Repeatedly acquires and releases + * the lock, checking for duplicate acquisitions. + */ +static int lock_torture_writer(void *arg) +{ + struct lock_writer_stress_stats *lwsp = arg; + static DEFINE_TORTURE_RANDOM(rand); + + VERBOSE_TOROUT_STRING("lock_torture_writer task started"); + set_user_nice(current, 19); + + do { + schedule_timeout_uninterruptible(1); + cur_ops->writelock(); + if (WARN_ON_ONCE(lock_is_write_held)) + lwsp->n_write_lock_fail++; + lock_is_write_held = 1; + lwsp->n_write_lock_acquired++; + cur_ops->write_delay(&rand); + lock_is_write_held = 0; + cur_ops->writeunlock(); + stutter_wait("lock_torture_writer"); + } while (!torture_must_stop()); + torture_kthread_stopping("lock_torture_writer"); + return 0; +} + +/* + * Create an lock-torture-statistics message in the specified buffer. + */ +static void lock_torture_printk(char *page) +{ + bool fail = 0; + int i; + long max = 0; + long min = lwsa[0].n_write_lock_acquired; + long long sum = 0; + + for (i = 0; i < nrealwriters_stress; i++) { + if (lwsa[i].n_write_lock_fail) + fail = true; + sum += lwsa[i].n_write_lock_acquired; + if (max < lwsa[i].n_write_lock_fail) + max = lwsa[i].n_write_lock_fail; + if (min > lwsa[i].n_write_lock_fail) + min = lwsa[i].n_write_lock_fail; + } + page += sprintf(page, "%s%s ", torture_type, TORTURE_FLAG); + page += sprintf(page, + "Writes: Total: %lld Max/Min: %ld/%ld %s Fail: %d %s\n", + sum, max, min, max / 2 > min ? "???" : "", + fail, fail ? "!!!" : ""); + if (fail) + atomic_inc(&n_lock_torture_errors); +} + +/* + * Print torture statistics. Caller must ensure that there is only one + * call to this function at a given time!!! This is normally accomplished + * by relying on the module system to only have one copy of the module + * loaded, and then by giving the lock_torture_stats kthread full control + * (or the init/cleanup functions when lock_torture_stats thread is not + * running). + */ +static void lock_torture_stats_print(void) +{ + int size = nrealwriters_stress * 200 + 8192; + char *buf; + + buf = kmalloc(size, GFP_KERNEL); + if (!buf) { + pr_err("lock_torture_stats_print: Out of memory, need: %d", + size); + return; + } + lock_torture_printk(buf); + pr_alert("%s", buf); + kfree(buf); +} + +/* + * Periodically prints torture statistics, if periodic statistics printing + * was specified via the stat_interval module parameter. + * + * No need to worry about fullstop here, since this one doesn't reference + * volatile state or register callbacks. + */ +static int lock_torture_stats(void *arg) +{ + VERBOSE_TOROUT_STRING("lock_torture_stats task started"); + do { + schedule_timeout_interruptible(stat_interval * HZ); + lock_torture_stats_print(); + torture_shutdown_absorb("lock_torture_stats"); + } while (!torture_must_stop()); + torture_kthread_stopping("lock_torture_stats"); + return 0; +} + +static inline void +lock_torture_print_module_parms(struct lock_torture_ops *cur_ops, + const char *tag) +{ + pr_alert("%s" TORTURE_FLAG + "--- %s: nwriters_stress=%d stat_interval=%d verbose=%d shuffle_interval=%d stutter=%d shutdown_secs=%d onoff_interval=%d onoff_holdoff=%d\n", + torture_type, tag, nrealwriters_stress, stat_interval, verbose, + shuffle_interval, stutter, shutdown_secs, + onoff_interval, onoff_holdoff); +} + +static void lock_torture_cleanup(void) +{ + int i; + + if (torture_cleanup()) + return; + + if (writer_tasks) { + for (i = 0; i < nrealwriters_stress; i++) + torture_stop_kthread(lock_torture_writer, + writer_tasks[i]); + kfree(writer_tasks); + writer_tasks = NULL; + } + + torture_stop_kthread(lock_torture_stats, stats_task); + lock_torture_stats_print(); /* -After- the stats thread is stopped! */ + + if (atomic_read(&n_lock_torture_errors)) + lock_torture_print_module_parms(cur_ops, + "End of test: FAILURE"); + else if (torture_onoff_failures()) + lock_torture_print_module_parms(cur_ops, + "End of test: LOCK_HOTPLUG"); + else + lock_torture_print_module_parms(cur_ops, + "End of test: SUCCESS"); +} + +static int __init lock_torture_init(void) +{ + int i; + int firsterr = 0; + static struct lock_torture_ops *torture_ops[] = { + &spin_lock_ops, &spin_lock_irq_ops, + }; + + torture_init_begin(torture_type, verbose, &locktorture_runnable); + + /* Process args and tell the world that the torturer is on the job. */ + for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { + cur_ops = torture_ops[i]; + if (strcmp(torture_type, cur_ops->name) == 0) + break; + } + if (i == ARRAY_SIZE(torture_ops)) { + pr_alert("lock-torture: invalid torture type: \"%s\"\n", + torture_type); + pr_alert("lock-torture types:"); + for (i = 0; i < ARRAY_SIZE(torture_ops); i++) + pr_alert(" %s", torture_ops[i]->name); + pr_alert("\n"); + torture_init_end(); + return -EINVAL; + } + if (cur_ops->init) + cur_ops->init(); /* no "goto unwind" prior to this point!!! */ + + if (nwriters_stress >= 0) + nrealwriters_stress = nwriters_stress; + else + nrealwriters_stress = 2 * num_online_cpus(); + lock_torture_print_module_parms(cur_ops, "Start of test"); + + /* Initialize the statistics so that each run gets its own numbers. */ + + lock_is_write_held = 0; + lwsa = kmalloc(sizeof(*lwsa) * nrealwriters_stress, GFP_KERNEL); + if (lwsa == NULL) { + VERBOSE_TOROUT_STRING("lwsa: Out of memory"); + firsterr = -ENOMEM; + goto unwind; + } + for (i = 0; i < nrealwriters_stress; i++) { + lwsa[i].n_write_lock_fail = 0; + lwsa[i].n_write_lock_acquired = 0; + } + + /* Start up the kthreads. */ + + if (onoff_interval > 0) { + firsterr = torture_onoff_init(onoff_holdoff * HZ, + onoff_interval * HZ); + if (firsterr) + goto unwind; + } + if (shuffle_interval > 0) { + firsterr = torture_shuffle_init(shuffle_interval); + if (firsterr) + goto unwind; + } + if (shutdown_secs > 0) { + firsterr = torture_shutdown_init(shutdown_secs, + lock_torture_cleanup); + if (firsterr) + goto unwind; + } + if (stutter > 0) { + firsterr = torture_stutter_init(stutter); + if (firsterr) + goto unwind; + } + + writer_tasks = kzalloc(nrealwriters_stress * sizeof(writer_tasks[0]), + GFP_KERNEL); + if (writer_tasks == NULL) { + VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory"); + firsterr = -ENOMEM; + goto unwind; + } + for (i = 0; i < nrealwriters_stress; i++) { + firsterr = torture_create_kthread(lock_torture_writer, &lwsa[i], + writer_tasks[i]); + if (firsterr) + goto unwind; + } + if (stat_interval > 0) { + firsterr = torture_create_kthread(lock_torture_stats, NULL, + stats_task); + if (firsterr) + goto unwind; + } + torture_init_end(); + return 0; + +unwind: + torture_init_end(); + lock_torture_cleanup(); + return firsterr; +} + +module_init(lock_torture_init); +module_exit(lock_torture_cleanup); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2bfb4e5..dd7f885 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -980,6 +980,21 @@ config DEBUG_LOCKING_API_SELFTESTS The following locking APIs are covered: spinlocks, rwlocks, mutexes and rwsems. +config LOCK_TORTURE_TEST + tristate "torture tests for locking" + depends on DEBUG_KERNEL + select TORTURE_TEST + default n + help + This option provides a kernel module that runs torture tests + on kernel locking primitives. The kernel module may be built + after the fact on the running kernel to be tested, if desired. + + Say Y here if you want kernel locking-primitive torture tests + to be built into the kernel. + Say M if you want these torture tests to build as a module. + Say N if you are unsure. + endmenu # lock debugging config TRACE_IRQFLAGS -- cgit v0.10.2 From ff20e251c409da81f2b850c81964908fb4c6fe66 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 6 Feb 2014 08:45:56 -0800 Subject: rcutorture: Add an rcu_busted to test the test This commit adds a deliberately buggy RCU implementation into rcutorture to allow easy checking that rcutorture correctly flags buggy RCU implementations. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 40792e7..da6c38d 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -372,6 +372,48 @@ static struct rcu_torture_ops rcu_bh_ops = { }; /* + * Don't even think about trying any of these in real life!!! + * The names includes "busted", and they really means it! + * The only purpose of these functions is to provide a buggy RCU + * implementation to make sure that rcutorture correctly emits + * buggy-RCU error messages. + */ +static void rcu_busted_torture_deferred_free(struct rcu_torture *p) +{ + /* This is a deliberate bug for testing purposes only! */ + rcu_torture_cb(&p->rtort_rcu); +} + +static void synchronize_rcu_busted(void) +{ + /* This is a deliberate bug for testing purposes only! */ +} + +static void +call_rcu_busted(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +{ + /* This is a deliberate bug for testing purposes only! */ + func(head); +} + +static struct rcu_torture_ops rcu_busted_ops = { + .init = rcu_sync_torture_init, + .readlock = rcu_torture_read_lock, + .read_delay = rcu_read_delay, /* just reuse rcu's version. */ + .readunlock = rcu_torture_read_unlock, + .completed = rcu_no_completed, + .deferred_free = rcu_busted_torture_deferred_free, + .sync = synchronize_rcu_busted, + .exp_sync = synchronize_rcu_busted, + .call = call_rcu_busted, + .cb_barrier = NULL, + .fqs = NULL, + .stats = NULL, + .irq_capable = 1, + .name = "rcu_busted" +}; + +/* * Definitions for srcu torture testing. */ @@ -1371,7 +1413,7 @@ rcu_torture_init(void) int cpu; int firsterr = 0; static struct rcu_torture_ops *torture_ops[] = { - &rcu_ops, &rcu_bh_ops, &srcu_ops, &sched_ops, + &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops, }; torture_init_begin(torture_type, verbose, &rcutorture_runnable); -- cgit v0.10.2 From a1be00d9533f75bb46b9b64526d5254c6be0f122 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 6 Feb 2014 09:02:51 -0800 Subject: rcutorture: Create config files for scripted test-the-test testing This commit adds a pair of files in the configs directory to allow test-the-test runs of rcutorture via a "--configs BUSTED" argument to the kvm.sh script. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/configs/BUSTED b/tools/testing/selftests/rcutorture/configs/BUSTED new file mode 100644 index 0000000..46fb6cf --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/BUSTED @@ -0,0 +1,8 @@ +CONFIG_RCU_TRACE=n +CONFIG_SMP=y +CONFIG_NR_CPUS=4 +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/BUSTED.boot b/tools/testing/selftests/rcutorture/configs/BUSTED.boot new file mode 100644 index 0000000..6804f9d --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/BUSTED.boot @@ -0,0 +1 @@ +rcutorture.torture_type=rcu_busted -- cgit v0.10.2 From d2ebf7eea0d3fd450ca5b1cb2243fe1d69d399ce Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 6 Feb 2014 11:54:32 -0800 Subject: rcutorture: Create CFcommon file for common Kconfig parameters Currently, CONFIG_RCU_TORTURE_TEST=y is hardcoded into the kvm-test-1-rcu.sh script and CONFIG_PRINTK_TIME=y is mentioned in each and every configs file. This commit creates a CFcommon file for these two Kconfig parameters, and modifies kvm-test-1-rcu.sh to copy this new file into the .config file during the build. This change will allow these scripts to operate on torture types other than just rcutorture. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh index 41ea781..30cfbc8 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh @@ -47,6 +47,7 @@ trap 'rm -rf $T' 0 . $KVPATH/ver_functions.sh config_template=${1} +config_dir=`echo $config_template | sed -e 's,/[^/]*$,,'` title=`echo $config_template | sed -e 's/^.*\///'` builddir=${2} if test -z "$builddir" -o ! -d "$builddir" -o ! -w "$builddir" @@ -63,9 +64,10 @@ fi cp $config_template $resdir/ConfigFragment echo ' ---' `date`: Starting build echo ' ---' Kconfig fragment at: $config_template >> $resdir/log -cat << '___EOF___' >> $T -CONFIG_RCU_TORTURE_TEST=y -___EOF___ +if test -r "$config_dir/CFcommon" +then + cat < $config_dir/CFcommon >> $T +fi # Optimizations below this point # CONFIG_USB=n # CONFIG_SECURITY=n diff --git a/tools/testing/selftests/rcutorture/configs/BUSTED b/tools/testing/selftests/rcutorture/configs/BUSTED index 46fb6cf..48d8a24 100644 --- a/tools/testing/selftests/rcutorture/configs/BUSTED +++ b/tools/testing/selftests/rcutorture/configs/BUSTED @@ -5,4 +5,3 @@ CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y -CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/CFcommon b/tools/testing/selftests/rcutorture/configs/CFcommon new file mode 100644 index 0000000..d2d2a86 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/CFcommon @@ -0,0 +1,2 @@ +CONFIG_RCU_TORTURE_TEST=y +CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/SRCU-N b/tools/testing/selftests/rcutorture/configs/SRCU-N index b4c6340..9fbb41b 100644 --- a/tools/testing/selftests/rcutorture/configs/SRCU-N +++ b/tools/testing/selftests/rcutorture/configs/SRCU-N @@ -5,4 +5,3 @@ CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n -CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/SRCU-P b/tools/testing/selftests/rcutorture/configs/SRCU-P index 6650e00..4b6f272 100644 --- a/tools/testing/selftests/rcutorture/configs/SRCU-P +++ b/tools/testing/selftests/rcutorture/configs/SRCU-P @@ -5,4 +5,3 @@ CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y -CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/TINY01 b/tools/testing/selftests/rcutorture/configs/TINY01 index 0c2823f..0a63e07 100644 --- a/tools/testing/selftests/rcutorture/configs/TINY01 +++ b/tools/testing/selftests/rcutorture/configs/TINY01 @@ -10,4 +10,3 @@ CONFIG_RCU_TRACE=n CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_PREEMPT_COUNT=n -CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/TINY02 b/tools/testing/selftests/rcutorture/configs/TINY02 index e5072d7..f4feaee 100644 --- a/tools/testing/selftests/rcutorture/configs/TINY02 +++ b/tools/testing/selftests/rcutorture/configs/TINY02 @@ -10,4 +10,3 @@ CONFIG_RCU_TRACE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_PREEMPT_COUNT=y -CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/TREE01 b/tools/testing/selftests/rcutorture/configs/TREE01 index 141119a..9c827ec 100644 --- a/tools/testing/selftests/rcutorture/configs/TREE01 +++ b/tools/testing/selftests/rcutorture/configs/TREE01 @@ -20,4 +20,3 @@ CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_CPU_STALL_VERBOSE=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n -CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/TREE02 b/tools/testing/selftests/rcutorture/configs/TREE02 index 2d4d096..bca03f6 100644 --- a/tools/testing/selftests/rcutorture/configs/TREE02 +++ b/tools/testing/selftests/rcutorture/configs/TREE02 @@ -23,4 +23,3 @@ CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_CPU_STALL_VERBOSE=y CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n -CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/TREE03 b/tools/testing/selftests/rcutorture/configs/TREE03 index a47de5b..c1f111c 100644 --- a/tools/testing/selftests/rcutorture/configs/TREE03 +++ b/tools/testing/selftests/rcutorture/configs/TREE03 @@ -20,4 +20,3 @@ CONFIG_RCU_CPU_STALL_VERBOSE=n CONFIG_RCU_BOOST=y CONFIG_RCU_BOOST_PRIO=2 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n -CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/TREE04 b/tools/testing/selftests/rcutorture/configs/TREE04 index 8d839b8..7dbd27c 100644 --- a/tools/testing/selftests/rcutorture/configs/TREE04 +++ b/tools/testing/selftests/rcutorture/configs/TREE04 @@ -22,4 +22,3 @@ CONFIG_PROVE_RCU_DELAY=n CONFIG_RCU_CPU_STALL_INFO=y CONFIG_RCU_CPU_STALL_VERBOSE=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=n -CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/TREE05 b/tools/testing/selftests/rcutorture/configs/TREE05 index b5ba72e..d0f32e5 100644 --- a/tools/testing/selftests/rcutorture/configs/TREE05 +++ b/tools/testing/selftests/rcutorture/configs/TREE05 @@ -22,4 +22,3 @@ CONFIG_PROVE_RCU_DELAY=y CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_CPU_STALL_VERBOSE=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n -CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/TREE06 b/tools/testing/selftests/rcutorture/configs/TREE06 index 7c95ab4..2e477df 100644 --- a/tools/testing/selftests/rcutorture/configs/TREE06 +++ b/tools/testing/selftests/rcutorture/configs/TREE06 @@ -23,4 +23,3 @@ CONFIG_PROVE_RCU_DELAY=n CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_CPU_STALL_VERBOSE=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=y -CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/TREE07 b/tools/testing/selftests/rcutorture/configs/TREE07 index 1467404..042f86e 100644 --- a/tools/testing/selftests/rcutorture/configs/TREE07 +++ b/tools/testing/selftests/rcutorture/configs/TREE07 @@ -21,4 +21,3 @@ CONFIG_PROVE_RCU_DELAY=n CONFIG_RCU_CPU_STALL_INFO=y CONFIG_RCU_CPU_STALL_VERBOSE=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n -CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/TREE08 b/tools/testing/selftests/rcutorture/configs/TREE08 index 7d097a6..3438cee 100644 --- a/tools/testing/selftests/rcutorture/configs/TREE08 +++ b/tools/testing/selftests/rcutorture/configs/TREE08 @@ -23,4 +23,3 @@ CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_CPU_STALL_VERBOSE=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n -CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/TREE08-T b/tools/testing/selftests/rcutorture/configs/TREE08-T index 442c4e4..bf4523d 100644 --- a/tools/testing/selftests/rcutorture/configs/TREE08-T +++ b/tools/testing/selftests/rcutorture/configs/TREE08-T @@ -23,4 +23,3 @@ CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_CPU_STALL_VERBOSE=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n -CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/TREE09 b/tools/testing/selftests/rcutorture/configs/TREE09 index 0d1ec0d..81e4f7c 100644 --- a/tools/testing/selftests/rcutorture/configs/TREE09 +++ b/tools/testing/selftests/rcutorture/configs/TREE09 @@ -18,4 +18,3 @@ CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_CPU_STALL_VERBOSE=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n -CONFIG_PRINTK_TIME=y -- cgit v0.10.2 From a5afdeb13c86564bdbd27e8d154933fb65ea2426 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 6 Feb 2014 17:50:32 -0800 Subject: rcutorture: Remove RCU dependencies from ver_functions.sh API The current set of functions in ver_functions.sh have APIs that are specific to RCU. This commit therefore makes an RCU-independent function that outputs version-specific boot arguments. This has the benefit that a test-type-independent call in kvm-test-1-rcu.sh can now handle any type of test, given a test-type-specific set of files in a configs directory. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh index 30cfbc8..7986b3e 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh @@ -148,12 +148,8 @@ qemu_append="`identify_qemu_append "$QEMU"`" # Pull in Kconfig-fragment boot parameters boot_args="`configfrag_boot_params "$boot_args" "$config_template"`" -# Generate CPU-hotplug boot parameters -boot_args="`rcutorture_param_onoff "$boot_args" $builddir/.config`" -# Generate rcu_barrier() boot parameter -boot_args="`rcutorture_param_n_barrier_cbs "$boot_args"`" -# Pull in standard rcutorture boot arguments -boot_args="$boot_args rcutorture.stat_interval=15 rcutorture.shutdown_secs=$seconds rcutorture.rcutorture_runnable=1 rcutorture.test_no_idle_hz=1 rcutorture.verbose=1" +# Generate kernel-version-specific boot parameters +boot_args="`per_version_boot_params "$boot_args" $builddir/.config $seconds`" echo $QEMU $qemu_args -m 512 -kernel $builddir/arch/x86/boot/bzImage -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd if test -n "$RCU_BUILDONLY" diff --git a/tools/testing/selftests/rcutorture/configs/v0.0/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/v0.0/ver_functions.sh index e805253..5ace37a 100644 --- a/tools/testing/selftests/rcutorture/configs/v0.0/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/v0.0/ver_functions.sh @@ -20,16 +20,14 @@ # # Authors: Paul E. McKenney -# rcutorture_param_n_barrier_cbs bootparam-string -# -# Adds n_barrier_cbs rcutorture module parameter to kernels having it. -rcutorture_param_n_barrier_cbs () { - echo $1 -} - -# rcutorture_param_onoff bootparam-string config-file -# -# Adds onoff rcutorture module parameters to kernels having it. -rcutorture_param_onoff () { - echo $1 +# per_version_boot_params bootparam-string config-file seconds +# +# Adds per-version torture-module parameters to kernels supporting them. +# Which old kernels do not. +per_version_boot_params () { + echo rcutorture.stat_interval=15 \ + rcutorture.shutdown_secs=$3 \ + rcutorture.rcutorture_runnable=1 \ + rcutorture.test_no_idle_hz=1 \ + rcutorture.verbose=1 } diff --git a/tools/testing/selftests/rcutorture/configs/v3.3/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/v3.3/ver_functions.sh index c37432f..bae5569 100644 --- a/tools/testing/selftests/rcutorture/configs/v3.3/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/v3.3/ver_functions.sh @@ -20,22 +20,25 @@ # # Authors: Paul E. McKenney -# rcutorture_param_n_barrier_cbs bootparam-string -# -# Adds n_barrier_cbs rcutorture module parameter to kernels having it. -rcutorture_param_n_barrier_cbs () { - echo $1 -} - # rcutorture_param_onoff bootparam-string config-file # # Adds onoff rcutorture module parameters to kernels having it. rcutorture_param_onoff () { if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2" then - echo CPU-hotplug kernel, adding rcutorture onoff. - echo $1 rcutorture.onoff_interval=3 rcutorture.onoff_holdoff=30 - else - echo $1 + echo CPU-hotplug kernel, adding rcutorture onoff. 1>&2 + echo rcutorture.onoff_interval=3 rcutorture.onoff_holdoff=30 fi } + +# per_version_boot_params bootparam-string config-file seconds +# +# Adds per-version torture-module parameters to kernels supporting them. +per_version_boot_params () { + echo $1 `rcutorture_param_onoff "$1" "$2"` \ + rcutorture.stat_interval=15 \ + rcutorture.shutdown_secs=$3 \ + rcutorture.rcutorture_runnable=1 \ + rcutorture.test_no_idle_hz=1 \ + rcutorture.verbose=1 +} diff --git a/tools/testing/selftests/rcutorture/configs/v3.5/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/v3.5/ver_functions.sh index 6a5f13a..8977d8d 100644 --- a/tools/testing/selftests/rcutorture/configs/v3.5/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/v3.5/ver_functions.sh @@ -26,9 +26,9 @@ rcutorture_param_n_barrier_cbs () { if echo $1 | grep -q "rcutorture\.n_barrier_cbs" then - echo $1 + : else - echo $1 rcutorture.n_barrier_cbs=4 + echo rcutorture.n_barrier_cbs=4 fi } @@ -38,9 +38,20 @@ rcutorture_param_n_barrier_cbs () { rcutorture_param_onoff () { if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2" then - echo CPU-hotplug kernel, adding rcutorture onoff. - echo $1 rcutorture.onoff_interval=3 rcutorture.onoff_holdoff=30 - else - echo $1 + echo CPU-hotplug kernel, adding rcutorture onoff. 1>&2 + echo rcutorture.onoff_interval=3 rcutorture.onoff_holdoff=30 fi } + +# per_version_boot_params bootparam-string config-file seconds +# +# Adds per-version torture-module parameters to kernels supporting them. +per_version_boot_params () { + echo $1 `rcutorture_param_onoff "$1" "$2"` \ + `rcutorture_param_n_barrier_cbs "$1"` \ + rcutorture.stat_interval=15 \ + rcutorture.shutdown_secs=$3 \ + rcutorture.rcutorture_runnable=1 \ + rcutorture.test_no_idle_hz=1 \ + rcutorture.verbose=1 +} diff --git a/tools/testing/selftests/rcutorture/configs/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/ver_functions.sh index 5e40ead..8977d8d 100644 --- a/tools/testing/selftests/rcutorture/configs/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/ver_functions.sh @@ -26,9 +26,9 @@ rcutorture_param_n_barrier_cbs () { if echo $1 | grep -q "rcutorture\.n_barrier_cbs" then - echo $1 + : else - echo $1 rcutorture.n_barrier_cbs=4 + echo rcutorture.n_barrier_cbs=4 fi } @@ -39,8 +39,19 @@ rcutorture_param_onoff () { if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2" then echo CPU-hotplug kernel, adding rcutorture onoff. 1>&2 - echo $1 rcutorture.onoff_interval=3 rcutorture.onoff_holdoff=30 - else - echo $1 + echo rcutorture.onoff_interval=3 rcutorture.onoff_holdoff=30 fi } + +# per_version_boot_params bootparam-string config-file seconds +# +# Adds per-version torture-module parameters to kernels supporting them. +per_version_boot_params () { + echo $1 `rcutorture_param_onoff "$1" "$2"` \ + `rcutorture_param_n_barrier_cbs "$1"` \ + rcutorture.stat_interval=15 \ + rcutorture.shutdown_secs=$3 \ + rcutorture.rcutorture_runnable=1 \ + rcutorture.test_no_idle_hz=1 \ + rcutorture.verbose=1 +} -- cgit v0.10.2 From d3b1548aa247d7b436834dbe518189bb9c1ebd4b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 6 Feb 2014 20:35:41 -0800 Subject: rcutorture: Rename kvm-test-1-rcu.sh The kvm-test-1-rcu.sh is not specific to RCU, so this commit renames it to kvm-test-1-run.sh. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh deleted file mode 100755 index 7986b3e..0000000 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-rcu.sh +++ /dev/null @@ -1,203 +0,0 @@ -#!/bin/bash -# -# Run a kvm-based test of the specified tree on the specified configs. -# Fully automated run and error checking, no graphics console. -# -# Execute this in the source tree. Do not run it as a background task -# because qemu does not seem to like that much. -# -# Usage: sh kvm-test-1-rcu.sh config builddir resdir minutes qemu-args boot_args -# -# qemu-args defaults to "" -- you will want "-nographic" if running headless. -# boot_args defaults to "root=/dev/sda noapic selinux=0 console=ttyS0" -# "initcall_debug debug rcutorture.stat_interval=15" -# "rcutorture.shutdown_secs=$((minutes * 60))" -# "rcutorture.rcutorture_runnable=1" -# -# Anything you specify for either qemu-args or boot_args is appended to -# the default values. The "-smp" value is deduced from the contents of -# the config fragment. -# -# More sophisticated argument parsing is clearly needed. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# Copyright (C) IBM Corporation, 2011 -# -# Authors: Paul E. McKenney - -grace=120 - -T=/tmp/kvm-test-1-rcu.sh.$$ -trap 'rm -rf $T' 0 - -. $KVM/bin/functions.sh -. $KVPATH/ver_functions.sh - -config_template=${1} -config_dir=`echo $config_template | sed -e 's,/[^/]*$,,'` -title=`echo $config_template | sed -e 's/^.*\///'` -builddir=${2} -if test -z "$builddir" -o ! -d "$builddir" -o ! -w "$builddir" -then - echo "kvm-test-1-rcu.sh :$builddir: Not a writable directory, cannot build into it" - exit 1 -fi -resdir=${3} -if test -z "$resdir" -o ! -d "$resdir" -o ! -w "$resdir" -then - echo "kvm-test-1-rcu.sh :$resdir: Not a writable directory, cannot store results into it" - exit 1 -fi -cp $config_template $resdir/ConfigFragment -echo ' ---' `date`: Starting build -echo ' ---' Kconfig fragment at: $config_template >> $resdir/log -if test -r "$config_dir/CFcommon" -then - cat < $config_dir/CFcommon >> $T -fi -# Optimizations below this point -# CONFIG_USB=n -# CONFIG_SECURITY=n -# CONFIG_NFS_FS=n -# CONFIG_SOUND=n -# CONFIG_INPUT_JOYSTICK=n -# CONFIG_INPUT_TABLET=n -# CONFIG_INPUT_TOUCHSCREEN=n -# CONFIG_INPUT_MISC=n -# CONFIG_INPUT_MOUSE=n -# # CONFIG_NET=n # disables console access, so accept the slower build. -# CONFIG_SCSI=n -# CONFIG_ATA=n -# CONFIG_FAT_FS=n -# CONFIG_MSDOS_FS=n -# CONFIG_VFAT_FS=n -# CONFIG_ISO9660_FS=n -# CONFIG_QUOTA=n -# CONFIG_HID=n -# CONFIG_CRYPTO=n -# CONFIG_PCCARD=n -# CONFIG_PCMCIA=n -# CONFIG_CARDBUS=n -# CONFIG_YENTA=n -if kvm-build.sh $config_template $builddir $T -then - cp $builddir/Make*.out $resdir - cp $builddir/.config $resdir - cp $builddir/arch/x86/boot/bzImage $resdir - parse-build.sh $resdir/Make.out $title - if test -f $builddir.wait - then - mv $builddir.wait $builddir.ready - fi -else - cp $builddir/Make*.out $resdir - echo Build failed, not running KVM, see $resdir. - if test -f $builddir.wait - then - mv $builddir.wait $builddir.ready - fi - exit 1 -fi -while test -f $builddir.ready -do - sleep 1 -done -minutes=$4 -seconds=$(($minutes * 60)) -qemu_args=$5 -boot_args=$6 - -cd $KVM -kstarttime=`awk 'BEGIN { print systime() }' < /dev/null` -echo ' ---' `date`: Starting kernel - -# Determine the appropriate flavor of qemu command. -QEMU="`identify_qemu $builddir/vmlinux`" - -# Generate -smp qemu argument. -qemu_args="-nographic $qemu_args" -cpu_count=`configNR_CPUS.sh $config_template` -vcpus=`identify_qemu_vcpus` -if test $cpu_count -gt $vcpus -then - echo CPU count limited from $cpu_count to $vcpus - touch $resdir/Warnings - echo CPU count limited from $cpu_count to $vcpus >> $resdir/Warnings - cpu_count=$vcpus -fi -qemu_args="`specify_qemu_cpus "$QEMU" "$qemu_args" "$cpu_count"`" - -# Generate architecture-specific and interaction-specific qemu arguments -qemu_args="$qemu_args `identify_qemu_args "$QEMU" "$builddir/console.log"`" - -# Generate qemu -append arguments -qemu_append="`identify_qemu_append "$QEMU"`" - -# Pull in Kconfig-fragment boot parameters -boot_args="`configfrag_boot_params "$boot_args" "$config_template"`" -# Generate kernel-version-specific boot parameters -boot_args="`per_version_boot_params "$boot_args" $builddir/.config $seconds`" - -echo $QEMU $qemu_args -m 512 -kernel $builddir/arch/x86/boot/bzImage -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd -if test -n "$RCU_BUILDONLY" -then - echo Build-only run specified, boot/test omitted. - exit 0 -fi -$QEMU $qemu_args -m 512 -kernel $builddir/arch/x86/boot/bzImage -append "$qemu_append $boot_args" & -qemu_pid=$! -commandcompleted=0 -echo Monitoring qemu job at pid $qemu_pid -for ((i=0;i<$seconds;i++)) -do - if kill -0 $qemu_pid > /dev/null 2>&1 - then - sleep 1 - else - commandcompleted=1 - kruntime=`awk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null` - if test $kruntime -lt $seconds - then - echo Completed in $kruntime vs. $seconds >> $resdir/Warnings 2>&1 - else - echo ' ---' `date`: Kernel done - fi - break - fi -done -if test $commandcompleted -eq 0 -then - echo Grace period for qemu job at pid $qemu_pid - for ((i=0;i<=$grace;i++)) - do - if kill -0 $qemu_pid > /dev/null 2>&1 - then - sleep 1 - else - break - fi - if test $i -eq $grace - then - kruntime=`awk 'BEGIN { print systime() - '"$kstarttime"' }'` - echo "!!! Hang at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1 - kill -KILL $qemu_pid - fi - done -fi - -cp $builddir/console.log $resdir -parse-rcutorture.sh $resdir/console.log $title -parse-console.sh $resdir/console.log $title diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh new file mode 100755 index 0000000..015d35b --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -0,0 +1,203 @@ +#!/bin/bash +# +# Run a kvm-based test of the specified tree on the specified configs. +# Fully automated run and error checking, no graphics console. +# +# Execute this in the source tree. Do not run it as a background task +# because qemu does not seem to like that much. +# +# Usage: sh kvm-test-1-run.sh config builddir resdir minutes qemu-args boot_args +# +# qemu-args defaults to "-nographic", along with arguments specifying the +# number of CPUs and other options generated from +# the underlying CPU architecture. +# boot_args defaults to value returned by the per_version_boot_params +# shell function. +# +# Anything you specify for either qemu-args or boot_args is appended to +# the default values. The "-smp" value is deduced from the contents of +# the config fragment. +# +# More sophisticated argument parsing is clearly needed. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# Copyright (C) IBM Corporation, 2011 +# +# Authors: Paul E. McKenney + +grace=120 + +T=/tmp/kvm-test-1-run.sh.$$ +trap 'rm -rf $T' 0 + +. $KVM/bin/functions.sh +. $KVPATH/ver_functions.sh + +config_template=${1} +config_dir=`echo $config_template | sed -e 's,/[^/]*$,,'` +title=`echo $config_template | sed -e 's/^.*\///'` +builddir=${2} +if test -z "$builddir" -o ! -d "$builddir" -o ! -w "$builddir" +then + echo "kvm-test-1-run.sh :$builddir: Not a writable directory, cannot build into it" + exit 1 +fi +resdir=${3} +if test -z "$resdir" -o ! -d "$resdir" -o ! -w "$resdir" +then + echo "kvm-test-1-run.sh :$resdir: Not a writable directory, cannot store results into it" + exit 1 +fi +cp $config_template $resdir/ConfigFragment +echo ' ---' `date`: Starting build +echo ' ---' Kconfig fragment at: $config_template >> $resdir/log +if test -r "$config_dir/CFcommon" +then + cat < $config_dir/CFcommon >> $T +fi +# Optimizations below this point +# CONFIG_USB=n +# CONFIG_SECURITY=n +# CONFIG_NFS_FS=n +# CONFIG_SOUND=n +# CONFIG_INPUT_JOYSTICK=n +# CONFIG_INPUT_TABLET=n +# CONFIG_INPUT_TOUCHSCREEN=n +# CONFIG_INPUT_MISC=n +# CONFIG_INPUT_MOUSE=n +# # CONFIG_NET=n # disables console access, so accept the slower build. +# CONFIG_SCSI=n +# CONFIG_ATA=n +# CONFIG_FAT_FS=n +# CONFIG_MSDOS_FS=n +# CONFIG_VFAT_FS=n +# CONFIG_ISO9660_FS=n +# CONFIG_QUOTA=n +# CONFIG_HID=n +# CONFIG_CRYPTO=n +# CONFIG_PCCARD=n +# CONFIG_PCMCIA=n +# CONFIG_CARDBUS=n +# CONFIG_YENTA=n +if kvm-build.sh $config_template $builddir $T +then + cp $builddir/Make*.out $resdir + cp $builddir/.config $resdir + cp $builddir/arch/x86/boot/bzImage $resdir + parse-build.sh $resdir/Make.out $title + if test -f $builddir.wait + then + mv $builddir.wait $builddir.ready + fi +else + cp $builddir/Make*.out $resdir + echo Build failed, not running KVM, see $resdir. + if test -f $builddir.wait + then + mv $builddir.wait $builddir.ready + fi + exit 1 +fi +while test -f $builddir.ready +do + sleep 1 +done +minutes=$4 +seconds=$(($minutes * 60)) +qemu_args=$5 +boot_args=$6 + +cd $KVM +kstarttime=`awk 'BEGIN { print systime() }' < /dev/null` +echo ' ---' `date`: Starting kernel + +# Determine the appropriate flavor of qemu command. +QEMU="`identify_qemu $builddir/vmlinux`" + +# Generate -smp qemu argument. +qemu_args="-nographic $qemu_args" +cpu_count=`configNR_CPUS.sh $config_template` +vcpus=`identify_qemu_vcpus` +if test $cpu_count -gt $vcpus +then + echo CPU count limited from $cpu_count to $vcpus + touch $resdir/Warnings + echo CPU count limited from $cpu_count to $vcpus >> $resdir/Warnings + cpu_count=$vcpus +fi +qemu_args="`specify_qemu_cpus "$QEMU" "$qemu_args" "$cpu_count"`" + +# Generate architecture-specific and interaction-specific qemu arguments +qemu_args="$qemu_args `identify_qemu_args "$QEMU" "$builddir/console.log"`" + +# Generate qemu -append arguments +qemu_append="`identify_qemu_append "$QEMU"`" + +# Pull in Kconfig-fragment boot parameters +boot_args="`configfrag_boot_params "$boot_args" "$config_template"`" +# Generate kernel-version-specific boot parameters +boot_args="`per_version_boot_params "$boot_args" $builddir/.config $seconds`" + +echo $QEMU $qemu_args -m 512 -kernel $builddir/arch/x86/boot/bzImage -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd +if test -n "$RCU_BUILDONLY" +then + echo Build-only run specified, boot/test omitted. + exit 0 +fi +$QEMU $qemu_args -m 512 -kernel $builddir/arch/x86/boot/bzImage -append "$qemu_append $boot_args" & +qemu_pid=$! +commandcompleted=0 +echo Monitoring qemu job at pid $qemu_pid +for ((i=0;i<$seconds;i++)) +do + if kill -0 $qemu_pid > /dev/null 2>&1 + then + sleep 1 + else + commandcompleted=1 + kruntime=`awk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null` + if test $kruntime -lt $seconds + then + echo Completed in $kruntime vs. $seconds >> $resdir/Warnings 2>&1 + else + echo ' ---' `date`: Kernel done + fi + break + fi +done +if test $commandcompleted -eq 0 +then + echo Grace period for qemu job at pid $qemu_pid + for ((i=0;i<=$grace;i++)) + do + if kill -0 $qemu_pid > /dev/null 2>&1 + then + sleep 1 + else + break + fi + if test $i -eq $grace + then + kruntime=`awk 'BEGIN { print systime() - '"$kstarttime"' }'` + echo "!!! Hang at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1 + kill -KILL $qemu_pid + fi + done +fi + +cp $builddir/console.log $resdir +parse-rcutorture.sh $resdir/console.log $title +parse-console.sh $resdir/console.log $title diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 242b52d..767097e 100644 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -307,7 +307,7 @@ function dump(first, pastlast) print "touch " builddir ".wait"; print "mkdir " builddir " > /dev/null 2>&1 || :"; print "mkdir " rd cfr[jn] " || :"; - print "kvm-test-1-rcu.sh " CONFIGDIR cf[j], builddir, rd cfr[jn], dur " \"" RCU_QEMU_ARG "\" \"" RCU_BOOTARGS "\" > " builddir ".out 2>&1 &" + print "kvm-test-1-run.sh " CONFIGDIR cf[j], builddir, rd cfr[jn], dur " \"" RCU_QEMU_ARG "\" \"" RCU_BOOTARGS "\" > " builddir ".out 2>&1 &" print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date`" print "while test -f " builddir ".wait" print "do" -- cgit v0.10.2 From 61010e74d76ca51dd452290d6c340c681df498ce Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 7 Feb 2014 10:29:49 -0800 Subject: rcutorture: Introduce "rcu" directory level underneath configs This commit uses the standard software ploy of introducing another level of indirection below the configs directory. This allows each torture-test suite to have its own set of Kconfig files, boot parameters, and version-specific scripts. Initially, we have only rcu, but lock will follow soonish. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index 015d35b..94b28bb 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -199,5 +199,5 @@ then fi cp $builddir/console.log $resdir -parse-rcutorture.sh $resdir/console.log $title +parse-${TORTURE_SUITE}torture.sh $resdir/console.log $title parse-console.sh $resdir/console.log $title diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 767097e..c24092c 100644 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -41,6 +41,7 @@ PATH=${KVM}/bin:$PATH; export PATH builddir="${KVM}/b1" RCU_INITRD="$KVM/initrd"; export RCU_INITRD RCU_KMAKE_ARG=""; export RCU_KMAKE_ARG +TORTURE_SUITE=rcu resdir="" configs="" cpus=0 @@ -66,8 +67,9 @@ usage () { echo " --no-initrd" echo " --qemu-args qemu-system-..." echo " --qemu-cmd qemu-system-..." - echo " --results absolute-pathname" echo " --relbuilddir relative-pathname" + echo " --results absolute-pathname" + echo " --torture rcu" exit 1 } @@ -156,6 +158,11 @@ do resdir=$2 shift ;; + --torture) + checkarg --torture "(suite name)" "$#" "$2" '^rcu$' '^--' + TORTURE_SUITE=$2 + shift + ;; *) echo Unknown argument $1 usage @@ -164,7 +171,7 @@ do shift done -CONFIGFRAG=${KVM}/configs; export CONFIGFRAG +CONFIGFRAG=${KVM}/configs/${TORTURE_SUITE}; export CONFIGFRAG KVPATH=${CONFIGFRAG}/$kversion; export KVPATH if test -z "$configs" @@ -191,6 +198,7 @@ then touch $resdir/$ds/log echo $scriptname $args >> $resdir/$ds/log + echo ${TORTURE_SUITE} > $resdir/$ds/TORTURE_SUITE pwd > $resdir/$ds/testid.txt if test -d .git @@ -265,6 +273,9 @@ END { }' # Generate a script to execute the tests in appropriate batches. +cat << ___EOF___ > $T/script +TORTURE_SUITE="$TORTURE_SUITE"; export TORTURE_SUITE +___EOF___ awk < $T/cfgcpu.pack \ -v CONFIGDIR="$CONFIGFRAG/$kversion/" \ -v KVM="$KVM" \ @@ -353,7 +364,7 @@ END { # Dump the last batch. if (ncpus != 0) dump(first, i); -}' > $T/script +}' >> $T/script if test "$dryrun" = script then diff --git a/tools/testing/selftests/rcutorture/configs/BUSTED b/tools/testing/selftests/rcutorture/configs/BUSTED deleted file mode 100644 index 48d8a24..0000000 --- a/tools/testing/selftests/rcutorture/configs/BUSTED +++ /dev/null @@ -1,7 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_SMP=y -CONFIG_NR_CPUS=4 -CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y diff --git a/tools/testing/selftests/rcutorture/configs/BUSTED.boot b/tools/testing/selftests/rcutorture/configs/BUSTED.boot deleted file mode 100644 index 6804f9d..0000000 --- a/tools/testing/selftests/rcutorture/configs/BUSTED.boot +++ /dev/null @@ -1 +0,0 @@ -rcutorture.torture_type=rcu_busted diff --git a/tools/testing/selftests/rcutorture/configs/CFLIST b/tools/testing/selftests/rcutorture/configs/CFLIST deleted file mode 100644 index cd3d29c..0000000 --- a/tools/testing/selftests/rcutorture/configs/CFLIST +++ /dev/null @@ -1,13 +0,0 @@ -TREE01 -TREE02 -TREE03 -TREE04 -TREE05 -TREE06 -TREE07 -TREE08 -TREE09 -SRCU-N -SRCU-P -TINY01 -TINY02 diff --git a/tools/testing/selftests/rcutorture/configs/CFcommon b/tools/testing/selftests/rcutorture/configs/CFcommon deleted file mode 100644 index d2d2a86..0000000 --- a/tools/testing/selftests/rcutorture/configs/CFcommon +++ /dev/null @@ -1,2 +0,0 @@ -CONFIG_RCU_TORTURE_TEST=y -CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/SRCU-N b/tools/testing/selftests/rcutorture/configs/SRCU-N deleted file mode 100644 index 9fbb41b..0000000 --- a/tools/testing/selftests/rcutorture/configs/SRCU-N +++ /dev/null @@ -1,7 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_SMP=y -CONFIG_NR_CPUS=4 -CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n diff --git a/tools/testing/selftests/rcutorture/configs/SRCU-N.boot b/tools/testing/selftests/rcutorture/configs/SRCU-N.boot deleted file mode 100644 index 238bfe3..0000000 --- a/tools/testing/selftests/rcutorture/configs/SRCU-N.boot +++ /dev/null @@ -1 +0,0 @@ -rcutorture.torture_type=srcu diff --git a/tools/testing/selftests/rcutorture/configs/SRCU-P b/tools/testing/selftests/rcutorture/configs/SRCU-P deleted file mode 100644 index 4b6f272..0000000 --- a/tools/testing/selftests/rcutorture/configs/SRCU-P +++ /dev/null @@ -1,7 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_SMP=y -CONFIG_NR_CPUS=8 -CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y diff --git a/tools/testing/selftests/rcutorture/configs/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/SRCU-P.boot deleted file mode 100644 index 238bfe3..0000000 --- a/tools/testing/selftests/rcutorture/configs/SRCU-P.boot +++ /dev/null @@ -1 +0,0 @@ -rcutorture.torture_type=srcu diff --git a/tools/testing/selftests/rcutorture/configs/TINY01 b/tools/testing/selftests/rcutorture/configs/TINY01 deleted file mode 100644 index 0a63e07..0000000 --- a/tools/testing/selftests/rcutorture/configs/TINY01 +++ /dev/null @@ -1,12 +0,0 @@ -CONFIG_SMP=n -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TINY_RCU=y -CONFIG_HZ_PERIODIC=n -CONFIG_NO_HZ_IDLE=y -CONFIG_NO_HZ_FULL=n -CONFIG_RCU_TRACE=n -CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_DEBUG_OBJECTS_RCU_HEAD=n -CONFIG_PREEMPT_COUNT=n diff --git a/tools/testing/selftests/rcutorture/configs/TINY02 b/tools/testing/selftests/rcutorture/configs/TINY02 deleted file mode 100644 index f4feaee..0000000 --- a/tools/testing/selftests/rcutorture/configs/TINY02 +++ /dev/null @@ -1,12 +0,0 @@ -CONFIG_SMP=n -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TINY_RCU=y -CONFIG_HZ_PERIODIC=y -CONFIG_NO_HZ_IDLE=n -CONFIG_NO_HZ_FULL=n -CONFIG_RCU_TRACE=y -CONFIG_DEBUG_LOCK_ALLOC=y -CONFIG_DEBUG_OBJECTS_RCU_HEAD=n -CONFIG_PREEMPT_COUNT=y diff --git a/tools/testing/selftests/rcutorture/configs/TREE01 b/tools/testing/selftests/rcutorture/configs/TREE01 deleted file mode 100644 index 9c827ec..0000000 --- a/tools/testing/selftests/rcutorture/configs/TREE01 +++ /dev/null @@ -1,22 +0,0 @@ -CONFIG_SMP=y -CONFIG_NR_CPUS=8 -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_HZ_PERIODIC=n -CONFIG_NO_HZ_IDLE=y -CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=y -CONFIG_RCU_TRACE=y -CONFIG_HOTPLUG_CPU=y -CONFIG_RCU_FANOUT=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_RCU_NOCB_CPU=y -CONFIG_RCU_NOCB_CPU_ZERO=y -CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_PROVE_RCU_DELAY=n -CONFIG_RCU_CPU_STALL_INFO=n -CONFIG_RCU_CPU_STALL_VERBOSE=n -CONFIG_RCU_BOOST=n -CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/TREE01.boot b/tools/testing/selftests/rcutorture/configs/TREE01.boot deleted file mode 100644 index 0fc8a34..0000000 --- a/tools/testing/selftests/rcutorture/configs/TREE01.boot +++ /dev/null @@ -1 +0,0 @@ -rcutorture.torture_type=rcu_bh diff --git a/tools/testing/selftests/rcutorture/configs/TREE02 b/tools/testing/selftests/rcutorture/configs/TREE02 deleted file mode 100644 index bca03f6..0000000 --- a/tools/testing/selftests/rcutorture/configs/TREE02 +++ /dev/null @@ -1,25 +0,0 @@ -CONFIG_SMP=y -CONFIG_NR_CPUS=8 -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_HZ_PERIODIC=n -CONFIG_NO_HZ_IDLE=y -CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n -CONFIG_RCU_TRACE=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_RCU_FANOUT=3 -CONFIG_RCU_FANOUT_LEAF=3 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_RCU_NOCB_CPU=n -CONFIG_DEBUG_LOCK_ALLOC=y -CONFIG_PROVE_LOCKING=n -CONFIG_PROVE_RCU_DELAY=n -CONFIG_RCU_CPU_STALL_INFO=n -CONFIG_RCU_CPU_STALL_VERBOSE=y -CONFIG_RCU_BOOST=n -CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/TREE03 b/tools/testing/selftests/rcutorture/configs/TREE03 deleted file mode 100644 index c1f111c..0000000 --- a/tools/testing/selftests/rcutorture/configs/TREE03 +++ /dev/null @@ -1,22 +0,0 @@ -CONFIG_SMP=y -CONFIG_NR_CPUS=8 -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_HZ_PERIODIC=y -CONFIG_NO_HZ_IDLE=n -CONFIG_NO_HZ_FULL=n -CONFIG_RCU_TRACE=y -CONFIG_HOTPLUG_CPU=y -CONFIG_RCU_FANOUT=4 -CONFIG_RCU_FANOUT_LEAF=4 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_RCU_NOCB_CPU=n -CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_PROVE_RCU_DELAY=n -CONFIG_RCU_CPU_STALL_INFO=n -CONFIG_RCU_CPU_STALL_VERBOSE=n -CONFIG_RCU_BOOST=y -CONFIG_RCU_BOOST_PRIO=2 -CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/TREE04 b/tools/testing/selftests/rcutorture/configs/TREE04 deleted file mode 100644 index 7dbd27c..0000000 --- a/tools/testing/selftests/rcutorture/configs/TREE04 +++ /dev/null @@ -1,24 +0,0 @@ -CONFIG_SMP=y -CONFIG_NR_CPUS=8 -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_HZ_PERIODIC=n -CONFIG_NO_HZ_IDLE=n -CONFIG_NO_HZ_FULL=y -CONFIG_NO_HZ_FULL_ALL=y -CONFIG_RCU_FAST_NO_HZ=y -CONFIG_RCU_TRACE=y -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_RCU_FANOUT=2 -CONFIG_RCU_FANOUT_LEAF=2 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_RCU_NOCB_CPU=n -CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_PROVE_RCU_DELAY=n -CONFIG_RCU_CPU_STALL_INFO=y -CONFIG_RCU_CPU_STALL_VERBOSE=y -CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/TREE04.boot b/tools/testing/selftests/rcutorture/configs/TREE04.boot deleted file mode 100644 index 0fc8a34..0000000 --- a/tools/testing/selftests/rcutorture/configs/TREE04.boot +++ /dev/null @@ -1 +0,0 @@ -rcutorture.torture_type=rcu_bh diff --git a/tools/testing/selftests/rcutorture/configs/TREE05 b/tools/testing/selftests/rcutorture/configs/TREE05 deleted file mode 100644 index d0f32e5..0000000 --- a/tools/testing/selftests/rcutorture/configs/TREE05 +++ /dev/null @@ -1,24 +0,0 @@ -CONFIG_SMP=y -CONFIG_NR_CPUS=8 -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_HZ_PERIODIC=n -CONFIG_NO_HZ_IDLE=y -CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n -CONFIG_RCU_TRACE=n -CONFIG_HOTPLUG_CPU=y -CONFIG_RCU_FANOUT=6 -CONFIG_RCU_FANOUT_LEAF=6 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_RCU_NOCB_CPU=y -CONFIG_RCU_NOCB_CPU_NONE=y -CONFIG_DEBUG_LOCK_ALLOC=y -CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_PROVE_RCU_DELAY=y -CONFIG_RCU_CPU_STALL_INFO=n -CONFIG_RCU_CPU_STALL_VERBOSE=n -CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/TREE05.boot b/tools/testing/selftests/rcutorture/configs/TREE05.boot deleted file mode 100644 index 3b42b8b..0000000 --- a/tools/testing/selftests/rcutorture/configs/TREE05.boot +++ /dev/null @@ -1 +0,0 @@ -rcutorture.torture_type=sched diff --git a/tools/testing/selftests/rcutorture/configs/TREE06 b/tools/testing/selftests/rcutorture/configs/TREE06 deleted file mode 100644 index 2e477df..0000000 --- a/tools/testing/selftests/rcutorture/configs/TREE06 +++ /dev/null @@ -1,25 +0,0 @@ -CONFIG_SMP=y -CONFIG_NR_CPUS=8 -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_HZ_PERIODIC=n -CONFIG_NO_HZ_IDLE=y -CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n -CONFIG_RCU_TRACE=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_RCU_FANOUT=6 -CONFIG_RCU_FANOUT_LEAF=6 -CONFIG_RCU_FANOUT_EXACT=y -CONFIG_RCU_NOCB_CPU=n -CONFIG_DEBUG_LOCK_ALLOC=y -CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_PROVE_RCU_DELAY=n -CONFIG_RCU_CPU_STALL_INFO=n -CONFIG_RCU_CPU_STALL_VERBOSE=n -CONFIG_DEBUG_OBJECTS_RCU_HEAD=y diff --git a/tools/testing/selftests/rcutorture/configs/TREE07 b/tools/testing/selftests/rcutorture/configs/TREE07 deleted file mode 100644 index 042f86e..0000000 --- a/tools/testing/selftests/rcutorture/configs/TREE07 +++ /dev/null @@ -1,23 +0,0 @@ -CONFIG_SMP=y -CONFIG_NR_CPUS=16 -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_HZ_PERIODIC=n -CONFIG_NO_HZ_IDLE=n -CONFIG_NO_HZ_FULL=y -CONFIG_NO_HZ_FULL_ALL=y -CONFIG_NO_HZ_FULL_SYSIDLE=y -CONFIG_RCU_FAST_NO_HZ=n -CONFIG_RCU_TRACE=y -CONFIG_HOTPLUG_CPU=y -CONFIG_RCU_FANOUT=2 -CONFIG_RCU_FANOUT_LEAF=2 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_RCU_NOCB_CPU=n -CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_PROVE_RCU_DELAY=n -CONFIG_RCU_CPU_STALL_INFO=y -CONFIG_RCU_CPU_STALL_VERBOSE=n -CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/TREE08 b/tools/testing/selftests/rcutorture/configs/TREE08 deleted file mode 100644 index 3438cee..0000000 --- a/tools/testing/selftests/rcutorture/configs/TREE08 +++ /dev/null @@ -1,25 +0,0 @@ -CONFIG_SMP=y -CONFIG_NR_CPUS=16 -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_HZ_PERIODIC=n -CONFIG_NO_HZ_IDLE=y -CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n -CONFIG_RCU_TRACE=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_RCU_FANOUT=3 -CONFIG_RCU_FANOUT_EXACT=y -CONFIG_RCU_FANOUT_LEAF=2 -CONFIG_RCU_NOCB_CPU=y -CONFIG_RCU_NOCB_CPU_ALL=y -CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_PROVE_RCU_DELAY=n -CONFIG_RCU_CPU_STALL_INFO=n -CONFIG_RCU_CPU_STALL_VERBOSE=n -CONFIG_RCU_BOOST=n -CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/TREE08-T b/tools/testing/selftests/rcutorture/configs/TREE08-T deleted file mode 100644 index bf4523d..0000000 --- a/tools/testing/selftests/rcutorture/configs/TREE08-T +++ /dev/null @@ -1,25 +0,0 @@ -CONFIG_SMP=y -CONFIG_NR_CPUS=16 -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_HZ_PERIODIC=n -CONFIG_NO_HZ_IDLE=y -CONFIG_NO_HZ_FULL=n -CONFIG_RCU_FAST_NO_HZ=n -CONFIG_RCU_TRACE=y -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_RCU_FANOUT=3 -CONFIG_RCU_FANOUT_EXACT=y -CONFIG_RCU_FANOUT_LEAF=2 -CONFIG_RCU_NOCB_CPU=y -CONFIG_RCU_NOCB_CPU_ALL=y -CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_PROVE_RCU_DELAY=n -CONFIG_RCU_CPU_STALL_INFO=n -CONFIG_RCU_CPU_STALL_VERBOSE=n -CONFIG_RCU_BOOST=n -CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/TREE09 b/tools/testing/selftests/rcutorture/configs/TREE09 deleted file mode 100644 index 81e4f7c..0000000 --- a/tools/testing/selftests/rcutorture/configs/TREE09 +++ /dev/null @@ -1,20 +0,0 @@ -CONFIG_SMP=n -CONFIG_NR_CPUS=1 -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_HZ_PERIODIC=n -CONFIG_NO_HZ_IDLE=y -CONFIG_NO_HZ_FULL=n -CONFIG_RCU_TRACE=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_RCU_NOCB_CPU=n -CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_PROVE_RCU_DELAY=n -CONFIG_RCU_CPU_STALL_INFO=n -CONFIG_RCU_CPU_STALL_VERBOSE=n -CONFIG_RCU_BOOST=n -CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED new file mode 100644 index 0000000..48d8a24 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED @@ -0,0 +1,7 @@ +CONFIG_RCU_TRACE=n +CONFIG_SMP=y +CONFIG_NR_CPUS=4 +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED.boot b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED.boot new file mode 100644 index 0000000..6804f9d --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED.boot @@ -0,0 +1 @@ +rcutorture.torture_type=rcu_busted diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST new file mode 100644 index 0000000..cd3d29c --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST @@ -0,0 +1,13 @@ +TREE01 +TREE02 +TREE03 +TREE04 +TREE05 +TREE06 +TREE07 +TREE08 +TREE09 +SRCU-N +SRCU-P +TINY01 +TINY02 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon new file mode 100644 index 0000000..d2d2a86 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon @@ -0,0 +1,2 @@ +CONFIG_RCU_TORTURE_TEST=y +CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N new file mode 100644 index 0000000..9fbb41b --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N @@ -0,0 +1,7 @@ +CONFIG_RCU_TRACE=n +CONFIG_SMP=y +CONFIG_NR_CPUS=4 +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot new file mode 100644 index 0000000..238bfe3 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot @@ -0,0 +1 @@ +rcutorture.torture_type=srcu diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P new file mode 100644 index 0000000..4b6f272 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P @@ -0,0 +1,7 @@ +CONFIG_RCU_TRACE=n +CONFIG_SMP=y +CONFIG_NR_CPUS=8 +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot new file mode 100644 index 0000000..238bfe3 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot @@ -0,0 +1 @@ +rcutorture.torture_type=srcu diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY01 b/tools/testing/selftests/rcutorture/configs/rcu/TINY01 new file mode 100644 index 0000000..0a63e07 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY01 @@ -0,0 +1,12 @@ +CONFIG_SMP=n +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TINY_RCU=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_TRACE=n +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_PREEMPT_COUNT=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 b/tools/testing/selftests/rcutorture/configs/rcu/TINY02 new file mode 100644 index 0000000..f4feaee --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02 @@ -0,0 +1,12 @@ +CONFIG_SMP=n +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TINY_RCU=y +CONFIG_HZ_PERIODIC=y +CONFIG_NO_HZ_IDLE=n +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_TRACE=y +CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_PREEMPT_COUNT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 new file mode 100644 index 0000000..9c827ec --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 @@ -0,0 +1,22 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=8 +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_FAST_NO_HZ=y +CONFIG_RCU_TRACE=y +CONFIG_HOTPLUG_CPU=y +CONFIG_RCU_FANOUT=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_RCU_NOCB_CPU=y +CONFIG_RCU_NOCB_CPU_ZERO=y +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_RCU_DELAY=n +CONFIG_RCU_CPU_STALL_INFO=n +CONFIG_RCU_CPU_STALL_VERBOSE=n +CONFIG_RCU_BOOST=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot new file mode 100644 index 0000000..0fc8a34 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot @@ -0,0 +1 @@ +rcutorture.torture_type=rcu_bh diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02 new file mode 100644 index 0000000..1a777b5 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02 @@ -0,0 +1,25 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=8 +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_FAST_NO_HZ=n +CONFIG_RCU_TRACE=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_RCU_FANOUT=3 +CONFIG_RCU_FANOUT_LEAF=3 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_RCU_NOCB_CPU=n +CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_PROVE_LOCKING=n +CONFIG_PROVE_RCU_DELAY=n +CONFIG_RCU_CPU_STALL_INFO=n +CONFIG_RCU_CPU_STALL_VERBOSE=y +CONFIG_RCU_BOOST=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03 b/tools/testing/selftests/rcutorture/configs/rcu/TREE03 new file mode 100644 index 0000000..c1f111c --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03 @@ -0,0 +1,22 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=8 +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_HZ_PERIODIC=y +CONFIG_NO_HZ_IDLE=n +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_TRACE=y +CONFIG_HOTPLUG_CPU=y +CONFIG_RCU_FANOUT=4 +CONFIG_RCU_FANOUT_LEAF=4 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_RCU_NOCB_CPU=n +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_RCU_DELAY=n +CONFIG_RCU_CPU_STALL_INFO=n +CONFIG_RCU_CPU_STALL_VERBOSE=n +CONFIG_RCU_BOOST=y +CONFIG_RCU_BOOST_PRIO=2 +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 new file mode 100644 index 0000000..7dbd27c --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 @@ -0,0 +1,24 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=8 +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=n +CONFIG_NO_HZ_FULL=y +CONFIG_NO_HZ_FULL_ALL=y +CONFIG_RCU_FAST_NO_HZ=y +CONFIG_RCU_TRACE=y +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_RCU_FANOUT=2 +CONFIG_RCU_FANOUT_LEAF=2 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_RCU_NOCB_CPU=n +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_RCU_DELAY=n +CONFIG_RCU_CPU_STALL_INFO=y +CONFIG_RCU_CPU_STALL_VERBOSE=y +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot new file mode 100644 index 0000000..0fc8a34 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot @@ -0,0 +1 @@ +rcutorture.torture_type=rcu_bh diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 new file mode 100644 index 0000000..d0f32e5 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 @@ -0,0 +1,24 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=8 +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_FAST_NO_HZ=n +CONFIG_RCU_TRACE=n +CONFIG_HOTPLUG_CPU=y +CONFIG_RCU_FANOUT=6 +CONFIG_RCU_FANOUT_LEAF=6 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_RCU_NOCB_CPU=y +CONFIG_RCU_NOCB_CPU_NONE=y +CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_PROVE_LOCKING=y +CONFIG_PROVE_RCU=y +CONFIG_PROVE_RCU_DELAY=y +CONFIG_RCU_CPU_STALL_INFO=n +CONFIG_RCU_CPU_STALL_VERBOSE=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot new file mode 100644 index 0000000..3b42b8b --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot @@ -0,0 +1 @@ +rcutorture.torture_type=sched diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 new file mode 100644 index 0000000..2e477df --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 @@ -0,0 +1,25 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=8 +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_FAST_NO_HZ=n +CONFIG_RCU_TRACE=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_RCU_FANOUT=6 +CONFIG_RCU_FANOUT_LEAF=6 +CONFIG_RCU_FANOUT_EXACT=y +CONFIG_RCU_NOCB_CPU=n +CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_PROVE_LOCKING=y +CONFIG_PROVE_RCU=y +CONFIG_PROVE_RCU_DELAY=n +CONFIG_RCU_CPU_STALL_INFO=n +CONFIG_RCU_CPU_STALL_VERBOSE=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 new file mode 100644 index 0000000..042f86e --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 @@ -0,0 +1,23 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=16 +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=n +CONFIG_NO_HZ_FULL=y +CONFIG_NO_HZ_FULL_ALL=y +CONFIG_NO_HZ_FULL_SYSIDLE=y +CONFIG_RCU_FAST_NO_HZ=n +CONFIG_RCU_TRACE=y +CONFIG_HOTPLUG_CPU=y +CONFIG_RCU_FANOUT=2 +CONFIG_RCU_FANOUT_LEAF=2 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_RCU_NOCB_CPU=n +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_RCU_DELAY=n +CONFIG_RCU_CPU_STALL_INFO=y +CONFIG_RCU_CPU_STALL_VERBOSE=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 b/tools/testing/selftests/rcutorture/configs/rcu/TREE08 new file mode 100644 index 0000000..3438cee --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08 @@ -0,0 +1,25 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=16 +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_FAST_NO_HZ=n +CONFIG_RCU_TRACE=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_RCU_FANOUT=3 +CONFIG_RCU_FANOUT_EXACT=y +CONFIG_RCU_FANOUT_LEAF=2 +CONFIG_RCU_NOCB_CPU=y +CONFIG_RCU_NOCB_CPU_ALL=y +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_RCU_DELAY=n +CONFIG_RCU_CPU_STALL_INFO=n +CONFIG_RCU_CPU_STALL_VERBOSE=n +CONFIG_RCU_BOOST=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T new file mode 100644 index 0000000..bf4523d --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T @@ -0,0 +1,25 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=16 +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_FAST_NO_HZ=n +CONFIG_RCU_TRACE=y +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_RCU_FANOUT=3 +CONFIG_RCU_FANOUT_EXACT=y +CONFIG_RCU_FANOUT_LEAF=2 +CONFIG_RCU_NOCB_CPU=y +CONFIG_RCU_NOCB_CPU_ALL=y +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_RCU_DELAY=n +CONFIG_RCU_CPU_STALL_INFO=n +CONFIG_RCU_CPU_STALL_VERBOSE=n +CONFIG_RCU_BOOST=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE09 b/tools/testing/selftests/rcutorture/configs/rcu/TREE09 new file mode 100644 index 0000000..81e4f7c --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE09 @@ -0,0 +1,20 @@ +CONFIG_SMP=n +CONFIG_NR_CPUS=1 +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_RCU_TRACE=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_RCU_NOCB_CPU=n +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_RCU_DELAY=n +CONFIG_RCU_CPU_STALL_INFO=n +CONFIG_RCU_CPU_STALL_VERBOSE=n +CONFIG_RCU_BOOST=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/CFLIST new file mode 100644 index 0000000..1822394 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/CFLIST @@ -0,0 +1,14 @@ +P1-S-T-NH-SD-SMP-HP +P2-2-t-nh-sd-SMP-hp +P3-3-T-nh-SD-SMP-hp +P4-A-t-NH-sd-SMP-HP +P5-U-T-NH-sd-SMP-hp +N1-S-T-NH-SD-SMP-HP +N2-2-t-nh-sd-SMP-hp +N3-3-T-nh-SD-SMP-hp +N4-A-t-NH-sd-SMP-HP +N5-U-T-NH-sd-SMP-hp +PT1-nh +PT2-NH +NT1-nh +NT3-NH diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/N1-S-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/N1-S-T-NH-SD-SMP-HP new file mode 100644 index 0000000..d3ef873 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/N1-S-T-NH-SD-SMP-HP @@ -0,0 +1,18 @@ +CONFIG_RCU_TRACE=y +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=8 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/N2-2-t-nh-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/N2-2-t-nh-sd-SMP-hp new file mode 100644 index 0000000..02e4185 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/N2-2-t-nh-sd-SMP-hp @@ -0,0 +1,20 @@ +CONFIG_RCU_TRACE=n +CONFIG_NO_HZ=n +CONFIG_SMP=y +CONFIG_RCU_FANOUT=4 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/N3-3-T-nh-SD-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/N3-3-T-nh-SD-SMP-hp new file mode 100644 index 0000000..b3100f6 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/N3-3-T-nh-SD-SMP-hp @@ -0,0 +1,22 @@ +CONFIG_RCU_TRACE=y +CONFIG_NO_HZ=n +CONFIG_SMP=y +CONFIG_RCU_FANOUT=2 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_PROVE_LOCKING=y +CONFIG_PROVE_RCU=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/N4-A-t-NH-sd-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/N4-A-t-NH-sd-SMP-HP new file mode 100644 index 0000000..c56b445 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/N4-A-t-NH-sd-SMP-HP @@ -0,0 +1,18 @@ +CONFIG_RCU_TRACE=n +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=6 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/N5-U-T-NH-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/N5-U-T-NH-sd-SMP-hp new file mode 100644 index 0000000..90d924f --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/N5-U-T-NH-sd-SMP-hp @@ -0,0 +1,22 @@ +CONFIG_RCU_TRACE=y +CONFIG_DEBUG_KERNEL=y +CONFIG_RCU_CPU_STALL_INFO=y +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=6 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=y +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/NT1-nh b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/NT1-nh new file mode 100644 index 0000000..023f312 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/NT1-nh @@ -0,0 +1,23 @@ +#CHECK#CONFIG_TINY_RCU=y +CONFIG_RCU_TRACE=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +# +CONFIG_SMP=n +# +CONFIG_HOTPLUG_CPU=n +# +CONFIG_NO_HZ=n +# +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +CONFIG_PROVE_LOCKING=y +CONFIG_PROVE_RCU=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/NT3-NH b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/NT3-NH new file mode 100644 index 0000000..6fd0235 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/NT3-NH @@ -0,0 +1,20 @@ +#CHECK#CONFIG_TINY_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +# +CONFIG_SMP=n +# +CONFIG_HOTPLUG_CPU=n +# +CONFIG_NO_HZ=y +# +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P1-S-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P1-S-T-NH-SD-SMP-HP new file mode 100644 index 0000000..f72402d --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P1-S-T-NH-SD-SMP-HP @@ -0,0 +1,19 @@ +CONFIG_RCU_TRACE=y +CONFIG_RCU_CPU_STALL_INFO=y +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=8 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P2-2-t-nh-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P2-2-t-nh-sd-SMP-hp new file mode 100644 index 0000000..0f3b667 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P2-2-t-nh-sd-SMP-hp @@ -0,0 +1,20 @@ +CONFIG_RCU_TRACE=n +CONFIG_NO_HZ=n +CONFIG_SMP=y +CONFIG_RCU_FANOUT=4 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P3-3-T-nh-SD-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P3-3-T-nh-SD-SMP-hp new file mode 100644 index 0000000..b035e14 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P3-3-T-nh-SD-SMP-hp @@ -0,0 +1,20 @@ +CONFIG_RCU_TRACE=y +CONFIG_NO_HZ=n +CONFIG_SMP=y +CONFIG_RCU_FANOUT=2 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P4-A-t-NH-sd-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P4-A-t-NH-sd-SMP-HP new file mode 100644 index 0000000..3ccf6a9 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P4-A-t-NH-sd-SMP-HP @@ -0,0 +1,22 @@ +CONFIG_RCU_TRACE=n +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=6 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_RT_MUTEXES=y +CONFIG_RCU_BOOST=y +CONFIG_RCU_BOOST_PRIO=2 +CONFIG_PROVE_LOCKING=y +CONFIG_PROVE_RCU=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P5-U-T-NH-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P5-U-T-NH-sd-SMP-hp new file mode 100644 index 0000000..ef624ce --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P5-U-T-NH-sd-SMP-hp @@ -0,0 +1,28 @@ +CONFIG_RCU_TRACE=y +CONFIG_RCU_CPU_STALL_INFO=y +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=6 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=y +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_DEBUG_KERNEL=y +CONFIG_PROVE_RCU_DELAY=y +CONFIG_DEBUG_OBJECTS=y +CONFIG_DEBUG_OBJECTS_RCU_HEAD=y +CONFIG_RT_MUTEXES=y +CONFIG_RCU_BOOST=y +CONFIG_RCU_BOOST_PRIO=2 +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/PT1-nh b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/PT1-nh new file mode 100644 index 0000000..e3361c3 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/PT1-nh @@ -0,0 +1,23 @@ +CONFIG_TINY_PREEMPT_RCU=y +CONFIG_RCU_BOOST=y +CONFIG_RCU_BOOST_PRIO=2 +CONFIG_RCU_TRACE=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +# +CONFIG_SMP=n +# +CONFIG_HOTPLUG_CPU=n +# +CONFIG_NO_HZ=n +# +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/PT2-NH b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/PT2-NH new file mode 100644 index 0000000..64abfc3 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/PT2-NH @@ -0,0 +1,22 @@ +CONFIG_TINY_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +# +CONFIG_SMP=n +# +CONFIG_HOTPLUG_CPU=n +# +CONFIG_NO_HZ=y +# +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +CONFIG_PROVE_LOCKING=y +CONFIG_PROVE_RCU=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/ver_functions.sh new file mode 100644 index 0000000..5ace37a --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/ver_functions.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# +# Kernel-version-dependent shell functions for the rest of the scripts. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# Copyright (C) IBM Corporation, 2013 +# +# Authors: Paul E. McKenney + +# per_version_boot_params bootparam-string config-file seconds +# +# Adds per-version torture-module parameters to kernels supporting them. +# Which old kernels do not. +per_version_boot_params () { + echo rcutorture.stat_interval=15 \ + rcutorture.shutdown_secs=$3 \ + rcutorture.rcutorture_runnable=1 \ + rcutorture.test_no_idle_hz=1 \ + rcutorture.verbose=1 +} diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/CFLIST new file mode 100644 index 0000000..da4cbc66 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/CFLIST @@ -0,0 +1,17 @@ +sysidleY.2013.06.19a +sysidleN.2013.06.19a +P1-S-T-NH-SD-SMP-HP +P2-2-t-nh-sd-SMP-hp +P3-3-T-nh-SD-SMP-hp +P4-A-t-NH-sd-SMP-HP +P5-U-T-NH-sd-SMP-hp +P6---t-nh-SD-smp-hp +N1-S-T-NH-SD-SMP-HP +N2-2-t-nh-sd-SMP-hp +N3-3-T-nh-SD-SMP-hp +N4-A-t-NH-sd-SMP-HP +N5-U-T-NH-sd-SMP-hp +PT1-nh +PT2-NH +NT1-nh +NT3-NH diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/N1-S-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/N1-S-T-NH-SD-SMP-HP new file mode 100644 index 0000000..d81e11d --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/N1-S-T-NH-SD-SMP-HP @@ -0,0 +1,19 @@ +CONFIG_RCU_TRACE=y +CONFIG_RCU_FAST_NO_HZ=y +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=8 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/N2-2-t-nh-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/N2-2-t-nh-sd-SMP-hp new file mode 100644 index 0000000..02e4185 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/N2-2-t-nh-sd-SMP-hp @@ -0,0 +1,20 @@ +CONFIG_RCU_TRACE=n +CONFIG_NO_HZ=n +CONFIG_SMP=y +CONFIG_RCU_FANOUT=4 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/N3-3-T-nh-SD-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/N3-3-T-nh-SD-SMP-hp new file mode 100644 index 0000000..b3100f6 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/N3-3-T-nh-SD-SMP-hp @@ -0,0 +1,22 @@ +CONFIG_RCU_TRACE=y +CONFIG_NO_HZ=n +CONFIG_SMP=y +CONFIG_RCU_FANOUT=2 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_PROVE_LOCKING=y +CONFIG_PROVE_RCU=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/N4-A-t-NH-sd-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/N4-A-t-NH-sd-SMP-HP new file mode 100644 index 0000000..c56b445 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/N4-A-t-NH-sd-SMP-HP @@ -0,0 +1,18 @@ +CONFIG_RCU_TRACE=n +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=6 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/N5-U-T-NH-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/N5-U-T-NH-sd-SMP-hp new file mode 100644 index 0000000..90d924f --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/N5-U-T-NH-sd-SMP-hp @@ -0,0 +1,22 @@ +CONFIG_RCU_TRACE=y +CONFIG_DEBUG_KERNEL=y +CONFIG_RCU_CPU_STALL_INFO=y +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=6 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=y +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/N6---t-nh-SD-smp-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/N6---t-nh-SD-smp-hp new file mode 100644 index 0000000..0ccc36d --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/N6---t-nh-SD-smp-hp @@ -0,0 +1,19 @@ +CONFIG_RCU_TRACE=n +CONFIG_NO_HZ=n +CONFIG_SMP=y +CONFIG_NR_CPUS=1 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/N7-4-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/N7-4-T-NH-SD-SMP-HP new file mode 100644 index 0000000..3f640cf --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/N7-4-T-NH-SD-SMP-HP @@ -0,0 +1,26 @@ +CONFIG_RCU_TRACE=y +CONFIG_DEBUG_KERNEL=y +CONFIG_RCU_CPU_STALL_INFO=y +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=2 +CONFIG_NR_CPUS=16 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=y +CONFIG_RCU_NOCB_CPU=y +CONFIG_RCU_NOCB_CPU_NONE=y +CONFIG_RCU_NOCB_CPU_ZERO=n +CONFIG_RCU_NOCB_CPU_ALL=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/N8-2-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/N8-2-T-NH-SD-SMP-HP new file mode 100644 index 0000000..285da2d --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/N8-2-T-NH-SD-SMP-HP @@ -0,0 +1,22 @@ +CONFIG_RCU_TRACE=y +CONFIG_DEBUG_KERNEL=y +CONFIG_RCU_CPU_STALL_INFO=y +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=14 +CONFIG_NR_CPUS=16 +CONFIG_RCU_FANOUT_EXACT=y +CONFIG_HOTPLUG_CPU=y +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/NT1-nh b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/NT1-nh new file mode 100644 index 0000000..023f312 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/NT1-nh @@ -0,0 +1,23 @@ +#CHECK#CONFIG_TINY_RCU=y +CONFIG_RCU_TRACE=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +# +CONFIG_SMP=n +# +CONFIG_HOTPLUG_CPU=n +# +CONFIG_NO_HZ=n +# +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +CONFIG_PROVE_LOCKING=y +CONFIG_PROVE_RCU=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/NT3-NH b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/NT3-NH new file mode 100644 index 0000000..6fd0235 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/NT3-NH @@ -0,0 +1,20 @@ +#CHECK#CONFIG_TINY_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +# +CONFIG_SMP=n +# +CONFIG_HOTPLUG_CPU=n +# +CONFIG_NO_HZ=y +# +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P1-S-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P1-S-T-NH-SD-SMP-HP new file mode 100644 index 0000000..9647c44 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P1-S-T-NH-SD-SMP-HP @@ -0,0 +1,20 @@ +CONFIG_RCU_TRACE=y +CONFIG_RCU_CPU_STALL_INFO=y +CONFIG_NO_HZ=y +CONFIG_RCU_FAST_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=8 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P2-2-t-nh-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P2-2-t-nh-sd-SMP-hp new file mode 100644 index 0000000..0f3b667 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P2-2-t-nh-sd-SMP-hp @@ -0,0 +1,20 @@ +CONFIG_RCU_TRACE=n +CONFIG_NO_HZ=n +CONFIG_SMP=y +CONFIG_RCU_FANOUT=4 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P3-3-T-nh-SD-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P3-3-T-nh-SD-SMP-hp new file mode 100644 index 0000000..b035e14 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P3-3-T-nh-SD-SMP-hp @@ -0,0 +1,20 @@ +CONFIG_RCU_TRACE=y +CONFIG_NO_HZ=n +CONFIG_SMP=y +CONFIG_RCU_FANOUT=2 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P4-A-t-NH-sd-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P4-A-t-NH-sd-SMP-HP new file mode 100644 index 0000000..3ccf6a9 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P4-A-t-NH-sd-SMP-HP @@ -0,0 +1,22 @@ +CONFIG_RCU_TRACE=n +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=6 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_RT_MUTEXES=y +CONFIG_RCU_BOOST=y +CONFIG_RCU_BOOST_PRIO=2 +CONFIG_PROVE_LOCKING=y +CONFIG_PROVE_RCU=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P5-U-T-NH-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P5-U-T-NH-sd-SMP-hp new file mode 100644 index 0000000..ef624ce --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P5-U-T-NH-sd-SMP-hp @@ -0,0 +1,28 @@ +CONFIG_RCU_TRACE=y +CONFIG_RCU_CPU_STALL_INFO=y +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=6 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=y +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_DEBUG_KERNEL=y +CONFIG_PROVE_RCU_DELAY=y +CONFIG_DEBUG_OBJECTS=y +CONFIG_DEBUG_OBJECTS_RCU_HEAD=y +CONFIG_RT_MUTEXES=y +CONFIG_RCU_BOOST=y +CONFIG_RCU_BOOST_PRIO=2 +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P6---t-nh-SD-smp-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P6---t-nh-SD-smp-hp new file mode 100644 index 0000000..f4c9175 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P6---t-nh-SD-smp-hp @@ -0,0 +1,18 @@ +CONFIG_RCU_TRACE=n +CONFIG_NO_HZ=n +CONFIG_SMP=n +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +CONFIG_TREE_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP new file mode 100644 index 0000000..77a8c5b --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP @@ -0,0 +1,30 @@ +CONFIG_RCU_TRACE=y +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=2 +CONFIG_NR_CPUS=16 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=y +CONFIG_RCU_NOCB_CPU=y +CONFIG_RCU_NOCB_CPU_NONE=n +CONFIG_RCU_NOCB_CPU_ZERO=n +CONFIG_RCU_NOCB_CPU_ALL=y +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_PROVE_LOCKING=y +CONFIG_PROVE_RCU=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_OBJECTS=y +CONFIG_DEBUG_OBJECTS_RCU_HEAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_SLUB=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP-all b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP-all new file mode 100644 index 0000000..0eecebc --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP-all @@ -0,0 +1,30 @@ +CONFIG_RCU_TRACE=y +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=2 +CONFIG_NR_CPUS=16 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=y +CONFIG_RCU_NOCB_CPU=y +CONFIG_RCU_NOCB_CPU_NONE=y +CONFIG_RCU_NOCB_CPU_ZERO=n +CONFIG_RCU_NOCB_CPU_ALL=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_PROVE_LOCKING=y +CONFIG_PROVE_RCU=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_OBJECTS=y +CONFIG_DEBUG_OBJECTS_RCU_HEAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_SLUB=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP-none b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP-none new file mode 100644 index 0000000..0eecebc --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP-none @@ -0,0 +1,30 @@ +CONFIG_RCU_TRACE=y +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=2 +CONFIG_NR_CPUS=16 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=y +CONFIG_RCU_NOCB_CPU=y +CONFIG_RCU_NOCB_CPU_NONE=y +CONFIG_RCU_NOCB_CPU_ZERO=n +CONFIG_RCU_NOCB_CPU_ALL=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_PROVE_LOCKING=y +CONFIG_PROVE_RCU=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_OBJECTS=y +CONFIG_DEBUG_OBJECTS_RCU_HEAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_SLUB=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-hp new file mode 100644 index 0000000..588bc70 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-hp @@ -0,0 +1,30 @@ +CONFIG_RCU_TRACE=y +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=2 +CONFIG_NR_CPUS=16 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=n +CONFIG_RCU_NOCB_CPU=y +CONFIG_RCU_NOCB_CPU_NONE=n +CONFIG_RCU_NOCB_CPU_ZERO=y +CONFIG_RCU_NOCB_CPU_ALL=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_PROVE_LOCKING=y +CONFIG_PROVE_RCU=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_OBJECTS=y +CONFIG_DEBUG_OBJECTS_RCU_HEAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_SLUB=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/PT1-nh b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/PT1-nh new file mode 100644 index 0000000..e3361c3 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/PT1-nh @@ -0,0 +1,23 @@ +CONFIG_TINY_PREEMPT_RCU=y +CONFIG_RCU_BOOST=y +CONFIG_RCU_BOOST_PRIO=2 +CONFIG_RCU_TRACE=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +# +CONFIG_SMP=n +# +CONFIG_HOTPLUG_CPU=n +# +CONFIG_NO_HZ=n +# +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/PT2-NH b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/PT2-NH new file mode 100644 index 0000000..64abfc3 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/PT2-NH @@ -0,0 +1,22 @@ +CONFIG_TINY_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +# +CONFIG_SMP=n +# +CONFIG_HOTPLUG_CPU=n +# +CONFIG_NO_HZ=y +# +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +CONFIG_PROVE_LOCKING=y +CONFIG_PROVE_RCU=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/CFLIST new file mode 100644 index 0000000..1822394 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/CFLIST @@ -0,0 +1,14 @@ +P1-S-T-NH-SD-SMP-HP +P2-2-t-nh-sd-SMP-hp +P3-3-T-nh-SD-SMP-hp +P4-A-t-NH-sd-SMP-HP +P5-U-T-NH-sd-SMP-hp +N1-S-T-NH-SD-SMP-HP +N2-2-t-nh-sd-SMP-hp +N3-3-T-nh-SD-SMP-hp +N4-A-t-NH-sd-SMP-HP +N5-U-T-NH-sd-SMP-hp +PT1-nh +PT2-NH +NT1-nh +NT3-NH diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/N1-S-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/N1-S-T-NH-SD-SMP-HP new file mode 100644 index 0000000..d81e11d --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/N1-S-T-NH-SD-SMP-HP @@ -0,0 +1,19 @@ +CONFIG_RCU_TRACE=y +CONFIG_RCU_FAST_NO_HZ=y +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=8 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/N2-2-t-nh-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/N2-2-t-nh-sd-SMP-hp new file mode 100644 index 0000000..02e4185 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/N2-2-t-nh-sd-SMP-hp @@ -0,0 +1,20 @@ +CONFIG_RCU_TRACE=n +CONFIG_NO_HZ=n +CONFIG_SMP=y +CONFIG_RCU_FANOUT=4 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/N3-3-T-nh-SD-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/N3-3-T-nh-SD-SMP-hp new file mode 100644 index 0000000..b3100f6 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/N3-3-T-nh-SD-SMP-hp @@ -0,0 +1,22 @@ +CONFIG_RCU_TRACE=y +CONFIG_NO_HZ=n +CONFIG_SMP=y +CONFIG_RCU_FANOUT=2 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_PROVE_LOCKING=y +CONFIG_PROVE_RCU=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/N4-A-t-NH-sd-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/N4-A-t-NH-sd-SMP-HP new file mode 100644 index 0000000..c56b445 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/N4-A-t-NH-sd-SMP-HP @@ -0,0 +1,18 @@ +CONFIG_RCU_TRACE=n +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=6 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/N5-U-T-NH-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/N5-U-T-NH-sd-SMP-hp new file mode 100644 index 0000000..90d924f --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/N5-U-T-NH-sd-SMP-hp @@ -0,0 +1,22 @@ +CONFIG_RCU_TRACE=y +CONFIG_DEBUG_KERNEL=y +CONFIG_RCU_CPU_STALL_INFO=y +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=6 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=y +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/NT1-nh b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/NT1-nh new file mode 100644 index 0000000..023f312 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/NT1-nh @@ -0,0 +1,23 @@ +#CHECK#CONFIG_TINY_RCU=y +CONFIG_RCU_TRACE=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +# +CONFIG_SMP=n +# +CONFIG_HOTPLUG_CPU=n +# +CONFIG_NO_HZ=n +# +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +CONFIG_PROVE_LOCKING=y +CONFIG_PROVE_RCU=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/NT3-NH b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/NT3-NH new file mode 100644 index 0000000..6fd0235 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/NT3-NH @@ -0,0 +1,20 @@ +#CHECK#CONFIG_TINY_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +# +CONFIG_SMP=n +# +CONFIG_HOTPLUG_CPU=n +# +CONFIG_NO_HZ=y +# +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P1-S-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P1-S-T-NH-SD-SMP-HP new file mode 100644 index 0000000..9647c44 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P1-S-T-NH-SD-SMP-HP @@ -0,0 +1,20 @@ +CONFIG_RCU_TRACE=y +CONFIG_RCU_CPU_STALL_INFO=y +CONFIG_NO_HZ=y +CONFIG_RCU_FAST_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=8 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P2-2-t-nh-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P2-2-t-nh-sd-SMP-hp new file mode 100644 index 0000000..0f3b667 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P2-2-t-nh-sd-SMP-hp @@ -0,0 +1,20 @@ +CONFIG_RCU_TRACE=n +CONFIG_NO_HZ=n +CONFIG_SMP=y +CONFIG_RCU_FANOUT=4 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P3-3-T-nh-SD-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P3-3-T-nh-SD-SMP-hp new file mode 100644 index 0000000..b035e14 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P3-3-T-nh-SD-SMP-hp @@ -0,0 +1,20 @@ +CONFIG_RCU_TRACE=y +CONFIG_NO_HZ=n +CONFIG_SMP=y +CONFIG_RCU_FANOUT=2 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P4-A-t-NH-sd-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P4-A-t-NH-sd-SMP-HP new file mode 100644 index 0000000..3ccf6a9 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P4-A-t-NH-sd-SMP-HP @@ -0,0 +1,22 @@ +CONFIG_RCU_TRACE=n +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=6 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_RT_MUTEXES=y +CONFIG_RCU_BOOST=y +CONFIG_RCU_BOOST_PRIO=2 +CONFIG_PROVE_LOCKING=y +CONFIG_PROVE_RCU=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P5-U-T-NH-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P5-U-T-NH-sd-SMP-hp new file mode 100644 index 0000000..ef624ce --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P5-U-T-NH-sd-SMP-hp @@ -0,0 +1,28 @@ +CONFIG_RCU_TRACE=y +CONFIG_RCU_CPU_STALL_INFO=y +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=6 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=y +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_DEBUG_KERNEL=y +CONFIG_PROVE_RCU_DELAY=y +CONFIG_DEBUG_OBJECTS=y +CONFIG_DEBUG_OBJECTS_RCU_HEAD=y +CONFIG_RT_MUTEXES=y +CONFIG_RCU_BOOST=y +CONFIG_RCU_BOOST_PRIO=2 +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/PT1-nh b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/PT1-nh new file mode 100644 index 0000000..e3361c3 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/PT1-nh @@ -0,0 +1,23 @@ +CONFIG_TINY_PREEMPT_RCU=y +CONFIG_RCU_BOOST=y +CONFIG_RCU_BOOST_PRIO=2 +CONFIG_RCU_TRACE=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +# +CONFIG_SMP=n +# +CONFIG_HOTPLUG_CPU=n +# +CONFIG_NO_HZ=n +# +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/PT2-NH b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/PT2-NH new file mode 100644 index 0000000..64abfc3 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/PT2-NH @@ -0,0 +1,22 @@ +CONFIG_TINY_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +# +CONFIG_SMP=n +# +CONFIG_HOTPLUG_CPU=n +# +CONFIG_NO_HZ=y +# +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +CONFIG_PROVE_LOCKING=y +CONFIG_PROVE_RCU=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/ver_functions.sh new file mode 100644 index 0000000..bae5569 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/ver_functions.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# +# Kernel-version-dependent shell functions for the rest of the scripts. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# Copyright (C) IBM Corporation, 2013 +# +# Authors: Paul E. McKenney + +# rcutorture_param_onoff bootparam-string config-file +# +# Adds onoff rcutorture module parameters to kernels having it. +rcutorture_param_onoff () { + if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2" + then + echo CPU-hotplug kernel, adding rcutorture onoff. 1>&2 + echo rcutorture.onoff_interval=3 rcutorture.onoff_holdoff=30 + fi +} + +# per_version_boot_params bootparam-string config-file seconds +# +# Adds per-version torture-module parameters to kernels supporting them. +per_version_boot_params () { + echo $1 `rcutorture_param_onoff "$1" "$2"` \ + rcutorture.stat_interval=15 \ + rcutorture.shutdown_secs=$3 \ + rcutorture.rcutorture_runnable=1 \ + rcutorture.test_no_idle_hz=1 \ + rcutorture.verbose=1 +} diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/CFLIST new file mode 100644 index 0000000..1822394 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/CFLIST @@ -0,0 +1,14 @@ +P1-S-T-NH-SD-SMP-HP +P2-2-t-nh-sd-SMP-hp +P3-3-T-nh-SD-SMP-hp +P4-A-t-NH-sd-SMP-HP +P5-U-T-NH-sd-SMP-hp +N1-S-T-NH-SD-SMP-HP +N2-2-t-nh-sd-SMP-hp +N3-3-T-nh-SD-SMP-hp +N4-A-t-NH-sd-SMP-HP +N5-U-T-NH-sd-SMP-hp +PT1-nh +PT2-NH +NT1-nh +NT3-NH diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/N1-S-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/N1-S-T-NH-SD-SMP-HP new file mode 100644 index 0000000..d81e11d --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/N1-S-T-NH-SD-SMP-HP @@ -0,0 +1,19 @@ +CONFIG_RCU_TRACE=y +CONFIG_RCU_FAST_NO_HZ=y +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=8 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/N2-2-t-nh-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/N2-2-t-nh-sd-SMP-hp new file mode 100644 index 0000000..02e4185 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/N2-2-t-nh-sd-SMP-hp @@ -0,0 +1,20 @@ +CONFIG_RCU_TRACE=n +CONFIG_NO_HZ=n +CONFIG_SMP=y +CONFIG_RCU_FANOUT=4 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/N3-3-T-nh-SD-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/N3-3-T-nh-SD-SMP-hp new file mode 100644 index 0000000..b3100f6 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/N3-3-T-nh-SD-SMP-hp @@ -0,0 +1,22 @@ +CONFIG_RCU_TRACE=y +CONFIG_NO_HZ=n +CONFIG_SMP=y +CONFIG_RCU_FANOUT=2 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_PROVE_LOCKING=y +CONFIG_PROVE_RCU=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/N4-A-t-NH-sd-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/N4-A-t-NH-sd-SMP-HP new file mode 100644 index 0000000..c56b445 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/N4-A-t-NH-sd-SMP-HP @@ -0,0 +1,18 @@ +CONFIG_RCU_TRACE=n +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=6 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/N5-U-T-NH-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/N5-U-T-NH-sd-SMP-hp new file mode 100644 index 0000000..90d924f --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/N5-U-T-NH-sd-SMP-hp @@ -0,0 +1,22 @@ +CONFIG_RCU_TRACE=y +CONFIG_DEBUG_KERNEL=y +CONFIG_RCU_CPU_STALL_INFO=y +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=6 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=y +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_TREE_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/NT1-nh b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/NT1-nh new file mode 100644 index 0000000..023f312 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/NT1-nh @@ -0,0 +1,23 @@ +#CHECK#CONFIG_TINY_RCU=y +CONFIG_RCU_TRACE=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +# +CONFIG_SMP=n +# +CONFIG_HOTPLUG_CPU=n +# +CONFIG_NO_HZ=n +# +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +CONFIG_PROVE_LOCKING=y +CONFIG_PROVE_RCU=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/NT3-NH b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/NT3-NH new file mode 100644 index 0000000..6fd0235 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/NT3-NH @@ -0,0 +1,20 @@ +#CHECK#CONFIG_TINY_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +# +CONFIG_SMP=n +# +CONFIG_HOTPLUG_CPU=n +# +CONFIG_NO_HZ=y +# +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P1-S-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P1-S-T-NH-SD-SMP-HP new file mode 100644 index 0000000..9647c44 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P1-S-T-NH-SD-SMP-HP @@ -0,0 +1,20 @@ +CONFIG_RCU_TRACE=y +CONFIG_RCU_CPU_STALL_INFO=y +CONFIG_NO_HZ=y +CONFIG_RCU_FAST_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=8 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P2-2-t-nh-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P2-2-t-nh-sd-SMP-hp new file mode 100644 index 0000000..0f3b667 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P2-2-t-nh-sd-SMP-hp @@ -0,0 +1,20 @@ +CONFIG_RCU_TRACE=n +CONFIG_NO_HZ=n +CONFIG_SMP=y +CONFIG_RCU_FANOUT=4 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P3-3-T-nh-SD-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P3-3-T-nh-SD-SMP-hp new file mode 100644 index 0000000..b035e14 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P3-3-T-nh-SD-SMP-hp @@ -0,0 +1,20 @@ +CONFIG_RCU_TRACE=y +CONFIG_NO_HZ=n +CONFIG_SMP=y +CONFIG_RCU_FANOUT=2 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P4-A-t-NH-sd-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P4-A-t-NH-sd-SMP-HP new file mode 100644 index 0000000..3ccf6a9 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P4-A-t-NH-sd-SMP-HP @@ -0,0 +1,22 @@ +CONFIG_RCU_TRACE=n +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=6 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=n +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_RT_MUTEXES=y +CONFIG_RCU_BOOST=y +CONFIG_RCU_BOOST_PRIO=2 +CONFIG_PROVE_LOCKING=y +CONFIG_PROVE_RCU=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P5-U-T-NH-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P5-U-T-NH-sd-SMP-hp new file mode 100644 index 0000000..ef624ce --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P5-U-T-NH-sd-SMP-hp @@ -0,0 +1,28 @@ +CONFIG_RCU_TRACE=y +CONFIG_RCU_CPU_STALL_INFO=y +CONFIG_NO_HZ=y +CONFIG_SMP=y +CONFIG_RCU_FANOUT=6 +CONFIG_NR_CPUS=8 +CONFIG_RCU_FANOUT_EXACT=y +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +#CHECK#CONFIG_TREE_PREEMPT_RCU=y +CONFIG_DEBUG_KERNEL=y +CONFIG_PROVE_RCU_DELAY=y +CONFIG_DEBUG_OBJECTS=y +CONFIG_DEBUG_OBJECTS_RCU_HEAD=y +CONFIG_RT_MUTEXES=y +CONFIG_RCU_BOOST=y +CONFIG_RCU_BOOST_PRIO=2 +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/PT1-nh b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/PT1-nh new file mode 100644 index 0000000..e3361c3 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/PT1-nh @@ -0,0 +1,23 @@ +CONFIG_TINY_PREEMPT_RCU=y +CONFIG_RCU_BOOST=y +CONFIG_RCU_BOOST_PRIO=2 +CONFIG_RCU_TRACE=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +# +CONFIG_SMP=n +# +CONFIG_HOTPLUG_CPU=n +# +CONFIG_NO_HZ=n +# +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/PT2-NH b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/PT2-NH new file mode 100644 index 0000000..64abfc3 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/PT2-NH @@ -0,0 +1,22 @@ +CONFIG_TINY_PREEMPT_RCU=y +CONFIG_RCU_TORTURE_TEST=m +CONFIG_MODULE_UNLOAD=y +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +# +CONFIG_SMP=n +# +CONFIG_HOTPLUG_CPU=n +# +CONFIG_NO_HZ=y +# +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +CONFIG_PROVE_LOCKING=y +CONFIG_PROVE_RCU=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_PRINTK_TIME=y + diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/ver_functions.sh new file mode 100644 index 0000000..8977d8d --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/ver_functions.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# +# Kernel-version-dependent shell functions for the rest of the scripts. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# Copyright (C) IBM Corporation, 2013 +# +# Authors: Paul E. McKenney + +# rcutorture_param_n_barrier_cbs bootparam-string +# +# Adds n_barrier_cbs rcutorture module parameter to kernels having it. +rcutorture_param_n_barrier_cbs () { + if echo $1 | grep -q "rcutorture\.n_barrier_cbs" + then + : + else + echo rcutorture.n_barrier_cbs=4 + fi +} + +# rcutorture_param_onoff bootparam-string config-file +# +# Adds onoff rcutorture module parameters to kernels having it. +rcutorture_param_onoff () { + if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2" + then + echo CPU-hotplug kernel, adding rcutorture onoff. 1>&2 + echo rcutorture.onoff_interval=3 rcutorture.onoff_holdoff=30 + fi +} + +# per_version_boot_params bootparam-string config-file seconds +# +# Adds per-version torture-module parameters to kernels supporting them. +per_version_boot_params () { + echo $1 `rcutorture_param_onoff "$1" "$2"` \ + `rcutorture_param_n_barrier_cbs "$1"` \ + rcutorture.stat_interval=15 \ + rcutorture.shutdown_secs=$3 \ + rcutorture.rcutorture_runnable=1 \ + rcutorture.test_no_idle_hz=1 \ + rcutorture.verbose=1 +} diff --git a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh new file mode 100644 index 0000000..8977d8d --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# +# Kernel-version-dependent shell functions for the rest of the scripts. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# Copyright (C) IBM Corporation, 2013 +# +# Authors: Paul E. McKenney + +# rcutorture_param_n_barrier_cbs bootparam-string +# +# Adds n_barrier_cbs rcutorture module parameter to kernels having it. +rcutorture_param_n_barrier_cbs () { + if echo $1 | grep -q "rcutorture\.n_barrier_cbs" + then + : + else + echo rcutorture.n_barrier_cbs=4 + fi +} + +# rcutorture_param_onoff bootparam-string config-file +# +# Adds onoff rcutorture module parameters to kernels having it. +rcutorture_param_onoff () { + if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2" + then + echo CPU-hotplug kernel, adding rcutorture onoff. 1>&2 + echo rcutorture.onoff_interval=3 rcutorture.onoff_holdoff=30 + fi +} + +# per_version_boot_params bootparam-string config-file seconds +# +# Adds per-version torture-module parameters to kernels supporting them. +per_version_boot_params () { + echo $1 `rcutorture_param_onoff "$1" "$2"` \ + `rcutorture_param_n_barrier_cbs "$1"` \ + rcutorture.stat_interval=15 \ + rcutorture.shutdown_secs=$3 \ + rcutorture.rcutorture_runnable=1 \ + rcutorture.test_no_idle_hz=1 \ + rcutorture.verbose=1 +} diff --git a/tools/testing/selftests/rcutorture/configs/v0.0/CFLIST b/tools/testing/selftests/rcutorture/configs/v0.0/CFLIST deleted file mode 100644 index 1822394..0000000 --- a/tools/testing/selftests/rcutorture/configs/v0.0/CFLIST +++ /dev/null @@ -1,14 +0,0 @@ -P1-S-T-NH-SD-SMP-HP -P2-2-t-nh-sd-SMP-hp -P3-3-T-nh-SD-SMP-hp -P4-A-t-NH-sd-SMP-HP -P5-U-T-NH-sd-SMP-hp -N1-S-T-NH-SD-SMP-HP -N2-2-t-nh-sd-SMP-hp -N3-3-T-nh-SD-SMP-hp -N4-A-t-NH-sd-SMP-HP -N5-U-T-NH-sd-SMP-hp -PT1-nh -PT2-NH -NT1-nh -NT3-NH diff --git a/tools/testing/selftests/rcutorture/configs/v0.0/N1-S-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/v0.0/N1-S-T-NH-SD-SMP-HP deleted file mode 100644 index d3ef873..0000000 --- a/tools/testing/selftests/rcutorture/configs/v0.0/N1-S-T-NH-SD-SMP-HP +++ /dev/null @@ -1,18 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=8 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v0.0/N2-2-t-nh-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/v0.0/N2-2-t-nh-sd-SMP-hp deleted file mode 100644 index 02e4185..0000000 --- a/tools/testing/selftests/rcutorture/configs/v0.0/N2-2-t-nh-sd-SMP-hp +++ /dev/null @@ -1,20 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_NO_HZ=n -CONFIG_SMP=y -CONFIG_RCU_FANOUT=4 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v0.0/N3-3-T-nh-SD-SMP-hp b/tools/testing/selftests/rcutorture/configs/v0.0/N3-3-T-nh-SD-SMP-hp deleted file mode 100644 index b3100f6..0000000 --- a/tools/testing/selftests/rcutorture/configs/v0.0/N3-3-T-nh-SD-SMP-hp +++ /dev/null @@ -1,22 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_NO_HZ=n -CONFIG_SMP=y -CONFIG_RCU_FANOUT=2 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v0.0/N4-A-t-NH-sd-SMP-HP b/tools/testing/selftests/rcutorture/configs/v0.0/N4-A-t-NH-sd-SMP-HP deleted file mode 100644 index c56b445..0000000 --- a/tools/testing/selftests/rcutorture/configs/v0.0/N4-A-t-NH-sd-SMP-HP +++ /dev/null @@ -1,18 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=6 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v0.0/N5-U-T-NH-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/v0.0/N5-U-T-NH-sd-SMP-hp deleted file mode 100644 index 90d924f..0000000 --- a/tools/testing/selftests/rcutorture/configs/v0.0/N5-U-T-NH-sd-SMP-hp +++ /dev/null @@ -1,22 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_DEBUG_KERNEL=y -CONFIG_RCU_CPU_STALL_INFO=y -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=6 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=y -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v0.0/NT1-nh b/tools/testing/selftests/rcutorture/configs/v0.0/NT1-nh deleted file mode 100644 index 023f312..0000000 --- a/tools/testing/selftests/rcutorture/configs/v0.0/NT1-nh +++ /dev/null @@ -1,23 +0,0 @@ -#CHECK#CONFIG_TINY_RCU=y -CONFIG_RCU_TRACE=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -# -CONFIG_SMP=n -# -CONFIG_HOTPLUG_CPU=n -# -CONFIG_NO_HZ=n -# -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v0.0/NT3-NH b/tools/testing/selftests/rcutorture/configs/v0.0/NT3-NH deleted file mode 100644 index 6fd0235..0000000 --- a/tools/testing/selftests/rcutorture/configs/v0.0/NT3-NH +++ /dev/null @@ -1,20 +0,0 @@ -#CHECK#CONFIG_TINY_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -# -CONFIG_SMP=n -# -CONFIG_HOTPLUG_CPU=n -# -CONFIG_NO_HZ=y -# -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v0.0/P1-S-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/v0.0/P1-S-T-NH-SD-SMP-HP deleted file mode 100644 index f72402d..0000000 --- a/tools/testing/selftests/rcutorture/configs/v0.0/P1-S-T-NH-SD-SMP-HP +++ /dev/null @@ -1,19 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_RCU_CPU_STALL_INFO=y -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=8 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v0.0/P2-2-t-nh-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/v0.0/P2-2-t-nh-sd-SMP-hp deleted file mode 100644 index 0f3b667..0000000 --- a/tools/testing/selftests/rcutorture/configs/v0.0/P2-2-t-nh-sd-SMP-hp +++ /dev/null @@ -1,20 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_NO_HZ=n -CONFIG_SMP=y -CONFIG_RCU_FANOUT=4 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v0.0/P3-3-T-nh-SD-SMP-hp b/tools/testing/selftests/rcutorture/configs/v0.0/P3-3-T-nh-SD-SMP-hp deleted file mode 100644 index b035e14..0000000 --- a/tools/testing/selftests/rcutorture/configs/v0.0/P3-3-T-nh-SD-SMP-hp +++ /dev/null @@ -1,20 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_NO_HZ=n -CONFIG_SMP=y -CONFIG_RCU_FANOUT=2 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v0.0/P4-A-t-NH-sd-SMP-HP b/tools/testing/selftests/rcutorture/configs/v0.0/P4-A-t-NH-sd-SMP-HP deleted file mode 100644 index 3ccf6a9..0000000 --- a/tools/testing/selftests/rcutorture/configs/v0.0/P4-A-t-NH-sd-SMP-HP +++ /dev/null @@ -1,22 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=6 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_RT_MUTEXES=y -CONFIG_RCU_BOOST=y -CONFIG_RCU_BOOST_PRIO=2 -CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v0.0/P5-U-T-NH-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/v0.0/P5-U-T-NH-sd-SMP-hp deleted file mode 100644 index ef624ce..0000000 --- a/tools/testing/selftests/rcutorture/configs/v0.0/P5-U-T-NH-sd-SMP-hp +++ /dev/null @@ -1,28 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_RCU_CPU_STALL_INFO=y -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=6 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=y -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_DEBUG_KERNEL=y -CONFIG_PROVE_RCU_DELAY=y -CONFIG_DEBUG_OBJECTS=y -CONFIG_DEBUG_OBJECTS_RCU_HEAD=y -CONFIG_RT_MUTEXES=y -CONFIG_RCU_BOOST=y -CONFIG_RCU_BOOST_PRIO=2 -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v0.0/PT1-nh b/tools/testing/selftests/rcutorture/configs/v0.0/PT1-nh deleted file mode 100644 index e3361c3..0000000 --- a/tools/testing/selftests/rcutorture/configs/v0.0/PT1-nh +++ /dev/null @@ -1,23 +0,0 @@ -CONFIG_TINY_PREEMPT_RCU=y -CONFIG_RCU_BOOST=y -CONFIG_RCU_BOOST_PRIO=2 -CONFIG_RCU_TRACE=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -# -CONFIG_SMP=n -# -CONFIG_HOTPLUG_CPU=n -# -CONFIG_NO_HZ=n -# -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v0.0/PT2-NH b/tools/testing/selftests/rcutorture/configs/v0.0/PT2-NH deleted file mode 100644 index 64abfc3..0000000 --- a/tools/testing/selftests/rcutorture/configs/v0.0/PT2-NH +++ /dev/null @@ -1,22 +0,0 @@ -CONFIG_TINY_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -# -CONFIG_SMP=n -# -CONFIG_HOTPLUG_CPU=n -# -CONFIG_NO_HZ=y -# -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v0.0/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/v0.0/ver_functions.sh deleted file mode 100644 index 5ace37a..0000000 --- a/tools/testing/selftests/rcutorture/configs/v0.0/ver_functions.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -# -# Kernel-version-dependent shell functions for the rest of the scripts. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# Copyright (C) IBM Corporation, 2013 -# -# Authors: Paul E. McKenney - -# per_version_boot_params bootparam-string config-file seconds -# -# Adds per-version torture-module parameters to kernels supporting them. -# Which old kernels do not. -per_version_boot_params () { - echo rcutorture.stat_interval=15 \ - rcutorture.shutdown_secs=$3 \ - rcutorture.rcutorture_runnable=1 \ - rcutorture.test_no_idle_hz=1 \ - rcutorture.verbose=1 -} diff --git a/tools/testing/selftests/rcutorture/configs/v3.12/CFLIST b/tools/testing/selftests/rcutorture/configs/v3.12/CFLIST deleted file mode 100644 index da4cbc66..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.12/CFLIST +++ /dev/null @@ -1,17 +0,0 @@ -sysidleY.2013.06.19a -sysidleN.2013.06.19a -P1-S-T-NH-SD-SMP-HP -P2-2-t-nh-sd-SMP-hp -P3-3-T-nh-SD-SMP-hp -P4-A-t-NH-sd-SMP-HP -P5-U-T-NH-sd-SMP-hp -P6---t-nh-SD-smp-hp -N1-S-T-NH-SD-SMP-HP -N2-2-t-nh-sd-SMP-hp -N3-3-T-nh-SD-SMP-hp -N4-A-t-NH-sd-SMP-HP -N5-U-T-NH-sd-SMP-hp -PT1-nh -PT2-NH -NT1-nh -NT3-NH diff --git a/tools/testing/selftests/rcutorture/configs/v3.12/N1-S-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/v3.12/N1-S-T-NH-SD-SMP-HP deleted file mode 100644 index d81e11d..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.12/N1-S-T-NH-SD-SMP-HP +++ /dev/null @@ -1,19 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_RCU_FAST_NO_HZ=y -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=8 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.12/N2-2-t-nh-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/v3.12/N2-2-t-nh-sd-SMP-hp deleted file mode 100644 index 02e4185..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.12/N2-2-t-nh-sd-SMP-hp +++ /dev/null @@ -1,20 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_NO_HZ=n -CONFIG_SMP=y -CONFIG_RCU_FANOUT=4 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.12/N3-3-T-nh-SD-SMP-hp b/tools/testing/selftests/rcutorture/configs/v3.12/N3-3-T-nh-SD-SMP-hp deleted file mode 100644 index b3100f6..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.12/N3-3-T-nh-SD-SMP-hp +++ /dev/null @@ -1,22 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_NO_HZ=n -CONFIG_SMP=y -CONFIG_RCU_FANOUT=2 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.12/N4-A-t-NH-sd-SMP-HP b/tools/testing/selftests/rcutorture/configs/v3.12/N4-A-t-NH-sd-SMP-HP deleted file mode 100644 index c56b445..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.12/N4-A-t-NH-sd-SMP-HP +++ /dev/null @@ -1,18 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=6 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.12/N5-U-T-NH-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/v3.12/N5-U-T-NH-sd-SMP-hp deleted file mode 100644 index 90d924f..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.12/N5-U-T-NH-sd-SMP-hp +++ /dev/null @@ -1,22 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_DEBUG_KERNEL=y -CONFIG_RCU_CPU_STALL_INFO=y -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=6 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=y -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.12/N6---t-nh-SD-smp-hp b/tools/testing/selftests/rcutorture/configs/v3.12/N6---t-nh-SD-smp-hp deleted file mode 100644 index 0ccc36d..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.12/N6---t-nh-SD-smp-hp +++ /dev/null @@ -1,19 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_NO_HZ=n -CONFIG_SMP=y -CONFIG_NR_CPUS=1 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.12/N7-4-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/v3.12/N7-4-T-NH-SD-SMP-HP deleted file mode 100644 index 3f640cf..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.12/N7-4-T-NH-SD-SMP-HP +++ /dev/null @@ -1,26 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_DEBUG_KERNEL=y -CONFIG_RCU_CPU_STALL_INFO=y -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=2 -CONFIG_NR_CPUS=16 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=y -CONFIG_RCU_NOCB_CPU=y -CONFIG_RCU_NOCB_CPU_NONE=y -CONFIG_RCU_NOCB_CPU_ZERO=n -CONFIG_RCU_NOCB_CPU_ALL=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.12/N8-2-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/v3.12/N8-2-T-NH-SD-SMP-HP deleted file mode 100644 index 285da2d..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.12/N8-2-T-NH-SD-SMP-HP +++ /dev/null @@ -1,22 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_DEBUG_KERNEL=y -CONFIG_RCU_CPU_STALL_INFO=y -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=14 -CONFIG_NR_CPUS=16 -CONFIG_RCU_FANOUT_EXACT=y -CONFIG_HOTPLUG_CPU=y -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.12/NT1-nh b/tools/testing/selftests/rcutorture/configs/v3.12/NT1-nh deleted file mode 100644 index 023f312..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.12/NT1-nh +++ /dev/null @@ -1,23 +0,0 @@ -#CHECK#CONFIG_TINY_RCU=y -CONFIG_RCU_TRACE=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -# -CONFIG_SMP=n -# -CONFIG_HOTPLUG_CPU=n -# -CONFIG_NO_HZ=n -# -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.12/NT3-NH b/tools/testing/selftests/rcutorture/configs/v3.12/NT3-NH deleted file mode 100644 index 6fd0235..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.12/NT3-NH +++ /dev/null @@ -1,20 +0,0 @@ -#CHECK#CONFIG_TINY_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -# -CONFIG_SMP=n -# -CONFIG_HOTPLUG_CPU=n -# -CONFIG_NO_HZ=y -# -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.12/P1-S-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/v3.12/P1-S-T-NH-SD-SMP-HP deleted file mode 100644 index 9647c44..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.12/P1-S-T-NH-SD-SMP-HP +++ /dev/null @@ -1,20 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_RCU_CPU_STALL_INFO=y -CONFIG_NO_HZ=y -CONFIG_RCU_FAST_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=8 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.12/P2-2-t-nh-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/v3.12/P2-2-t-nh-sd-SMP-hp deleted file mode 100644 index 0f3b667..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.12/P2-2-t-nh-sd-SMP-hp +++ /dev/null @@ -1,20 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_NO_HZ=n -CONFIG_SMP=y -CONFIG_RCU_FANOUT=4 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.12/P3-3-T-nh-SD-SMP-hp b/tools/testing/selftests/rcutorture/configs/v3.12/P3-3-T-nh-SD-SMP-hp deleted file mode 100644 index b035e14..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.12/P3-3-T-nh-SD-SMP-hp +++ /dev/null @@ -1,20 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_NO_HZ=n -CONFIG_SMP=y -CONFIG_RCU_FANOUT=2 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.12/P4-A-t-NH-sd-SMP-HP b/tools/testing/selftests/rcutorture/configs/v3.12/P4-A-t-NH-sd-SMP-HP deleted file mode 100644 index 3ccf6a9..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.12/P4-A-t-NH-sd-SMP-HP +++ /dev/null @@ -1,22 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=6 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_RT_MUTEXES=y -CONFIG_RCU_BOOST=y -CONFIG_RCU_BOOST_PRIO=2 -CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.12/P5-U-T-NH-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/v3.12/P5-U-T-NH-sd-SMP-hp deleted file mode 100644 index ef624ce..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.12/P5-U-T-NH-sd-SMP-hp +++ /dev/null @@ -1,28 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_RCU_CPU_STALL_INFO=y -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=6 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=y -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_DEBUG_KERNEL=y -CONFIG_PROVE_RCU_DELAY=y -CONFIG_DEBUG_OBJECTS=y -CONFIG_DEBUG_OBJECTS_RCU_HEAD=y -CONFIG_RT_MUTEXES=y -CONFIG_RCU_BOOST=y -CONFIG_RCU_BOOST_PRIO=2 -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.12/P6---t-nh-SD-smp-hp b/tools/testing/selftests/rcutorture/configs/v3.12/P6---t-nh-SD-smp-hp deleted file mode 100644 index f4c9175..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.12/P6---t-nh-SD-smp-hp +++ /dev/null @@ -1,18 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_NO_HZ=n -CONFIG_SMP=n -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -CONFIG_TREE_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.12/P7-4-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/v3.12/P7-4-T-NH-SD-SMP-HP deleted file mode 100644 index 77a8c5b..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.12/P7-4-T-NH-SD-SMP-HP +++ /dev/null @@ -1,30 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=2 -CONFIG_NR_CPUS=16 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=y -CONFIG_RCU_NOCB_CPU=y -CONFIG_RCU_NOCB_CPU_NONE=n -CONFIG_RCU_NOCB_CPU_ZERO=n -CONFIG_RCU_NOCB_CPU_ALL=y -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_OBJECTS=y -CONFIG_DEBUG_OBJECTS_RCU_HEAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_SLUB=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.12/P7-4-T-NH-SD-SMP-HP-all b/tools/testing/selftests/rcutorture/configs/v3.12/P7-4-T-NH-SD-SMP-HP-all deleted file mode 100644 index 0eecebc..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.12/P7-4-T-NH-SD-SMP-HP-all +++ /dev/null @@ -1,30 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=2 -CONFIG_NR_CPUS=16 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=y -CONFIG_RCU_NOCB_CPU=y -CONFIG_RCU_NOCB_CPU_NONE=y -CONFIG_RCU_NOCB_CPU_ZERO=n -CONFIG_RCU_NOCB_CPU_ALL=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_OBJECTS=y -CONFIG_DEBUG_OBJECTS_RCU_HEAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_SLUB=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.12/P7-4-T-NH-SD-SMP-HP-none b/tools/testing/selftests/rcutorture/configs/v3.12/P7-4-T-NH-SD-SMP-HP-none deleted file mode 100644 index 0eecebc..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.12/P7-4-T-NH-SD-SMP-HP-none +++ /dev/null @@ -1,30 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=2 -CONFIG_NR_CPUS=16 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=y -CONFIG_RCU_NOCB_CPU=y -CONFIG_RCU_NOCB_CPU_NONE=y -CONFIG_RCU_NOCB_CPU_ZERO=n -CONFIG_RCU_NOCB_CPU_ALL=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_OBJECTS=y -CONFIG_DEBUG_OBJECTS_RCU_HEAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_SLUB=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.12/P7-4-T-NH-SD-SMP-hp b/tools/testing/selftests/rcutorture/configs/v3.12/P7-4-T-NH-SD-SMP-hp deleted file mode 100644 index 588bc70..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.12/P7-4-T-NH-SD-SMP-hp +++ /dev/null @@ -1,30 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=2 -CONFIG_NR_CPUS=16 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=n -CONFIG_RCU_NOCB_CPU=y -CONFIG_RCU_NOCB_CPU_NONE=n -CONFIG_RCU_NOCB_CPU_ZERO=y -CONFIG_RCU_NOCB_CPU_ALL=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_OBJECTS=y -CONFIG_DEBUG_OBJECTS_RCU_HEAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_SLUB=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.12/PT1-nh b/tools/testing/selftests/rcutorture/configs/v3.12/PT1-nh deleted file mode 100644 index e3361c3..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.12/PT1-nh +++ /dev/null @@ -1,23 +0,0 @@ -CONFIG_TINY_PREEMPT_RCU=y -CONFIG_RCU_BOOST=y -CONFIG_RCU_BOOST_PRIO=2 -CONFIG_RCU_TRACE=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -# -CONFIG_SMP=n -# -CONFIG_HOTPLUG_CPU=n -# -CONFIG_NO_HZ=n -# -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.12/PT2-NH b/tools/testing/selftests/rcutorture/configs/v3.12/PT2-NH deleted file mode 100644 index 64abfc3..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.12/PT2-NH +++ /dev/null @@ -1,22 +0,0 @@ -CONFIG_TINY_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -# -CONFIG_SMP=n -# -CONFIG_HOTPLUG_CPU=n -# -CONFIG_NO_HZ=y -# -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.3/CFLIST b/tools/testing/selftests/rcutorture/configs/v3.3/CFLIST deleted file mode 100644 index 1822394..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.3/CFLIST +++ /dev/null @@ -1,14 +0,0 @@ -P1-S-T-NH-SD-SMP-HP -P2-2-t-nh-sd-SMP-hp -P3-3-T-nh-SD-SMP-hp -P4-A-t-NH-sd-SMP-HP -P5-U-T-NH-sd-SMP-hp -N1-S-T-NH-SD-SMP-HP -N2-2-t-nh-sd-SMP-hp -N3-3-T-nh-SD-SMP-hp -N4-A-t-NH-sd-SMP-HP -N5-U-T-NH-sd-SMP-hp -PT1-nh -PT2-NH -NT1-nh -NT3-NH diff --git a/tools/testing/selftests/rcutorture/configs/v3.3/N1-S-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/v3.3/N1-S-T-NH-SD-SMP-HP deleted file mode 100644 index d81e11d..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.3/N1-S-T-NH-SD-SMP-HP +++ /dev/null @@ -1,19 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_RCU_FAST_NO_HZ=y -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=8 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.3/N2-2-t-nh-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/v3.3/N2-2-t-nh-sd-SMP-hp deleted file mode 100644 index 02e4185..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.3/N2-2-t-nh-sd-SMP-hp +++ /dev/null @@ -1,20 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_NO_HZ=n -CONFIG_SMP=y -CONFIG_RCU_FANOUT=4 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.3/N3-3-T-nh-SD-SMP-hp b/tools/testing/selftests/rcutorture/configs/v3.3/N3-3-T-nh-SD-SMP-hp deleted file mode 100644 index b3100f6..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.3/N3-3-T-nh-SD-SMP-hp +++ /dev/null @@ -1,22 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_NO_HZ=n -CONFIG_SMP=y -CONFIG_RCU_FANOUT=2 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.3/N4-A-t-NH-sd-SMP-HP b/tools/testing/selftests/rcutorture/configs/v3.3/N4-A-t-NH-sd-SMP-HP deleted file mode 100644 index c56b445..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.3/N4-A-t-NH-sd-SMP-HP +++ /dev/null @@ -1,18 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=6 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.3/N5-U-T-NH-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/v3.3/N5-U-T-NH-sd-SMP-hp deleted file mode 100644 index 90d924f..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.3/N5-U-T-NH-sd-SMP-hp +++ /dev/null @@ -1,22 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_DEBUG_KERNEL=y -CONFIG_RCU_CPU_STALL_INFO=y -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=6 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=y -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.3/NT1-nh b/tools/testing/selftests/rcutorture/configs/v3.3/NT1-nh deleted file mode 100644 index 023f312..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.3/NT1-nh +++ /dev/null @@ -1,23 +0,0 @@ -#CHECK#CONFIG_TINY_RCU=y -CONFIG_RCU_TRACE=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -# -CONFIG_SMP=n -# -CONFIG_HOTPLUG_CPU=n -# -CONFIG_NO_HZ=n -# -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.3/NT3-NH b/tools/testing/selftests/rcutorture/configs/v3.3/NT3-NH deleted file mode 100644 index 6fd0235..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.3/NT3-NH +++ /dev/null @@ -1,20 +0,0 @@ -#CHECK#CONFIG_TINY_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -# -CONFIG_SMP=n -# -CONFIG_HOTPLUG_CPU=n -# -CONFIG_NO_HZ=y -# -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.3/P1-S-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/v3.3/P1-S-T-NH-SD-SMP-HP deleted file mode 100644 index 9647c44..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.3/P1-S-T-NH-SD-SMP-HP +++ /dev/null @@ -1,20 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_RCU_CPU_STALL_INFO=y -CONFIG_NO_HZ=y -CONFIG_RCU_FAST_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=8 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.3/P2-2-t-nh-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/v3.3/P2-2-t-nh-sd-SMP-hp deleted file mode 100644 index 0f3b667..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.3/P2-2-t-nh-sd-SMP-hp +++ /dev/null @@ -1,20 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_NO_HZ=n -CONFIG_SMP=y -CONFIG_RCU_FANOUT=4 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.3/P3-3-T-nh-SD-SMP-hp b/tools/testing/selftests/rcutorture/configs/v3.3/P3-3-T-nh-SD-SMP-hp deleted file mode 100644 index b035e14..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.3/P3-3-T-nh-SD-SMP-hp +++ /dev/null @@ -1,20 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_NO_HZ=n -CONFIG_SMP=y -CONFIG_RCU_FANOUT=2 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.3/P4-A-t-NH-sd-SMP-HP b/tools/testing/selftests/rcutorture/configs/v3.3/P4-A-t-NH-sd-SMP-HP deleted file mode 100644 index 3ccf6a9..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.3/P4-A-t-NH-sd-SMP-HP +++ /dev/null @@ -1,22 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=6 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_RT_MUTEXES=y -CONFIG_RCU_BOOST=y -CONFIG_RCU_BOOST_PRIO=2 -CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.3/P5-U-T-NH-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/v3.3/P5-U-T-NH-sd-SMP-hp deleted file mode 100644 index ef624ce..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.3/P5-U-T-NH-sd-SMP-hp +++ /dev/null @@ -1,28 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_RCU_CPU_STALL_INFO=y -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=6 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=y -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_DEBUG_KERNEL=y -CONFIG_PROVE_RCU_DELAY=y -CONFIG_DEBUG_OBJECTS=y -CONFIG_DEBUG_OBJECTS_RCU_HEAD=y -CONFIG_RT_MUTEXES=y -CONFIG_RCU_BOOST=y -CONFIG_RCU_BOOST_PRIO=2 -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.3/PT1-nh b/tools/testing/selftests/rcutorture/configs/v3.3/PT1-nh deleted file mode 100644 index e3361c3..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.3/PT1-nh +++ /dev/null @@ -1,23 +0,0 @@ -CONFIG_TINY_PREEMPT_RCU=y -CONFIG_RCU_BOOST=y -CONFIG_RCU_BOOST_PRIO=2 -CONFIG_RCU_TRACE=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -# -CONFIG_SMP=n -# -CONFIG_HOTPLUG_CPU=n -# -CONFIG_NO_HZ=n -# -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.3/PT2-NH b/tools/testing/selftests/rcutorture/configs/v3.3/PT2-NH deleted file mode 100644 index 64abfc3..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.3/PT2-NH +++ /dev/null @@ -1,22 +0,0 @@ -CONFIG_TINY_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -# -CONFIG_SMP=n -# -CONFIG_HOTPLUG_CPU=n -# -CONFIG_NO_HZ=y -# -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.3/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/v3.3/ver_functions.sh deleted file mode 100644 index bae5569..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.3/ver_functions.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash -# -# Kernel-version-dependent shell functions for the rest of the scripts. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# Copyright (C) IBM Corporation, 2013 -# -# Authors: Paul E. McKenney - -# rcutorture_param_onoff bootparam-string config-file -# -# Adds onoff rcutorture module parameters to kernels having it. -rcutorture_param_onoff () { - if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2" - then - echo CPU-hotplug kernel, adding rcutorture onoff. 1>&2 - echo rcutorture.onoff_interval=3 rcutorture.onoff_holdoff=30 - fi -} - -# per_version_boot_params bootparam-string config-file seconds -# -# Adds per-version torture-module parameters to kernels supporting them. -per_version_boot_params () { - echo $1 `rcutorture_param_onoff "$1" "$2"` \ - rcutorture.stat_interval=15 \ - rcutorture.shutdown_secs=$3 \ - rcutorture.rcutorture_runnable=1 \ - rcutorture.test_no_idle_hz=1 \ - rcutorture.verbose=1 -} diff --git a/tools/testing/selftests/rcutorture/configs/v3.5/CFLIST b/tools/testing/selftests/rcutorture/configs/v3.5/CFLIST deleted file mode 100644 index 1822394..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.5/CFLIST +++ /dev/null @@ -1,14 +0,0 @@ -P1-S-T-NH-SD-SMP-HP -P2-2-t-nh-sd-SMP-hp -P3-3-T-nh-SD-SMP-hp -P4-A-t-NH-sd-SMP-HP -P5-U-T-NH-sd-SMP-hp -N1-S-T-NH-SD-SMP-HP -N2-2-t-nh-sd-SMP-hp -N3-3-T-nh-SD-SMP-hp -N4-A-t-NH-sd-SMP-HP -N5-U-T-NH-sd-SMP-hp -PT1-nh -PT2-NH -NT1-nh -NT3-NH diff --git a/tools/testing/selftests/rcutorture/configs/v3.5/N1-S-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/v3.5/N1-S-T-NH-SD-SMP-HP deleted file mode 100644 index d81e11d..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.5/N1-S-T-NH-SD-SMP-HP +++ /dev/null @@ -1,19 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_RCU_FAST_NO_HZ=y -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=8 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.5/N2-2-t-nh-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/v3.5/N2-2-t-nh-sd-SMP-hp deleted file mode 100644 index 02e4185..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.5/N2-2-t-nh-sd-SMP-hp +++ /dev/null @@ -1,20 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_NO_HZ=n -CONFIG_SMP=y -CONFIG_RCU_FANOUT=4 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.5/N3-3-T-nh-SD-SMP-hp b/tools/testing/selftests/rcutorture/configs/v3.5/N3-3-T-nh-SD-SMP-hp deleted file mode 100644 index b3100f6..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.5/N3-3-T-nh-SD-SMP-hp +++ /dev/null @@ -1,22 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_NO_HZ=n -CONFIG_SMP=y -CONFIG_RCU_FANOUT=2 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.5/N4-A-t-NH-sd-SMP-HP b/tools/testing/selftests/rcutorture/configs/v3.5/N4-A-t-NH-sd-SMP-HP deleted file mode 100644 index c56b445..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.5/N4-A-t-NH-sd-SMP-HP +++ /dev/null @@ -1,18 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=6 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.5/N5-U-T-NH-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/v3.5/N5-U-T-NH-sd-SMP-hp deleted file mode 100644 index 90d924f..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.5/N5-U-T-NH-sd-SMP-hp +++ /dev/null @@ -1,22 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_DEBUG_KERNEL=y -CONFIG_RCU_CPU_STALL_INFO=y -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=6 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=y -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_TREE_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.5/NT1-nh b/tools/testing/selftests/rcutorture/configs/v3.5/NT1-nh deleted file mode 100644 index 023f312..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.5/NT1-nh +++ /dev/null @@ -1,23 +0,0 @@ -#CHECK#CONFIG_TINY_RCU=y -CONFIG_RCU_TRACE=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -# -CONFIG_SMP=n -# -CONFIG_HOTPLUG_CPU=n -# -CONFIG_NO_HZ=n -# -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.5/NT3-NH b/tools/testing/selftests/rcutorture/configs/v3.5/NT3-NH deleted file mode 100644 index 6fd0235..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.5/NT3-NH +++ /dev/null @@ -1,20 +0,0 @@ -#CHECK#CONFIG_TINY_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -# -CONFIG_SMP=n -# -CONFIG_HOTPLUG_CPU=n -# -CONFIG_NO_HZ=y -# -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.5/P1-S-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/v3.5/P1-S-T-NH-SD-SMP-HP deleted file mode 100644 index 9647c44..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.5/P1-S-T-NH-SD-SMP-HP +++ /dev/null @@ -1,20 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_RCU_CPU_STALL_INFO=y -CONFIG_NO_HZ=y -CONFIG_RCU_FAST_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=8 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.5/P2-2-t-nh-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/v3.5/P2-2-t-nh-sd-SMP-hp deleted file mode 100644 index 0f3b667..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.5/P2-2-t-nh-sd-SMP-hp +++ /dev/null @@ -1,20 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_NO_HZ=n -CONFIG_SMP=y -CONFIG_RCU_FANOUT=4 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.5/P3-3-T-nh-SD-SMP-hp b/tools/testing/selftests/rcutorture/configs/v3.5/P3-3-T-nh-SD-SMP-hp deleted file mode 100644 index b035e14..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.5/P3-3-T-nh-SD-SMP-hp +++ /dev/null @@ -1,20 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_NO_HZ=n -CONFIG_SMP=y -CONFIG_RCU_FANOUT=2 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.5/P4-A-t-NH-sd-SMP-HP b/tools/testing/selftests/rcutorture/configs/v3.5/P4-A-t-NH-sd-SMP-HP deleted file mode 100644 index 3ccf6a9..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.5/P4-A-t-NH-sd-SMP-HP +++ /dev/null @@ -1,22 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=6 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=n -CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_RT_MUTEXES=y -CONFIG_RCU_BOOST=y -CONFIG_RCU_BOOST_PRIO=2 -CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.5/P5-U-T-NH-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/v3.5/P5-U-T-NH-sd-SMP-hp deleted file mode 100644 index ef624ce..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.5/P5-U-T-NH-sd-SMP-hp +++ /dev/null @@ -1,28 +0,0 @@ -CONFIG_RCU_TRACE=y -CONFIG_RCU_CPU_STALL_INFO=y -CONFIG_NO_HZ=y -CONFIG_SMP=y -CONFIG_RCU_FANOUT=6 -CONFIG_NR_CPUS=8 -CONFIG_RCU_FANOUT_EXACT=y -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -#CHECK#CONFIG_TREE_PREEMPT_RCU=y -CONFIG_DEBUG_KERNEL=y -CONFIG_PROVE_RCU_DELAY=y -CONFIG_DEBUG_OBJECTS=y -CONFIG_DEBUG_OBJECTS_RCU_HEAD=y -CONFIG_RT_MUTEXES=y -CONFIG_RCU_BOOST=y -CONFIG_RCU_BOOST_PRIO=2 -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.5/PT1-nh b/tools/testing/selftests/rcutorture/configs/v3.5/PT1-nh deleted file mode 100644 index e3361c3..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.5/PT1-nh +++ /dev/null @@ -1,23 +0,0 @@ -CONFIG_TINY_PREEMPT_RCU=y -CONFIG_RCU_BOOST=y -CONFIG_RCU_BOOST_PRIO=2 -CONFIG_RCU_TRACE=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -# -CONFIG_SMP=n -# -CONFIG_HOTPLUG_CPU=n -# -CONFIG_NO_HZ=n -# -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.5/PT2-NH b/tools/testing/selftests/rcutorture/configs/v3.5/PT2-NH deleted file mode 100644 index 64abfc3..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.5/PT2-NH +++ /dev/null @@ -1,22 +0,0 @@ -CONFIG_TINY_PREEMPT_RCU=y -CONFIG_RCU_TORTURE_TEST=m -CONFIG_MODULE_UNLOAD=y -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n -# -CONFIG_SMP=n -# -CONFIG_HOTPLUG_CPU=n -# -CONFIG_NO_HZ=y -# -CONFIG_PREEMPT_NONE=n -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=y -CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y -CONFIG_SYSFS_DEPRECATED_V2=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_PRINTK_TIME=y - diff --git a/tools/testing/selftests/rcutorture/configs/v3.5/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/v3.5/ver_functions.sh deleted file mode 100644 index 8977d8d..0000000 --- a/tools/testing/selftests/rcutorture/configs/v3.5/ver_functions.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash -# -# Kernel-version-dependent shell functions for the rest of the scripts. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# Copyright (C) IBM Corporation, 2013 -# -# Authors: Paul E. McKenney - -# rcutorture_param_n_barrier_cbs bootparam-string -# -# Adds n_barrier_cbs rcutorture module parameter to kernels having it. -rcutorture_param_n_barrier_cbs () { - if echo $1 | grep -q "rcutorture\.n_barrier_cbs" - then - : - else - echo rcutorture.n_barrier_cbs=4 - fi -} - -# rcutorture_param_onoff bootparam-string config-file -# -# Adds onoff rcutorture module parameters to kernels having it. -rcutorture_param_onoff () { - if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2" - then - echo CPU-hotplug kernel, adding rcutorture onoff. 1>&2 - echo rcutorture.onoff_interval=3 rcutorture.onoff_holdoff=30 - fi -} - -# per_version_boot_params bootparam-string config-file seconds -# -# Adds per-version torture-module parameters to kernels supporting them. -per_version_boot_params () { - echo $1 `rcutorture_param_onoff "$1" "$2"` \ - `rcutorture_param_n_barrier_cbs "$1"` \ - rcutorture.stat_interval=15 \ - rcutorture.shutdown_secs=$3 \ - rcutorture.rcutorture_runnable=1 \ - rcutorture.test_no_idle_hz=1 \ - rcutorture.verbose=1 -} diff --git a/tools/testing/selftests/rcutorture/configs/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/ver_functions.sh deleted file mode 100644 index 8977d8d..0000000 --- a/tools/testing/selftests/rcutorture/configs/ver_functions.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash -# -# Kernel-version-dependent shell functions for the rest of the scripts. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# Copyright (C) IBM Corporation, 2013 -# -# Authors: Paul E. McKenney - -# rcutorture_param_n_barrier_cbs bootparam-string -# -# Adds n_barrier_cbs rcutorture module parameter to kernels having it. -rcutorture_param_n_barrier_cbs () { - if echo $1 | grep -q "rcutorture\.n_barrier_cbs" - then - : - else - echo rcutorture.n_barrier_cbs=4 - fi -} - -# rcutorture_param_onoff bootparam-string config-file -# -# Adds onoff rcutorture module parameters to kernels having it. -rcutorture_param_onoff () { - if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2" - then - echo CPU-hotplug kernel, adding rcutorture onoff. 1>&2 - echo rcutorture.onoff_interval=3 rcutorture.onoff_holdoff=30 - fi -} - -# per_version_boot_params bootparam-string config-file seconds -# -# Adds per-version torture-module parameters to kernels supporting them. -per_version_boot_params () { - echo $1 `rcutorture_param_onoff "$1" "$2"` \ - `rcutorture_param_n_barrier_cbs "$1"` \ - rcutorture.stat_interval=15 \ - rcutorture.shutdown_secs=$3 \ - rcutorture.rcutorture_runnable=1 \ - rcutorture.test_no_idle_hz=1 \ - rcutorture.verbose=1 -} -- cgit v0.10.2 From 9dfa5b35c190e3ce9325a356f54282a4b8dc0336 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 7 Feb 2014 13:56:37 -0800 Subject: locktorture: Add vestigial locktorture configuration This commit adds a trivial set of configuration files for lock torturing. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index c24092c..8d3a6e9 100644 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -159,7 +159,7 @@ do shift ;; --torture) - checkarg --torture "(suite name)" "$#" "$2" '^rcu$' '^--' + checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\)$' '^--' TORTURE_SUITE=$2 shift ;; diff --git a/tools/testing/selftests/rcutorture/configs/lock/BUSTED b/tools/testing/selftests/rcutorture/configs/lock/BUSTED new file mode 100644 index 0000000..1d1da14 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/lock/BUSTED @@ -0,0 +1,6 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=4 +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y diff --git a/tools/testing/selftests/rcutorture/configs/lock/BUSTED.boot b/tools/testing/selftests/rcutorture/configs/lock/BUSTED.boot new file mode 100644 index 0000000..6386c15 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/lock/BUSTED.boot @@ -0,0 +1 @@ +locktorture.torture_type=lock_busted diff --git a/tools/testing/selftests/rcutorture/configs/lock/CFLIST b/tools/testing/selftests/rcutorture/configs/lock/CFLIST new file mode 100644 index 0000000..a061b22 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/lock/CFLIST @@ -0,0 +1 @@ +LOCK01 diff --git a/tools/testing/selftests/rcutorture/configs/lock/CFcommon b/tools/testing/selftests/rcutorture/configs/lock/CFcommon new file mode 100644 index 0000000..e372dc2 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/lock/CFcommon @@ -0,0 +1,2 @@ +CONFIG_LOCK_TORTURE_TEST=y +CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK01 b/tools/testing/selftests/rcutorture/configs/lock/LOCK01 new file mode 100644 index 0000000..a9625e3 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK01 @@ -0,0 +1,6 @@ +CONFIG_SMP=y +CONFIG_NR_CPUS=8 +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y diff --git a/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh new file mode 100644 index 0000000..9746ea1 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# +# Kernel-version-dependent shell functions for the rest of the scripts. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# Copyright (C) IBM Corporation, 2014 +# +# Authors: Paul E. McKenney + +# locktorture_param_onoff bootparam-string config-file +# +# Adds onoff locktorture module parameters to kernels having it. +locktorture_param_onoff () { + if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2" + then + echo CPU-hotplug kernel, adding locktorture onoff. 1>&2 + echo locktorture.onoff_interval=3 locktorture.onoff_holdoff=30 + fi +} + +# per_version_boot_params bootparam-string config-file seconds +# +# Adds per-version torture-module parameters to kernels supporting them. +per_version_boot_params () { + echo $1 `locktorture_param_onoff "$1" "$2"` \ + locktorture.stat_interval=15 \ + locktorture.shutdown_secs=$3 \ + locktorture.locktorture_runnable=1 \ + locktorture.verbose=1 +} -- cgit v0.10.2 From f881825a73543e9664c7fe7166e06f5f4d569834 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 7 Feb 2014 14:42:51 -0800 Subject: rcutorture: Gracefully handle NULL cleanup hooks Although most torture tests will have some cleanup hook, it is possible that one might not. This commit therefore enables graceful handling of a NULL cleanup hook during torture-test shutdown. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/kernel/torture.c b/kernel/torture.c index b26c7b4..acc9afc 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -454,7 +454,10 @@ static int torture_shutdown(void *arg) VERBOSE_TOROUT_STRING("torture_shutdown task shutting down system"); shutdown_task = NULL; /* Avoid self-kill deadlock. */ - torture_shutdown_hook();/* Shut down the enclosing torture test. */ + if (torture_shutdown_hook) + torture_shutdown_hook(); + else + VERBOSE_TOROUT_STRING("No torture_shutdown_hook(), skipping."); kernel_power_off(); /* Shut down the system. */ return 0; } -- cgit v0.10.2 From 02736b81dd74ea141ac5d55e909ac58ddc5f610d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 7 Feb 2014 15:47:12 -0800 Subject: locktorture: Add kvm-recheck.sh plug-in for locktorture This commit adds the kvm-recheck-lock.sh plug-in for locktorture to print out lock-specific progress statistics. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh new file mode 100755 index 0000000..829186e --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# +# Analyze a given results directory for locktorture progress. +# +# Usage: sh kvm-recheck-lock.sh resdir +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# Copyright (C) IBM Corporation, 2014 +# +# Authors: Paul E. McKenney + +i="$1" +if test -d $i +then + : +else + echo Unreadable results directory: $i + exit 1 +fi + +configfile=`echo $i | sed -e 's/^.*\///'` +ncs=`grep "Writes: Total:" $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* Total: //' -e 's/ .*$//'` +if test -z "$ncs" +then + echo $configfile +else + title="$configfile ------- $ncs acquisitions/releases" + dur=`sed -e 's/^.* locktorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null` + if test -z "$dur" + then + : + else + ncsps=`awk -v ncs=$ncs -v dur=$dur ' + BEGIN { print ncs / dur }' < /dev/null` + title="$title ($ncsps per second)" + fi + echo $title +fi diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index 31c8706..a44daaa 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# Given the results directories for previous KVM runs of rcutorture, +# Given the results directories for previous KVM-based torture runs, # check the build and console output for errors. Given a directory # containing results directories, this recursively checks them all. # @@ -37,7 +37,8 @@ do resdir=`echo $i | sed -e 's,/$,,' -e 's,/[^/]*$,,'` head -1 $resdir/log fi - kvm-recheck-rcu.sh $i + TORTURE_SUITE="`cat $i/../TORTURE_SUITE`" + kvm-recheck-${TORTURE_SUITE}.sh $i configcheck.sh $i/.config $i/ConfigFragment parse-build.sh $i/Make.out $configfile parse-rcutorture.sh $i/console.log $configfile -- cgit v0.10.2 From 48a21d5cf05912ed3b2237f417a959a90fd6907d Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Sat, 8 Feb 2014 18:06:57 -0800 Subject: rcutorture: Rename TREE_RCU-Kconfig.txt It used to be that: git ls-files "*Kconfig*" would find all Kconfig files and would only find Kconfig files. This commit renames TREE_RCU-Kconfig.txt to TREE_RCU-kconfig.txt so that this is once again true. Signed-off-by: Paul Bolle Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-Kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-Kconfig.txt deleted file mode 100644 index adbb76c..0000000 --- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-Kconfig.txt +++ /dev/null @@ -1,95 +0,0 @@ -This document gives a brief rationale for the TREE_RCU-related test -cases, a group that includes TREE_PREEMPT_RCU. - - -Kconfig Parameters: - -CONFIG_DEBUG_LOCK_ALLOC -- Do three, covering CONFIG_PROVE_LOCKING & not. -CONFIG_DEBUG_OBJECTS_RCU_HEAD -- Do one. -CONFIG_HOTPLUG_CPU -- Do half. (Every second.) -CONFIG_HZ_PERIODIC -- Do one. -CONFIG_NO_HZ_IDLE -- Do those not otherwise specified. (Groups of two.) -CONFIG_NO_HZ_FULL -- Do two, one with CONFIG_NO_HZ_FULL_SYSIDLE. -CONFIG_NO_HZ_FULL_SYSIDLE -- Do one. -CONFIG_PREEMPT -- Do half. (First three and #8.) -CONFIG_PROVE_LOCKING -- Do all but two, covering CONFIG_PROVE_RCU and not. -CONFIG_PROVE_RCU -- Do all but one under CONFIG_PROVE_LOCKING. -CONFIG_PROVE_RCU_DELAY -- Do one. -CONFIG_RCU_BOOST -- one of TREE_PREEMPT_RCU. -CONFIG_RCU_BOOST_PRIO -- set to 2 for _BOOST testing. -CONFIG_RCU_CPU_STALL_INFO -- do one with and without _VERBOSE. -CONFIG_RCU_CPU_STALL_VERBOSE -- do one with and without _INFO. -CONFIG_RCU_FANOUT -- Cover hierarchy as currently, but overlap with others. -CONFIG_RCU_FANOUT_EXACT -- Do one. -CONFIG_RCU_FANOUT_LEAF -- Do one non-default. -CONFIG_RCU_FAST_NO_HZ -- Do one, but not with CONFIG_RCU_NOCB_CPU_ALL. -CONFIG_RCU_NOCB_CPU -- Do three, see below. -CONFIG_RCU_NOCB_CPU_ALL -- Do one. -CONFIG_RCU_NOCB_CPU_NONE -- Do one. -CONFIG_RCU_NOCB_CPU_ZERO -- Do one. -CONFIG_RCU_TRACE -- Do half. -CONFIG_SMP -- Need one !SMP for TREE_PREEMPT_RCU. -RCU-bh: Do one with PREEMPT and one with !PREEMPT. -RCU-sched: Do one with PREEMPT but not BOOST. - - -Hierarchy: - -TREE01. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=8, CONFIG_RCU_FANOUT_EXACT=n. -TREE02. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=3, CONFIG_RCU_FANOUT_EXACT=n, - CONFIG_RCU_FANOUT_LEAF=3. -TREE03. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=4, CONFIG_RCU_FANOUT_EXACT=n, - CONFIG_RCU_FANOUT_LEAF=4. -TREE04. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=2, CONFIG_RCU_FANOUT_EXACT=n, - CONFIG_RCU_FANOUT_LEAF=2. -TREE05. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=6, CONFIG_RCU_FANOUT_EXACT=n - CONFIG_RCU_FANOUT_LEAF=6. -TREE06. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=6, CONFIG_RCU_FANOUT_EXACT=y - CONFIG_RCU_FANOUT_LEAF=6. -TREE07. CONFIG_NR_CPUS=16, CONFIG_RCU_FANOUT=2, CONFIG_RCU_FANOUT_EXACT=n, - CONFIG_RCU_FANOUT_LEAF=2. -TREE08. CONFIG_NR_CPUS=16, CONFIG_RCU_FANOUT=3, CONFIG_RCU_FANOUT_EXACT=y, - CONFIG_RCU_FANOUT_LEAF=2. -TREE09. CONFIG_NR_CPUS=1. - - -Kconfig Parameters Ignored: - -CONFIG_64BIT - - Used only to check CONFIG_RCU_FANOUT value, inspection suffices. - -CONFIG_NO_HZ_FULL_SYSIDLE_SMALL - - Defer until Frederic uses this. - -CONFIG_PREEMPT_COUNT -CONFIG_PREEMPT_RCU - - Redundant with CONFIG_PREEMPT, ignore. - -CONFIG_RCU_BOOST_DELAY - - Inspection suffices, ignore. - -CONFIG_RCU_CPU_STALL_TIMEOUT - - Inspection suffices, ignore. - -CONFIG_RCU_STALL_COMMON - - Implied by TREE_RCU and TREE_PREEMPT_RCU. - -CONFIG_RCU_TORTURE_TEST -CONFIG_RCU_TORTURE_TEST_RUNNABLE - - Always used in KVM testing. - -CONFIG_RCU_USER_QS - - Redundant with CONFIG_NO_HZ_FULL. - -CONFIG_TREE_PREEMPT_RCU -CONFIG_TREE_RCU - - These are controlled by CONFIG_PREEMPT. diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt new file mode 100644 index 0000000..adbb76c --- /dev/null +++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt @@ -0,0 +1,95 @@ +This document gives a brief rationale for the TREE_RCU-related test +cases, a group that includes TREE_PREEMPT_RCU. + + +Kconfig Parameters: + +CONFIG_DEBUG_LOCK_ALLOC -- Do three, covering CONFIG_PROVE_LOCKING & not. +CONFIG_DEBUG_OBJECTS_RCU_HEAD -- Do one. +CONFIG_HOTPLUG_CPU -- Do half. (Every second.) +CONFIG_HZ_PERIODIC -- Do one. +CONFIG_NO_HZ_IDLE -- Do those not otherwise specified. (Groups of two.) +CONFIG_NO_HZ_FULL -- Do two, one with CONFIG_NO_HZ_FULL_SYSIDLE. +CONFIG_NO_HZ_FULL_SYSIDLE -- Do one. +CONFIG_PREEMPT -- Do half. (First three and #8.) +CONFIG_PROVE_LOCKING -- Do all but two, covering CONFIG_PROVE_RCU and not. +CONFIG_PROVE_RCU -- Do all but one under CONFIG_PROVE_LOCKING. +CONFIG_PROVE_RCU_DELAY -- Do one. +CONFIG_RCU_BOOST -- one of TREE_PREEMPT_RCU. +CONFIG_RCU_BOOST_PRIO -- set to 2 for _BOOST testing. +CONFIG_RCU_CPU_STALL_INFO -- do one with and without _VERBOSE. +CONFIG_RCU_CPU_STALL_VERBOSE -- do one with and without _INFO. +CONFIG_RCU_FANOUT -- Cover hierarchy as currently, but overlap with others. +CONFIG_RCU_FANOUT_EXACT -- Do one. +CONFIG_RCU_FANOUT_LEAF -- Do one non-default. +CONFIG_RCU_FAST_NO_HZ -- Do one, but not with CONFIG_RCU_NOCB_CPU_ALL. +CONFIG_RCU_NOCB_CPU -- Do three, see below. +CONFIG_RCU_NOCB_CPU_ALL -- Do one. +CONFIG_RCU_NOCB_CPU_NONE -- Do one. +CONFIG_RCU_NOCB_CPU_ZERO -- Do one. +CONFIG_RCU_TRACE -- Do half. +CONFIG_SMP -- Need one !SMP for TREE_PREEMPT_RCU. +RCU-bh: Do one with PREEMPT and one with !PREEMPT. +RCU-sched: Do one with PREEMPT but not BOOST. + + +Hierarchy: + +TREE01. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=8, CONFIG_RCU_FANOUT_EXACT=n. +TREE02. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=3, CONFIG_RCU_FANOUT_EXACT=n, + CONFIG_RCU_FANOUT_LEAF=3. +TREE03. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=4, CONFIG_RCU_FANOUT_EXACT=n, + CONFIG_RCU_FANOUT_LEAF=4. +TREE04. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=2, CONFIG_RCU_FANOUT_EXACT=n, + CONFIG_RCU_FANOUT_LEAF=2. +TREE05. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=6, CONFIG_RCU_FANOUT_EXACT=n + CONFIG_RCU_FANOUT_LEAF=6. +TREE06. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=6, CONFIG_RCU_FANOUT_EXACT=y + CONFIG_RCU_FANOUT_LEAF=6. +TREE07. CONFIG_NR_CPUS=16, CONFIG_RCU_FANOUT=2, CONFIG_RCU_FANOUT_EXACT=n, + CONFIG_RCU_FANOUT_LEAF=2. +TREE08. CONFIG_NR_CPUS=16, CONFIG_RCU_FANOUT=3, CONFIG_RCU_FANOUT_EXACT=y, + CONFIG_RCU_FANOUT_LEAF=2. +TREE09. CONFIG_NR_CPUS=1. + + +Kconfig Parameters Ignored: + +CONFIG_64BIT + + Used only to check CONFIG_RCU_FANOUT value, inspection suffices. + +CONFIG_NO_HZ_FULL_SYSIDLE_SMALL + + Defer until Frederic uses this. + +CONFIG_PREEMPT_COUNT +CONFIG_PREEMPT_RCU + + Redundant with CONFIG_PREEMPT, ignore. + +CONFIG_RCU_BOOST_DELAY + + Inspection suffices, ignore. + +CONFIG_RCU_CPU_STALL_TIMEOUT + + Inspection suffices, ignore. + +CONFIG_RCU_STALL_COMMON + + Implied by TREE_RCU and TREE_PREEMPT_RCU. + +CONFIG_RCU_TORTURE_TEST +CONFIG_RCU_TORTURE_TEST_RUNNABLE + + Always used in KVM testing. + +CONFIG_RCU_USER_QS + + Redundant with CONFIG_NO_HZ_FULL. + +CONFIG_TREE_PREEMPT_RCU +CONFIG_TREE_RCU + + These are controlled by CONFIG_PREEMPT. -- cgit v0.10.2 From b46f358ae5e3558a9072f0354128d5749a584d01 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 10 Feb 2014 19:59:05 -0800 Subject: rcutorture: Place kvm-test-1-run.sh output into res directory The output of each kvm-test-1-run.sh script is placed into a file whose name parallels that of the build directory. This means that the kvm-test-1-run.sh output is overwritten by later run. This commit therefore places the kvm-test-1-run.sh output into the per-test-case directory in the "res" hierarchy. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 8d3a6e9..48b27e9 100644 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -318,7 +318,7 @@ function dump(first, pastlast) print "touch " builddir ".wait"; print "mkdir " builddir " > /dev/null 2>&1 || :"; print "mkdir " rd cfr[jn] " || :"; - print "kvm-test-1-run.sh " CONFIGDIR cf[j], builddir, rd cfr[jn], dur " \"" RCU_QEMU_ARG "\" \"" RCU_BOOTARGS "\" > " builddir ".out 2>&1 &" + print "kvm-test-1-run.sh " CONFIGDIR cf[j], builddir, rd cfr[jn], dur " \"" RCU_QEMU_ARG "\" \"" RCU_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &" print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date`" print "while test -f " builddir ".wait" print "do" @@ -337,7 +337,7 @@ function dump(first, pastlast) for (j = 1; j < jn; j++) { builddir=KVM "/b" j print "echo ----", cfr[j], cpusr[j] ovf ": Build/run results:" - print "cat " builddir ".out" + print "cat " rd cfr[j] "/kvm-test-1-run.sh.out" } } -- cgit v0.10.2 From e086481baf9d0436bdd6e9b739bfa4a83fb89ef5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 11 Feb 2014 08:05:07 -0800 Subject: rcutorture: Add a lock_busted to test the test This commit adds a maximally broken locking primitive in which lock acquisition and release are both no-ops. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index d69d20d..f26b1a1 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -112,6 +112,37 @@ static struct lock_torture_ops *cur_ops; * Definitions for lock torture testing. */ +static int torture_lock_busted_write_lock(void) +{ + return 0; /* BUGGY, do not use in real life!!! */ +} + +static void torture_lock_busted_write_delay(struct torture_random_state *trsp) +{ + const unsigned long longdelay_us = 100; + + /* We want a long delay occasionally to force massive contention. */ + if (!(torture_random(trsp) % + (nrealwriters_stress * 2000 * longdelay_us))) + mdelay(longdelay_us); +#ifdef CONFIG_PREEMPT + if (!(torture_random(trsp) % (nrealwriters_stress * 20000))) + preempt_schedule(); /* Allow test to be preempted. */ +#endif +} + +static void torture_lock_busted_write_unlock(void) +{ + /* BUGGY, do not use in real life!!! */ +} + +static struct lock_torture_ops lock_busted_ops = { + .writelock = torture_lock_busted_write_lock, + .write_delay = torture_lock_busted_write_delay, + .writeunlock = torture_lock_busted_write_unlock, + .name = "lock_busted" +}; + static DEFINE_SPINLOCK(torture_spinlock); static int torture_spin_lock_write_lock(void) __acquires(torture_spinlock) @@ -320,7 +351,7 @@ static int __init lock_torture_init(void) int i; int firsterr = 0; static struct lock_torture_ops *torture_ops[] = { - &spin_lock_ops, &spin_lock_irq_ops, + &lock_busted_ops, &spin_lock_ops, &spin_lock_irq_ops, }; torture_init_begin(torture_type, verbose, &locktorture_runnable); -- cgit v0.10.2 From 73fa867e2c705cf5cf0a38df8618fa20eee3d75a Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 11 Feb 2014 12:16:14 -0800 Subject: rcutorture: Save kvm.sh output to log This commit logs the progress text that kvm.sh outputs, improving after-the-fact troubleshooting. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 48b27e9..5a78cbf 100644 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -297,7 +297,8 @@ awk < $T/cfgcpu.pack \ # Dump out the scripting required to run one test batch. function dump(first, pastlast) { - print "echo ----Start batch: `date`" + print "echo ----Start batch: `date`"; + print "echo ----Start batch: `date` >> " rd "/log"; jn=1 for (j = first; j < pastlast; j++) { builddir=KVM "/b" jn @@ -314,30 +315,37 @@ function dump(first, pastlast) else ovf = ""; print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date`"; + print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date` >> " rd "/log"; print "rm -f " builddir ".*"; print "touch " builddir ".wait"; print "mkdir " builddir " > /dev/null 2>&1 || :"; print "mkdir " rd cfr[jn] " || :"; print "kvm-test-1-run.sh " CONFIGDIR cf[j], builddir, rd cfr[jn], dur " \"" RCU_QEMU_ARG "\" \"" RCU_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &" - print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date`" + print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date`"; + print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date` >> " rd "/log"; print "while test -f " builddir ".wait" print "do" print "\tsleep 1" print "done" - print "echo ", cfr[jn], cpusr[jn] ovf ": Build complete. `date`" + print "echo ", cfr[jn], cpusr[jn] ovf ": Build complete. `date`"; + print "echo ", cfr[jn], cpusr[jn] ovf ": Build complete. `date` >> " rd "/log"; jn++; } for (j = 1; j < jn; j++) { builddir=KVM "/b" j print "rm -f " builddir ".ready" - print "echo ----", cfr[j], cpusr[j] ovf ": Starting kernel. `date`" + print "echo ----", cfr[j], cpusr[j] ovf ": Starting kernel. `date`"; + print "echo ----", cfr[j], cpusr[j] ovf ": Starting kernel. `date` >> " rd "/log"; } print "wait" - print "echo ---- All kernel runs complete. `date`" + print "echo ---- All kernel runs complete. `date`"; + print "echo ---- All kernel runs complete. `date` >> " rd "/log"; for (j = 1; j < jn; j++) { builddir=KVM "/b" j - print "echo ----", cfr[j], cpusr[j] ovf ": Build/run results:" - print "cat " rd cfr[j] "/kvm-test-1-run.sh.out" + print "echo ----", cfr[j], cpusr[j] ovf ": Build/run results:"; + print "echo ----", cfr[j], cpusr[j] ovf ": Build/run results: >> " rd "/log"; + print "cat " rd cfr[j] "/kvm-test-1-run.sh.out"; + print "cat " rd cfr[j] "/kvm-test-1-run.sh.out >> " rd "/log"; } } -- cgit v0.10.2 From 44a589ca2de4d7980f8b335481a09bf818065dbe Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Thu, 6 Feb 2014 11:28:31 -0800 Subject: NFC: NCI: Fix NULL pointer dereference The check should be for setup function pointer. This patch fixes NULL pointer dereference issue for NCI based NFC driver which doesn't define setup handler. Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: Samuel Ortiz diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 46bda01..56db888 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -301,7 +301,7 @@ static int nci_open_device(struct nci_dev *ndev) rc = __nci_request(ndev, nci_reset_req, 0, msecs_to_jiffies(NCI_RESET_TIMEOUT)); - if (ndev->ops->setup(ndev)) + if (ndev->ops->setup) ndev->ops->setup(ndev); if (!rc) { -- cgit v0.10.2 From eae39cb4934d3daab3ec828c5201c955b2e56af9 Mon Sep 17 00:00:00 2001 From: Pekon Gupta Date: Mon, 17 Feb 2014 13:11:23 +0530 Subject: mtd: nand: omap: fix ecclayout to be in sync with u-boot NAND driver Fixes: commit a919e51161b58ed7e6e663daba99ab7d558808f3 mtd: nand: omap2: clean-up BCHx_HW and BCHx_SW ECC configurations in device_probe Fixes ecclayout mismatch introduced in above commit for following ecc-schemes: - OMAP_ECC_BCH4_CODE_HW_DETECTION_SW - OMAP_ECC_BCH8_CODE_HW_DETECTION_SW However, this patch also touches other ecc-schemes as the fix required refactoring common code, into ecc-scheme specific code. This patch aligns ecc-layout for below ecc-schemes as per reference [1],[2],[3] +---+------------+-------------++-------------+-------------+ |OOB|BCH8_CODE_HW|BCH8_CODE_HW_||HAM1_CODE_HW |HAM1_CODE_HW | |pos| | DETECTION_SW||(x8 device) |(x16 device) | +---+------------+-------------++-------------+-------------+ | 0 |BADBLK_MARK | BADBLK_MARK || BADBLK_MARK | BADBLK_MARK | | 1 |BADBLK_MARK | BADBLK_MARK || eccpos[0] | BADBLK_MARK | | 2 | eccpos[0] | eccpos[0] || eccpos[1] | eccpos[0] | | 3 | eccpos[1] | eccpos[1] || eccpos[2] | eccpos[1] | | 4 | eccpos[2] | eccpos[2] || eccpos[3] | eccpos[2] | | 5 | eccpos[3] | eccpos[3] || eccpos[4] | eccpos[3] | | 6 | eccpos[4] | eccpos[4] || eccpos[5] | eccpos[4] | | 7 | eccpos[5] | eccpos[5] || eccpos[6] | eccpos[5] | | 8 | eccpos[6] | eccpos[6] || eccpos[7] | eccpos[6] | | 9 | eccpos[7] | eccpos[7] || eccpos[8] | eccpos[7] | |10 | eccpos[8] | eccpos[8] || eccpos[9] | eccpos[8] | |11 | eccpos[9] | eccpos[9] || eccpos[10] | eccpos[9] | |12 | eccpos[10] | eccpos[10] || eccpos[11] | eccpos[10] | |13 | eccpos[11] | eccpos[11] || oobfree[0] | eccpos[11] | |14 | eccpos[12] | eccpos[12] || oobfree[1] | oobfree[0] | |15 | eccpos[13] | || oobfree[2] | oobfree[1] | +---+------------+-------------++-------------+-------------+ |16 | eccpos[14] | eccpos[13] || oobfree[3] | oobfree[2] | |...| [...] | [...] || [...] | [...] | |56 | eccpos[54] | eccpos[51] || oobfree[43] | oobfree[42] | |57 | eccpos[55] | || oobfree[44] | oobfree[43] | +===+============+=============+==============+=============+ |58 | oobfree[0] | oobfree[0] || oobfree[45] | oobfree[44] | |59 | oobfree[1] | oobfree[1] || oobfree[46] | oobfree[45] | |60 | oobfree[2] | oobfree[2] || oobfree[47] | oobfree[46] | |61 | oobfree[3] | oobfree[3] || oobfree[48] | oobfree[47] | |62 | oobfree[4] | oobfree[4] || oobfree[49] | oobfree[48] | |63 | oobfree[5] | oobfree[5] || oobfree[50] | oobfree[49] | +---+------------+-------------+--------------+-------------+ [1] ecc-layout expected by ROM code, as specified in SoC TRM under: Chapter="Initialization" Section="Device Initialization by ROM code" Sub-Section="Memory Booting" Heading="NAND" Figure="ECC Locations in NAND Spare Areas" [2] ecc-layout updates in u-boot http://lists.denx.de/pipermail/u-boot/2013-November/167551.html [3] u-boot configurations to match above ecc-layout are documented at https://processors.wiki.ti.com/index.php/Linux_Core_NAND_User%27s_Guide CC: # 3.13.x+ Reported-by: Enric Balletbo Serra Tested-by: Enric Balletbo i Serra Tested-by: Stefan Roese Signed-off-by: Pekon Gupta Signed-off-by: Brian Norris diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index ef4190a..34ef941 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -1633,6 +1633,7 @@ static int omap_nand_probe(struct platform_device *pdev) int i; dma_cap_mask_t mask; unsigned sig; + unsigned oob_index; struct resource *res; struct mtd_part_parser_data ppdata = {}; @@ -1826,9 +1827,11 @@ static int omap_nand_probe(struct platform_device *pdev) (mtd->writesize / nand_chip->ecc.size); if (nand_chip->options & NAND_BUSWIDTH_16) - ecclayout->eccpos[0] = BADBLOCK_MARKER_LENGTH; + oob_index = BADBLOCK_MARKER_LENGTH; else - ecclayout->eccpos[0] = 1; + oob_index = 1; + for (i = 0; i < ecclayout->eccbytes; i++, oob_index++) + ecclayout->eccpos[i] = oob_index; ecclayout->oobfree->offset = ecclayout->eccpos[0] + ecclayout->eccbytes; break; @@ -1847,7 +1850,12 @@ static int omap_nand_probe(struct platform_device *pdev) ecclayout->eccbytes = nand_chip->ecc.bytes * (mtd->writesize / nand_chip->ecc.size); - ecclayout->eccpos[0] = BADBLOCK_MARKER_LENGTH; + oob_index = BADBLOCK_MARKER_LENGTH; + for (i = 0; i < ecclayout->eccbytes; i++, oob_index++) { + ecclayout->eccpos[i] = oob_index; + if (((i + 1) % nand_chip->ecc.bytes) == 0) + oob_index++; + } ecclayout->oobfree->offset = ecclayout->eccpos[0] + ecclayout->eccbytes; /* software bch library is used for locating errors */ @@ -1883,7 +1891,9 @@ static int omap_nand_probe(struct platform_device *pdev) ecclayout->eccbytes = nand_chip->ecc.bytes * (mtd->writesize / nand_chip->ecc.size); - ecclayout->eccpos[0] = BADBLOCK_MARKER_LENGTH; + oob_index = BADBLOCK_MARKER_LENGTH; + for (i = 0; i < ecclayout->eccbytes; i++, oob_index++) + ecclayout->eccpos[i] = oob_index; ecclayout->oobfree->offset = ecclayout->eccpos[0] + ecclayout->eccbytes; /* This ECC scheme requires ELM H/W block */ @@ -1913,7 +1923,12 @@ static int omap_nand_probe(struct platform_device *pdev) ecclayout->eccbytes = nand_chip->ecc.bytes * (mtd->writesize / nand_chip->ecc.size); - ecclayout->eccpos[0] = BADBLOCK_MARKER_LENGTH; + oob_index = BADBLOCK_MARKER_LENGTH; + for (i = 0; i < ecclayout->eccbytes; i++, oob_index++) { + ecclayout->eccpos[i] = oob_index; + if (((i + 1) % nand_chip->ecc.bytes) == 0) + oob_index++; + } ecclayout->oobfree->offset = ecclayout->eccpos[0] + ecclayout->eccbytes; /* software bch library is used for locating errors */ @@ -1956,7 +1971,9 @@ static int omap_nand_probe(struct platform_device *pdev) ecclayout->eccbytes = nand_chip->ecc.bytes * (mtd->writesize / nand_chip->ecc.size); - ecclayout->eccpos[0] = BADBLOCK_MARKER_LENGTH; + oob_index = BADBLOCK_MARKER_LENGTH; + for (i = 0; i < ecclayout->eccbytes; i++, oob_index++) + ecclayout->eccpos[i] = oob_index; ecclayout->oobfree->offset = ecclayout->eccpos[0] + ecclayout->eccbytes; break; @@ -1975,8 +1992,6 @@ static int omap_nand_probe(struct platform_device *pdev) /* populate remaining ECC layout data */ ecclayout->oobfree->length = mtd->oobsize - (BADBLOCK_MARKER_LENGTH + ecclayout->eccbytes); - for (i = 1; i < ecclayout->eccbytes; i++) - ecclayout->eccpos[i] = ecclayout->eccpos[0] + i; /* check if NAND device's OOB is enough to store ECC signatures */ if (mtd->oobsize < (ecclayout->eccbytes + BADBLOCK_MARKER_LENGTH)) { pr_err("not enough OOB bytes required = %d, available=%d\n", -- cgit v0.10.2 From aa6092f9835893290e77c3e24649def49dac1583 Mon Sep 17 00:00:00 2001 From: Pekon Gupta Date: Mon, 17 Feb 2014 13:11:24 +0530 Subject: mtd: nand: omap: fix ecclayout->oobfree->offset 1) In current implementation, ecclayout->oobfree->offset is calculated with respect to ecclayout->eccpos[0] which is incorrect because ECC bytes may not be stored contiguously in OOB. So, this patch calculates ecclayout->oobfree->offset with respect to last ECC byte-position 'eccpos[ecclayout->eccbytes-1]'. 2) ECC layout of some ecc-schemes expects reserved-markers at specific eccpos[] which should not be over-written by any file-system metadata. So this patch aligns oobfree->offset taking into account of such markers. CC: # 3.13.x+ Tested-by: Enric Balletbo i Serra Tested-by: Stefan Roese Signed-off-by: Pekon Gupta Signed-off-by: Brian Norris diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index 34ef941..58685ab 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -1832,8 +1832,9 @@ static int omap_nand_probe(struct platform_device *pdev) oob_index = 1; for (i = 0; i < ecclayout->eccbytes; i++, oob_index++) ecclayout->eccpos[i] = oob_index; - ecclayout->oobfree->offset = ecclayout->eccpos[0] + - ecclayout->eccbytes; + /* no reserved-marker in ecclayout for this ecc-scheme */ + ecclayout->oobfree->offset = + ecclayout->eccpos[ecclayout->eccbytes - 1] + 1; break; case OMAP_ECC_BCH4_CODE_HW_DETECTION_SW: @@ -1856,8 +1857,9 @@ static int omap_nand_probe(struct platform_device *pdev) if (((i + 1) % nand_chip->ecc.bytes) == 0) oob_index++; } - ecclayout->oobfree->offset = ecclayout->eccpos[0] + - ecclayout->eccbytes; + /* include reserved-marker in ecclayout->oobfree calculation */ + ecclayout->oobfree->offset = 1 + + ecclayout->eccpos[ecclayout->eccbytes - 1] + 1; /* software bch library is used for locating errors */ nand_chip->ecc.priv = nand_bch_init(mtd, nand_chip->ecc.size, @@ -1894,8 +1896,9 @@ static int omap_nand_probe(struct platform_device *pdev) oob_index = BADBLOCK_MARKER_LENGTH; for (i = 0; i < ecclayout->eccbytes; i++, oob_index++) ecclayout->eccpos[i] = oob_index; - ecclayout->oobfree->offset = ecclayout->eccpos[0] + - ecclayout->eccbytes; + /* reserved marker already included in ecclayout->eccbytes */ + ecclayout->oobfree->offset = + ecclayout->eccpos[ecclayout->eccbytes - 1] + 1; /* This ECC scheme requires ELM H/W block */ if (is_elm_present(info, pdata->elm_of_node, BCH4_ECC) < 0) { pr_err("nand: error: could not initialize ELM\n"); @@ -1929,8 +1932,9 @@ static int omap_nand_probe(struct platform_device *pdev) if (((i + 1) % nand_chip->ecc.bytes) == 0) oob_index++; } - ecclayout->oobfree->offset = ecclayout->eccpos[0] + - ecclayout->eccbytes; + /* include reserved-marker in ecclayout->oobfree calculation */ + ecclayout->oobfree->offset = 1 + + ecclayout->eccpos[ecclayout->eccbytes - 1] + 1; /* software bch library is used for locating errors */ nand_chip->ecc.priv = nand_bch_init(mtd, nand_chip->ecc.size, @@ -1974,8 +1978,9 @@ static int omap_nand_probe(struct platform_device *pdev) oob_index = BADBLOCK_MARKER_LENGTH; for (i = 0; i < ecclayout->eccbytes; i++, oob_index++) ecclayout->eccpos[i] = oob_index; - ecclayout->oobfree->offset = ecclayout->eccpos[0] + - ecclayout->eccbytes; + /* reserved marker already included in ecclayout->eccbytes */ + ecclayout->oobfree->offset = + ecclayout->eccpos[ecclayout->eccbytes - 1] + 1; break; #else pr_err("nand: error: CONFIG_MTD_NAND_OMAP_BCH not enabled\n"); -- cgit v0.10.2 From bb38eefb6858ce16b34716145b9597a5680aa54c Mon Sep 17 00:00:00 2001 From: Pekon Gupta Date: Mon, 17 Feb 2014 13:11:25 +0530 Subject: mtd: nand: omap: fix ecclayout->oobfree->length This patch excludes reserved-marker byte-position from oobfree->length calculation. Thus all bytes from oobfree->offset till end of OOB are free. CC: # 3.13.x+ Signed-off-by: Pekon Gupta Signed-off-by: Brian Norris diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index 58685ab..bf642ce 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -1994,9 +1994,8 @@ static int omap_nand_probe(struct platform_device *pdev) goto return_error; } - /* populate remaining ECC layout data */ - ecclayout->oobfree->length = mtd->oobsize - (BADBLOCK_MARKER_LENGTH + - ecclayout->eccbytes); + /* all OOB bytes from oobfree->offset till end off OOB are free */ + ecclayout->oobfree->length = mtd->oobsize - ecclayout->oobfree->offset; /* check if NAND device's OOB is enough to store ECC signatures */ if (mtd->oobsize < (ecclayout->eccbytes + BADBLOCK_MARKER_LENGTH)) { pr_err("not enough OOB bytes required = %d, available=%d\n", -- cgit v0.10.2 From 10762e80489e73e2d52bf78e0869f253f42821bd Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 23 Feb 2014 12:52:44 +0000 Subject: target/sbc: Fix sbc_dif_copy_prot addr offset bug This patch fixes a bug in sbc_dif_copy_prot() where the updated addr offset did not take into account the case where the associated scatterlist had not been incremented. This addresses the case where incoming protection scatterlists may contain a length smaller than PAGE_SIZE across multiple entires, when the target protection scatterlists are always being explicitly filled up to PAGE_SIZE before adding another entry. Cc: Martin K. Petersen Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Sagi Grimberg Cc: Or Gerlitz Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index a448944..ef1a58a 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -1074,7 +1074,7 @@ sbc_dif_copy_prot(struct se_cmd *cmd, unsigned int sectors, bool read, struct scatterlist *psg; void *paddr, *addr; unsigned int i, len, left; - unsigned int offset = 0; + unsigned int offset = sg_off; left = sectors * dev->prot_length; @@ -1084,11 +1084,10 @@ sbc_dif_copy_prot(struct se_cmd *cmd, unsigned int sectors, bool read, if (offset >= sg->length) { sg = sg_next(sg); offset = 0; - sg_off = sg->offset; } paddr = kmap_atomic(sg_page(psg)) + psg->offset; - addr = kmap_atomic(sg_page(sg)) + sg_off; + addr = kmap_atomic(sg_page(sg)) + sg->offset + offset; if (read) memcpy(paddr, addr, len); -- cgit v0.10.2 From 94387aa7cec8c5386b405b852cf3e5be38456fc7 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 23 Feb 2014 14:04:09 +0000 Subject: target: Add DIF sense codes in transport_generic_request_failure This patch adds the three missing DIF related sense codes within transport_generic_request_failure(), which are required to ensure that the correct ASC/ASQC is generated by the subsequent call to transport_send_check_condition_and_sense(). Cc: Martin K. Petersen Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Sagi Grimberg Cc: Or Gerlitz Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 24b4f65d..2956250 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1601,6 +1601,9 @@ void transport_generic_request_failure(struct se_cmd *cmd, case TCM_CHECK_CONDITION_ABORT_CMD: case TCM_CHECK_CONDITION_UNIT_ATTENTION: case TCM_CHECK_CONDITION_NOT_READY: + case TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED: + case TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED: + case TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED: break; case TCM_OUT_OF_RESOURCES: sense_reason = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; -- cgit v0.10.2 From fc272ec7a2dfe002b823c78f8ef0eb88042e2762 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 23 Feb 2014 14:20:08 +0200 Subject: Target/sbc: Don't use sg as iterator in sbc_verify_read Because then this sg is passed to sbc_copy_prot which will hit a protection fault in cases we have more than a single sg. Signed-off-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index ef1a58a..42f18fc 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -1162,7 +1162,7 @@ sbc_dif_verify_read(struct se_cmd *cmd, sector_t start, unsigned int sectors, { struct se_device *dev = cmd->se_dev; struct se_dif_v1_tuple *sdt; - struct scatterlist *dsg; + struct scatterlist *dsg, *psg = sg; sector_t sector = start; void *daddr, *paddr; int i, j, offset = sg_off; @@ -1170,14 +1170,14 @@ sbc_dif_verify_read(struct se_cmd *cmd, sector_t start, unsigned int sectors, for_each_sg(cmd->t_data_sg, dsg, cmd->t_data_nents, i) { daddr = kmap_atomic(sg_page(dsg)) + dsg->offset; - paddr = kmap_atomic(sg_page(sg)) + sg->offset; + paddr = kmap_atomic(sg_page(psg)) + sg->offset; for (j = 0; j < dsg->length; j += dev->dev_attrib.block_size) { - if (offset >= sg->length) { + if (offset >= psg->length) { kunmap_atomic(paddr); - sg = sg_next(sg); - paddr = kmap_atomic(sg_page(sg)) + sg->offset; + psg = sg_next(psg); + paddr = kmap_atomic(sg_page(psg)) + psg->offset; offset = 0; } -- cgit v0.10.2 From 6b1168e1617d9d4db73ef5276490627abf5adec4 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 14 Feb 2014 13:31:03 +0400 Subject: CIFS: Fix wrong pos argument of cifs_find_lock_conflict and use generic_file_aio_write rather than __generic_file_aio_write in cifs_writev. Signed-off-by: Pavel Shilovsky Reported-by: Al Viro Signed-off-by: Steve French diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 53c1507..834fce7 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2579,31 +2579,19 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov, struct cifsInodeInfo *cinode = CIFS_I(inode); struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; ssize_t rc = -EACCES; + loff_t lock_pos = pos; - BUG_ON(iocb->ki_pos != pos); - + if (file->f_flags & O_APPEND) + lock_pos = i_size_read(inode); /* * We need to hold the sem to be sure nobody modifies lock list * with a brlock that prevents writing. */ down_read(&cinode->lock_sem); - if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs), + if (!cifs_find_lock_conflict(cfile, lock_pos, iov_length(iov, nr_segs), server->vals->exclusive_lock_type, NULL, - CIFS_WRITE_OP)) { - mutex_lock(&inode->i_mutex); - rc = __generic_file_aio_write(iocb, iov, nr_segs, - &iocb->ki_pos); - mutex_unlock(&inode->i_mutex); - } - - if (rc > 0) { - ssize_t err; - - err = generic_write_sync(file, iocb->ki_pos - rc, rc); - if (err < 0) - rc = err; - } - + CIFS_WRITE_OP)) + rc = generic_file_aio_write(iocb, iov, nr_segs, pos); up_read(&cinode->lock_sem); return rc; } -- cgit v0.10.2 From a26054d184763969a411e3939fe243516715ff59 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 14 Feb 2014 07:21:00 -0500 Subject: cifs: sanity check length of data to send before sending We had a bug discovered recently where an upper layer function (cifs_iovec_write) could pass down a smb_rqst with an invalid amount of data in it. The length of the SMB frame would be correct, but the rqst struct would cause smb_send_rqst to send nearly 4GB of data. This should never be the case. Add some sanity checking to the beginning of smb_send_rqst that ensures that the amount of data we're going to send agrees with the length in the RFC1002 header. If it doesn't, WARN() and return -EIO to the upper layers. Signed-off-by: Jeff Layton Acked-by: Sachin Prabhu Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index b375709..18cd565 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -270,6 +270,26 @@ cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx, iov->iov_len = rqst->rq_pagesz; } +static unsigned long +rqst_len(struct smb_rqst *rqst) +{ + unsigned int i; + struct kvec *iov = rqst->rq_iov; + unsigned long buflen = 0; + + /* total up iov array first */ + for (i = 0; i < rqst->rq_nvec; i++) + buflen += iov[i].iov_len; + + /* add in the page array if there is one */ + if (rqst->rq_npages) { + buflen += rqst->rq_pagesz * (rqst->rq_npages - 1); + buflen += rqst->rq_tailsz; + } + + return buflen; +} + static int smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) { @@ -277,6 +297,7 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) struct kvec *iov = rqst->rq_iov; int n_vec = rqst->rq_nvec; unsigned int smb_buf_length = get_rfc1002_length(iov[0].iov_base); + unsigned long send_length; unsigned int i; size_t total_len = 0, sent; struct socket *ssocket = server->ssocket; @@ -285,6 +306,14 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) if (ssocket == NULL) return -ENOTSOCK; + /* sanity check send length */ + send_length = rqst_len(rqst); + if (send_length != smb_buf_length + 4) { + WARN(1, "Send length mismatch(send_length=%lu smb_buf_length=%u)\n", + send_length, smb_buf_length); + return -EIO; + } + cifs_dbg(FYI, "Sending smb: smb_len=%u\n", smb_buf_length); dump_smb(iov[0].iov_base, iov[0].iov_len); -- cgit v0.10.2 From 963a1852fbac4f75a2d938fa2e734ef1e6d4c044 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 21 Feb 2014 20:34:34 +0100 Subject: mac80211: don't validate unchanged AP bandwidth while tracking The MLME code in mac80211 must track whether or not the AP changed bandwidth, but if there's no change while tracking it shouldn't do anything, otherwise regulatory updates can make it impossible to connect to certain APs if the regulatory database doesn't match the information from the AP. See the precise scenario described in the code. This still leaves some possible problems with CSA or if the AP actually changed bandwidth, but those cases are less common and won't completely prevent using it. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=70881 Cc: stable@vger.kernel.org Reported-and-tested-by: Nate Carlson Signed-off-by: Johannes Berg diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 57d5482..c415f00 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -271,6 +271,28 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, ret = 0; out: + /* + * When tracking the current AP, don't do any further checks if the + * new chandef is identical to the one we're currently using for the + * connection. This keeps us from playing ping-pong with regulatory, + * without it the following can happen (for example): + * - connect to an AP with 80 MHz, world regdom allows 80 MHz + * - AP advertises regdom US + * - CRDA loads regdom US with 80 MHz prohibited (old database) + * - the code below detects an unsupported channel, downgrades, and + * we disconnect from the AP in the caller + * - disconnect causes CRDA to reload world regdomain and the game + * starts anew. + * (see https://bugzilla.kernel.org/show_bug.cgi?id=70881) + * + * It seems possible that there are still scenarios with CSA or real + * bandwidth changes where a this could happen, but those cases are + * less common and wouldn't completely prevent using the AP. + */ + if (tracking && + cfg80211_chandef_identical(chandef, &sdata->vif.bss_conf.chandef)) + return ret; + /* don't print the message below for VHT mismatch if VHT is disabled */ if (ret & IEEE80211_STA_DISABLE_VHT) vht_chandef = *chandef; -- cgit v0.10.2 From e6cfc0295c7d51b008999a8b13a44fb43f8685ea Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 3 Feb 2014 15:33:37 +0000 Subject: asm-generic: add sched_setattr/sched_getattr syscalls Add the sched_setattr and sched_getattr syscalls to the generic syscall list, which is used by the following architectures: arc, arm64, c6x, hexagon, metag, openrisc, score, tile, unicore32. Signed-off-by: James Hogan Acked-by: Arnd Bergmann Acked-by: Catalin Marinas Cc: linux-arch@vger.kernel.org Cc: Vineet Gupta Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: Mark Salter Cc: Aurelien Jacquiot Cc: linux-c6x-dev@linux-c6x.org Cc: Richard Kuo Cc: linux-hexagon@vger.kernel.org Cc: linux-metag@vger.kernel.org Cc: Jonas Bonn Cc: linux@lists.openrisc.net Cc: Chen Liqin Cc: Lennox Wu Cc: Chris Metcalf Cc: Guan Xuetao diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index a20a9b4..dde8041 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -692,9 +692,13 @@ __SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \ __SYSCALL(__NR_kcmp, sys_kcmp) #define __NR_finit_module 273 __SYSCALL(__NR_finit_module, sys_finit_module) +#define __NR_sched_setattr 274 +__SYSCALL(__NR_sched_setattr, sys_sched_setattr) +#define __NR_sched_getattr 275 +__SYSCALL(__NR_sched_getattr, sys_sched_getattr) #undef __NR_syscalls -#define __NR_syscalls 274 +#define __NR_syscalls 276 /* * All syscalls below here should go away really, -- cgit v0.10.2 From b29dd35909c4ac074e84e8e7000fe525fe33c1a1 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 7 Jan 2014 13:54:26 +0100 Subject: clk: shmobile: rcar-gen2: Fix clock parent for all non-PLL clocks The lb, qspi, sdh, sd0 and sd1 clocks have the PLL1 (divided by 2) as their parent, not the main clock. Fix it. This bug introduced in v3.14-rc1 breaks various devices on the Lager and Kolesh shmobile boards and should thus be considered as a regression for which a fix during the -rc series is appropriate. Reported-by: Geert Uytterhoeven Signed-off-by: Laurent Pinchart Acked-by: Simon Horman diff --git a/drivers/clk/shmobile/clk-rcar-gen2.c b/drivers/clk/shmobile/clk-rcar-gen2.c index a59ec21..8c7bcbd 100644 --- a/drivers/clk/shmobile/clk-rcar-gen2.c +++ b/drivers/clk/shmobile/clk-rcar-gen2.c @@ -186,7 +186,7 @@ rcar_gen2_cpg_register_clock(struct device_node *np, struct rcar_gen2_cpg *cpg, const char *name) { const struct clk_div_table *table = NULL; - const char *parent_name = "main"; + const char *parent_name; unsigned int shift; unsigned int mult = 1; unsigned int div = 1; @@ -201,23 +201,31 @@ rcar_gen2_cpg_register_clock(struct device_node *np, struct rcar_gen2_cpg *cpg, * the multiplier value. */ u32 value = clk_readl(cpg->reg + CPG_PLL0CR); + parent_name = "main"; mult = ((value >> 24) & ((1 << 7) - 1)) + 1; } else if (!strcmp(name, "pll1")) { + parent_name = "main"; mult = config->pll1_mult / 2; } else if (!strcmp(name, "pll3")) { + parent_name = "main"; mult = config->pll3_mult; } else if (!strcmp(name, "lb")) { + parent_name = "pll1_div2"; div = cpg_mode & BIT(18) ? 36 : 24; } else if (!strcmp(name, "qspi")) { + parent_name = "pll1_div2"; div = (cpg_mode & (BIT(3) | BIT(2) | BIT(1))) == BIT(2) ? 16 : 20; } else if (!strcmp(name, "sdh")) { + parent_name = "pll1_div2"; table = cpg_sdh_div_table; shift = 8; } else if (!strcmp(name, "sd0")) { + parent_name = "pll1_div2"; table = cpg_sd01_div_table; shift = 4; } else if (!strcmp(name, "sd1")) { + parent_name = "pll1_div2"; table = cpg_sd01_div_table; shift = 0; } else if (!strcmp(name, "z")) { -- cgit v0.10.2 From 4a9c3ce1d293fe67123102f0b6734d6067c665fd Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 7 Jan 2014 13:54:26 +0100 Subject: clk: shmobile: rcar-gen2: Fix qspi divisor The qspi clock divisor is incorrectly set to twice the value it should have, possibly because it has been computed based on PLL1 as the clock parent instead of PLL1 / 2 (the datasheets specifies the qspi nominal frequencies, not the divisor values). Fix it. This bug introduced in v3.14-rc1 breaks various devices on the Lager and Kolesh shmobile boards and should thus be considered as a regression for which a fix during the -rc series is appropriate. Reported-by: Geert Uytterhoeven Signed-off-by: Laurent Pinchart Acked-by: Simon Horman diff --git a/drivers/clk/shmobile/clk-rcar-gen2.c b/drivers/clk/shmobile/clk-rcar-gen2.c index 8c7bcbd..dd272a0 100644 --- a/drivers/clk/shmobile/clk-rcar-gen2.c +++ b/drivers/clk/shmobile/clk-rcar-gen2.c @@ -215,7 +215,7 @@ rcar_gen2_cpg_register_clock(struct device_node *np, struct rcar_gen2_cpg *cpg, } else if (!strcmp(name, "qspi")) { parent_name = "pll1_div2"; div = (cpg_mode & (BIT(3) | BIT(2) | BIT(1))) == BIT(2) - ? 16 : 20; + ? 8 : 10; } else if (!strcmp(name, "sdh")) { parent_name = "pll1_div2"; table = cpg_sdh_div_table; -- cgit v0.10.2 From 60480994c70981d0336aadb3f46b052977d3d19f Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 19 Feb 2014 18:13:24 +0100 Subject: clk: shmobile: Fix typo in MSTP clock DT bindings The DT bindings document a renesas,indices property, while the code, the DT example and the DT sources all use renesas,clock-indices. Fix the documentation. The shmobile mstp DT bindings have been merged in v3.14-rc1 with a bug in the DT ABI, a fix during the -rc series is appropriate. Reported-by: Geert Uytterhoeven Signed-off-by: Laurent Pinchart Acked-by: Simon Horman diff --git a/Documentation/devicetree/bindings/clock/renesas,cpg-mstp-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,cpg-mstp-clocks.txt index a6a352c..5992dce 100644 --- a/Documentation/devicetree/bindings/clock/renesas,cpg-mstp-clocks.txt +++ b/Documentation/devicetree/bindings/clock/renesas,cpg-mstp-clocks.txt @@ -21,9 +21,9 @@ Required Properties: must appear in the same order as the output clocks. - #clock-cells: Must be 1 - clock-output-names: The name of the clocks as free-form strings - - renesas,indices: Indices of the gate clocks into the group (0 to 31) + - renesas,clock-indices: Indices of the gate clocks into the group (0 to 31) -The clocks, clock-output-names and renesas,indices properties contain one +The clocks, clock-output-names and renesas,clock-indices properties contain one entry per gate clock. The MSTP groups are sparsely populated. Unimplemented gate clocks must not be declared. -- cgit v0.10.2 From 04b01a1db46cc17f6ab2ac2bada27d8e671710e1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 19 Feb 2014 11:21:38 +0100 Subject: perf tests: Fix *.o make tests Enable and fix *.o object make tests. Following tests are now available: $ make -f tests/make make_perf_o_O - make_perf_o_O: cd . && make -f Makefile O=/tmp/tmp.iF5vI5emGy DESTDIR=/tmp/tmp.epDPFVhH0s perf.o $ make -f tests/make make_util_map_o_O - make_util_map_o_O: cd . && make -f Makefile O=/tmp/tmp.BWuMf55ygC DESTDIR=/tmp/tmp.QbGBRF95oP util/map.o Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1392805300-14610-1-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 00544b8..eb3671b 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -167,13 +167,9 @@ test_make_install_info_O := $(test_ok) test_make_install_pdf := $(test_ok) test_make_install_pdf_O := $(test_ok) -# Kbuild tests only -#test_make_python_perf_so_O := test -f $$TMP/tools/perf/python/perf.so -#test_make_perf_o_O := test -f $$TMP/tools/perf/perf.o -#test_make_util_map_o_O := test -f $$TMP/tools/perf/util/map.o - -test_make_perf_o_O := true -test_make_util_map_o_O := true +test_make_python_perf_so_O := test -f $$TMP_O/python/perf.so +test_make_perf_o_O := test -f $$TMP_O/perf.o +test_make_util_map_o_O := test -f $$TMP_O/util/map.o test_default = test -x $(PERF)/perf test = $(if $(test_$1),$(test_$1),$(test_default)) -- cgit v0.10.2 From 2a94f6c455b38c0411d92b0df794bb8137bf1957 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 19 Feb 2014 11:21:39 +0100 Subject: perf tests: Add pmu-bison.o make test Adding pmu-bison.o make test: $ make -f tests/make make_util_pmu_bison_o - make_util_pmu_bison_o: cd . && make -f Makefile DESTDIR=/tmp/tmp.0u99hQn8Ga util/pmu-bison.o $ make -f tests/make make_util_pmu_bison_o_O - make_util_pmu_bison_o_O: cd . && make -f Makefile O=/tmp/tmp.sWKDLGS71O DESTDIR=/tmp/tmp.htQNJAfJ0d util/pmu-bison.o make: *** [make_util_pmu_bison_o_O] Error 1 The 'O=' version of the test is failing at the moment, due to the OUTPUT directory issue fixed in next patch. Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1392805300-14610-2-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/tests/make b/tools/perf/tests/make index eb3671b..2d24954 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -35,8 +35,9 @@ make_tags := tags make_cscope := cscope make_help := help make_doc := doc -make_perf_o := perf.o -make_util_map_o := util/map.o +make_perf_o := perf.o +make_util_map_o := util/map.o +make_util_pmu_bison_o := util/pmu-bison.o make_install := install make_install_bin := install-bin make_install_doc := install-doc @@ -73,6 +74,7 @@ run += make_help run += make_doc run += make_perf_o run += make_util_map_o +run += make_util_pmu_bison_o run += make_install run += make_install_bin # FIXME 'install-*' commented out till they're fixed @@ -113,8 +115,9 @@ test_make_doc_O := $(test_ok) test_make_python_perf_so := test -f $(PERF)/python/perf.so -test_make_perf_o := test -f $(PERF)/perf.o -test_make_util_map_o := test -f $(PERF)/util/map.o +test_make_perf_o := test -f $(PERF)/perf.o +test_make_util_map_o := test -f $(PERF)/util/map.o +test_make_util_pmu_bison_o := test -f $(PERF)/util/pmu-bison.o define test_dest_files for file in $(1); do \ @@ -170,6 +173,7 @@ test_make_install_pdf_O := $(test_ok) test_make_python_perf_so_O := test -f $$TMP_O/python/perf.so test_make_perf_o_O := test -f $$TMP_O/perf.o test_make_util_map_o_O := test -f $$TMP_O/util/map.o +test_make_util_pmu_bison_o_O := test -f $$TMP_O/util/pmu-bison.o test_default = test -x $(PERF)/perf test = $(if $(test_$1),$(test_$1),$(test_default)) -- cgit v0.10.2 From da237ed07c5144b0330a38b9b68be167231255d0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 19 Feb 2014 11:21:40 +0100 Subject: perf tools: Fix bison OUTPUT directories dependency The bison and flex C objects don't have dependency for creating output directories. This could lead to build failure if the one of those objects is picked up by make to be build as the first one (reported by Arnaldo). Also following make fails: $ rm -rf /tmp/krava; mkdir /tmp/krava; make O=/tmp/krava util/pmu-bison.o BUILD: Doing 'make -j4' parallel build [ SNIP ] BISON /tmp/krava/util/pmu-bison.c FLAGS: * new build flags or prefix bison: /tmp/krava/util/pmu-bison.output: cannot open: No such file or directory make[1]: *** [/tmp/krava/util/pmu-bison.c] Error 1 make[1]: *** Waiting for unfinished jobs.... make: *** [util/pmu-bison.o] Error 2 Adding bison objects dependency for output directories (flex objects depends on bisons'). This fixies the make_util_pmu_bison_o_O make test. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1392805300-14610-3-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 77b153f..5fedd69 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -712,9 +712,15 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) # we depend the various files onto their directories. DIRECTORY_DEPS = $(LIB_OBJS) $(BUILTIN_OBJS) $(GTK_OBJS) DIRECTORY_DEPS += $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h -$(DIRECTORY_DEPS): | $(sort $(dir $(DIRECTORY_DEPS))) +# no need to add flex objects, because they depend on bison ones +DIRECTORY_DEPS += $(OUTPUT)util/parse-events-bison.c +DIRECTORY_DEPS += $(OUTPUT)util/pmu-bison.c + +OUTPUT_DIRECTORIES := $(sort $(dir $(DIRECTORY_DEPS))) + +$(DIRECTORY_DEPS): | $(OUTPUT_DIRECTORIES) # In the second step, we make a rule to actually create these directories -$(sort $(dir $(DIRECTORY_DEPS))): +$(OUTPUT_DIRECTORIES): $(QUIET_MKDIR)$(MKDIR) -p $@ 2>/dev/null $(LIB_FILE): $(LIB_OBJS) -- cgit v0.10.2 From 0695e57b9a6a5eb856a58cf488f715b3bb7366a0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 19 Feb 2014 16:52:54 +0100 Subject: perf tools: Factor features display code Currently the we display all detected features/libraries by following rules: - if one of the features is missing - if it's build from clean tree This patch changes changes this behavior in several ways. - We no longer display all detected features, only detected libraries are displayed by default: $ make BUILD: Doing 'make -j4' parallel build Auto-detecting system features: ... dwarf: [ on ] ... glibc: [ on ] ... gtk2: [ on ] ... libaudit: [ on ] ... libbfd: [ on ] ... libelf: [ on ] ... libnuma: [ on ] ... libperl: [ on ] ... libpython: [ on ] ... libslang: [ on ] ... libunwind: [ on ] The assumption is, that above libraries are the most interesting part of the detection, while we don't care much about detection of on-exit support. - If all above libraries are detected, the default is not shown on subsequent builds. - If one of the above libraries is missing, the detection output is forced. - The features status is stored in PERF-FEATURES file and the detection output is forced in case the there's difference between the file contents and currently detected features. - If you want to see all detected features, you can use VF=1 make variable, that forces the detected features output. $ make VF=1 BUILD: Doing 'make -j4' parallel build Auto-detecting system features: ... dwarf: [ on ] ... glibc: [ on ] ... gtk2: [ on ] ... libaudit: [ on ] ... libbfd: [ on ] ... libelf: [ on ] ... libnuma: [ on ] ... libperl: [ on ] ... libpython: [ on ] ... libslang: [ on ] ... libunwind: [ on ] ... backtrace: [ on ] ... fortify-source: [ on ] ... gtk2-infobar: [ on ] ... libelf-getphdrnum: [ on ] ... libelf-mmap: [ on ] ... libpython-version: [ on ] ... on-exit: [ on ] ... stackprotector-all: [ on ] ... timerfd: [ on ] ... libunwind-debug-frame: [ OFF ] ... bionic: [ OFF ] Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1392825179-5228-2-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 5fedd69..bde91f8 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -7,6 +7,8 @@ include config/utilities.mak # Define V to have a more verbose compile. # +# Define VF to have a more verbose feature check output. +# # Define O to save output files in a separate directory. # # Define ARCH as name of target architecture if you want cross-builds. @@ -897,7 +899,7 @@ config-clean: clean: $(LIBTRACEEVENT)-clean $(LIBAPIKFS)-clean config-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS) $(GTK_OBJS) $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf - $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* + $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)PERF-FEATURES $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean $(python-clean) diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 1686583..39e6e6c 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -149,6 +149,32 @@ CORE_FEATURE_TESTS = \ stackprotector-all \ timerfd +LIB_FEATURE_TESTS = \ + dwarf \ + glibc \ + gtk2 \ + libaudit \ + libbfd \ + libelf \ + libnuma \ + libperl \ + libpython \ + libslang \ + libunwind + +VF_FEATURE_TESTS = \ + backtrace \ + fortify-source \ + gtk2-infobar \ + libelf-getphdrnum \ + libelf-mmap \ + libpython-version \ + on-exit \ + stackprotector-all \ + timerfd \ + libunwind-debug-frame \ + bionic + # Set FEATURE_CHECK_(C|LD)FLAGS-all for all CORE_FEATURE_TESTS features. # If in the future we need per-feature checks/flags for features not # mentioned in this list we need to refactor this ;-). @@ -161,17 +187,6 @@ endef $(foreach feat,$(CORE_FEATURE_TESTS),$(call set_test_all_flags,$(feat))) # -# So here we detect whether test-all was rebuilt, to be able -# to skip the print-out of the long features list if the file -# existed before and after it was built: -# -ifeq ($(wildcard $(OUTPUT)config/feature-checks/test-all.bin),) - test-all-failed := 1 -else - test-all-failed := 0 -endif - -# # Special fast-path for the 'all features are available' case: # $(call feature_check,all,$(MSG)) @@ -180,15 +195,6 @@ $(call feature_check,all,$(MSG)) # Just in case the build freshly failed, make sure we print the # feature matrix: # -ifeq ($(feature-all), 0) - test-all-failed := 1 -endif - -ifeq ($(test-all-failed),1) - $(info ) - $(info Auto-detecting system features:) -endif - ifeq ($(feature-all), 1) # # test-all.c passed - just set all the core feature flags to 1: @@ -199,27 +205,6 @@ else $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_check,$(feat))) endif -# -# Print the result of the feature test: -# -feature_print = $(eval $(feature_print_code)) $(info $(MSG)) - -define feature_print_code - ifeq ($(feature-$(1)), 1) - MSG = $(shell printf '...%30s: [ \033[32mon\033[m ]' $(1)) - else - MSG = $(shell printf '...%30s: [ \033[31mOFF\033[m ]' $(1)) - endif -endef - -# -# Only print out our features if we rebuilt the testcases or if a test failed: -# -ifeq ($(test-all-failed), 1) - $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_print,$(feat))) - $(info ) -endif - ifeq ($(feature-stackprotector-all), 1) CFLAGS += -fstack-protector-all endif @@ -602,3 +587,59 @@ ifdef DESTDIR plugindir=$(libdir)/traceevent/plugins plugindir_SQ= $(subst ','\'',$(plugindir)) endif + +# +# Print the result of the feature test: +# +feature_print = $(eval $(feature_print_code)) $(info $(MSG)) + +define feature_print_code + ifeq ($(feature-$(1)), 1) + MSG = $(shell printf '...%30s: [ \033[32mon\033[m ]' $(1)) + else + MSG = $(shell printf '...%30s: [ \033[31mOFF\033[m ]' $(1)) + endif +endef + +PERF_FEATURES := $(foreach feat,$(LIB_FEATURE_TESTS),feature-$(feat)($(feature-$(feat)))) +PERF_FEATURES_FILE := $(shell touch $(OUTPUT)PERF-FEATURES; cat $(OUTPUT)PERF-FEATURES) + +# The $(display_lib) controls the default detection message +# output. It's set if: +# - detected features differes from stored features from +# last build (in PERF-FEATURES file) +# - one of the $(LIB_FEATURE_TESTS) is not detected +# - VF is enabled + +ifneq ("$(PERF_FEATURES)","$(PERF_FEATURES_FILE)") + $(shell echo "$(PERF_FEATURES)" > $(OUTPUT)PERF-FEATURES) + display_lib := 1 +endif + +feature_check = $(eval $(feature_check_code)) +define feature_check_code + ifneq ($(feature-$(1)), 1) + display_lib := 1 + endif +endef + +$(foreach feat,$(LIB_FEATURE_TESTS),$(call feature_check,$(feat))) + +ifeq ($(VF),1) + display_lib := 1 + display_vf := 1 +endif + +ifeq ($(display_lib),1) + $(info ) + $(info Auto-detecting system features:) + $(foreach feat,$(LIB_FEATURE_TESTS),$(call feature_print,$(feat),)) +endif + +ifeq ($(display_vf),1) + $(foreach feat,$(VF_FEATURE_TESTS),$(call feature_print,$(feat),)) +endif + +ifeq ($(display_lib),1) + $(info ) +endif -- cgit v0.10.2 From 8d79076a3c5dbe45109fd15d2489168fbbb28a3d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 19 Feb 2014 16:52:55 +0100 Subject: perf tools: Add variable display for VF make output Adding dump of interesting build directories to the make VF=1 output. $ make VF=1 BUILD: Doing 'make -j4' parallel build Auto-detecting system features: ... dwarf: [ on ] ... glibc: [ on ] ... gtk2: [ on ] ... libaudit: [ on ] ... libbfd: [ on ] ... libelf: [ on ] ... libnuma: [ on ] ... libperl: [ on ] ... libpython: [ on ] ... libslang: [ on ] ... libunwind: [ on ] ... backtrace: [ on ] ... fortify-source: [ on ] ... gtk2-infobar: [ on ] ... libelf-getphdrnum: [ on ] ... libelf-mmap: [ on ] ... libpython-version: [ on ] ... on-exit: [ on ] ... stackprotector-all: [ on ] ... timerfd: [ on ] ... libunwind-debug-frame: [ OFF ] ... bionic: [ OFF ] ... prefix: /home/jolsa ... bindir: /home/jolsa/bin ... libdir: /home/jolsa/lib64 ... sysconfdir: /home/jolsa/etc Adding functions to print variable/text in features display - feature_print_var/feature_print_text (feature_print_text is used in next patches). Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1392825179-5228-3-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 39e6e6c..97a64c4 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -591,9 +591,9 @@ endif # # Print the result of the feature test: # -feature_print = $(eval $(feature_print_code)) $(info $(MSG)) +feature_print_status = $(eval $(feature_print_status_code)) $(info $(MSG)) -define feature_print_code +define feature_print_status_code ifeq ($(feature-$(1)), 1) MSG = $(shell printf '...%30s: [ \033[32mon\033[m ]' $(1)) else @@ -601,6 +601,16 @@ define feature_print_code endif endef +feature_print_var = $(eval $(feature_print_var_code)) $(info $(MSG)) +define feature_print_var_code + MSG = $(shell printf '...%30s: %s' $(1) $($(1))) +endef + +feature_print_text = $(eval $(feature_print_text_code)) $(info $(MSG)) +define feature_print_text_code + MSG = $(shell printf '...%30s: %s' $(1) $(2)) +endef + PERF_FEATURES := $(foreach feat,$(LIB_FEATURE_TESTS),feature-$(feat)($(feature-$(feat)))) PERF_FEATURES_FILE := $(shell touch $(OUTPUT)PERF-FEATURES; cat $(OUTPUT)PERF-FEATURES) @@ -633,11 +643,16 @@ endif ifeq ($(display_lib),1) $(info ) $(info Auto-detecting system features:) - $(foreach feat,$(LIB_FEATURE_TESTS),$(call feature_print,$(feat),)) + $(foreach feat,$(LIB_FEATURE_TESTS),$(call feature_print_status,$(feat),)) endif ifeq ($(display_vf),1) - $(foreach feat,$(VF_FEATURE_TESTS),$(call feature_print,$(feat),)) + $(foreach feat,$(VF_FEATURE_TESTS),$(call feature_print_status,$(feat),)) + $(info ) + $(call feature_print_var,prefix) + $(call feature_print_var,bindir) + $(call feature_print_var,libdir) + $(call feature_print_var,sysconfdir) endif ifeq ($(display_lib),1) -- cgit v0.10.2 From 7a0447d61d2d754988add992be2b46d4587ae86a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 20 Feb 2014 16:08:18 -0300 Subject: perf tools: Warn the user about how to enable libunwind support When one has libunwind installed somewhere the perf tools build process doesn't expects it to be, this happens: [acme@ssdandy linux]$ make O=/tmp/build/perf -C tools/perf/ install-bin make: Entering directory `/home/acme/git/linux/tools/perf' BUILD: Doing 'make -j8' parallel build config/Makefile:312: No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 1.1 Auto-detecting system features: ... libunwind: [ OFF ] Change the message so that it tells how to use a non-standard libunwind install directory: config/Makefile:312: No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 1.1 and/or set LIBUNWIND_DIR [acme@ssdandy linux]$ make LIBUNWIND_DIR=/opt/libunwind-git/ O=/tmp/build/perf -C tools/perf/ install-bin make: Entering directory `/home/acme/git/linux/tools/perf' BUILD: Doing 'make -j8' parallel build Auto-detecting system features: ... libunwind: [ on ] Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-huoxnou7sw85lm58k3pi1xhw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 97a64c4..9429805 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -309,7 +309,7 @@ endif # NO_LIBELF ifndef NO_LIBUNWIND ifneq ($(feature-libunwind), 1) - msg := $(warning No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 1.1); + msg := $(warning No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 1.1 and/or set LIBUNWIND_DIR); NO_LIBUNWIND := 1 else ifeq ($(ARCH),arm) -- cgit v0.10.2 From 45757895c785e0a4c10afd5670cdc26cea2bbc97 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 19 Feb 2014 16:52:56 +0100 Subject: perf tools: Add feature check for libdw dwarf unwind Adding feature check test code for libdw dwarf unwind. Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1392825179-5228-4-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 9429805..074fa61 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -59,6 +59,18 @@ ifeq ($(NO_PERF_REGS),0) CFLAGS += -DHAVE_PERF_REGS_SUPPORT endif +ifndef NO_LIBELF + # for linking with debug library, run like: + # make DEBUG=1 LIBDW_DIR=/opt/libdw/ + ifdef LIBDW_DIR + LIBDW_CFLAGS := -I$(LIBDW_DIR)/include + LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib + + FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS) + FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw + endif +endif + # include ARCH specific config -include $(src-perf)/arch/$(ARCH)/Makefile @@ -267,13 +279,6 @@ else msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static); endif else - # for linking with debug library, run like: - # make DEBUG=1 LIBDW_DIR=/opt/libdw/ - ifdef LIBDW_DIR - LIBDW_CFLAGS := -I$(LIBDW_DIR)/include - LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib - endif - ifneq ($(feature-dwarf), 1) msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); NO_DWARF := 1 diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 12e5513..2b492f5 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -26,7 +26,8 @@ FILES= \ test-libunwind-debug-frame.bin \ test-on-exit.bin \ test-stackprotector-all.bin \ - test-timerfd.bin + test-timerfd.bin \ + test-libdw-dwarf-unwind.bin CC := $(CROSS_COMPILE)gcc -MD PKG_CONFIG := $(CROSS_COMPILE)pkg-config @@ -141,6 +142,9 @@ test-backtrace.bin: test-timerfd.bin: $(BUILD) +test-libdw-dwarf-unwind.bin: + $(BUILD) + -include *.d ############################### diff --git a/tools/perf/config/feature-checks/test-all.c b/tools/perf/config/feature-checks/test-all.c index 9b8a544..fc37eb3 100644 --- a/tools/perf/config/feature-checks/test-all.c +++ b/tools/perf/config/feature-checks/test-all.c @@ -89,6 +89,10 @@ # include "test-stackprotector-all.c" #undef main +#define main main_test_libdw_dwarf_unwind +# include "test-libdw-dwarf-unwind.c" +#undef main + int main(int argc, char *argv[]) { main_test_libpython(); @@ -111,6 +115,7 @@ int main(int argc, char *argv[]) main_test_libnuma(); main_test_timerfd(); main_test_stackprotector_all(); + main_test_libdw_dwarf_unwind(); return 0; } diff --git a/tools/perf/config/feature-checks/test-libdw-dwarf-unwind.c b/tools/perf/config/feature-checks/test-libdw-dwarf-unwind.c new file mode 100644 index 0000000..f676a3f --- /dev/null +++ b/tools/perf/config/feature-checks/test-libdw-dwarf-unwind.c @@ -0,0 +1,13 @@ + +#include + +int main(void) +{ + /* + * This function is guarded via: __nonnull_attribute__ (1, 2). + * Passing '1' as arguments value. This code is never executed, + * only compiled. + */ + dwfl_thread_getframes((void *) 1, (void *) 1, NULL); + return 0; +} -- cgit v0.10.2 From 5ea8415407a76c4a85ac971ec82d110161cd77f1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 19 Feb 2014 16:52:57 +0100 Subject: perf tools: Add libdw DWARF post unwind support Adding libdw DWARF post unwind support, which is part of elfutils-devel/libdw-dev package from version 0.158. The new code is contained in unwin-libdw.c object, and implements unwind__get_entries unwind interface function. New Makefile variable NO_LIBDW_DWARF_UNWIND was added to control its compilation, and is marked as disabled now. It's factored with the rest of the Makefile unwind build code in the next patch. Arch specific code was added for x86. Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1392825179-5228-5-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index bde91f8..2dff0b8 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -57,6 +57,12 @@ include config/utilities.mak # Define NO_LIBAUDIT if you do not want libaudit support # # Define NO_LIBBIONIC if you do not want bionic support +# +# Define NO_LIBDW_DWARF_UNWIND if you do not want libdw support +# for dwarf backtrace post unwind. + +# temporarily disabled +NO_LIBDW_DWARF_UNWIND := 1 ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(shell pwd))) @@ -478,6 +484,11 @@ ifndef NO_DWARF endif # NO_DWARF endif # NO_LIBELF +ifndef NO_LIBDW_DWARF_UNWIND + LIB_OBJS += $(OUTPUT)util/unwind-libdw.o + LIB_H += util/unwind-libdw.h +endif + ifndef NO_LIBUNWIND LIB_OBJS += $(OUTPUT)util/unwind-libunwind.o endif diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 4fa9be9..37c4652 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -7,6 +7,9 @@ LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o endif +ifndef NO_LIBDW_DWARF_UNWIND +LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o +endif LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/tsc.o LIB_H += arch/$(ARCH)/util/tsc.h diff --git a/tools/perf/arch/x86/util/unwind-libdw.c b/tools/perf/arch/x86/util/unwind-libdw.c new file mode 100644 index 0000000..c4b7217 --- /dev/null +++ b/tools/perf/arch/x86/util/unwind-libdw.c @@ -0,0 +1,51 @@ +#include +#include "../../util/unwind-libdw.h" +#include "../../util/perf_regs.h" + +bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) +{ + struct unwind_info *ui = arg; + struct regs_dump *user_regs = &ui->sample->user_regs; + Dwarf_Word dwarf_regs[17]; + unsigned nregs; + +#define REG(r) ({ \ + Dwarf_Word val = 0; \ + perf_reg_value(&val, user_regs, PERF_REG_X86_##r); \ + val; \ +}) + + if (user_regs->abi == PERF_SAMPLE_REGS_ABI_32) { + dwarf_regs[0] = REG(AX); + dwarf_regs[1] = REG(CX); + dwarf_regs[2] = REG(DX); + dwarf_regs[3] = REG(BX); + dwarf_regs[4] = REG(SP); + dwarf_regs[5] = REG(BP); + dwarf_regs[6] = REG(SI); + dwarf_regs[7] = REG(DI); + dwarf_regs[8] = REG(IP); + nregs = 9; + } else { + dwarf_regs[0] = REG(AX); + dwarf_regs[1] = REG(DX); + dwarf_regs[2] = REG(CX); + dwarf_regs[3] = REG(BX); + dwarf_regs[4] = REG(SI); + dwarf_regs[5] = REG(DI); + dwarf_regs[6] = REG(BP); + dwarf_regs[7] = REG(SP); + dwarf_regs[8] = REG(R8); + dwarf_regs[9] = REG(R9); + dwarf_regs[10] = REG(R10); + dwarf_regs[11] = REG(R11); + dwarf_regs[12] = REG(R12); + dwarf_regs[13] = REG(R13); + dwarf_regs[14] = REG(R14); + dwarf_regs[15] = REG(R15); + dwarf_regs[16] = REG(IP); + nregs = 17; + } + + return dwfl_thread_state_registers(thread, 0, nregs, dwarf_regs); +} diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 074fa61..c040d86 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -261,6 +261,7 @@ ifdef NO_LIBELF NO_DWARF := 1 NO_DEMANGLE := 1 NO_LIBUNWIND := 1 + NO_LIBDW_DWARF_UNWIND := 1 else ifeq ($(feature-libelf), 0) ifeq ($(feature-glibc), 1) diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c new file mode 100644 index 0000000..67db73e --- /dev/null +++ b/tools/perf/util/unwind-libdw.c @@ -0,0 +1,210 @@ +#include +#include +#include +#include +#include +#include "unwind.h" +#include "unwind-libdw.h" +#include "machine.h" +#include "thread.h" +#include "types.h" +#include "event.h" +#include "perf_regs.h" + +static char *debuginfo_path; + +static const Dwfl_Callbacks offline_callbacks = { + .find_debuginfo = dwfl_standard_find_debuginfo, + .debuginfo_path = &debuginfo_path, + .section_address = dwfl_offline_section_address, +}; + +static int __report_module(struct addr_location *al, u64 ip, + struct unwind_info *ui) +{ + Dwfl_Module *mod; + struct dso *dso = NULL; + + thread__find_addr_location(ui->thread, ui->machine, + PERF_RECORD_MISC_USER, + MAP__FUNCTION, ip, al); + + if (al->map) + dso = al->map->dso; + + if (!dso) + return 0; + + mod = dwfl_addrmodule(ui->dwfl, ip); + if (!mod) + mod = dwfl_report_elf(ui->dwfl, dso->short_name, + dso->long_name, -1, al->map->start, + false); + + return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1; +} + +static int report_module(u64 ip, struct unwind_info *ui) +{ + struct addr_location al; + + return __report_module(&al, ip, ui); +} + +static int entry(u64 ip, struct unwind_info *ui) + +{ + struct unwind_entry e; + struct addr_location al; + + if (__report_module(&al, ip, ui)) + return -1; + + e.ip = ip; + e.map = al.map; + e.sym = al.sym; + + pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n", + al.sym ? al.sym->name : "''", + ip, + al.map ? al.map->map_ip(al.map, ip) : (u64) 0); + + return ui->cb(&e, ui->arg); +} + +static pid_t next_thread(Dwfl *dwfl, void *arg, void **thread_argp) +{ + /* We want only single thread to be processed. */ + if (*thread_argp != NULL) + return 0; + + *thread_argp = arg; + return dwfl_pid(dwfl); +} + +static int access_dso_mem(struct unwind_info *ui, Dwarf_Addr addr, + Dwarf_Word *data) +{ + struct addr_location al; + ssize_t size; + + thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER, + MAP__FUNCTION, addr, &al); + if (!al.map) { + pr_debug("unwind: no map for %lx\n", (unsigned long)addr); + return -1; + } + + if (!al.map->dso) + return -1; + + size = dso__data_read_addr(al.map->dso, al.map, ui->machine, + addr, (u8 *) data, sizeof(*data)); + + return !(size == sizeof(*data)); +} + +static bool memory_read(Dwfl *dwfl __maybe_unused, Dwarf_Addr addr, Dwarf_Word *result, + void *arg) +{ + struct unwind_info *ui = arg; + struct stack_dump *stack = &ui->sample->user_stack; + u64 start, end; + int offset; + int ret; + + ret = perf_reg_value(&start, &ui->sample->user_regs, PERF_REG_SP); + if (ret) + return false; + + end = start + stack->size; + + /* Check overflow. */ + if (addr + sizeof(Dwarf_Word) < addr) + return false; + + if (addr < start || addr + sizeof(Dwarf_Word) > end) { + ret = access_dso_mem(ui, addr, result); + if (ret) { + pr_debug("unwind: access_mem 0x%" PRIx64 " not inside range" + " 0x%" PRIx64 "-0x%" PRIx64 "\n", + addr, start, end); + return false; + } + return true; + } + + offset = addr - start; + *result = *(Dwarf_Word *)&stack->data[offset]; + pr_debug("unwind: access_mem addr 0x%" PRIx64 ", val %lx, offset %d\n", + addr, (unsigned long)*result, offset); + return true; +} + +static const Dwfl_Thread_Callbacks callbacks = { + .next_thread = next_thread, + .memory_read = memory_read, + .set_initial_registers = libdw__arch_set_initial_registers, +}; + +static int +frame_callback(Dwfl_Frame *state, void *arg) +{ + struct unwind_info *ui = arg; + Dwarf_Addr pc; + + if (!dwfl_frame_pc(state, &pc, NULL)) { + pr_err("%s", dwfl_errmsg(-1)); + return DWARF_CB_ABORT; + } + + return entry(pc, ui) || !(--ui->max_stack) ? + DWARF_CB_ABORT : DWARF_CB_OK; +} + +int unwind__get_entries(unwind_entry_cb_t cb, void *arg, + struct machine *machine, struct thread *thread, + struct perf_sample *data, + int max_stack) +{ + struct unwind_info ui = { + .sample = data, + .thread = thread, + .machine = machine, + .cb = cb, + .arg = arg, + .max_stack = max_stack, + }; + Dwarf_Word ip; + int err = -EINVAL; + + if (!data->user_regs.regs) + return -EINVAL; + + ui.dwfl = dwfl_begin(&offline_callbacks); + if (!ui.dwfl) + goto out; + + err = perf_reg_value(&ip, &data->user_regs, PERF_REG_IP); + if (err) + goto out; + + err = report_module(ip, &ui); + if (err) + goto out; + + if (!dwfl_attach_state(ui.dwfl, EM_NONE, thread->tid, &callbacks, &ui)) + goto out; + + err = dwfl_getthread_frames(ui.dwfl, thread->tid, frame_callback, &ui); + + if (err && !ui.max_stack) + err = 0; + + out: + if (err) + pr_debug("unwind: failed with '%s'\n", dwfl_errmsg(-1)); + + dwfl_end(ui.dwfl); + return 0; +} diff --git a/tools/perf/util/unwind-libdw.h b/tools/perf/util/unwind-libdw.h new file mode 100644 index 0000000..417a142 --- /dev/null +++ b/tools/perf/util/unwind-libdw.h @@ -0,0 +1,21 @@ +#ifndef __PERF_UNWIND_LIBDW_H +#define __PERF_UNWIND_LIBDW_H + +#include +#include "event.h" +#include "thread.h" +#include "unwind.h" + +bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg); + +struct unwind_info { + Dwfl *dwfl; + struct perf_sample *sample; + struct machine *machine; + struct thread *thread; + unwind_entry_cb_t cb; + void *arg; + int max_stack; +}; + +#endif /* __PERF_UNWIND_LIBDW_H */ -- cgit v0.10.2 From 0a4f2b6a3ba5066947a8cbd7cfa26fb8a9280625 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 19 Feb 2014 16:52:58 +0100 Subject: perf tools: Setup default dwarf post unwinder Factor NO_LIBDW_DWARF_UNWIND makefile variable and code that selects default DWARf post unwinder based on detected features (libdw and libunwind support) If both are detected the libunwind is selected as default. Simple 'make' will try to add: - libunwind unwinder if present - libdw unwinder if present - disable dwarf unwind if non of libunwind and libdw libraries are present If one of the DWARF unwind libraries is detected, message is displayed which one (libunwind/libdw) is compiled in. Examples: - compile in libdw unwinder if present: $ make NO_LIBUNWIND=1 - compile in libdw (with libdw installation directory) unwinder if present: $ make LIBDW_DIR=/opt/elfutils/ NO_LIBUNWIND=1 BUILD: Doing 'make -j4' parallel build Auto-detecting system features: ... dwarf: [ on ] ... glibc: [ on ] ... gtk2: [ on ] ... libaudit: [ on ] ... libbfd: [ on ] ... libelf: [ on ] ... libnuma: [ on ] ... libperl: [ on ] ... libpython: [ on ] ... libslang: [ on ] ... libunwind: [ on ] ... libdw-dwarf-unwind: [ on ] ... DWARF post unwind library: libdw - disable post dwarf unwind completely: $ make NO_LIBUNWIND=1 NO_LIBDW_DWARF_UNWIND=1 BUILD: Doing 'make -j4' parallel build Auto-detecting system features: ... dwarf: [ on ] ... glibc: [ on ] ... gtk2: [ on ] ... libaudit: [ on ] ... libbfd: [ on ] ... libelf: [ on ] ... libnuma: [ on ] ... libperl: [ on ] ... libpython: [ on ] ... libslang: [ on ] ... libunwind: [ on ] ... libdw-dwarf-unwind: [ on ] ... DWARF post unwind library: libunwind Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1392825179-5228-6-git-send-email-jolsa@redhat.com [ Add suggestion about setting LIBDW_DIR when not finding libdw ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 2dff0b8..1f7ec48 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -61,9 +61,6 @@ include config/utilities.mak # Define NO_LIBDW_DWARF_UNWIND if you do not want libdw support # for dwarf backtrace post unwind. -# temporarily disabled -NO_LIBDW_DWARF_UNWIND := 1 - ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(shell pwd))) srctree := $(patsubst %/,%,$(dir $(srctree))) @@ -412,7 +409,7 @@ endif LIB_OBJS += $(OUTPUT)tests/code-reading.o LIB_OBJS += $(OUTPUT)tests/sample-parsing.o LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o -ifndef NO_LIBUNWIND +ifndef NO_DWARF_UNWIND ifeq ($(ARCH),x86) LIB_OBJS += $(OUTPUT)tests/dwarf-unwind.o endif diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 37c4652..1641542 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -4,12 +4,14 @@ LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif ifndef NO_LIBUNWIND LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o endif ifndef NO_LIBDW_DWARF_UNWIND LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o endif +ifndef NO_DWARF_UNWIND +LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o +LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o +endif LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/tsc.o LIB_H += arch/$(ARCH)/util/tsc.h diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index c040d86..f7c81d3 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -159,7 +159,8 @@ CORE_FEATURE_TESTS = \ libunwind \ on-exit \ stackprotector-all \ - timerfd + timerfd \ + libdw-dwarf-unwind LIB_FEATURE_TESTS = \ dwarf \ @@ -172,7 +173,8 @@ LIB_FEATURE_TESTS = \ libperl \ libpython \ libslang \ - libunwind + libunwind \ + libdw-dwarf-unwind VF_FEATURE_TESTS = \ backtrace \ @@ -280,6 +282,12 @@ else msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static); endif else + ifndef NO_LIBDW_DWARF_UNWIND + ifneq ($(feature-libdw-dwarf-unwind),1) + NO_LIBDW_DWARF_UNWIND := 1 + msg := $(warning No libdw DWARF unwind found, Please install elfutils-devel/libdw-dev >= 0.158 and/or set LIBDW_DIR); + endif + endif ifneq ($(feature-dwarf), 1) msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); NO_DWARF := 1 @@ -315,25 +323,51 @@ endif # NO_LIBELF ifndef NO_LIBUNWIND ifneq ($(feature-libunwind), 1) - msg := $(warning No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 1.1 and/or set LIBUNWIND_DIR); + msg := $(warning No libunwind found. Please install libunwind-dev[el] >= 1.1 and/or set LIBUNWIND_DIR); NO_LIBUNWIND := 1 + endif +endif + +dwarf-post-unwind := 1 +dwarf-post-unwind-text := BUG + +# setup DWARF post unwinder +ifdef NO_LIBUNWIND + ifdef NO_LIBDW_DWARF_UNWIND + msg := $(warning Disabling post unwind, no support found.); + dwarf-post-unwind := 0 else - ifeq ($(ARCH),arm) - $(call feature_check,libunwind-debug-frame) - ifneq ($(feature-libunwind-debug-frame), 1) - msg := $(warning No debug_frame support found in libunwind); - CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME - endif - else - # non-ARM has no dwarf_find_debug_frame() function: + dwarf-post-unwind-text := libdw + endif +else + dwarf-post-unwind-text := libunwind + # Enable libunwind support by default. + ifndef NO_LIBDW_DWARF_UNWIND + NO_LIBDW_DWARF_UNWIND := 1 + endif +endif + +ifeq ($(dwarf-post-unwind),1) + CFLAGS += -DHAVE_DWARF_UNWIND_SUPPORT +else + NO_DWARF_UNWIND := 1 +endif + +ifndef NO_LIBUNWIND + ifeq ($(ARCH),arm) + $(call feature_check,libunwind-debug-frame) + ifneq ($(feature-libunwind-debug-frame), 1) + msg := $(warning No debug_frame support found in libunwind); CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME endif - - CFLAGS += -DHAVE_DWARF_UNWIND_SUPPORT -DHAVE_LIBUNWIND_SUPPORT - EXTLIBS += $(LIBUNWIND_LIBS) - CFLAGS += $(LIBUNWIND_CFLAGS) - LDFLAGS += $(LIBUNWIND_LDFLAGS) - endif # ifneq ($(feature-libunwind), 1) + else + # non-ARM has no dwarf_find_debug_frame() function: + CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME + endif + CFLAGS += -DHAVE_LIBUNWIND_SUPPORT + EXTLIBS += $(LIBUNWIND_LIBS) + CFLAGS += $(LIBUNWIND_CFLAGS) + LDFLAGS += $(LIBUNWIND_LDFLAGS) endif ifndef NO_LIBAUDIT @@ -620,6 +654,10 @@ endef PERF_FEATURES := $(foreach feat,$(LIB_FEATURE_TESTS),feature-$(feat)($(feature-$(feat)))) PERF_FEATURES_FILE := $(shell touch $(OUTPUT)PERF-FEATURES; cat $(OUTPUT)PERF-FEATURES) +ifeq ($(dwarf-post-unwind),1) + PERF_FEATURES += dwarf-post-unwind($(dwarf-post-unwind-text)) +endif + # The $(display_lib) controls the default detection message # output. It's set if: # - detected features differes from stored features from @@ -650,6 +688,10 @@ ifeq ($(display_lib),1) $(info ) $(info Auto-detecting system features:) $(foreach feat,$(LIB_FEATURE_TESTS),$(call feature_print_status,$(feat),)) + + ifeq ($(dwarf-post-unwind),1) + $(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text)) + endif endif ifeq ($(display_vf),1) @@ -659,6 +701,8 @@ ifeq ($(display_vf),1) $(call feature_print_var,bindir) $(call feature_print_var,libdir) $(call feature_print_var,sysconfdir) + $(call feature_print_var,LIBUNWIND_DIR) + $(call feature_print_var,LIBDW_DIR) endif ifeq ($(display_lib),1) -- cgit v0.10.2 From 9e8c06eaba76392644825a72d965ffa5f2a5784a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 19 Feb 2014 16:52:59 +0100 Subject: perf tests: Add NO_LIBDW_DWARF_UNWIND make test Adding make test for NO_LIBDW_DWARF_UNWIND option, plus updating minimal build test with it. Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jean Pihet Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1392825179-5228-7-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 2d24954..5daeae1 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -27,6 +27,7 @@ make_no_ui := NO_NEWT=1 NO_SLANG=1 NO_GTK2=1 make_no_demangle := NO_DEMANGLE=1 make_no_libelf := NO_LIBELF=1 make_no_libunwind := NO_LIBUNWIND=1 +make_no_libdw_dwarf_unwind := NO_LIBDW_DWARF_UNWIND=1 make_no_backtrace := NO_BACKTRACE=1 make_no_libnuma := NO_LIBNUMA=1 make_no_libaudit := NO_LIBAUDIT=1 @@ -50,6 +51,7 @@ make_install_pdf := install-pdf make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1 make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 +make_minimal += NO_LIBDW_DWARF_UNWIND=1 # $(run) contains all available tests run := make_pure @@ -66,6 +68,7 @@ run += make_no_ui run += make_no_demangle run += make_no_libelf run += make_no_libunwind +run += make_no_libdw_dwarf_unwind run += make_no_backtrace run += make_no_libnuma run += make_no_libaudit -- cgit v0.10.2 From 48c65bda95d692076de7e5eae3188ddae8635dca Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Feb 2014 10:32:53 +0900 Subject: perf annotate: Check availability of annotate when processing samples The TUI of perf report and top support annotation, but stdio and GTK don't. So it should be checked before calling hist_entry__inc_addr_ samples() to avoid wasting resources that will never be used. perf annotate need it regardless of UI and sort keys, so the check of whether to allocate resources should be on the tools that have annotate as an option in the TUI, 'report' and 'top', not on the function called by all of them. It caused perf annotate on ppc64 to produce zero output, since the buckets were not being allocated. Reported-by: Anton Blanchard Signed-off-by: Namhyung Kim Cc: Anton Blanchard Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1392859976-32760-1-git-send-email-namhyung@kernel.org [ Renamed (report,top)__needs_annotate() to ui__has_annotation() ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3c53ec2..02f985f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -113,14 +113,16 @@ static int report__add_mem_hist_entry(struct perf_tool *tool, struct addr_locati if (!he) return -ENOMEM; - err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); - if (err) - goto out; + if (ui__has_annotation()) { + err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); + if (err) + goto out; - mx = he->mem_info; - err = addr_map_symbol__inc_samples(&mx->daddr, evsel->idx); - if (err) - goto out; + mx = he->mem_info; + err = addr_map_symbol__inc_samples(&mx->daddr, evsel->idx); + if (err) + goto out; + } evsel->hists.stats.total_period += cost; hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); @@ -164,14 +166,18 @@ static int report__add_branch_hist_entry(struct perf_tool *tool, struct addr_loc he = __hists__add_entry(&evsel->hists, al, parent, &bi[i], NULL, 1, 1, 0); if (he) { - bx = he->branch_info; - err = addr_map_symbol__inc_samples(&bx->from, evsel->idx); - if (err) - goto out; - - err = addr_map_symbol__inc_samples(&bx->to, evsel->idx); - if (err) - goto out; + if (ui__has_annotation()) { + bx = he->branch_info; + err = addr_map_symbol__inc_samples(&bx->from, + evsel->idx); + if (err) + goto out; + + err = addr_map_symbol__inc_samples(&bx->to, + evsel->idx); + if (err) + goto out; + } evsel->hists.stats.total_period += 1; hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); @@ -205,7 +211,9 @@ static int report__add_hist_entry(struct perf_tool *tool, struct perf_evsel *evs if (err) goto out; - err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); + if (ui__has_annotation()) + err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); + evsel->hists.stats.total_period += sample->period; hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); out: diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 76cd510..5f989a7 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -176,7 +176,7 @@ static void perf_top__record_precise_ip(struct perf_top *top, { struct annotation *notes; struct symbol *sym; - int err; + int err = 0; if (he == NULL || he->ms.sym == NULL || ((top->sym_filter_entry == NULL || @@ -190,7 +190,9 @@ static void perf_top__record_precise_ip(struct perf_top *top, return; ip = he->ms.map->map_ip(he->ms.map, ip); - err = hist_entry__inc_addr_samples(he, counter, ip); + + if (ui__has_annotation()) + err = hist_entry__inc_addr_samples(he, counter, ip); pthread_mutex_unlock(¬es->lock); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 469eb67..3aa555f 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -8,6 +8,8 @@ */ #include "util.h" +#include "ui/ui.h" +#include "sort.h" #include "build-id.h" #include "color.h" #include "cache.h" @@ -489,7 +491,7 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map, { struct annotation *notes; - if (sym == NULL || use_browser != 1 || !sort__has_sym) + if (sym == NULL) return 0; notes = symbol__annotation(sym); @@ -1399,3 +1401,8 @@ int hist_entry__annotate(struct hist_entry *he, size_t privsize) { return symbol__annotate(he->ms.sym, he->ms.map, privsize); } + +bool ui__has_annotation(void) +{ + return use_browser == 1 && sort__has_sym; +} diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index b2aef59..56ad4f5 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -151,6 +151,8 @@ void symbol__annotate_zero_histogram(struct symbol *sym, int evidx); void symbol__annotate_decay_histogram(struct symbol *sym, int evidx); void disasm__purge(struct list_head *head); +bool ui__has_annotation(void); + int symbol__tty_annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, bool print_lines, bool full_paths, int min_pcnt, int max_lines); -- cgit v0.10.2 From 98e9f03bbf2cb21a60f94b8b700eb5d38470819d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Feb 2014 10:32:54 +0900 Subject: perf symbols: Destroy unused symsrcs Stephane reported that perf report and annotate failed to process data using lots of (> 500) shared libraries. It was because of the limit on number of open files (ulimit -n). Currently when perf loads a DSO, it'll look for normal and dynamic symbol tables. And if it fails to find out both tables, it'll iterate all of possible symtab types. But many of them are useless since they have no additional information and the problem is that it's not closing those files even though they're not used. Fix it. Reported-by: Stephane Eranian Signed-off-by: Namhyung Kim Cc: Cody P Schafer Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1392859976-32760-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index a9d758a..e89afc0 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1336,6 +1336,8 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) if (syms_ss && runtime_ss) break; + } else { + symsrc__destroy(ss); } } -- cgit v0.10.2 From 9499934f70deac0cdb96aa2d90f2a0a2de69d80c Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 24 Feb 2014 14:30:00 +0100 Subject: s390/sclp_early: Return correct HSA block count also for zero Currently we return a negative block count if SCLP returns HSA block count zero. The reason is that we subtract one for the header page. So fix this and correctly return zero block count if SCLP returns zero. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 82f2c38..2c6aac6 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -184,9 +184,9 @@ static long __init sclp_hsa_size_init(struct sdias_sccb *sccb) sccb_init_eq_size(sccb); if (sclp_cmd_early(SCLP_CMDW_WRITE_EVENT_DATA, sccb)) return -EIO; - if (sccb->evbuf.blk_cnt != 0) - return (sccb->evbuf.blk_cnt - 1) * PAGE_SIZE; - return 0; + if (sccb->evbuf.blk_cnt == 0) + return 0; + return (sccb->evbuf.blk_cnt - 1) * PAGE_SIZE; } static long __init sclp_hsa_copy_wait(struct sccb_header *sccb) @@ -195,6 +195,8 @@ static long __init sclp_hsa_copy_wait(struct sccb_header *sccb) sccb->length = PAGE_SIZE; if (sclp_cmd_early(SCLP_CMDW_READ_EVENT_DATA, sccb)) return -EIO; + if (((struct sdias_sccb *) sccb)->evbuf.blk_cnt == 0) + return 0; return (((struct sdias_sccb *) sccb)->evbuf.blk_cnt - 1) * PAGE_SIZE; } -- cgit v0.10.2 From 823002023da6d9124ed63bc622267c15ab2a7347 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 24 Feb 2014 16:18:55 +0100 Subject: s390/uaccess: remove copy_from_user_real() There is no user left, so remove it. It was also potentially broken, since the function didn't clear destination memory if copy_from_user() failed. Which would allow for information leaks. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 2710b41..4133b3f 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -344,6 +344,5 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo } int copy_to_user_real(void __user *dest, void *src, unsigned long count); -int copy_from_user_real(void *dest, void __user *src, unsigned long count); #endif /* __S390_UACCESS_H */ diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index efe8ad0..2a2e354 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -152,32 +152,6 @@ out: } /* - * Copy memory from user (virtual) to kernel (real) - */ -int copy_from_user_real(void *dest, void __user *src, unsigned long count) -{ - int offs = 0, size, rc; - char *buf; - - buf = (char *) __get_free_page(GFP_KERNEL); - if (!buf) - return -ENOMEM; - rc = -EFAULT; - while (offs < count) { - size = min(PAGE_SIZE, count - offs); - if (copy_from_user(buf, src + offs, size)) - goto out; - if (memcpy_real(dest + offs, buf, size)) - goto out; - offs += size; - } - rc = 0; -out: - free_page((unsigned long) buf); - return rc; -} - -/* * Check if physical address is within prefix or zero page */ static int is_swapped(unsigned long addr) -- cgit v0.10.2 From d72d2bb5a6bc85af77a9f969687c74ea00cdcc99 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 24 Feb 2014 16:33:08 +0100 Subject: s390/checksum: remove memset() within csum_partial_copy_from_user() The memset() within csum_partial_copy_from_user() is rather pointless since copy_from_user() already cleared the rest of the destination buffer if an exception happened. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index 4f57a4f..7403648 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -44,22 +44,15 @@ csum_partial(const void *buff, int len, __wsum sum) * here even more important to align src and dst on a 32-bit (or even * better 64-bit) boundary * - * Copy from userspace and compute checksum. If we catch an exception - * then zero the rest of the buffer. + * Copy from userspace and compute checksum. */ static inline __wsum csum_partial_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err_ptr) { - int missing; - - missing = copy_from_user(dst, src, len); - if (missing) { - memset(dst + len - missing, 0, missing); + if (unlikely(copy_from_user(dst, src, len))) *err_ptr = -EFAULT; - } - return csum_partial(dst, len, sum); } -- cgit v0.10.2 From 8dd853d7b6efcabba631a590dad3ed55bba7f0f2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 23 Feb 2014 08:34:24 -0800 Subject: Documentation/memory-barriers.txt: Clarify release/acquire ordering This commit fixes a couple of typos and clarifies what happens when the CPU chooses to execute a later lock acquisition before a prior lock release, in particular, why deadlock is avoided. Reported-by: Peter Hurley Reported-by: James Bottomley Reported-by: Stefan Richter Signed-off-by: Paul E. McKenney diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 9dde54c..11c1d20 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1674,12 +1674,12 @@ for each construct. These operations all imply certain barriers: Memory operations issued after the ACQUIRE will be completed after the ACQUIRE operation has completed. - Memory operations issued before the ACQUIRE may be completed after the - ACQUIRE operation has completed. An smp_mb__before_spinlock(), combined - with a following ACQUIRE, orders prior loads against subsequent stores and - stores and prior stores against subsequent stores. Note that this is - weaker than smp_mb()! The smp_mb__before_spinlock() primitive is free on - many architectures. + Memory operations issued before the ACQUIRE may be completed after + the ACQUIRE operation has completed. An smp_mb__before_spinlock(), + combined with a following ACQUIRE, orders prior loads against + subsequent loads and stores and also orders prior stores against + subsequent stores. Note that this is weaker than smp_mb()! The + smp_mb__before_spinlock() primitive is free on many architectures. (2) RELEASE operation implication: @@ -1724,24 +1724,21 @@ may occur as: ACQUIRE M, STORE *B, STORE *A, RELEASE M -This same reordering can of course occur if the lock's ACQUIRE and RELEASE are -to the same lock variable, but only from the perspective of another CPU not -holding that lock. - -In short, a RELEASE followed by an ACQUIRE may -not- be assumed to be a full -memory barrier because it is possible for a preceding RELEASE to pass a -later ACQUIRE from the viewpoint of the CPU, but not from the viewpoint -of the compiler. Note that deadlocks cannot be introduced by this -interchange because if such a deadlock threatened, the RELEASE would -simply complete. - -If it is necessary for a RELEASE-ACQUIRE pair to produce a full barrier, the -ACQUIRE can be followed by an smp_mb__after_unlock_lock() invocation. This -will produce a full barrier if either (a) the RELEASE and the ACQUIRE are -executed by the same CPU or task, or (b) the RELEASE and ACQUIRE act on the -same variable. The smp_mb__after_unlock_lock() primitive is free on many -architectures. Without smp_mb__after_unlock_lock(), the critical sections -corresponding to the RELEASE and the ACQUIRE can cross: +When the ACQUIRE and RELEASE are a lock acquisition and release, +respectively, this same reordering can occur if the lock's ACQUIRE and +RELEASE are to the same lock variable, but only from the perspective of +another CPU not holding that lock. In short, a ACQUIRE followed by an +RELEASE may -not- be assumed to be a full memory barrier. + +Similarly, the reverse case of a RELEASE followed by an ACQUIRE does not +imply a full memory barrier. If it is necessary for a RELEASE-ACQUIRE +pair to produce a full barrier, the ACQUIRE can be followed by an +smp_mb__after_unlock_lock() invocation. This will produce a full barrier +if either (a) the RELEASE and the ACQUIRE are executed by the same +CPU or task, or (b) the RELEASE and ACQUIRE act on the same variable. +The smp_mb__after_unlock_lock() primitive is free on many architectures. +Without smp_mb__after_unlock_lock(), the CPU's execution of the critical +sections corresponding to the RELEASE and the ACQUIRE can cross, so that: *A = a; RELEASE M @@ -1752,7 +1749,36 @@ could occur as: ACQUIRE N, STORE *B, STORE *A, RELEASE M -With smp_mb__after_unlock_lock(), they cannot, so that: +It might appear that this reordering could introduce a deadlock. +However, this cannot happen because if such a deadlock threatened, +the RELEASE would simply complete, thereby avoiding the deadlock. + + Why does this work? + + One key point is that we are only talking about the CPU doing + the reordering, not the compiler. If the compiler (or, for + that matter, the developer) switched the operations, deadlock + -could- occur. + + But suppose the CPU reordered the operations. In this case, + the unlock precedes the lock in the assembly code. The CPU + simply elected to try executing the later lock operation first. + If there is a deadlock, this lock operation will simply spin (or + try to sleep, but more on that later). The CPU will eventually + execute the unlock operation (which preceded the lock operation + in the assembly code), which will unravel the potential deadlock, + allowing the lock operation to succeed. + + But what if the lock is a sleeplock? In that case, the code will + try to enter the scheduler, where it will eventually encounter + a memory barrier, which will force the earlier unlock operation + to complete, again unraveling the deadlock. There might be + a sleep-unlock race, but the locking primitive needs to resolve + such races properly in any case. + +With smp_mb__after_unlock_lock(), the two critical sections cannot overlap. +For example, with the following code, the store to *A will always be +seen by other CPUs before the store to *B: *A = a; RELEASE M @@ -1760,13 +1786,18 @@ With smp_mb__after_unlock_lock(), they cannot, so that: smp_mb__after_unlock_lock(); *B = b; -will always occur as either of the following: +The operations will always occur in one of the following orders: - STORE *A, RELEASE, ACQUIRE, STORE *B - STORE *A, ACQUIRE, RELEASE, STORE *B + STORE *A, RELEASE, ACQUIRE, smp_mb__after_unlock_lock(), STORE *B + STORE *A, ACQUIRE, RELEASE, smp_mb__after_unlock_lock(), STORE *B + ACQUIRE, STORE *A, RELEASE, smp_mb__after_unlock_lock(), STORE *B If the RELEASE and ACQUIRE were instead both operating on the same lock -variable, only the first of these two alternatives can occur. +variable, only the first of these alternatives can occur. In addition, +the more strongly ordered systems may rule out some of the above orders. +But in any case, as noted earlier, the smp_mb__after_unlock_lock() +ensures that the store to *A will always be seen as happening before +the store to *B. Locks and semaphores may not provide any guarantee of ordering on UP compiled systems, and so cannot be counted on in such a situation to actually achieve @@ -2787,7 +2818,7 @@ in that order, but, without intervention, the sequence may have almost any combination of elements combined or discarded, provided the program's view of the world remains consistent. Note that ACCESS_ONCE() is -not- optional in the above example, as there are architectures where a given CPU might -interchange successive loads to the same location. On such architectures, +reorder successive loads to the same location. On such architectures, ACCESS_ONCE() does whatever is necessary to prevent this, for example, on Itanium the volatile casts used by ACCESS_ONCE() cause GCC to emit the special ld.acq and st.rel instructions that prevent such reordering. -- cgit v0.10.2 From 1acacc0784aab45627b6009e0e9224886279ac0b Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 19 Feb 2014 20:32:33 -0500 Subject: dm thin: fix the error path for the thin device constructor dm_pool_close_thin_device() must be called if dm_set_target_max_io_len() fails in thin_ctr(). Otherwise __pool_destroy() will fail because the pool will still have an open thin device: device-mapper: thin metadata: attempt to close pmd when 1 device(s) are still open device-mapper: thin: __pool_destroy: dm_pool_metadata_close() failed. Also, must establish error code if failing thin_ctr() because the pool is in fail_io mode. Signed-off-by: Mike Snitzer Acked-by: Joe Thornber Cc: stable@vger.kernel.org diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index d501e43..4cf4b19 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2895,6 +2895,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) if (get_pool_mode(tc->pool) == PM_FAIL) { ti->error = "Couldn't open thin device, Pool is in fail mode"; + r = -EINVAL; goto bad_thin_open; } @@ -2906,7 +2907,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) r = dm_set_target_max_io_len(ti, tc->pool->sectors_per_block); if (r) - goto bad_thin_open; + goto bad_target_max_io_len; ti->num_flush_bios = 1; ti->flush_supported = true; @@ -2927,6 +2928,8 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) return 0; +bad_target_max_io_len: + dm_pool_close_thin_device(tc->td); bad_thin_open: __pool_dec(tc->pool); bad_pool_lookup: -- cgit v0.10.2 From 0d3dc5e8b85a144aaeb5dc26f7f2113e4c4e7e81 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 20 Feb 2014 10:32:55 +0900 Subject: perf symbols: Check return value of filename__read_debuglink() When dso__read_binary_type_filename() called, it doesn't check the return value of filename__read_debuglink() so that it'll try to open the debuglink file even if it doesn't exist. Also fix return value of the filename__read_debuglink() as it always return -1 regardless of the result. Signed-off-by: Namhyung Kim Cc: Cody P Schafer Cc: Ingo Molnar Cc: Namhyung Kim Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1392859976-32760-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 4045d08..64453d6 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -45,8 +45,8 @@ int dso__read_binary_type_filename(const struct dso *dso, debuglink--; if (*debuglink == '/') debuglink++; - filename__read_debuglink(dso->long_name, debuglink, - size - (debuglink - filename)); + ret = filename__read_debuglink(dso->long_name, debuglink, + size - (debuglink - filename)); } break; case DSO_BINARY_TYPE__BUILD_ID_CACHE: diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 3e9f336..8ac4a4f 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -506,6 +506,8 @@ int filename__read_debuglink(const char *filename, char *debuglink, /* the start of this section is a zero-terminated string */ strncpy(debuglink, data->d_buf, size); + err = 0; + out_elf_end: elf_end(elf); out_close: -- cgit v0.10.2 From 1029f9fedf87fa6f52096991588fa54ffd159584 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 20 Feb 2014 10:32:56 +0900 Subject: perf symbols: Check compatible symtab type before loading dso When loading a dso it'll look for symbol tables of all possible types. However it's just wasted of time to check incompatible types - like trying kernel module when loading user library. Signed-off-by: Namhyung Kim Cc: Adrian Hunter Cc: Cody P Schafer Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1392859976-32760-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 46e2ede..0ada68b 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1251,6 +1251,46 @@ out_failure: return -1; } +static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, + enum dso_binary_type type) +{ + switch (type) { + case DSO_BINARY_TYPE__JAVA_JIT: + case DSO_BINARY_TYPE__DEBUGLINK: + case DSO_BINARY_TYPE__SYSTEM_PATH_DSO: + case DSO_BINARY_TYPE__FEDORA_DEBUGINFO: + case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO: + case DSO_BINARY_TYPE__BUILDID_DEBUGINFO: + case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO: + return !kmod && dso->kernel == DSO_TYPE_USER; + + case DSO_BINARY_TYPE__KALLSYMS: + case DSO_BINARY_TYPE__VMLINUX: + case DSO_BINARY_TYPE__KCORE: + return dso->kernel == DSO_TYPE_KERNEL; + + case DSO_BINARY_TYPE__GUEST_KALLSYMS: + case DSO_BINARY_TYPE__GUEST_VMLINUX: + case DSO_BINARY_TYPE__GUEST_KCORE: + return dso->kernel == DSO_TYPE_GUEST_KERNEL; + + case DSO_BINARY_TYPE__GUEST_KMODULE: + case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE: + /* + * kernel modules know their symtab type - it's set when + * creating a module dso in machine__new_module(). + */ + return kmod && dso->symtab_type == type; + + case DSO_BINARY_TYPE__BUILD_ID_CACHE: + return true; + + case DSO_BINARY_TYPE__NOT_FOUND: + default: + return false; + } +} + int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) { char *name; @@ -1261,6 +1301,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) int ss_pos = 0; struct symsrc ss_[2]; struct symsrc *syms_ss = NULL, *runtime_ss = NULL; + bool kmod; dso__set_loaded(dso, map->type); @@ -1301,7 +1342,11 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) if (!name) return -1; - /* Iterate over candidate debug images. + kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE || + dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE; + + /* + * Iterate over candidate debug images. * Keep track of "interesting" ones (those which have a symtab, dynsym, * and/or opd section) for processing. */ @@ -1311,6 +1356,9 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) enum dso_binary_type symtab_type = binary_type_symtab[i]; + if (!dso__is_compatible_symtab_type(dso, kmod, symtab_type)) + continue; + if (dso__read_binary_type_filename(dso, symtab_type, root_dir, name, PATH_MAX)) continue; @@ -1351,15 +1399,10 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) if (!runtime_ss && syms_ss) runtime_ss = syms_ss; - if (syms_ss) { - int km; - - km = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE || - dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE; - ret = dso__load_sym(dso, map, syms_ss, runtime_ss, filter, km); - } else { + if (syms_ss) + ret = dso__load_sym(dso, map, syms_ss, runtime_ss, filter, kmod); + else ret = -1; - } if (ret > 0) { int nr_plt; -- cgit v0.10.2 From e4ceb0f40da5dc26f84025d121c2fe6ff7d8a947 Mon Sep 17 00:00:00 2001 From: Bing Zhao Date: Fri, 21 Feb 2014 12:23:00 -0800 Subject: mwifiex: rename usb driver name registerring to usb core Both libertas USB driver and mwifiex_usb driver are registerring with name 'usb8xxx'. The following conflict happens while trying to load both drivers. [6.211307] Error: Driver 'usb8xxx' is already registered... [6.217261] mwifiex_usb: Driver register failed! Fix it by renaming mwifiex_usb driver's name. Reported-by: Fengguang Wu Signed-off-by: Bing Zhao Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/mwifiex/usb.c b/drivers/net/wireless/mwifiex/usb.c index e8ebbd4..cb6b70a 100644 --- a/drivers/net/wireless/mwifiex/usb.c +++ b/drivers/net/wireless/mwifiex/usb.c @@ -22,8 +22,6 @@ #define USB_VERSION "1.0" -static const char usbdriver_name[] = "usb8xxx"; - static struct mwifiex_if_ops usb_ops; static struct semaphore add_remove_card_sem; static struct usb_card_rec *usb_card; @@ -567,7 +565,7 @@ static void mwifiex_usb_disconnect(struct usb_interface *intf) } static struct usb_driver mwifiex_usb_driver = { - .name = usbdriver_name, + .name = "mwifiex_usb", .probe = mwifiex_usb_probe, .disconnect = mwifiex_usb_disconnect, .id_table = mwifiex_usb_table, -- cgit v0.10.2 From 558ff225de80ac95b132d3a115ddadcd64498b4f Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 22 Feb 2014 13:48:19 +0100 Subject: ath9k: fix ps-poll responses under a-mpdu sessions When passing tx frames to the U-APSD queue for powersave poll responses, the ath_atx_tid pointer needs to be passed to ath_tx_setup_buffer for proper sequence number accounting. This fixes high latency and connection stability issues with ath9k running as AP and a few kinds of mobile phones as client, when PS-Poll is heavily used Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index 4f4ce83..f042a18 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -2186,14 +2186,15 @@ int ath_tx_start(struct ieee80211_hw *hw, struct sk_buff *skb, txq->stopped = true; } + if (txctl->an) + tid = ath_get_skb_tid(sc, txctl->an, skb); + if (info->flags & IEEE80211_TX_CTL_PS_RESPONSE) { ath_txq_unlock(sc, txq); txq = sc->tx.uapsdq; ath_txq_lock(sc, txq); } else if (txctl->an && ieee80211_is_data_present(hdr->frame_control)) { - tid = ath_get_skb_tid(sc, txctl->an, skb); - WARN_ON(tid->ac->txq != txctl->txq); if (info->flags & IEEE80211_TX_CTL_CLEAR_PS_FILT) -- cgit v0.10.2 From 6f58c780e5a5b43a6d2121e0d43cdcba1d3cc5fc Mon Sep 17 00:00:00 2001 From: "Dr. Greg Wettstein" Date: Mon, 24 Feb 2014 13:59:53 -0600 Subject: qla2xxx: Fix kernel panic on selective retransmission request A selective retransmission request (SRR) is a fibre-channel protocol control request which provides support for requesting retransmission of a data sequence in response to an issue such as frame loss or corruption. These events are experienced infrequently in fibre-channel based networks which makes it difficult to test and assess codepaths which handle these events. We were fortunate enough, for some definition of fortunate, to have a metro-area single-mode SAN link which, at 10 GBPS sustained load levels, would consistently generate SRR's in a SCST based target implementation using our SCST/in-kernel Qlogic target interface driver. In response to an SRR the in-kernel Qlogic target driver immediately panics resulting in a catastrophic storage failure for serviced initiators. The culprit was a debug statement in the qla_target.c file which does not verify that a pointer to the SCSI CDB is not null. The unchecked pointer dereference results in the kernel panic and resultant system failure. The other two references to the SCSI CDB by the SRR handling code use a ternary operator to verify a non-null pointer is being acted on. This patch simply adds a similar test to the implicated debug statement. This patch is a candidate for any stable kernel being maintained since it addresses a potentially catastrophic event with minimal downside. Signed-off-by: Dr. Greg Wettstein Cc: #3.5+ Signed-off-by: Nicholas Bellinger diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index f2e1c5a..0cb7307 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -3202,7 +3202,8 @@ restart: ql_dbg(ql_dbg_tgt_mgt, vha, 0xf02c, "SRR cmd %p (se_cmd %p, tag %d, op %x), " "sg_cnt=%d, offset=%d", cmd, &cmd->se_cmd, cmd->tag, - se_cmd->t_task_cdb[0], cmd->sg_cnt, cmd->offset); + se_cmd->t_task_cdb ? se_cmd->t_task_cdb[0] : 0, + cmd->sg_cnt, cmd->offset); qlt_handle_srr(vha, sctio, imm); -- cgit v0.10.2 From 760dbe1dcb6d3dd3ead73dc69b23f206b52273bb Mon Sep 17 00:00:00 2001 From: Juergen Beisert Date: Mon, 24 Feb 2014 14:39:00 +0000 Subject: staging:iio:adc:MXS:LRADC: fix touchscreen statemachine Releasing the touchscreen lets the internal statemachine left in a wrong state. Due to this the release coordinate will be reported again by accident when the next touchscreen event happens. This change sets up the correct state when waiting for the next touchscreen event. This has led to reported issues with calibrating the touchscreen. Bug was introduced somewhere in the series that began with 18da755de59b406ce2371a55fb15ed676eb08ed2 Staging/iio/adc/touchscreen/MXS: add proper clock handling in which the way this driver worked was substantially changed to be interrupt driven rather than relying on a busy loop. This was a regression in the 3.13 kernel. Signed-off-by: Juergen Beisert Tested-by: Alexandre Belloni Cc: stable@vger.kernel.org Signed-off-by: Jonathan Cameron diff --git a/drivers/staging/iio/adc/mxs-lradc.c b/drivers/staging/iio/adc/mxs-lradc.c index 7fc66a6..514844e 100644 --- a/drivers/staging/iio/adc/mxs-lradc.c +++ b/drivers/staging/iio/adc/mxs-lradc.c @@ -757,6 +757,7 @@ static void mxs_lradc_finish_touch_event(struct mxs_lradc *lradc, bool valid) } /* if it is released, wait for the next touch via IRQ */ + lradc->cur_plate = LRADC_TOUCH; mxs_lradc_reg_clear(lradc, LRADC_CTRL1_TOUCH_DETECT_IRQ, LRADC_CTRL1); mxs_lradc_reg_set(lradc, LRADC_CTRL1_TOUCH_DETECT_IRQ_EN, LRADC_CTRL1); } -- cgit v0.10.2 From 0e7ede80d929ff0f830c44a543daa1acd590c749 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 21 Feb 2014 13:08:04 +0800 Subject: virtio-net: alloc big buffers also when guest can receive UFO We should alloc big buffers also when guest can receive UFO packets to let the big packets fit into guest rx buffer. Fixes 5c5167515d80f78f6bb538492c423adcae31ad65 (virtio-net: Allow UFO feature to be set and advertised.) Cc: Rusty Russell Cc: Michael S. Tsirkin Cc: Sridhar Samudrala Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Acked-by: Rusty Russell Signed-off-by: David S. Miller diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index d75f8ed..5632a99 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1711,7 +1711,8 @@ static int virtnet_probe(struct virtio_device *vdev) /* If we can receive ANY GSO packets, we must allocate large ones. */ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) || - virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN)) + virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN) || + virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UFO)) vi->big_packets = true; if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) -- cgit v0.10.2 From 7262b7b26de1495f77edbe1e71cb15b8198adb9d Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Sat, 22 Feb 2014 13:09:03 +0100 Subject: net: stmmac: Check return value of alloc_dma_desc_resources() alloc_dma_desc_resources() returns an error value and the next line actually checks for it, so assign the return value properly. Found by the coverity scanner. Signed-off-by: Tobias Klauser Acked-by: Giuseppe Cavallaro Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index a2e7d2c..078ad0e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1705,7 +1705,7 @@ static int stmmac_open(struct net_device *dev) priv->dma_rx_size = STMMAC_ALIGN(dma_rxsize); priv->dma_buf_sz = STMMAC_ALIGN(buf_sz); - alloc_dma_desc_resources(priv); + ret = alloc_dma_desc_resources(priv); if (ret < 0) { pr_err("%s: DMA descriptors allocation failed\n", __func__); goto dma_desc_error; -- cgit v0.10.2 From d10473d4e3f9d1b81b50a60c8465d6f59a095c46 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 22 Feb 2014 22:25:57 -0800 Subject: tcp: reduce the bloat caused by tcp_is_cwnd_limited() tcp_is_cwnd_limited() allows GSO/TSO enabled flows to increase their cwnd to allow a full size (64KB) TSO packet to be sent. Non GSO flows only allow an extra room of 3 MSS. For most flows with a BDP below 10 MSS, this results in a bloat of cwnd reaching 90, and an inflate of RTT. Thanks to TSO auto sizing, we can restrict the bloat to the number of MSS contained in a TSO packet (tp->xmit_size_goal_segs), to keep original intent without performance impact. Because we keep cwnd small, it helps to keep TSO packet size to their optimal value. Example for a 10Mbit flow, with low TCP Small queue limits (no more than 2 skb in qdisc/device tx ring) Before patch : lpk51:~# ./ss -i dst lpk52:44862 | grep cwnd cubic wscale:6,6 rto:215 rtt:15.875/2.5 mss:1448 cwnd:96 ssthresh:96 send 70.1Mbps unacked:14 rcv_space:29200 After patch : lpk51:~# ./ss -i dst lpk52:52916 | grep cwnd cubic wscale:6,6 rto:206 rtt:5.206/0.036 mss:1448 cwnd:15 ssthresh:14 send 33.4Mbps unacked:4 rcv_space:29200 Signed-off-by: Eric Dumazet Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Nandita Dukkipati Cc: Van Jacobson Acked-by: Neal Cardwell Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index ad37bf1..2388275 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -290,8 +290,7 @@ bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) left = tp->snd_cwnd - in_flight; if (sk_can_gso(sk) && left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && - left * tp->mss_cache < sk->sk_gso_max_size && - left < sk->sk_gso_max_segs) + left < tp->xmit_size_goal_segs) return true; return left <= tcp_max_tso_deferred_mss(tp); } -- cgit v0.10.2 From 7fe412d07d881020022a188b95c63a19b651a391 Mon Sep 17 00:00:00 2001 From: Venkatesh Srinivas Date: Mon, 24 Feb 2014 14:13:32 -0800 Subject: vhost/scsi: Check LUN structure byte 0 is set to 1, per spec The virtio spec requires byte 0 of the virtio-scsi LUN structure to be '1'. Signed-off-by: Venkatesh Srinivas Reviewed-by: Paolo Bonzini Signed-off-by: Nicholas Bellinger diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 0a025b8..e48d4a6 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1001,6 +1001,12 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) break; } + /* virtio-scsi spec requires byte 0 of the lun to be 1 */ + if (unlikely(v_req.lun[0] != 1)) { + vhost_scsi_send_bad_target(vs, vq, head, out); + continue; + } + /* Extract the tpgt */ target = v_req.lun[1]; tpg = ACCESS_ONCE(vs_tpg[target]); -- cgit v0.10.2 From 260ea9c2e2d330303163e286ab01b66dbcfe3a6f Mon Sep 17 00:00:00 2001 From: Manu Gupta Date: Mon, 24 Feb 2014 12:12:28 -0600 Subject: staging: r8188eu: Add new device ID The D-Link DWA-123 REV D1 with USB ID 2001:3310 uses this driver. Signed-off-by: Manu Gupta Signed-off-by: Larry Finger Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c index a70dcef..2f40ff5 100644 --- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c +++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c @@ -55,6 +55,7 @@ static struct usb_device_id rtw_usb_id_tbl[] = { /****** 8188EUS ********/ {USB_DEVICE(0x07b8, 0x8179)}, /* Abocom - Abocom */ {USB_DEVICE(0x2001, 0x330F)}, /* DLink DWA-125 REV D1 */ + {USB_DEVICE(0x2001, 0x3310)}, /* Dlink DWA-123 REV D1 */ {} /* Terminating entry */ }; -- cgit v0.10.2 From 30c219710358c5cca2f8bd2e9e547c6aadf7cf8b Mon Sep 17 00:00:00 2001 From: Markus Pargmann Date: Thu, 20 Feb 2014 17:36:03 +0100 Subject: regulator: core: Replace direct ops->enable usage There are some direct ops->enable in the regulator core driver. This is a potential issue as the function _regulator_do_enable() handles gpio regulators and the normal ops->enable calls. These gpio regulators are simply ignored when ops->enable is called directly. One possible bug is that boot-on and always-on gpio regulators are not enabled on registration. This patch replaces all ops->enable calls by _regulator_do_enable. [Handle missing enable operations -- broonie] Cc: # 3.10+ Signed-off-by: Markus Pargmann Signed-off-by: Mark Brown regulator: Handle invalid enable operation for always/boot on regulators Signed-off-by: Mark Brown diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 74e9fb2..00feec3 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -953,6 +953,8 @@ static int machine_constraints_current(struct regulator_dev *rdev, return 0; } +static int _regulator_do_enable(struct regulator_dev *rdev); + /** * set_machine_constraints - sets regulator constraints * @rdev: regulator source @@ -1013,10 +1015,9 @@ static int set_machine_constraints(struct regulator_dev *rdev, /* If the constraints say the regulator should be on at this point * and we have control then make sure it is enabled. */ - if ((rdev->constraints->always_on || rdev->constraints->boot_on) && - ops->enable) { - ret = ops->enable(rdev); - if (ret < 0) { + if (rdev->constraints->always_on || rdev->constraints->boot_on) { + ret = _regulator_do_enable(rdev); + if (ret < 0 && ret != -EINVAL) { rdev_err(rdev, "failed to enable\n"); goto out; } @@ -3626,9 +3627,8 @@ int regulator_suspend_finish(void) struct regulator_ops *ops = rdev->desc->ops; mutex_lock(&rdev->mutex); - if ((rdev->use_count > 0 || rdev->constraints->always_on) && - ops->enable) { - error = ops->enable(rdev); + if (rdev->use_count > 0 || rdev->constraints->always_on) { + error = _regulator_do_enable(rdev); if (error) ret = error; } else { -- cgit v0.10.2 From 66fda75f47dc583f1c187556e9a2c082dd64f8c6 Mon Sep 17 00:00:00 2001 From: Markus Pargmann Date: Thu, 20 Feb 2014 17:36:04 +0100 Subject: regulator: core: Replace direct ops->disable usage There are many places where ops->disable is called directly. Instead we should use _regulator_do_disable() which also handles gpio regulators. To be able to use the wrapper function from _regulator_force_disable(), I moved the _notifier_call_chain() call from _regulator_do_disable() to _regulator_disable(). This way, _regulator_force_disable() can use different flags for _notifier_call_chain() without calling it twice. Cc: # 3.10+ Signed-off-by: Markus Pargmann Signed-off-by: Mark Brown diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 00feec3..6f71985 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1901,8 +1901,6 @@ static int _regulator_do_disable(struct regulator_dev *rdev) trace_regulator_disable_complete(rdev_get_name(rdev)); - _notifier_call_chain(rdev, REGULATOR_EVENT_DISABLE, - NULL); return 0; } @@ -1926,6 +1924,8 @@ static int _regulator_disable(struct regulator_dev *rdev) rdev_err(rdev, "failed to disable\n"); return ret; } + _notifier_call_chain(rdev, REGULATOR_EVENT_DISABLE, + NULL); } rdev->use_count = 0; @@ -1978,20 +1978,16 @@ static int _regulator_force_disable(struct regulator_dev *rdev) { int ret = 0; - /* force disable */ - if (rdev->desc->ops->disable) { - /* ah well, who wants to live forever... */ - ret = rdev->desc->ops->disable(rdev); - if (ret < 0) { - rdev_err(rdev, "failed to force disable\n"); - return ret; - } - /* notify other consumers that power has been forced off */ - _notifier_call_chain(rdev, REGULATOR_EVENT_FORCE_DISABLE | - REGULATOR_EVENT_DISABLE, NULL); + ret = _regulator_do_disable(rdev); + if (ret < 0) { + rdev_err(rdev, "failed to force disable\n"); + return ret; } - return ret; + _notifier_call_chain(rdev, REGULATOR_EVENT_FORCE_DISABLE | + REGULATOR_EVENT_DISABLE, NULL); + + return 0; } /** @@ -3624,8 +3620,6 @@ int regulator_suspend_finish(void) mutex_lock(®ulator_list_mutex); list_for_each_entry(rdev, ®ulator_list, list) { - struct regulator_ops *ops = rdev->desc->ops; - mutex_lock(&rdev->mutex); if (rdev->use_count > 0 || rdev->constraints->always_on) { error = _regulator_do_enable(rdev); @@ -3634,12 +3628,10 @@ int regulator_suspend_finish(void) } else { if (!have_full_constraints()) goto unlock; - if (!ops->disable) - goto unlock; if (!_regulator_is_enabled(rdev)) goto unlock; - error = ops->disable(rdev); + error = _regulator_do_disable(rdev); if (error) ret = error; } @@ -3813,7 +3805,7 @@ static int __init regulator_init_complete(void) ops = rdev->desc->ops; c = rdev->constraints; - if (!ops->disable || (c && c->always_on)) + if (c && c->always_on) continue; mutex_lock(&rdev->mutex); @@ -3834,7 +3826,7 @@ static int __init regulator_init_complete(void) /* We log since this may kill the system if it * goes wrong. */ rdev_info(rdev, "disabling\n"); - ret = ops->disable(rdev); + ret = _regulator_do_disable(rdev); if (ret != 0) rdev_err(rdev, "couldn't disable: %d\n", ret); } else { -- cgit v0.10.2 From 340fea3d7f6a2657ddd0b48413cd81e8513357ed Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Mon, 24 Feb 2014 20:12:28 -0500 Subject: r8169: initialize rtl8169_stats seqlock Boris reports he's seeing: > [ 9.195943] INFO: trying to register non-static key. > [ 9.196031] the code is fine but needs lockdep annotation. > [ 9.196031] turning off the locking correctness validator. > [ 9.196031] CPU: 1 PID: 933 Comm: modprobe Not tainted 3.14.0-rc4+ #1 with the r8169 driver. These are occuring because the seqcount embedded in u64_stats_sync on 32-bit SMP is uninitialized which is making lockdep unhappy. Signed-off-by: Kyle McMartin Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 91a67ae..e977965 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -7118,6 +7118,8 @@ rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } mutex_init(&tp->wk.mutex); + u64_stats_init(&tp->rx_stats.syncp); + u64_stats_init(&tp->tx_stats.syncp); /* Get MAC address */ for (i = 0; i < ETH_ALEN; i++) -- cgit v0.10.2 From a2530060c1157a7863abbe1fea3cfdd5da55bd48 Mon Sep 17 00:00:00 2001 From: Sherman Yin Date: Thu, 23 Jan 2014 12:44:47 -0800 Subject: Update dtsi with new pinctrl compatible string This commit updates bcm11351.dtsi with the new compatible string for the same driver. Signed-off-by: Sherman Yin Reviewed-by: Matt Porter Acked-by: Linus Walleij Signed-off-by: Christian Daudt diff --git a/arch/arm/boot/dts/bcm11351.dtsi b/arch/arm/boot/dts/bcm11351.dtsi index e491b82..792fde1 100644 --- a/arch/arm/boot/dts/bcm11351.dtsi +++ b/arch/arm/boot/dts/bcm11351.dtsi @@ -147,7 +147,7 @@ }; pinctrl@35004800 { - compatible = "brcm,capri-pinctrl"; + compatible = "brcm,bcm11351-pinctrl"; reg = <0x35004800 0x430>; }; -- cgit v0.10.2 From d0deca0276807740f3e688d97196122511b8f765 Mon Sep 17 00:00:00 2001 From: Christian Daudt Date: Mon, 24 Feb 2014 09:15:35 -0800 Subject: pinctrl: refer to updated dt binding string. Bring the driver in line with the bcm-based dt name for pinctrl. This is being done to keep consistency with other Broadcom mobile SoC drivers. Signed-off-by: Christian Daudt Reviewed-by: Matt Porter diff --git a/drivers/pinctrl/pinctrl-capri.c b/drivers/pinctrl/pinctrl-capri.c index 4669c53..eb25002 100644 --- a/drivers/pinctrl/pinctrl-capri.c +++ b/drivers/pinctrl/pinctrl-capri.c @@ -1435,7 +1435,7 @@ int __init capri_pinctrl_probe(struct platform_device *pdev) } static struct of_device_id capri_pinctrl_of_match[] = { - { .compatible = "brcm,capri-pinctrl", }, + { .compatible = "brcm,bcm11351-pinctrl", }, { }, }; -- cgit v0.10.2 From 735ea23c4868bf3123a4c79184e9206e0cc60211 Mon Sep 17 00:00:00 2001 From: Sherman Yin Date: Thu, 23 Jan 2014 12:44:44 -0800 Subject: pinctrl: Rename Broadcom Capri pinctrl binding The compatible string of the Broadcom Capri pinctrl driver is renamed to "brcm,bcm11351-pinctrl" to match the machine binding here: Documentation/devicetree/bindings/arm/bcm/bcm11351.txt Signed-off-by: Sherman Yin Reviewed-by: Matt Porter Acked-by: Linus Walleij Signed-off-by: Christian Daudt diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,bcm11351-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/brcm,bcm11351-pinctrl.txt new file mode 100644 index 0000000..c119deb --- /dev/null +++ b/Documentation/devicetree/bindings/pinctrl/brcm,bcm11351-pinctrl.txt @@ -0,0 +1,461 @@ +Broadcom BCM281xx Pin Controller + +This is a pin controller for the Broadcom BCM281xx SoC family, which includes +BCM11130, BCM11140, BCM11351, BCM28145, and BCM28155 SoCs. + +=== Pin Controller Node === + +Required Properties: + +- compatible: Must be "brcm,bcm11351-pinctrl" +- reg: Base address of the PAD Controller register block and the size + of the block. + +For example, the following is the bare minimum node: + + pinctrl@35004800 { + compatible = "brcm,bcm11351-pinctrl"; + reg = <0x35004800 0x430>; + }; + +As a pin controller device, in addition to the required properties, this node +should also contain the pin configuration nodes that client devices reference, +if any. + +=== Pin Configuration Node === + +Each pin configuration node is a sub-node of the pin controller node and is a +container of an arbitrary number of subnodes, called pin group nodes in this +document. + +Please refer to the pinctrl-bindings.txt in this directory for details of the +common pinctrl bindings used by client devices, including the definition of a +"pin configuration node". + +=== Pin Group Node === + +A pin group node specifies the desired pin mux and/or pin configuration for an +arbitrary number of pins. The name of the pin group node is optional and not +used. + +A pin group node only affects the properties specified in the node, and has no +effect on any properties that are omitted. + +The pin group node accepts a subset of the generic pin config properties. For +details generic pin config properties, please refer to pinctrl-bindings.txt +and . + +Each pin controlled by this pin controller belong to one of three types: +Standard, I2C, and HDMI. Each type accepts a different set of pin config +properties. A list of pins and their types is provided below. + +Required Properties (applicable to all pins): + +- pins: Multiple strings. Specifies the name(s) of one or more pins to + be configured by this node. + +Optional Properties (for standard pins): + +- function: String. Specifies the pin mux selection. Values + must be one of: "alt1", "alt2", "alt3", "alt4" +- input-schmitt-enable: No arguments. Enable schmitt-trigger mode. +- input-schmitt-disable: No arguments. Disable schmitt-trigger mode. +- bias-pull-up: No arguments. Pull up on pin. +- bias-pull-down: No arguments. Pull down on pin. +- bias-disable: No arguments. Disable pin bias. +- slew-rate: Integer. Meaning depends on configured pin mux: + *_SCL or *_SDA: + 0: Standard(100kbps)& Fast(400kbps) mode + 1: Highspeed (3.4Mbps) mode + IC_DM or IC_DP: + 0: normal slew rate + 1: fast slew rate + Otherwise: + 0: fast slew rate + 1: normal slew rate +- input-enable: No arguements. Enable input (does not affect + output.) +- input-disable: No arguements. Disable input (does not affect + output.) +- drive-strength: Integer. Drive strength in mA. Valid values are + 2, 4, 6, 8, 10, 12, 14, 16 mA. + +Optional Properties (for I2C pins): + +- function: String. Specifies the pin mux selection. Values + must be one of: "alt1", "alt2", "alt3", "alt4" +- bias-pull-up: Integer. Pull up strength in Ohm. There are 3 + pull-up resisitors (1.2k, 1.8k, 2.7k) available + in parallel for I2C pins, so the valid values + are: 568, 720, 831, 1080, 1200, 1800, 2700 Ohm. +- bias-disable: No arguments. Disable pin bias. +- slew-rate: Integer. Meaning depends on configured pin mux: + *_SCL or *_SDA: + 0: Standard(100kbps)& Fast(400kbps) mode + 1: Highspeed (3.4Mbps) mode + IC_DM or IC_DP: + 0: normal slew rate + 1: fast slew rate + Otherwise: + 0: fast slew rate + 1: normal slew rate +- input-enable: No arguements. Enable input (does not affect + output.) +- input-disable: No arguements. Disable input (does not affect + output.) + +Optional Properties (for HDMI pins): + +- function: String. Specifies the pin mux selection. Values + must be one of: "alt1", "alt2", "alt3", "alt4" +- slew-rate: Integer. Controls slew rate. + 0: Standard(100kbps)& Fast(400kbps) mode + 1: Highspeed (3.4Mbps) mode +- input-enable: No arguements. Enable input (does not affect + output.) +- input-disable: No arguements. Disable input (does not affect + output.) + +Example: +// pin controller node +pinctrl@35004800 { + compatible = "brcmbcm11351-pinctrl"; + reg = <0x35004800 0x430>; + + // pin configuration node + dev_a_default: dev_a_active { + //group node defining 1 standard pin + grp_1 { + pins = "std_pin1"; + function = "alt1"; + input-schmitt-enable; + bias-disable; + slew-rate = <1>; + drive-strength = <4>; + }; + + // group node defining 2 I2C pins + grp_2 { + pins = "i2c_pin1", "i2c_pin2"; + function = "alt2"; + bias-pull-up = <720>; + input-enable; + }; + + // group node defining 2 HDMI pins + grp_3 { + pins = "hdmi_pin1", "hdmi_pin2"; + function = "alt3"; + slew-rate = <1>; + }; + + // other pin group nodes + ... + }; + + // other pin configuration nodes + ... +}; + +In the example above, "dev_a_active" is a pin configuration node with a number +of sub-nodes. In the pin group node "grp_1", one pin, "std_pin1", is defined in +the "pins" property. Thus, the remaining properties in the "grp_1" node applies +only to this pin, including the following settings: + - setting pinmux to "alt1" + - enabling schmitt-trigger (hystersis) mode + - disabling pin bias + - setting the slew-rate to 1 + - setting the drive strength to 4 mA +Note that neither "input-enable" nor "input-disable" was specified - the pinctrl +subsystem will therefore leave this property unchanged from whatever state it +was in before applying these changes. + +The "pins" property in the pin group node "grp_2" specifies two pins - +"i2c_pin1" and "i2c_pin2"; the remaining properties in this pin group node, +therefore, applies to both of these pins. The properties include: + - setting pinmux to "alt2" + - setting pull-up resistance to 720 Ohm (ie. enabling 1.2k and 1.8k resistors + in parallel) + - enabling both pins' input +"slew-rate" is not specified in this pin group node, so the slew-rate for these +pins are left as-is. + +Finally, "grp_3" defines two HDMI pins. The following properties are applied to +both pins: + - setting pinmux to "alt3" + - setting slew-rate to 1; for HDMI pins, this corresponds to the 3.4 Mbps + Highspeed mode +The input is neither enabled or disabled, and is left untouched. + +=== Pin Names and Type === + +The following are valid pin names and their pin types: + + "adcsync", Standard + "bat_rm", Standard + "bsc1_scl", I2C + "bsc1_sda", I2C + "bsc2_scl", I2C + "bsc2_sda", I2C + "classgpwr", Standard + "clk_cx8", Standard + "clkout_0", Standard + "clkout_1", Standard + "clkout_2", Standard + "clkout_3", Standard + "clkreq_in_0", Standard + "clkreq_in_1", Standard + "cws_sys_req1", Standard + "cws_sys_req2", Standard + "cws_sys_req3", Standard + "digmic1_clk", Standard + "digmic1_dq", Standard + "digmic2_clk", Standard + "digmic2_dq", Standard + "gpen13", Standard + "gpen14", Standard + "gpen15", Standard + "gpio00", Standard + "gpio01", Standard + "gpio02", Standard + "gpio03", Standard + "gpio04", Standard + "gpio05", Standard + "gpio06", Standard + "gpio07", Standard + "gpio08", Standard + "gpio09", Standard + "gpio10", Standard + "gpio11", Standard + "gpio12", Standard + "gpio13", Standard + "gpio14", Standard + "gps_pablank", Standard + "gps_tmark", Standard + "hdmi_scl", HDMI + "hdmi_sda", HDMI + "ic_dm", Standard + "ic_dp", Standard + "kp_col_ip_0", Standard + "kp_col_ip_1", Standard + "kp_col_ip_2", Standard + "kp_col_ip_3", Standard + "kp_row_op_0", Standard + "kp_row_op_1", Standard + "kp_row_op_2", Standard + "kp_row_op_3", Standard + "lcd_b_0", Standard + "lcd_b_1", Standard + "lcd_b_2", Standard + "lcd_b_3", Standard + "lcd_b_4", Standard + "lcd_b_5", Standard + "lcd_b_6", Standard + "lcd_b_7", Standard + "lcd_g_0", Standard + "lcd_g_1", Standard + "lcd_g_2", Standard + "lcd_g_3", Standard + "lcd_g_4", Standard + "lcd_g_5", Standard + "lcd_g_6", Standard + "lcd_g_7", Standard + "lcd_hsync", Standard + "lcd_oe", Standard + "lcd_pclk", Standard + "lcd_r_0", Standard + "lcd_r_1", Standard + "lcd_r_2", Standard + "lcd_r_3", Standard + "lcd_r_4", Standard + "lcd_r_5", Standard + "lcd_r_6", Standard + "lcd_r_7", Standard + "lcd_vsync", Standard + "mdmgpio0", Standard + "mdmgpio1", Standard + "mdmgpio2", Standard + "mdmgpio3", Standard + "mdmgpio4", Standard + "mdmgpio5", Standard + "mdmgpio6", Standard + "mdmgpio7", Standard + "mdmgpio8", Standard + "mphi_data_0", Standard + "mphi_data_1", Standard + "mphi_data_2", Standard + "mphi_data_3", Standard + "mphi_data_4", Standard + "mphi_data_5", Standard + "mphi_data_6", Standard + "mphi_data_7", Standard + "mphi_data_8", Standard + "mphi_data_9", Standard + "mphi_data_10", Standard + "mphi_data_11", Standard + "mphi_data_12", Standard + "mphi_data_13", Standard + "mphi_data_14", Standard + "mphi_data_15", Standard + "mphi_ha0", Standard + "mphi_hat0", Standard + "mphi_hat1", Standard + "mphi_hce0_n", Standard + "mphi_hce1_n", Standard + "mphi_hrd_n", Standard + "mphi_hwr_n", Standard + "mphi_run0", Standard + "mphi_run1", Standard + "mtx_scan_clk", Standard + "mtx_scan_data", Standard + "nand_ad_0", Standard + "nand_ad_1", Standard + "nand_ad_2", Standard + "nand_ad_3", Standard + "nand_ad_4", Standard + "nand_ad_5", Standard + "nand_ad_6", Standard + "nand_ad_7", Standard + "nand_ale", Standard + "nand_cen_0", Standard + "nand_cen_1", Standard + "nand_cle", Standard + "nand_oen", Standard + "nand_rdy_0", Standard + "nand_rdy_1", Standard + "nand_wen", Standard + "nand_wp", Standard + "pc1", Standard + "pc2", Standard + "pmu_int", Standard + "pmu_scl", I2C + "pmu_sda", I2C + "rfst2g_mtsloten3g", Standard + "rgmii_0_rx_ctl", Standard + "rgmii_0_rxc", Standard + "rgmii_0_rxd_0", Standard + "rgmii_0_rxd_1", Standard + "rgmii_0_rxd_2", Standard + "rgmii_0_rxd_3", Standard + "rgmii_0_tx_ctl", Standard + "rgmii_0_txc", Standard + "rgmii_0_txd_0", Standard + "rgmii_0_txd_1", Standard + "rgmii_0_txd_2", Standard + "rgmii_0_txd_3", Standard + "rgmii_1_rx_ctl", Standard + "rgmii_1_rxc", Standard + "rgmii_1_rxd_0", Standard + "rgmii_1_rxd_1", Standard + "rgmii_1_rxd_2", Standard + "rgmii_1_rxd_3", Standard + "rgmii_1_tx_ctl", Standard + "rgmii_1_txc", Standard + "rgmii_1_txd_0", Standard + "rgmii_1_txd_1", Standard + "rgmii_1_txd_2", Standard + "rgmii_1_txd_3", Standard + "rgmii_gpio_0", Standard + "rgmii_gpio_1", Standard + "rgmii_gpio_2", Standard + "rgmii_gpio_3", Standard + "rtxdata2g_txdata3g1", Standard + "rtxen2g_txdata3g2", Standard + "rxdata3g0", Standard + "rxdata3g1", Standard + "rxdata3g2", Standard + "sdio1_clk", Standard + "sdio1_cmd", Standard + "sdio1_data_0", Standard + "sdio1_data_1", Standard + "sdio1_data_2", Standard + "sdio1_data_3", Standard + "sdio4_clk", Standard + "sdio4_cmd", Standard + "sdio4_data_0", Standard + "sdio4_data_1", Standard + "sdio4_data_2", Standard + "sdio4_data_3", Standard + "sim_clk", Standard + "sim_data", Standard + "sim_det", Standard + "sim_resetn", Standard + "sim2_clk", Standard + "sim2_data", Standard + "sim2_det", Standard + "sim2_resetn", Standard + "sri_c", Standard + "sri_d", Standard + "sri_e", Standard + "ssp_extclk", Standard + "ssp0_clk", Standard + "ssp0_fs", Standard + "ssp0_rxd", Standard + "ssp0_txd", Standard + "ssp2_clk", Standard + "ssp2_fs_0", Standard + "ssp2_fs_1", Standard + "ssp2_fs_2", Standard + "ssp2_fs_3", Standard + "ssp2_rxd_0", Standard + "ssp2_rxd_1", Standard + "ssp2_txd_0", Standard + "ssp2_txd_1", Standard + "ssp3_clk", Standard + "ssp3_fs", Standard + "ssp3_rxd", Standard + "ssp3_txd", Standard + "ssp4_clk", Standard + "ssp4_fs", Standard + "ssp4_rxd", Standard + "ssp4_txd", Standard + "ssp5_clk", Standard + "ssp5_fs", Standard + "ssp5_rxd", Standard + "ssp5_txd", Standard + "ssp6_clk", Standard + "ssp6_fs", Standard + "ssp6_rxd", Standard + "ssp6_txd", Standard + "stat_1", Standard + "stat_2", Standard + "sysclken", Standard + "traceclk", Standard + "tracedt00", Standard + "tracedt01", Standard + "tracedt02", Standard + "tracedt03", Standard + "tracedt04", Standard + "tracedt05", Standard + "tracedt06", Standard + "tracedt07", Standard + "tracedt08", Standard + "tracedt09", Standard + "tracedt10", Standard + "tracedt11", Standard + "tracedt12", Standard + "tracedt13", Standard + "tracedt14", Standard + "tracedt15", Standard + "txdata3g0", Standard + "txpwrind", Standard + "uartb1_ucts", Standard + "uartb1_urts", Standard + "uartb1_urxd", Standard + "uartb1_utxd", Standard + "uartb2_urxd", Standard + "uartb2_utxd", Standard + "uartb3_ucts", Standard + "uartb3_urts", Standard + "uartb3_urxd", Standard + "uartb3_utxd", Standard + "uartb4_ucts", Standard + "uartb4_urts", Standard + "uartb4_urxd", Standard + "uartb4_utxd", Standard + "vc_cam1_scl", I2C + "vc_cam1_sda", I2C + "vc_cam2_scl", I2C + "vc_cam2_sda", I2C + "vc_cam3_scl", I2C + "vc_cam3_sda", I2C diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,capri-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/brcm,capri-pinctrl.txt deleted file mode 100644 index 9e9e9ef..0000000 --- a/Documentation/devicetree/bindings/pinctrl/brcm,capri-pinctrl.txt +++ /dev/null @@ -1,461 +0,0 @@ -Broadcom Capri Pin Controller - -This is a pin controller for the Broadcom BCM281xx SoC family, which includes -BCM11130, BCM11140, BCM11351, BCM28145, and BCM28155 SoCs. - -=== Pin Controller Node === - -Required Properties: - -- compatible: Must be "brcm,capri-pinctrl". -- reg: Base address of the PAD Controller register block and the size - of the block. - -For example, the following is the bare minimum node: - - pinctrl@35004800 { - compatible = "brcm,capri-pinctrl"; - reg = <0x35004800 0x430>; - }; - -As a pin controller device, in addition to the required properties, this node -should also contain the pin configuration nodes that client devices reference, -if any. - -=== Pin Configuration Node === - -Each pin configuration node is a sub-node of the pin controller node and is a -container of an arbitrary number of subnodes, called pin group nodes in this -document. - -Please refer to the pinctrl-bindings.txt in this directory for details of the -common pinctrl bindings used by client devices, including the definition of a -"pin configuration node". - -=== Pin Group Node === - -A pin group node specifies the desired pin mux and/or pin configuration for an -arbitrary number of pins. The name of the pin group node is optional and not -used. - -A pin group node only affects the properties specified in the node, and has no -effect on any properties that are omitted. - -The pin group node accepts a subset of the generic pin config properties. For -details generic pin config properties, please refer to pinctrl-bindings.txt -and . - -Each pin controlled by this pin controller belong to one of three types: -Standard, I2C, and HDMI. Each type accepts a different set of pin config -properties. A list of pins and their types is provided below. - -Required Properties (applicable to all pins): - -- pins: Multiple strings. Specifies the name(s) of one or more pins to - be configured by this node. - -Optional Properties (for standard pins): - -- function: String. Specifies the pin mux selection. Values - must be one of: "alt1", "alt2", "alt3", "alt4" -- input-schmitt-enable: No arguments. Enable schmitt-trigger mode. -- input-schmitt-disable: No arguments. Disable schmitt-trigger mode. -- bias-pull-up: No arguments. Pull up on pin. -- bias-pull-down: No arguments. Pull down on pin. -- bias-disable: No arguments. Disable pin bias. -- slew-rate: Integer. Meaning depends on configured pin mux: - *_SCL or *_SDA: - 0: Standard(100kbps)& Fast(400kbps) mode - 1: Highspeed (3.4Mbps) mode - IC_DM or IC_DP: - 0: normal slew rate - 1: fast slew rate - Otherwise: - 0: fast slew rate - 1: normal slew rate -- input-enable: No arguements. Enable input (does not affect - output.) -- input-disable: No arguements. Disable input (does not affect - output.) -- drive-strength: Integer. Drive strength in mA. Valid values are - 2, 4, 6, 8, 10, 12, 14, 16 mA. - -Optional Properties (for I2C pins): - -- function: String. Specifies the pin mux selection. Values - must be one of: "alt1", "alt2", "alt3", "alt4" -- bias-pull-up: Integer. Pull up strength in Ohm. There are 3 - pull-up resisitors (1.2k, 1.8k, 2.7k) available - in parallel for I2C pins, so the valid values - are: 568, 720, 831, 1080, 1200, 1800, 2700 Ohm. -- bias-disable: No arguments. Disable pin bias. -- slew-rate: Integer. Meaning depends on configured pin mux: - *_SCL or *_SDA: - 0: Standard(100kbps)& Fast(400kbps) mode - 1: Highspeed (3.4Mbps) mode - IC_DM or IC_DP: - 0: normal slew rate - 1: fast slew rate - Otherwise: - 0: fast slew rate - 1: normal slew rate -- input-enable: No arguements. Enable input (does not affect - output.) -- input-disable: No arguements. Disable input (does not affect - output.) - -Optional Properties (for HDMI pins): - -- function: String. Specifies the pin mux selection. Values - must be one of: "alt1", "alt2", "alt3", "alt4" -- slew-rate: Integer. Controls slew rate. - 0: Standard(100kbps)& Fast(400kbps) mode - 1: Highspeed (3.4Mbps) mode -- input-enable: No arguements. Enable input (does not affect - output.) -- input-disable: No arguements. Disable input (does not affect - output.) - -Example: -// pin controller node -pinctrl@35004800 { - compatible = "brcm,capri-pinctrl"; - reg = <0x35004800 0x430>; - - // pin configuration node - dev_a_default: dev_a_active { - //group node defining 1 standard pin - grp_1 { - pins = "std_pin1"; - function = "alt1"; - input-schmitt-enable; - bias-disable; - slew-rate = <1>; - drive-strength = <4>; - }; - - // group node defining 2 I2C pins - grp_2 { - pins = "i2c_pin1", "i2c_pin2"; - function = "alt2"; - bias-pull-up = <720>; - input-enable; - }; - - // group node defining 2 HDMI pins - grp_3 { - pins = "hdmi_pin1", "hdmi_pin2"; - function = "alt3"; - slew-rate = <1>; - }; - - // other pin group nodes - ... - }; - - // other pin configuration nodes - ... -}; - -In the example above, "dev_a_active" is a pin configuration node with a number -of sub-nodes. In the pin group node "grp_1", one pin, "std_pin1", is defined in -the "pins" property. Thus, the remaining properties in the "grp_1" node applies -only to this pin, including the following settings: - - setting pinmux to "alt1" - - enabling schmitt-trigger (hystersis) mode - - disabling pin bias - - setting the slew-rate to 1 - - setting the drive strength to 4 mA -Note that neither "input-enable" nor "input-disable" was specified - the pinctrl -subsystem will therefore leave this property unchanged from whatever state it -was in before applying these changes. - -The "pins" property in the pin group node "grp_2" specifies two pins - -"i2c_pin1" and "i2c_pin2"; the remaining properties in this pin group node, -therefore, applies to both of these pins. The properties include: - - setting pinmux to "alt2" - - setting pull-up resistance to 720 Ohm (ie. enabling 1.2k and 1.8k resistors - in parallel) - - enabling both pins' input -"slew-rate" is not specified in this pin group node, so the slew-rate for these -pins are left as-is. - -Finally, "grp_3" defines two HDMI pins. The following properties are applied to -both pins: - - setting pinmux to "alt3" - - setting slew-rate to 1; for HDMI pins, this corresponds to the 3.4 Mbps - Highspeed mode -The input is neither enabled or disabled, and is left untouched. - -=== Pin Names and Type === - -The following are valid pin names and their pin types: - - "adcsync", Standard - "bat_rm", Standard - "bsc1_scl", I2C - "bsc1_sda", I2C - "bsc2_scl", I2C - "bsc2_sda", I2C - "classgpwr", Standard - "clk_cx8", Standard - "clkout_0", Standard - "clkout_1", Standard - "clkout_2", Standard - "clkout_3", Standard - "clkreq_in_0", Standard - "clkreq_in_1", Standard - "cws_sys_req1", Standard - "cws_sys_req2", Standard - "cws_sys_req3", Standard - "digmic1_clk", Standard - "digmic1_dq", Standard - "digmic2_clk", Standard - "digmic2_dq", Standard - "gpen13", Standard - "gpen14", Standard - "gpen15", Standard - "gpio00", Standard - "gpio01", Standard - "gpio02", Standard - "gpio03", Standard - "gpio04", Standard - "gpio05", Standard - "gpio06", Standard - "gpio07", Standard - "gpio08", Standard - "gpio09", Standard - "gpio10", Standard - "gpio11", Standard - "gpio12", Standard - "gpio13", Standard - "gpio14", Standard - "gps_pablank", Standard - "gps_tmark", Standard - "hdmi_scl", HDMI - "hdmi_sda", HDMI - "ic_dm", Standard - "ic_dp", Standard - "kp_col_ip_0", Standard - "kp_col_ip_1", Standard - "kp_col_ip_2", Standard - "kp_col_ip_3", Standard - "kp_row_op_0", Standard - "kp_row_op_1", Standard - "kp_row_op_2", Standard - "kp_row_op_3", Standard - "lcd_b_0", Standard - "lcd_b_1", Standard - "lcd_b_2", Standard - "lcd_b_3", Standard - "lcd_b_4", Standard - "lcd_b_5", Standard - "lcd_b_6", Standard - "lcd_b_7", Standard - "lcd_g_0", Standard - "lcd_g_1", Standard - "lcd_g_2", Standard - "lcd_g_3", Standard - "lcd_g_4", Standard - "lcd_g_5", Standard - "lcd_g_6", Standard - "lcd_g_7", Standard - "lcd_hsync", Standard - "lcd_oe", Standard - "lcd_pclk", Standard - "lcd_r_0", Standard - "lcd_r_1", Standard - "lcd_r_2", Standard - "lcd_r_3", Standard - "lcd_r_4", Standard - "lcd_r_5", Standard - "lcd_r_6", Standard - "lcd_r_7", Standard - "lcd_vsync", Standard - "mdmgpio0", Standard - "mdmgpio1", Standard - "mdmgpio2", Standard - "mdmgpio3", Standard - "mdmgpio4", Standard - "mdmgpio5", Standard - "mdmgpio6", Standard - "mdmgpio7", Standard - "mdmgpio8", Standard - "mphi_data_0", Standard - "mphi_data_1", Standard - "mphi_data_2", Standard - "mphi_data_3", Standard - "mphi_data_4", Standard - "mphi_data_5", Standard - "mphi_data_6", Standard - "mphi_data_7", Standard - "mphi_data_8", Standard - "mphi_data_9", Standard - "mphi_data_10", Standard - "mphi_data_11", Standard - "mphi_data_12", Standard - "mphi_data_13", Standard - "mphi_data_14", Standard - "mphi_data_15", Standard - "mphi_ha0", Standard - "mphi_hat0", Standard - "mphi_hat1", Standard - "mphi_hce0_n", Standard - "mphi_hce1_n", Standard - "mphi_hrd_n", Standard - "mphi_hwr_n", Standard - "mphi_run0", Standard - "mphi_run1", Standard - "mtx_scan_clk", Standard - "mtx_scan_data", Standard - "nand_ad_0", Standard - "nand_ad_1", Standard - "nand_ad_2", Standard - "nand_ad_3", Standard - "nand_ad_4", Standard - "nand_ad_5", Standard - "nand_ad_6", Standard - "nand_ad_7", Standard - "nand_ale", Standard - "nand_cen_0", Standard - "nand_cen_1", Standard - "nand_cle", Standard - "nand_oen", Standard - "nand_rdy_0", Standard - "nand_rdy_1", Standard - "nand_wen", Standard - "nand_wp", Standard - "pc1", Standard - "pc2", Standard - "pmu_int", Standard - "pmu_scl", I2C - "pmu_sda", I2C - "rfst2g_mtsloten3g", Standard - "rgmii_0_rx_ctl", Standard - "rgmii_0_rxc", Standard - "rgmii_0_rxd_0", Standard - "rgmii_0_rxd_1", Standard - "rgmii_0_rxd_2", Standard - "rgmii_0_rxd_3", Standard - "rgmii_0_tx_ctl", Standard - "rgmii_0_txc", Standard - "rgmii_0_txd_0", Standard - "rgmii_0_txd_1", Standard - "rgmii_0_txd_2", Standard - "rgmii_0_txd_3", Standard - "rgmii_1_rx_ctl", Standard - "rgmii_1_rxc", Standard - "rgmii_1_rxd_0", Standard - "rgmii_1_rxd_1", Standard - "rgmii_1_rxd_2", Standard - "rgmii_1_rxd_3", Standard - "rgmii_1_tx_ctl", Standard - "rgmii_1_txc", Standard - "rgmii_1_txd_0", Standard - "rgmii_1_txd_1", Standard - "rgmii_1_txd_2", Standard - "rgmii_1_txd_3", Standard - "rgmii_gpio_0", Standard - "rgmii_gpio_1", Standard - "rgmii_gpio_2", Standard - "rgmii_gpio_3", Standard - "rtxdata2g_txdata3g1", Standard - "rtxen2g_txdata3g2", Standard - "rxdata3g0", Standard - "rxdata3g1", Standard - "rxdata3g2", Standard - "sdio1_clk", Standard - "sdio1_cmd", Standard - "sdio1_data_0", Standard - "sdio1_data_1", Standard - "sdio1_data_2", Standard - "sdio1_data_3", Standard - "sdio4_clk", Standard - "sdio4_cmd", Standard - "sdio4_data_0", Standard - "sdio4_data_1", Standard - "sdio4_data_2", Standard - "sdio4_data_3", Standard - "sim_clk", Standard - "sim_data", Standard - "sim_det", Standard - "sim_resetn", Standard - "sim2_clk", Standard - "sim2_data", Standard - "sim2_det", Standard - "sim2_resetn", Standard - "sri_c", Standard - "sri_d", Standard - "sri_e", Standard - "ssp_extclk", Standard - "ssp0_clk", Standard - "ssp0_fs", Standard - "ssp0_rxd", Standard - "ssp0_txd", Standard - "ssp2_clk", Standard - "ssp2_fs_0", Standard - "ssp2_fs_1", Standard - "ssp2_fs_2", Standard - "ssp2_fs_3", Standard - "ssp2_rxd_0", Standard - "ssp2_rxd_1", Standard - "ssp2_txd_0", Standard - "ssp2_txd_1", Standard - "ssp3_clk", Standard - "ssp3_fs", Standard - "ssp3_rxd", Standard - "ssp3_txd", Standard - "ssp4_clk", Standard - "ssp4_fs", Standard - "ssp4_rxd", Standard - "ssp4_txd", Standard - "ssp5_clk", Standard - "ssp5_fs", Standard - "ssp5_rxd", Standard - "ssp5_txd", Standard - "ssp6_clk", Standard - "ssp6_fs", Standard - "ssp6_rxd", Standard - "ssp6_txd", Standard - "stat_1", Standard - "stat_2", Standard - "sysclken", Standard - "traceclk", Standard - "tracedt00", Standard - "tracedt01", Standard - "tracedt02", Standard - "tracedt03", Standard - "tracedt04", Standard - "tracedt05", Standard - "tracedt06", Standard - "tracedt07", Standard - "tracedt08", Standard - "tracedt09", Standard - "tracedt10", Standard - "tracedt11", Standard - "tracedt12", Standard - "tracedt13", Standard - "tracedt14", Standard - "tracedt15", Standard - "txdata3g0", Standard - "txpwrind", Standard - "uartb1_ucts", Standard - "uartb1_urts", Standard - "uartb1_urxd", Standard - "uartb1_utxd", Standard - "uartb2_urxd", Standard - "uartb2_utxd", Standard - "uartb3_ucts", Standard - "uartb3_urts", Standard - "uartb3_urxd", Standard - "uartb3_utxd", Standard - "uartb4_ucts", Standard - "uartb4_urts", Standard - "uartb4_urxd", Standard - "uartb4_utxd", Standard - "vc_cam1_scl", I2C - "vc_cam1_sda", I2C - "vc_cam2_scl", I2C - "vc_cam2_sda", I2C - "vc_cam3_scl", I2C - "vc_cam3_sda", I2C -- cgit v0.10.2 From 548da08fc1e245faf9b0d7c41ecd8e07984fc332 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sat, 22 Feb 2014 18:30:13 +0100 Subject: ASoC: wm8958-dsp: Fix firmware block loading The codec->control_data contains a pointer to the device's regmap struct. But wm8994_bulk_write() expects a pointer to the parent wm8998 device. The issue was introduced in commit d9a7666f ("ASoC: Remove ASoC-specific WM8994 I/O code"). Fixes: d9a7666f ("ASoC: Remove ASoC-specific WM8994 I/O code") Signed-off-by: Lars-Peter Clausen Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/sound/soc/codecs/wm8958-dsp2.c b/sound/soc/codecs/wm8958-dsp2.c index b7488f1..d4248e0 100644 --- a/sound/soc/codecs/wm8958-dsp2.c +++ b/sound/soc/codecs/wm8958-dsp2.c @@ -153,7 +153,7 @@ static int wm8958_dsp2_fw(struct snd_soc_codec *codec, const char *name, data32 &= 0xffffff; - wm8994_bulk_write(codec->control_data, + wm8994_bulk_write(wm8994->wm8994, data32 & 0xffffff, block_len / 2, (void *)(data + 8)); -- cgit v0.10.2 From 00efcb1c8e1c3c5e5d3ce6f0682d66402911a84f Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Tue, 7 Jan 2014 13:03:43 +0100 Subject: clk: Correct handling of NULL clk in __clk_{get, put} Ensure clk->kref is dereferenced only when clk is not NULL. Signed-off-by: Sylwester Nawrocki Tested-by: Sachin Kamat Signed-off-by: Mike Turquette diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 5517944..c42e608 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -2226,24 +2226,25 @@ EXPORT_SYMBOL_GPL(devm_clk_unregister); */ int __clk_get(struct clk *clk) { - if (clk && !try_module_get(clk->owner)) - return 0; + if (clk) { + if (!try_module_get(clk->owner)) + return 0; - kref_get(&clk->ref); + kref_get(&clk->ref); + } return 1; } void __clk_put(struct clk *clk) { - if (WARN_ON_ONCE(IS_ERR(clk))) + if (!clk || WARN_ON_ONCE(IS_ERR(clk))) return; clk_prepare_lock(); kref_put(&clk->ref, __clk_release); clk_prepare_unlock(); - if (clk) - module_put(clk->owner); + module_put(clk->owner); } /*** clk rate change notifiers ***/ -- cgit v0.10.2 From 37c367ecdb9a01c9acc980e6e17913570a1788a7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 24 Feb 2014 15:23:10 +0100 Subject: ALSA: hda - Add a fixup for HP Folio 13 mute LED HP Folio 13 may have a broken BIOS that doesn't set up the mute LED GPIO properly, and the driver guesses it wrongly, too. Add a new fixup entry for setting the GPIO pin statically for this laptop. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=70991 Cc: Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index a2f11bf..3bc29c9 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -98,6 +98,7 @@ enum { STAC_92HD83XXX_HP_LED, STAC_92HD83XXX_HP_INV_LED, STAC_92HD83XXX_HP_MIC_LED, + STAC_HP_LED_GPIO10, STAC_92HD83XXX_HEADSET_JACK, STAC_92HD83XXX_HP, STAC_HP_ENVY_BASS, @@ -2130,6 +2131,17 @@ static void stac92hd83xxx_fixup_hp_mic_led(struct hda_codec *codec, } } +static void stac92hd83xxx_fixup_hp_led_gpio10(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct sigmatel_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->gpio_led = 0x10; /* GPIO4 */ + spec->default_polarity = 0; + } +} + static void stac92hd83xxx_fixup_headset_jack(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -2624,6 +2636,12 @@ static const struct hda_fixup stac92hd83xxx_fixups[] = { .chained = true, .chain_id = STAC_92HD83XXX_HP, }, + [STAC_HP_LED_GPIO10] = { + .type = HDA_FIXUP_FUNC, + .v.func = stac92hd83xxx_fixup_hp_led_gpio10, + .chained = true, + .chain_id = STAC_92HD83XXX_HP, + }, [STAC_92HD83XXX_HEADSET_JACK] = { .type = HDA_FIXUP_FUNC, .v.func = stac92hd83xxx_fixup_headset_jack, @@ -2702,6 +2720,8 @@ static const struct snd_pci_quirk stac92hd83xxx_fixup_tbl[] = { "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD), SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1888, "HP Envy Spectre", STAC_HP_ENVY_BASS), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1899, + "HP Folio 13", STAC_HP_LED_GPIO10), SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x18df, "HP Folio", STAC_HP_BNB13_EQ), SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x18F8, -- cgit v0.10.2 From c0f5eeed0f4cef4f05b74883a7160e7edde58b6a Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 24 Feb 2014 09:39:27 +0100 Subject: i7core_edac: Fix PCI device reference count The reference count changes done by pci_get_device can be a little misleading when the usage diverges from the most common scheme. The reference count of the device passed as the last parameter is always decreased, even if the function returns no new device. So if we are going to try alternative device IDs, we must manually increment the device reference count before each retry. If we don't, we end up decreasing the reference count, and after a few modprobe/rmmod cycles the PCI devices will vanish. In other words and as Alan put it: without this fix the EDAC code corrupts the PCI device list. This fixes kernel bug #50491: https://bugzilla.kernel.org/show_bug.cgi?id=50491 Signed-off-by: Jean Delvare Link: http://lkml.kernel.org/r/20140224093927.7659dd9d@endymion.delvare Reviewed-by: Alan Cox Cc: Mauro Carvalho Chehab Cc: Doug Thompson Cc: stable@vger.kernel.org Signed-off-by: Borislav Petkov diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 87533ca..d871275 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -1334,14 +1334,19 @@ static int i7core_get_onedevice(struct pci_dev **prev, * is at addr 8086:2c40, instead of 8086:2c41. So, we need * to probe for the alternate address in case of failure */ - if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev) + if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev) { + pci_dev_get(*prev); /* pci_get_device will put it */ pdev = pci_get_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev); + } - if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev) + if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && + !pdev) { + pci_dev_get(*prev); /* pci_get_device will put it */ pdev = pci_get_device(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE_ALT, *prev); + } if (!pdev) { if (*prev) { -- cgit v0.10.2 From 75135da0d68419ef8a925f4c1d5f63d8046e314d Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 25 Feb 2014 09:43:13 +0100 Subject: i7300_edac: Fix device reference count pci_get_device() decrements the reference count of "from" (last argument) so when we break off the loop successfully we have only one device reference - and we don't know which device we have. If we want a reference to each device, we must take them explicitly and let the pci_get_device() walk complete to avoid duplicate references. This is serious, as over-putting device references will cause the device to eventually disappear. Without this fix, the kernel crashes after a few insmod/rmmod cycles. Tested on an Intel S7000FC4UR system with a 7300 chipset. Signed-off-by: Jean Delvare Link: http://lkml.kernel.org/r/20140224111656.09bbb7ed@endymion.delvare Cc: Mauro Carvalho Chehab Cc: Doug Thompson Cc: stable@vger.kernel.org Signed-off-by: Borislav Petkov diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index d63f479..57e96a3 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -943,33 +943,35 @@ static int i7300_get_devices(struct mem_ctl_info *mci) /* Attempt to 'get' the MCH register we want */ pdev = NULL; - while (!pvt->pci_dev_16_1_fsb_addr_map || - !pvt->pci_dev_16_2_fsb_err_regs) { - pdev = pci_get_device(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_I7300_MCH_ERR, pdev); - if (!pdev) { - /* End of list, leave */ - i7300_printk(KERN_ERR, - "'system address,Process Bus' " - "device not found:" - "vendor 0x%x device 0x%x ERR funcs " - "(broken BIOS?)\n", - PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_I7300_MCH_ERR); - goto error; - } - + while ((pdev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7300_MCH_ERR, + pdev))) { /* Store device 16 funcs 1 and 2 */ switch (PCI_FUNC(pdev->devfn)) { case 1: - pvt->pci_dev_16_1_fsb_addr_map = pdev; + if (!pvt->pci_dev_16_1_fsb_addr_map) + pvt->pci_dev_16_1_fsb_addr_map = + pci_dev_get(pdev); break; case 2: - pvt->pci_dev_16_2_fsb_err_regs = pdev; + if (!pvt->pci_dev_16_2_fsb_err_regs) + pvt->pci_dev_16_2_fsb_err_regs = + pci_dev_get(pdev); break; } } + if (!pvt->pci_dev_16_1_fsb_addr_map || + !pvt->pci_dev_16_2_fsb_err_regs) { + /* At least one device was not found */ + i7300_printk(KERN_ERR, + "'system address,Process Bus' device not found:" + "vendor 0x%x device 0x%x ERR funcs (broken BIOS?)\n", + PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_I7300_MCH_ERR); + goto error; + } + edac_dbg(1, "System Address, processor bus- PCI Bus ID: %s %x:%x\n", pci_name(pvt->pci_dev_16_0_fsb_ctlr), pvt->pci_dev_16_0_fsb_ctlr->vendor, -- cgit v0.10.2 From 2513190a926f093dbdc301c68e6ade0bcf293f9a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 21 Feb 2014 19:02:34 +0100 Subject: fsnotify: Fix detection whether overflow event is queued Currently we didn't initialize event's list head when we removed it from the event list. Thus a detection whether overflow event is already queued wasn't working. Fix it by always initializing the list head when deleting event from a list. Signed-off-by: Jan Kara diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 18b3c44..6bec2f4 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -132,7 +132,11 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group event = list_first_entry(&group->notification_list, struct fsnotify_event, list); - list_del(&event->list); + /* + * We need to init list head for the case of overflow event so that + * check in fsnotify_add_notify_events() works + */ + list_del_init(&event->list); group->q_len--; return event; -- cgit v0.10.2 From 482ef06c5e946aae360f247dc69471ec031e09d2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 21 Feb 2014 19:07:54 +0100 Subject: fanotify: Handle overflow in case of permission events If the event queue overflows when we are handling permission event, we will never get response from userspace. So we must avoid waiting for it. Change fsnotify_add_notify_event() to return whether overflow has happened so that we can detect it in fanotify_handle_event() and act accordingly. Signed-off-by: Jan Kara diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 205dc21..dc638f7 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -192,10 +192,12 @@ static int fanotify_handle_event(struct fsnotify_group *group, ret = fsnotify_add_notify_event(group, fsn_event, fanotify_merge); if (ret) { - BUG_ON(mask & FAN_ALL_PERM_EVENTS); + /* Permission events shouldn't be merged */ + BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS); /* Our event wasn't used in the end. Free it. */ fsnotify_destroy_event(group, fsn_event); - ret = 0; + + return 0; } #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 6bec2f4..6a4ba17 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -80,7 +80,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group, /* * Add an event to the group notification queue. The group can later pull this * event off the queue to deal with. The function returns 0 if the event was - * added to the queue, 1 if the event was merged with some other queued event. + * added to the queue, 1 if the event was merged with some other queued event, + * 2 if the queue of events has overflown. */ int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event, @@ -95,10 +96,14 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, mutex_lock(&group->notification_mutex); if (group->q_len >= group->max_events) { + ret = 2; /* Queue overflow event only if it isn't already queued */ - if (list_empty(&group->overflow_event.list)) - event = &group->overflow_event; - ret = 1; + if (!list_empty(&group->overflow_event.list)) { + mutex_unlock(&group->notification_mutex); + return ret; + } + event = &group->overflow_event; + goto queue; } if (!list_empty(list) && merge) { @@ -109,6 +114,7 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, } } +queue: group->q_len++; list_add_tail(&event->list, list); mutex_unlock(&group->notification_mutex); -- cgit v0.10.2 From ff57cd5863cf3014c1c5ed62ce2715294f065b17 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 21 Feb 2014 19:14:11 +0100 Subject: fsnotify: Allocate overflow events with proper type Commit 7053aee26a35 "fsnotify: do not share events between notification groups" used overflow event statically allocated in a group with the size of the generic notification event. This causes problems because some code looks at type specific parts of event structure and gets confused by a random data it sees there and causes crashes. Fix the problem by allocating overflow event with type corresponding to the group type so code cannot get confused. Signed-off-by: Jan Kara diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index b6175fa..287a22c 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -698,6 +698,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) struct fsnotify_group *group; int f_flags, fd; struct user_struct *user; + struct fanotify_event_info *oevent; pr_debug("%s: flags=%d event_f_flags=%d\n", __func__, flags, event_f_flags); @@ -730,8 +731,20 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) group->fanotify_data.user = user; atomic_inc(&user->fanotify_listeners); + oevent = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL); + if (unlikely(!oevent)) { + fd = -ENOMEM; + goto out_destroy_group; + } + group->overflow_event = &oevent->fse; + fsnotify_init_event(group->overflow_event, NULL, FS_Q_OVERFLOW); + oevent->tgid = get_pid(task_tgid(current)); + oevent->path.mnt = NULL; + oevent->path.dentry = NULL; + group->fanotify_data.f_flags = event_f_flags; #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + oevent->response = 0; mutex_init(&group->fanotify_data.access_mutex); init_waitqueue_head(&group->fanotify_data.access_waitq); INIT_LIST_HEAD(&group->fanotify_data.access_list); diff --git a/fs/notify/group.c b/fs/notify/group.c index ee674fe..ad19959 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -55,6 +55,13 @@ void fsnotify_destroy_group(struct fsnotify_group *group) /* clear the notification queue of all events */ fsnotify_flush_notify(group); + /* + * Destroy overflow event (we cannot use fsnotify_destroy_event() as + * that deliberately ignores overflow events. + */ + if (group->overflow_event) + group->ops->free_event(group->overflow_event); + fsnotify_put_group(group); } @@ -99,7 +106,6 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) INIT_LIST_HEAD(&group->marks_list); group->ops = ops; - fsnotify_init_event(&group->overflow_event, NULL, FS_Q_OVERFLOW); return group; } diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 6528b5a..78a2ca3 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -633,11 +633,23 @@ static int inotify_update_watch(struct fsnotify_group *group, struct inode *inod static struct fsnotify_group *inotify_new_group(unsigned int max_events) { struct fsnotify_group *group; + struct inotify_event_info *oevent; group = fsnotify_alloc_group(&inotify_fsnotify_ops); if (IS_ERR(group)) return group; + oevent = kmalloc(sizeof(struct inotify_event_info), GFP_KERNEL); + if (unlikely(!oevent)) { + fsnotify_destroy_group(group); + return ERR_PTR(-ENOMEM); + } + group->overflow_event = &oevent->fse; + fsnotify_init_event(group->overflow_event, NULL, FS_Q_OVERFLOW); + oevent->wd = -1; + oevent->sync_cookie = 0; + oevent->name_len = 0; + group->max_events = max_events; spin_lock_init(&group->inotify_data.idr_lock); diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 6a4ba17..1e58402 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -98,11 +98,11 @@ int fsnotify_add_notify_event(struct fsnotify_group *group, if (group->q_len >= group->max_events) { ret = 2; /* Queue overflow event only if it isn't already queued */ - if (!list_empty(&group->overflow_event.list)) { + if (!list_empty(&group->overflow_event->list)) { mutex_unlock(&group->notification_mutex); return ret; } - event = &group->overflow_event; + event = group->overflow_event; goto queue; } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index c84bc7c..64cf3ef 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -160,7 +160,7 @@ struct fsnotify_group { struct fasync_struct *fsn_fa; /* async notification */ - struct fsnotify_event overflow_event; /* Event we queue when the + struct fsnotify_event *overflow_event; /* Event we queue when the * notification list is too * full */ -- cgit v0.10.2 From 092008abeed1e4168c08826262695ea67657e1aa Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Fri, 14 Feb 2014 08:54:00 +0100 Subject: cfg80211: regulatory: reset regdomain in case of error Reset regdomain to world regdomain in case of errors in set_regdom() function. This will fix a problem with such scenario: - iw reg set US - iw reg set 00 - iw reg set US The last step always fail and we get deadlock in kernel regulatory code. Next setting new regulatory wasn't possible due to: Pending regulatory request, waiting for it to be processed... Signed-off-by: Janusz Dziedzic Acked-by: Luis R. Rodriguez Signed-off-by: Johannes Berg diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 4c50c21..f054137 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2373,6 +2373,7 @@ static int reg_set_rd_country_ie(const struct ieee80211_regdomain *rd, int set_regdom(const struct ieee80211_regdomain *rd) { struct regulatory_request *lr; + bool user_reset = false; int r; if (!reg_is_valid_request(rd->alpha2)) { @@ -2389,6 +2390,7 @@ int set_regdom(const struct ieee80211_regdomain *rd) break; case NL80211_REGDOM_SET_BY_USER: r = reg_set_rd_user(rd, lr); + user_reset = true; break; case NL80211_REGDOM_SET_BY_DRIVER: r = reg_set_rd_driver(rd, lr); @@ -2402,8 +2404,14 @@ int set_regdom(const struct ieee80211_regdomain *rd) } if (r) { - if (r == -EALREADY) + switch (r) { + case -EALREADY: reg_set_request_processed(); + break; + default: + /* Back to world regulatory in case of errors */ + restore_regulatory_settings(user_reset); + } kfree(rd); return r; -- cgit v0.10.2 From fed95bab8d29b928fcf6225be72d37ded452e8a2 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 25 Feb 2014 19:28:44 +0800 Subject: sysfs: fix namespace refcnt leak As mount() and kill_sb() is not a one-to-one match, we shoudn't get ns refcnt unconditionally in sysfs_mount(), and instead we should get the refcnt only when kernfs_mount() allocated a new superblock. v2: - Changed the name of the new argument, suggested by Tejun. - Made the argument optional, suggested by Tejun. v3: - Make the new argument as second-to-last arg, suggested by Tejun. Signed-off-by: Li Zefan Acked-by: Tejun Heo --- fs/kernfs/mount.c | 8 +++++++- fs/sysfs/mount.c | 5 +++-- include/linux/kernfs.h | 9 +++++---- 3 files changed, 15 insertions(+), 7 deletions(-) Signed-off-by: Greg Kroah-Hartman diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 0d6ce89..0f4152d 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -94,6 +94,7 @@ const void *kernfs_super_ns(struct super_block *sb) * @fs_type: file_system_type of the fs being mounted * @flags: mount flags specified for the mount * @root: kernfs_root of the hierarchy being mounted + * @new_sb_created: tell the caller if we allocated a new superblock * @ns: optional namespace tag of the mount * * This is to be called from each kernfs user's file_system_type->mount() @@ -104,7 +105,8 @@ const void *kernfs_super_ns(struct super_block *sb) * The return value can be passed to the vfs layer verbatim. */ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, - struct kernfs_root *root, const void *ns) + struct kernfs_root *root, bool *new_sb_created, + const void *ns) { struct super_block *sb; struct kernfs_super_info *info; @@ -122,6 +124,10 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, kfree(info); if (IS_ERR(sb)) return ERR_CAST(sb); + + if (new_sb_created) + *new_sb_created = !sb->s_root; + if (!sb->s_root) { error = kernfs_fill_super(sb); if (error) { diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 6211230..3eaf5c6 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -27,6 +27,7 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, { struct dentry *root; void *ns; + bool new_sb; if (!(flags & MS_KERNMOUNT)) { if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type)) @@ -37,8 +38,8 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, } ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); - root = kernfs_mount_ns(fs_type, flags, sysfs_root, ns); - if (IS_ERR(root)) + root = kernfs_mount_ns(fs_type, flags, sysfs_root, &new_sb, ns); + if (IS_ERR(root) || !new_sb) kobj_ns_drop(KOBJ_NS_TYPE_NET, ns); return root; } diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 5be9f02..d267623 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -249,7 +249,8 @@ void kernfs_notify(struct kernfs_node *kn); const void *kernfs_super_ns(struct super_block *sb); struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, - struct kernfs_root *root, const void *ns); + struct kernfs_root *root, bool *new_sb_created, + const void *ns); void kernfs_kill_sb(struct super_block *sb); void kernfs_init(void); @@ -317,7 +318,7 @@ static inline const void *kernfs_super_ns(struct super_block *sb) static inline struct dentry * kernfs_mount_ns(struct file_system_type *fs_type, int flags, - struct kernfs_root *root, const void *ns) + struct kernfs_root *root, bool *new_sb_created, const void *ns) { return ERR_PTR(-ENOSYS); } static inline void kernfs_kill_sb(struct super_block *sb) { } @@ -368,9 +369,9 @@ static inline int kernfs_remove_by_name(struct kernfs_node *parent, static inline struct dentry * kernfs_mount(struct file_system_type *fs_type, int flags, - struct kernfs_root *root) + struct kernfs_root *root, bool *new_sb_created) { - return kernfs_mount_ns(fs_type, flags, root, NULL); + return kernfs_mount_ns(fs_type, flags, root, new_sb_created, NULL); } #endif /* __LINUX_KERNFS_H */ -- cgit v0.10.2 From da87ca4d4ca101f177fffd84f1f0a5e4c0343557 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 19 Feb 2014 16:19:35 -0800 Subject: ioat: fix tasklet tear down Since commit 77873803363c "net_dma: mark broken" we no longer pin dma engines active for the network-receive-offload use case. As a result the ->free_chan_resources() that occurs after the driver self test no longer has a NET_DMA induced ->alloc_chan_resources() to back it up. A late firing irq can lead to ksoftirqd spinning indefinitely due to the tasklet_disable() performed by ->free_chan_resources(). Only ->alloc_chan_resources() can clear this condition in affected kernels. This problem has been present since commit 3e037454bcfa "I/OAT: Add support for MSI and MSI-X" in 2.6.24, but is now exposed. Given the NET_DMA use case is deprecated we can revisit moving the driver to use threaded irqs. For now, just tear down the irq and tasklet properly by: 1/ Disable the irq from triggering the tasklet 2/ Disable the irq from re-arming 3/ Flush inflight interrupts 4/ Flush the timer 5/ Flush inflight tasklets References: https://lkml.org/lkml/2014/1/27/282 https://lkml.org/lkml/2014/2/19/672 Cc: Ingo Molnar Cc: Steven Rostedt Cc: Reported-by: Mike Galbraith Reported-by: Stanislav Fomichev Tested-by: Mike Galbraith Tested-by: Stanislav Fomichev Reviewed-by: Thomas Gleixner Signed-off-by: Dan Williams diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index 8752918..4e3549a 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -77,7 +77,8 @@ static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET); for_each_set_bit(bit, &attnstatus, BITS_PER_LONG) { chan = ioat_chan_by_index(instance, bit); - tasklet_schedule(&chan->cleanup_task); + if (test_bit(IOAT_RUN, &chan->state)) + tasklet_schedule(&chan->cleanup_task); } writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); @@ -93,7 +94,8 @@ static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data) { struct ioat_chan_common *chan = data; - tasklet_schedule(&chan->cleanup_task); + if (test_bit(IOAT_RUN, &chan->state)) + tasklet_schedule(&chan->cleanup_task); return IRQ_HANDLED; } @@ -116,7 +118,6 @@ void ioat_init_channel(struct ioatdma_device *device, struct ioat_chan_common *c chan->timer.function = device->timer_fn; chan->timer.data = data; tasklet_init(&chan->cleanup_task, device->cleanup_fn, data); - tasklet_disable(&chan->cleanup_task); } /** @@ -354,13 +355,49 @@ static int ioat1_dma_alloc_chan_resources(struct dma_chan *c) writel(((u64) chan->completion_dma) >> 32, chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); - tasklet_enable(&chan->cleanup_task); + set_bit(IOAT_RUN, &chan->state); ioat1_dma_start_null_desc(ioat); /* give chain to dma device */ dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n", __func__, ioat->desccount); return ioat->desccount; } +void ioat_stop(struct ioat_chan_common *chan) +{ + struct ioatdma_device *device = chan->device; + struct pci_dev *pdev = device->pdev; + int chan_id = chan_num(chan); + struct msix_entry *msix; + + /* 1/ stop irq from firing tasklets + * 2/ stop the tasklet from re-arming irqs + */ + clear_bit(IOAT_RUN, &chan->state); + + /* flush inflight interrupts */ + switch (device->irq_mode) { + case IOAT_MSIX: + msix = &device->msix_entries[chan_id]; + synchronize_irq(msix->vector); + break; + case IOAT_MSI: + case IOAT_INTX: + synchronize_irq(pdev->irq); + break; + default: + break; + } + + /* flush inflight timers */ + del_timer_sync(&chan->timer); + + /* flush inflight tasklet runs */ + tasklet_kill(&chan->cleanup_task); + + /* final cleanup now that everything is quiesced and can't re-arm */ + device->cleanup_fn((unsigned long) &chan->common); +} + /** * ioat1_dma_free_chan_resources - release all the descriptors * @chan: the channel to be cleaned @@ -379,9 +416,7 @@ static void ioat1_dma_free_chan_resources(struct dma_chan *c) if (ioat->desccount == 0) return; - tasklet_disable(&chan->cleanup_task); - del_timer_sync(&chan->timer); - ioat1_cleanup(ioat); + ioat_stop(chan); /* Delay 100ms after reset to allow internal DMA logic to quiesce * before removing DMA descriptor resources. @@ -526,8 +561,11 @@ ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest, static void ioat1_cleanup_event(unsigned long data) { struct ioat_dma_chan *ioat = to_ioat_chan((void *) data); + struct ioat_chan_common *chan = &ioat->base; ioat1_cleanup(ioat); + if (!test_bit(IOAT_RUN, &chan->state)) + return; writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); } diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 11fb877..e982f00 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -356,6 +356,7 @@ bool ioat_cleanup_preamble(struct ioat_chan_common *chan, void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type); void ioat_kobject_del(struct ioatdma_device *device); int ioat_dma_setup_interrupts(struct ioatdma_device *device); +void ioat_stop(struct ioat_chan_common *chan); extern const struct sysfs_ops ioat_sysfs_ops; extern struct ioat_sysfs_entry ioat_version_attr; extern struct ioat_sysfs_entry ioat_cap_attr; diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 5d3affe..8d10580 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -190,8 +190,11 @@ static void ioat2_cleanup(struct ioat2_dma_chan *ioat) void ioat2_cleanup_event(unsigned long data) { struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); + struct ioat_chan_common *chan = &ioat->base; ioat2_cleanup(ioat); + if (!test_bit(IOAT_RUN, &chan->state)) + return; writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); } @@ -553,10 +556,10 @@ int ioat2_alloc_chan_resources(struct dma_chan *c) ioat->issued = 0; ioat->tail = 0; ioat->alloc_order = order; + set_bit(IOAT_RUN, &chan->state); spin_unlock_bh(&ioat->prep_lock); spin_unlock_bh(&chan->cleanup_lock); - tasklet_enable(&chan->cleanup_task); ioat2_start_null_desc(ioat); /* check that we got off the ground */ @@ -566,7 +569,6 @@ int ioat2_alloc_chan_resources(struct dma_chan *c) } while (i++ < 20 && !is_ioat_active(status) && !is_ioat_idle(status)); if (is_ioat_active(status) || is_ioat_idle(status)) { - set_bit(IOAT_RUN, &chan->state); return 1 << ioat->alloc_order; } else { u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); @@ -809,11 +811,8 @@ void ioat2_free_chan_resources(struct dma_chan *c) if (!ioat->ring) return; - tasklet_disable(&chan->cleanup_task); - del_timer_sync(&chan->timer); - device->cleanup_fn((unsigned long) c); + ioat_stop(chan); device->reset_hw(chan); - clear_bit(IOAT_RUN, &chan->state); spin_lock_bh(&chan->cleanup_lock); spin_lock_bh(&ioat->prep_lock); diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 820817e9..b9b38a1 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -464,8 +464,11 @@ static void ioat3_cleanup(struct ioat2_dma_chan *ioat) static void ioat3_cleanup_event(unsigned long data) { struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); + struct ioat_chan_common *chan = &ioat->base; ioat3_cleanup(ioat); + if (!test_bit(IOAT_RUN, &chan->state)) + return; writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); } -- cgit v0.10.2 From d31a36a6d87f68c3b97193bfca11e99d0cc385f7 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 24 Feb 2014 22:26:05 +0100 Subject: ath9k: reduce baseband hang detection false positive rate Check if the baseband state remains stable, and add a small delay between register reads. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 11eab9f..303ce27 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -1534,7 +1534,7 @@ EXPORT_SYMBOL(ath9k_hw_check_nav); bool ath9k_hw_check_alive(struct ath_hw *ah) { int count = 50; - u32 reg; + u32 reg, last_val; if (AR_SREV_9300(ah)) return !ath9k_hw_detect_mac_hang(ah); @@ -1542,9 +1542,13 @@ bool ath9k_hw_check_alive(struct ath_hw *ah) if (AR_SREV_9285_12_OR_LATER(ah)) return true; + last_val = REG_READ(ah, AR_OBS_BUS_1); do { reg = REG_READ(ah, AR_OBS_BUS_1); + if (reg != last_val) + return true; + last_val = reg; if ((reg & 0x7E7FFFEF) == 0x00702400) continue; @@ -1556,6 +1560,8 @@ bool ath9k_hw_check_alive(struct ath_hw *ah) default: return true; } + + udelay(1); } while (count-- > 0); return false; -- cgit v0.10.2 From b7b146c9c9a0248cc57da71244f672ebc54bbef1 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 24 Feb 2014 22:26:06 +0100 Subject: ath9k: fix invalid descriptor discarding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only set sc->rx.discard_next to rx_stats->rs_more when actually discarding the current descriptor. Also, fix a detection of broken descriptors: First the code checks if the current descriptor is not done. Then it checks if the next descriptor is done. Add a check that afterwards checks the first descriptor again, because it might have been completed in the mean time. This fixes a regression introduced in commit 723e711356b5a8a95728a890e254e8b0d47b55cf "ath9k: fix handling of broken descriptors" Cc: stable@vger.kernel.org Reported-by: Marco André Dinis Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c index a0ebdd0..82e340d 100644 --- a/drivers/net/wireless/ath/ath9k/recv.c +++ b/drivers/net/wireless/ath/ath9k/recv.c @@ -732,11 +732,18 @@ static struct ath_rxbuf *ath_get_next_rx_buf(struct ath_softc *sc, return NULL; /* - * mark descriptor as zero-length and set the 'more' - * flag to ensure that both buffers get discarded + * Re-check previous descriptor, in case it has been filled + * in the mean time. */ - rs->rs_datalen = 0; - rs->rs_more = true; + ret = ath9k_hw_rxprocdesc(ah, ds, rs); + if (ret == -EINPROGRESS) { + /* + * mark descriptor as zero-length and set the 'more' + * flag to ensure that both buffers get discarded + */ + rs->rs_datalen = 0; + rs->rs_more = true; + } } list_del(&bf->list); @@ -985,32 +992,32 @@ static int ath9k_rx_skb_preprocess(struct ath_softc *sc, struct ath_common *common = ath9k_hw_common(ah); struct ieee80211_hdr *hdr; bool discard_current = sc->rx.discard_next; - int ret = 0; /* * Discard corrupt descriptors which are marked in * ath_get_next_rx_buf(). */ - sc->rx.discard_next = rx_stats->rs_more; if (discard_current) - return -EINVAL; + goto corrupt; + + sc->rx.discard_next = false; /* * Discard zero-length packets. */ if (!rx_stats->rs_datalen) { RX_STAT_INC(rx_len_err); - return -EINVAL; + goto corrupt; } - /* - * rs_status follows rs_datalen so if rs_datalen is too large - * we can take a hint that hardware corrupted it, so ignore - * those frames. - */ + /* + * rs_status follows rs_datalen so if rs_datalen is too large + * we can take a hint that hardware corrupted it, so ignore + * those frames. + */ if (rx_stats->rs_datalen > (common->rx_bufsize - ah->caps.rx_status_len)) { RX_STAT_INC(rx_len_err); - return -EINVAL; + goto corrupt; } /* Only use status info from the last fragment */ @@ -1024,10 +1031,8 @@ static int ath9k_rx_skb_preprocess(struct ath_softc *sc, * This is different from the other corrupt descriptor * condition handled above. */ - if (rx_stats->rs_status & ATH9K_RXERR_CORRUPT_DESC) { - ret = -EINVAL; - goto exit; - } + if (rx_stats->rs_status & ATH9K_RXERR_CORRUPT_DESC) + goto corrupt; hdr = (struct ieee80211_hdr *) (skb->data + ah->caps.rx_status_len); @@ -1043,18 +1048,15 @@ static int ath9k_rx_skb_preprocess(struct ath_softc *sc, if (ath_process_fft(sc, hdr, rx_stats, rx_status->mactime)) RX_STAT_INC(rx_spectral); - ret = -EINVAL; - goto exit; + return -EINVAL; } /* * everything but the rate is checked here, the rate check is done * separately to avoid doing two lookups for a rate for each frame. */ - if (!ath9k_rx_accept(common, hdr, rx_status, rx_stats, decrypt_error)) { - ret = -EINVAL; - goto exit; - } + if (!ath9k_rx_accept(common, hdr, rx_status, rx_stats, decrypt_error)) + return -EINVAL; if (ath_is_mybeacon(common, hdr)) { RX_STAT_INC(rx_beacons); @@ -1064,15 +1066,11 @@ static int ath9k_rx_skb_preprocess(struct ath_softc *sc, /* * This shouldn't happen, but have a safety check anyway. */ - if (WARN_ON(!ah->curchan)) { - ret = -EINVAL; - goto exit; - } + if (WARN_ON(!ah->curchan)) + return -EINVAL; - if (ath9k_process_rate(common, hw, rx_stats, rx_status)) { - ret =-EINVAL; - goto exit; - } + if (ath9k_process_rate(common, hw, rx_stats, rx_status)) + return -EINVAL; ath9k_process_rssi(common, hw, rx_stats, rx_status); @@ -1087,9 +1085,11 @@ static int ath9k_rx_skb_preprocess(struct ath_softc *sc, sc->rx.num_pkts++; #endif -exit: - sc->rx.discard_next = false; - return ret; + return 0; + +corrupt: + sc->rx.discard_next = rx_stats->rs_more; + return -EINVAL; } static void ath9k_rx_skb_postprocess(struct ath_common *common, -- cgit v0.10.2 From fff421580f512fc044cc7421fdff31a7a6997350 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 14 Jan 2014 20:20:43 -0800 Subject: timers: Track total number of timers in list Currently, the tvec_base structure's ->active_timers field tracks only the non-deferrable timers, which means that even if ->active_timers is zero, there might well be deferrable timers in the list. This commit therefore adds an ->all_timers field to track all the timers, whether deferrable or not. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett Acked-by: Peter Zijlstra Reviewed-by: Oleg Nesterov Reviewed-by: Steven Rostedt Tested-by: Mike Galbraith diff --git a/kernel/timer.c b/kernel/timer.c index accfd24..fdc4383 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -81,6 +81,7 @@ struct tvec_base { unsigned long timer_jiffies; unsigned long next_timer; unsigned long active_timers; + unsigned long all_timers; struct tvec_root tv1; struct tvec tv2; struct tvec tv3; @@ -392,6 +393,7 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) base->next_timer = timer->expires; base->active_timers++; } + base->all_timers++; } #ifdef CONFIG_TIMER_STATS @@ -671,6 +673,7 @@ detach_expired_timer(struct timer_list *timer, struct tvec_base *base) detach_timer(timer, true); if (!tbase_get_deferrable(timer->base)) base->active_timers--; + base->all_timers--; } static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, @@ -685,6 +688,7 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, if (timer->expires == base->next_timer) base->next_timer = base->timer_jiffies; } + base->all_timers--; return 1; } @@ -1559,6 +1563,7 @@ static int init_timers_cpu(int cpu) base->timer_jiffies = jiffies; base->next_timer = base->timer_jiffies; base->active_timers = 0; + base->all_timers = 0; return 0; } -- cgit v0.10.2 From d550e81dc0ddc04f1b417c179c214103a28e0ee8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 16 Dec 2013 05:57:10 -0800 Subject: timers: Reduce __run_timers() latency for empty list The __run_timers() function currently steps through the list one jiffy at a time in order to update the timer wheel. However, if the timer wheel is empty, no adjustment is needed other than updating ->timer_jiffies. In this case, which is likely to be common for NO_HZ_FULL kernels, the kernel currently incurs a large latency for no good reason. This commit therefore short-circuits this case. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett Acked-by: Peter Zijlstra Reviewed-by: Oleg Nesterov Reviewed-by: Steven Rostedt Tested-by: Mike Galbraith diff --git a/kernel/timer.c b/kernel/timer.c index fdc4383..c8bc709 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -338,6 +338,20 @@ void set_timer_slack(struct timer_list *timer, int slack_hz) } EXPORT_SYMBOL_GPL(set_timer_slack); +/* + * If the list is empty, catch up ->timer_jiffies to the current time. + * The caller must hold the tvec_base lock. Returns true if the list + * was empty and therefore ->timer_jiffies was updated. + */ +static bool catchup_timer_jiffies(struct tvec_base *base) +{ + if (!base->all_timers) { + base->timer_jiffies = jiffies; + return true; + } + return false; +} + static void __internal_add_timer(struct tvec_base *base, struct timer_list *timer) { @@ -1150,6 +1164,10 @@ static inline void __run_timers(struct tvec_base *base) struct timer_list *timer; spin_lock_irq(&base->lock); + if (catchup_timer_jiffies(base)) { + spin_unlock_irq(&base->lock); + return; + } while (time_after_eq(jiffies, base->timer_jiffies)) { struct list_head work_list; struct list_head *head = &work_list; -- cgit v0.10.2 From 16d937f880312e3f47157d4d6d6ebf7e61523378 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 16 Dec 2013 10:32:01 -0800 Subject: timers: Reduce future __run_timers() latency for newly emptied list The __run_timers() function currently steps through the list one jiffy at a time in order to update the timer wheel. However, if the timer wheel is empty, no adjustment is needed other than updating ->timer_jiffies. Therefore, if we just emptied the timer wheel, for example, by deleting the last timer, we should mark the timer wheel as being up to date. This marking will reduce (and perhaps eliminate) the jiffy-stepping that a future __run_timers() call will need to do in response to some future timer posting or migration. This commit therefore catches ->timer_jiffies for this case. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett Acked-by: Peter Zijlstra Reviewed-by: Oleg Nesterov Reviewed-by: Steven Rostedt Tested-by: Mike Galbraith diff --git a/kernel/timer.c b/kernel/timer.c index c8bc709..dfac34f 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -688,6 +688,7 @@ detach_expired_timer(struct timer_list *timer, struct tvec_base *base) if (!tbase_get_deferrable(timer->base)) base->active_timers--; base->all_timers--; + (void)catchup_timer_jiffies(base); } static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, @@ -703,6 +704,7 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, base->next_timer = base->timer_jiffies; } base->all_timers--; + (void)catchup_timer_jiffies(base); return 1; } -- cgit v0.10.2 From 18d8cb64c9c074cbe2bd677ab10fff8283abdb62 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 16 Dec 2013 10:41:50 -0800 Subject: timers: Reduce future __run_timers() latency for first add to empty list The __run_timers() function currently steps through the list one jiffy at a time in order to update the timer wheel. However, if the timer wheel is empty, no adjustment is needed other than updating ->timer_jiffies. Therefore, just before we add a timer to an empty timer wheel, we should mark the timer wheel as being up to date. This marking will reduce (and perhaps eliminate) the jiffy-stepping that a future __run_timers() call will need to do in response to some future timer posting or migration. This commit therefore updates ->timer_jiffies for this case. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett Acked-by: Peter Zijlstra Reviewed-by: Oleg Nesterov Reviewed-by: Steven Rostedt Tested-by: Mike Galbraith diff --git a/kernel/timer.c b/kernel/timer.c index dfac34f..0c638cf 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -398,6 +398,7 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer) static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) { + (void)catchup_timer_jiffies(base); __internal_add_timer(base, timer); /* * Update base->active_timers and base->next_timer -- cgit v0.10.2 From aea369b959bef10d235cd0714789cd8b0fe170b8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 15 Jan 2014 16:19:27 -0800 Subject: timers: Make internal_add_timer() update ->next_timer if ->active_timers == 0 The internal_add_timer() function updates base->next_timer only if timer->expires < base->next_timer. This is correct, but it also makes sense to do the same if we add the first non-deferrable timer. Signed-off-by: Oleg Nesterov Reviewed-by: Steven Rostedt Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett Acked-by: Peter Zijlstra Tested-by: Mike Galbraith diff --git a/kernel/timer.c b/kernel/timer.c index 0c638cf..c0d8898 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -404,9 +404,9 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) * Update base->active_timers and base->next_timer */ if (!tbase_get_deferrable(timer->base)) { - if (time_before(timer->expires, base->next_timer)) + if (!base->active_timers++ || + time_before(timer->expires, base->next_timer)) base->next_timer = timer->expires; - base->active_timers++; } base->all_timers++; } -- cgit v0.10.2 From 169a1d85d084edeb0736ad80fe439639ac938dcd Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Wed, 19 Feb 2014 17:47:31 +0200 Subject: net,IB/mlx: Bump all Mellanox driver versions Bump all Mellanox driver versions. Signed-off-by: Amir Vadai Signed-off-by: David S. Miller diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index e81c554..f9c12e9 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -53,8 +53,8 @@ #include "user.h" #define DRV_NAME MLX4_IB_DRV_NAME -#define DRV_VERSION "1.0" -#define DRV_RELDATE "April 4, 2008" +#define DRV_VERSION "2.2-1" +#define DRV_RELDATE "Feb 2014" #define MLX4_IB_FLOW_MAX_PRIO 0xFFF #define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index aa03e73..bf90057 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -46,8 +46,8 @@ #include "mlx5_ib.h" #define DRIVER_NAME "mlx5_ib" -#define DRIVER_VERSION "1.0" -#define DRIVER_RELDATE "June 2013" +#define DRIVER_VERSION "2.2-1" +#define DRIVER_RELDATE "Feb 2014" MODULE_AUTHOR("Eli Cohen "); MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver"); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 6b65f77..7aec6c8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -51,8 +51,8 @@ #define DRV_NAME "mlx4_core" #define PFX DRV_NAME ": " -#define DRV_VERSION "1.1" -#define DRV_RELDATE "Dec, 2011" +#define DRV_VERSION "2.2-1" +#define DRV_RELDATE "Feb, 2014" #define MLX4_FS_UDP_UC_EN (1 << 1) #define MLX4_FS_TCP_UC_EN (1 << 2) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 9ca223b..b57e8c8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -57,8 +57,8 @@ #include "en_port.h" #define DRV_NAME "mlx4_en" -#define DRV_VERSION "2.0" -#define DRV_RELDATE "Dec 2011" +#define DRV_VERSION "2.2-1" +#define DRV_RELDATE "Feb 2014" #define MLX4_EN_MSG_LEVEL (NETIF_MSG_LINK | NETIF_MSG_IFDOWN) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index a064f06..23b7e2d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -46,8 +46,8 @@ #include "mlx5_core.h" #define DRIVER_NAME "mlx5_core" -#define DRIVER_VERSION "1.0" -#define DRIVER_RELDATE "June 2013" +#define DRIVER_VERSION "2.2-1" +#define DRIVER_RELDATE "Feb 2014" MODULE_AUTHOR("Eli Cohen "); MODULE_DESCRIPTION("Mellanox ConnectX-IB HCA core library"); -- cgit v0.10.2 From f229006ec6beabf7b844653d92fa61f025fe3dcf Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 25 Feb 2014 22:05:35 +0000 Subject: irq-metag*: stop set_affinity vectoring to offline cpus Fix irq_set_affinity callbacks in the Meta IRQ chip drivers to AND cpu_online_mask into the cpumask when picking a CPU to vector the interrupt to. As Thomas pointed out, the /proc/irq/$N/smp_affinity interface doesn't filter out offline CPUs, so without this patch if you offline CPU0 and set an IRQ affinity to 0x3 it vectors the interrupt onto CPU0 even though it is offline. Reported-by: Thomas Gleixner Signed-off-by: James Hogan Cc: Thomas Gleixner Cc: linux-metag@vger.kernel.org Cc: stable@vger.kernel.org diff --git a/drivers/irqchip/irq-metag-ext.c b/drivers/irqchip/irq-metag-ext.c index 92c41ab..2cb474a 100644 --- a/drivers/irqchip/irq-metag-ext.c +++ b/drivers/irqchip/irq-metag-ext.c @@ -515,7 +515,7 @@ static int meta_intc_set_affinity(struct irq_data *data, * one cpu (the interrupt code doesn't support it), so we just * pick the first cpu we find in 'cpumask'. */ - cpu = cpumask_any(cpumask); + cpu = cpumask_any_and(cpumask, cpu_online_mask); thread = cpu_2_hwthread_id[cpu]; metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr); diff --git a/drivers/irqchip/irq-metag.c b/drivers/irqchip/irq-metag.c index 8e94d7a..c16c186 100644 --- a/drivers/irqchip/irq-metag.c +++ b/drivers/irqchip/irq-metag.c @@ -201,7 +201,7 @@ static int metag_internal_irq_set_affinity(struct irq_data *data, * one cpu (the interrupt code doesn't support it), so we just * pick the first cpu we find in 'cpumask'. */ - cpu = cpumask_any(cpumask); + cpu = cpumask_any_and(cpumask, cpu_online_mask); thread = cpu_2_hwthread_id[cpu]; metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR1(thread)), -- cgit v0.10.2 From 33b6c7765f0c20da9d61246a095acad0f98a1da5 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 25 Feb 2014 15:01:39 -0800 Subject: mm, hwpoison: release page on PageHWPoison() in __do_fault() It seems we forget to release page after detecting HW error. Signed-off-by: Kirill A. Shutemov Cc: Mel Gorman Cc: Rik van Riel Cc: Andi Kleen Cc: Matthew Wilcox Cc: Dave Hansen Cc: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memory.c b/mm/memory.c index be6a0c0..5f2001a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3348,6 +3348,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (ret & VM_FAULT_LOCKED) unlock_page(vmf.page); ret = VM_FAULT_HWPOISON; + page_cache_release(vmf.page); goto uncharge_out; } -- cgit v0.10.2 From ff3a2b73b7d6d79038512d60690de18cd7aa4e21 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 25 Feb 2014 15:01:40 -0800 Subject: drivers/iommu/omap-iommu-debug.c: fix decimal permissions These should have been octal. Signed-off-by: Joe Perches Cc: Joerg Roedel Cc: Hiroshi DOYU Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c index d97fbe4..80fffba 100644 --- a/drivers/iommu/omap-iommu-debug.c +++ b/drivers/iommu/omap-iommu-debug.c @@ -354,8 +354,8 @@ DEBUG_FOPS(mem); return -ENOMEM; \ } -#define DEBUG_ADD_FILE(name) __DEBUG_ADD_FILE(name, 600) -#define DEBUG_ADD_FILE_RO(name) __DEBUG_ADD_FILE(name, 400) +#define DEBUG_ADD_FILE(name) __DEBUG_ADD_FILE(name, 0600) +#define DEBUG_ADD_FILE_RO(name) __DEBUG_ADD_FILE(name, 0400) static int iommu_debug_register(struct device *dev, void *data) { -- cgit v0.10.2 From 01412886b735ef241f9a41adf9f707ce1522eb61 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 25 Feb 2014 15:01:41 -0800 Subject: drivers/fmc/fmc-write-eeprom.c: fix decimal permissions This 444 should have been octal. Signed-off-by: Joe Perches Cc: Alessandro Rubini Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/fmc/fmc-write-eeprom.c b/drivers/fmc/fmc-write-eeprom.c index ee5b479..9bb2cbd 100644 --- a/drivers/fmc/fmc-write-eeprom.c +++ b/drivers/fmc/fmc-write-eeprom.c @@ -27,7 +27,7 @@ FMC_PARAM_BUSID(fwe_drv); /* The "file=" is like the generic "gateware=" used elsewhere */ static char *fwe_file[FMC_MAX_CARDS]; static int fwe_file_n; -module_param_array_named(file, fwe_file, charp, &fwe_file_n, 444); +module_param_array_named(file, fwe_file, charp, &fwe_file_n, 0444); static int fwe_run_tlv(struct fmc_device *fmc, const struct firmware *fw, int write) -- cgit v0.10.2 From 9845cbbd113fbb5b769a45d8e88dc47bc12df4e0 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 25 Feb 2014 15:01:42 -0800 Subject: mm, thp: fix infinite loop on memcg OOM Masayoshi Mizuma reported a bug with the hang of an application under the memcg limit. It happens on write-protection fault to huge zero page If we successfully allocate a huge page to replace zero page but hit the memcg limit we need to split the zero page with split_huge_page_pmd() and fallback to small pages. The other part of the problem is that VM_FAULT_OOM has special meaning in do_huge_pmd_wp_page() context. __handle_mm_fault() expects the page to be split if it sees VM_FAULT_OOM and it will will retry page fault handling. This causes an infinite loop if the page was not split. do_huge_pmd_wp_zero_page_fallback() can return VM_FAULT_OOM if it failed to allocate one small page, so fallback to small pages will not help. The solution for this part is to replace VM_FAULT_OOM with VM_FAULT_FALLBACK is fallback required. Signed-off-by: Kirill A. Shutemov Reported-by: Masayoshi Mizuma Reviewed-by: Michal Hocko Cc: Johannes Weiner Cc: Andrea Arcangeli Cc: David Rientjes Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/huge_memory.c b/mm/huge_memory.c index da23eb9..4df39b1 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1166,8 +1166,10 @@ alloc: } else { ret = do_huge_pmd_wp_page_fallback(mm, vma, address, pmd, orig_pmd, page, haddr); - if (ret & VM_FAULT_OOM) + if (ret & VM_FAULT_OOM) { split_huge_page(page); + ret |= VM_FAULT_FALLBACK; + } put_page(page); } count_vm_event(THP_FAULT_FALLBACK); @@ -1179,9 +1181,10 @@ alloc: if (page) { split_huge_page(page); put_page(page); - } + } else + split_huge_page_pmd(vma, address, pmd); + ret |= VM_FAULT_FALLBACK; count_vm_event(THP_FAULT_FALLBACK); - ret |= VM_FAULT_OOM; goto out; } diff --git a/mm/memory.c b/mm/memory.c index 5f2001a..22dfa61 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3704,7 +3704,6 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(is_vm_hugetlb_page(vma))) return hugetlb_fault(mm, vma, address, flags); -retry: pgd = pgd_offset(mm, address); pud = pud_alloc(mm, pgd, address); if (!pud) @@ -3742,20 +3741,13 @@ retry: if (dirty && !pmd_write(orig_pmd)) { ret = do_huge_pmd_wp_page(mm, vma, address, pmd, orig_pmd); - /* - * If COW results in an oom, the huge pmd will - * have been split, so retry the fault on the - * pte for a smaller charge. - */ - if (unlikely(ret & VM_FAULT_OOM)) - goto retry; - return ret; + if (!(ret & VM_FAULT_FALLBACK)) + return ret; } else { huge_pmd_set_accessed(mm, vma, address, pmd, orig_pmd, dirty); + return 0; } - - return 0; } } -- cgit v0.10.2 From 08088cb9ac0a9c28d4cf3efa4f6848a9b053ccfd Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 25 Feb 2014 15:01:44 -0800 Subject: memcg: change oom_info_lock to mutex Kirill has reported the following: Task in /test killed as a result of limit of /test memory: usage 10240kB, limit 10240kB, failcnt 51 memory+swap: usage 10240kB, limit 10240kB, failcnt 0 kmem: usage 0kB, limit 18014398509481983kB, failcnt 0 Memory cgroup stats for /test: BUG: sleeping function called from invalid context at kernel/cpu.c:68 in_atomic(): 1, irqs_disabled(): 0, pid: 66, name: memcg_test 2 locks held by memcg_test/66: #0: (memcg_oom_lock#2){+.+...}, at: [] pagefault_out_of_memory+0x14/0x90 #1: (oom_info_lock){+.+...}, at: [] mem_cgroup_print_oom_info+0x2a/0x390 CPU: 2 PID: 66 Comm: memcg_test Not tainted 3.14.0-rc1-dirty #745 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS Bochs 01/01/2011 Call Trace: __might_sleep+0x16a/0x210 get_online_cpus+0x1c/0x60 mem_cgroup_read_stat+0x27/0xb0 mem_cgroup_print_oom_info+0x260/0x390 dump_header+0x88/0x251 ? trace_hardirqs_on+0xd/0x10 oom_kill_process+0x258/0x3d0 mem_cgroup_oom_synchronize+0x656/0x6c0 ? mem_cgroup_charge_common+0xd0/0xd0 pagefault_out_of_memory+0x14/0x90 mm_fault_error+0x91/0x189 __do_page_fault+0x48e/0x580 do_page_fault+0xe/0x10 page_fault+0x22/0x30 which complains that mem_cgroup_read_stat cannot be called from an atomic context but mem_cgroup_print_oom_info takes a spinlock. Change oom_info_lock to a mutex. This was introduced by 947b3dd1a84b ("memcg, oom: lock mem_cgroup_print_oom_info"). Signed-off-by: Michal Hocko Reported-by: "Kirill A. Shutemov" Cc: Johannes Weiner Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 53385cd..ce7a8cc 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1687,7 +1687,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) * protects memcg_name and makes sure that parallel ooms do not * interleave */ - static DEFINE_SPINLOCK(oom_info_lock); + static DEFINE_MUTEX(oom_info_lock); struct cgroup *task_cgrp; struct cgroup *mem_cgrp; static char memcg_name[PATH_MAX]; @@ -1698,7 +1698,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) if (!p) return; - spin_lock(&oom_info_lock); + mutex_lock(&oom_info_lock); rcu_read_lock(); mem_cgrp = memcg->css.cgroup; @@ -1767,7 +1767,7 @@ done: pr_cont("\n"); } - spin_unlock(&oom_info_lock); + mutex_unlock(&oom_info_lock); } /* -- cgit v0.10.2 From f3713fd9cff733d9df83116422d8e4af6e86b2bb Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 25 Feb 2014 15:01:45 -0800 Subject: ipc,mqueue: remove limits for the amount of system-wide queues Commit 93e6f119c0ce ("ipc/mqueue: cleanup definition names and locations") added global hardcoded limits to the amount of message queues that can be created. While these limits are per-namespace, reality is that it ends up breaking userspace applications. Historically users have, at least in theory, been able to create up to INT_MAX queues, and limiting it to just 1024 is way too low and dramatic for some workloads and use cases. For instance, Madars reports: "This update imposes bad limits on our multi-process application. As our app uses approaches that each process opens its own set of queues (usually something about 3-5 queues per process). In some scenarios we might run up to 3000 processes or more (which of-course for linux is not a problem). Thus we might need up to 9000 queues or more. All processes run under one user." Other affected users can be found in launchpad bug #1155695: https://bugs.launchpad.net/ubuntu/+source/manpages/+bug/1155695 Instead of increasing this limit, revert it entirely and fallback to the original way of dealing queue limits -- where once a user's resource limit is reached, and all memory is used, new queues cannot be created. Signed-off-by: Davidlohr Bueso Reported-by: Madars Vitolins Acked-by: Doug Ledford Cc: Manfred Spraul Cc: [3.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index e7831d2..35e7eca 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -118,9 +118,7 @@ extern int mq_init_ns(struct ipc_namespace *ns); * the new maximum will handle anyone else. I may have to revisit this * in the future. */ -#define MIN_QUEUESMAX 1 #define DFLT_QUEUESMAX 256 -#define HARD_QUEUESMAX 1024 #define MIN_MSGMAX 1 #define DFLT_MSG 10U #define DFLT_MSGMAX 10 diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c index 383d638..5bb8bfe 100644 --- a/ipc/mq_sysctl.c +++ b/ipc/mq_sysctl.c @@ -22,6 +22,16 @@ static void *get_mq(ctl_table *table) return which; } +static int proc_mq_dointvec(ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table mq_table; + memcpy(&mq_table, table, sizeof(mq_table)); + mq_table.data = get_mq(table); + + return proc_dointvec(&mq_table, write, buffer, lenp, ppos); +} + static int proc_mq_dointvec_minmax(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -33,12 +43,10 @@ static int proc_mq_dointvec_minmax(ctl_table *table, int write, lenp, ppos); } #else +#define proc_mq_dointvec NULL #define proc_mq_dointvec_minmax NULL #endif -static int msg_queues_limit_min = MIN_QUEUESMAX; -static int msg_queues_limit_max = HARD_QUEUESMAX; - static int msg_max_limit_min = MIN_MSGMAX; static int msg_max_limit_max = HARD_MSGMAX; @@ -51,9 +59,7 @@ static ctl_table mq_sysctls[] = { .data = &init_ipc_ns.mq_queues_max, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_mq_dointvec_minmax, - .extra1 = &msg_queues_limit_min, - .extra2 = &msg_queues_limit_max, + .proc_handler = proc_mq_dointvec, }, { .procname = "msg_max", diff --git a/ipc/mqueue.c b/ipc/mqueue.c index ccf1f9f..c3b3117 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -433,9 +433,9 @@ static int mqueue_create(struct inode *dir, struct dentry *dentry, error = -EACCES; goto out_unlock; } - if (ipc_ns->mq_queues_count >= HARD_QUEUESMAX || - (ipc_ns->mq_queues_count >= ipc_ns->mq_queues_max && - !capable(CAP_SYS_RESOURCE))) { + + if (ipc_ns->mq_queues_count >= ipc_ns->mq_queues_max && + !capable(CAP_SYS_RESOURCE)) { error = -ENOSPC; goto out_unlock; } -- cgit v0.10.2 From 27b2a49a1493f5199a4c2f84abc6aebf81184e2b Mon Sep 17 00:00:00 2001 From: Fathi Boudra Date: Tue, 25 Feb 2014 15:01:46 -0800 Subject: Makefile: fix extra parenthesis typo when CC_STACKPROTECTOR_REGULAR is enabled An extra parenthesis typo introduced in 19952a92037e ("stackprotector: Unify the HAVE_CC_STACKPROTECTOR logic between architectures") is causing the following error when CONFIG_CC_STACKPROTECTOR_REGULAR is enabled: Makefile:608: Cannot use CONFIG_CC_STACKPROTECTOR: -fstack-protector not supported by compiler Makefile:608: *** missing separator. Stop. Signed-off-by: Fathi Boudra Acked-by: Kees Cook Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Makefile b/Makefile index 831b36a..15e90be 100644 --- a/Makefile +++ b/Makefile @@ -606,7 +606,7 @@ ifdef CONFIG_CC_STACKPROTECTOR_REGULAR stackp-flag := -fstack-protector ifeq ($(call cc-option, $(stackp-flag)),) $(warning Cannot use CONFIG_CC_STACKPROTECTOR: \ - -fstack-protector not supported by compiler)) + -fstack-protector not supported by compiler) endif else ifdef CONFIG_CC_STACKPROTECTOR_STRONG stackp-flag := -fstack-protector-strong -- cgit v0.10.2 From cf015e9f279f100e3c2a826217a4ab5dba3eb5ed Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 25 Feb 2014 15:01:47 -0800 Subject: MAINTAINERS: update L: misuses L: lines are for the email addresses of traditional mailing lists. W: lines are for URLs. Convert two L: misuses to W: links. Signed-off-by: Joe Perches Reported-by: Paul Bolle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index 85b3dd8..eab0088 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2611,9 +2611,9 @@ DC395x SCSI driver M: Oliver Neukum M: Ali Akcaagac M: Jamie Lenehan -W: http://twibble.org/dist/dc395x/ L: dc395x@twibble.org -L: http://lists.twibble.org/mailman/listinfo/dc395x/ +W: http://twibble.org/dist/dc395x/ +W: http://lists.twibble.org/mailman/listinfo/dc395x/ S: Maintained F: Documentation/scsi/dc395x.txt F: drivers/scsi/dc395x.* @@ -8443,8 +8443,8 @@ TARGET SUBSYSTEM M: Nicholas A. Bellinger L: linux-scsi@vger.kernel.org L: target-devel@vger.kernel.org -L: http://groups.google.com/group/linux-iscsi-target-dev W: http://www.linux-iscsi.org +W: http://groups.google.com/group/linux-iscsi-target-dev T: git git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending.git master S: Supported F: drivers/target/ -- cgit v0.10.2 From 6c15b327ccbdce45878d4689f4d938f29dd309db Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 25 Feb 2014 15:01:48 -0800 Subject: Makefile: fix build with make 3.80 again According to Documentation/Changes, make 3.80 is still being supported for building the kernel, hence make files must not make (unconditional) use of features introduced only in newer versions. Commit 8779657d29c0 ("stackprotector: Introduce CONFIG_CC_STACKPROTECTOR_STRONG") however introduced an "else ifdef" construct which make 3.80 doesn't understand. Also correct a warning message still referencing the old config option name. Apart from that I question the use of "ifdef" here (but it was used that way already prior to said commit): ifeq (,y) would seem more to the point. Signed-off-by: Jan Beulich Acked-by: Kees Cook Cc: Ingo Molnar Cc: Michal Marek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/Makefile b/Makefile index 15e90be..81f1bf3 100644 --- a/Makefile +++ b/Makefile @@ -605,10 +605,11 @@ endif ifdef CONFIG_CC_STACKPROTECTOR_REGULAR stackp-flag := -fstack-protector ifeq ($(call cc-option, $(stackp-flag)),) - $(warning Cannot use CONFIG_CC_STACKPROTECTOR: \ + $(warning Cannot use CONFIG_CC_STACKPROTECTOR_REGULAR: \ -fstack-protector not supported by compiler) endif -else ifdef CONFIG_CC_STACKPROTECTOR_STRONG +else +ifdef CONFIG_CC_STACKPROTECTOR_STRONG stackp-flag := -fstack-protector-strong ifeq ($(call cc-option, $(stackp-flag)),) $(warning Cannot use CONFIG_CC_STACKPROTECTOR_STRONG: \ @@ -618,6 +619,7 @@ else # Force off for distro compilers that enable stack protector by default. stackp-flag := $(call cc-option, -fno-stack-protector) endif +endif KBUILD_CFLAGS += $(stackp-flag) # This warning generated too much noise in a regular build. -- cgit v0.10.2 From 61bd0943bc410c9f0daf0965535cb92ce76e1e8a Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 25 Feb 2014 15:01:49 -0800 Subject: MAINTAINERS: change mailing list address for Altera UART drivers The nios2-dev list has been moved to the RocketBoards infrastructure, so adjust the address accordingly. Signed-off-by: Tobias Klauser Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index eab0088..df8869d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -538,7 +538,7 @@ F: arch/alpha/ ALTERA UART/JTAG UART SERIAL DRIVERS M: Tobias Klauser L: linux-serial@vger.kernel.org -L: nios2-dev@sopc.et.ntust.edu.tw (moderated for non-subscribers) +L: nios2-dev@lists.rocketboards.org (moderated for non-subscribers) S: Maintained F: drivers/tty/serial/altera_uart.c F: drivers/tty/serial/altera_jtaguart.c -- cgit v0.10.2 From 91a48a2e85a3b70ce10ead34b4ab5347f8d215c9 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 24 Feb 2014 00:48:05 +0100 Subject: ipv4: ipv6: better estimate tunnel header cut for correct ufo handling Currently the UFO fragmentation process does not correctly handle inner UDP frames. (The following tcpdumps are captured on the parent interface with ufo disabled while tunnel has ufo enabled, 2000 bytes payload, mtu 1280, both sit device): IPv6: 16:39:10.031613 IP (tos 0x0, ttl 64, id 3208, offset 0, flags [DF], proto IPv6 (41), length 1300) 192.168.122.151 > 1.1.1.1: IP6 (hlim 64, next-header Fragment (44) payload length: 1240) 2001::1 > 2001::8: frag (0x00000001:0|1232) 44883 > distinct: UDP, length 2000 16:39:10.031709 IP (tos 0x0, ttl 64, id 3209, offset 0, flags [DF], proto IPv6 (41), length 844) 192.168.122.151 > 1.1.1.1: IP6 (hlim 64, next-header Fragment (44) payload length: 784) 2001::1 > 2001::8: frag (0x00000001:0|776) 58979 > 46366: UDP, length 5471 We can see that fragmentation header offset is not correctly updated. (fragmentation id handling is corrected by 916e4cf46d0204 ("ipv6: reuse ip6_frag_id from ip6_ufo_append_data")). IPv4: 16:39:57.737761 IP (tos 0x0, ttl 64, id 3209, offset 0, flags [DF], proto IPIP (4), length 1296) 192.168.122.151 > 1.1.1.1: IP (tos 0x0, ttl 64, id 57034, offset 0, flags [none], proto UDP (17), length 1276) 192.168.99.1.35961 > 192.168.99.2.distinct: UDP, length 2000 16:39:57.738028 IP (tos 0x0, ttl 64, id 3210, offset 0, flags [DF], proto IPIP (4), length 792) 192.168.122.151 > 1.1.1.1: IP (tos 0x0, ttl 64, id 57035, offset 0, flags [none], proto UDP (17), length 772) 192.168.99.1.13531 > 192.168.99.2.20653: UDP, length 51109 In this case fragmentation id is incremented and offset is not updated. First, I aligned inet_gso_segment and ipv6_gso_segment: * align naming of flags * ipv6_gso_segment: setting skb->encapsulation is unnecessary, as we always ensure that the state of this flag is left untouched when returning from upper gso segmenation function * ipv6_gso_segment: move skb_reset_inner_headers below updating the fragmentation header data, we don't care for updating fragmentation header data * remove currently unneeded comment indicating skb->encapsulation might get changed by upper gso_segment callback (gre and udp-tunnel reset encapsulation after segmentation on each fragment) If we encounter an IPIP or SIT gso skb we now check for the protocol == IPPROTO_UDP and that we at least have already traversed another ip(6) protocol header. The reason why we have to special case GSO_IPIP and GSO_SIT is that we reset skb->encapsulation to 0 while skb_mac_gso_segment the inner protocol of GSO_UDP_TUNNEL or GSO_GRE packets. Reported-by: Wolfgang Walter Cc: Cong Wang Cc: Tom Herbert Cc: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ecd2c3f..19ab78a 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1296,8 +1296,11 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, segs = ERR_PTR(-EPROTONOSUPPORT); - /* Note : following gso_segment() might change skb->encapsulation */ - udpfrag = !skb->encapsulation && proto == IPPROTO_UDP; + if (skb->encapsulation && + skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP)) + udpfrag = proto == IPPROTO_UDP && encap; + else + udpfrag = proto == IPPROTO_UDP && !skb->encapsulation; ops = rcu_dereference(inet_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 1e8683b..59f95af 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -89,7 +89,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, unsigned int unfrag_ip6hlen; u8 *prevhdr; int offset = 0; - bool tunnel; + bool encap, udpfrag; int nhoff; if (unlikely(skb_shinfo(skb)->gso_type & @@ -110,8 +110,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) goto out; - tunnel = SKB_GSO_CB(skb)->encap_level > 0; - if (tunnel) + encap = SKB_GSO_CB(skb)->encap_level > 0; + if (encap) features = skb->dev->hw_enc_features & netif_skb_features(skb); SKB_GSO_CB(skb)->encap_level += sizeof(*ipv6h); @@ -121,6 +121,12 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); + if (skb->encapsulation && + skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP)) + udpfrag = proto == IPPROTO_UDP && encap; + else + udpfrag = proto == IPPROTO_UDP && !skb->encapsulation; + ops = rcu_dereference(inet6_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) { skb_reset_transport_header(skb); @@ -133,13 +139,9 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, for (skb = segs; skb; skb = skb->next) { ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); ipv6h->payload_len = htons(skb->len - nhoff - sizeof(*ipv6h)); - if (tunnel) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } skb->network_header = (u8 *)ipv6h - skb->head; - if (!tunnel && proto == IPPROTO_UDP) { + if (udpfrag) { unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen); fptr->frag_off = htons(offset); @@ -148,6 +150,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, offset += (ntohs(ipv6h->payload_len) - sizeof(struct frag_hdr)); } + if (encap) + skb_reset_inner_headers(skb); } out: -- cgit v0.10.2 From 22ae27906da3e6f15bf2a55aba3216f717e21671 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 24 Feb 2014 13:12:06 +0100 Subject: qeth: postpone freeing of qdio memory To guarantee that a qdio ccw_device no longer touches the qdio memory shared with Linux, the qdio ccw_device should be offline when freeing the qdio memory. Thus this patch postpones freeing of qdio memory. Signed-off-by: Ursula Braun Signed-off-by: Frank Blaschka Reviewed-by: Sebastian Ott Signed-off-by: David S. Miller diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index c3a83df..795ed61 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1660,7 +1660,6 @@ int qeth_qdio_clear_card(struct qeth_card *card, int use_halt) QDIO_FLAG_CLEANUP_USING_CLEAR); if (rc) QETH_CARD_TEXT_(card, 3, "1err%d", rc); - qdio_free(CARD_DDEV(card)); atomic_set(&card->qdio.state, QETH_QDIO_ALLOCATED); break; case QETH_QDIO_CLEANING: @@ -2605,6 +2604,7 @@ static int qeth_mpc_initialize(struct qeth_card *card) return 0; out_qdio: qeth_qdio_clear_card(card, card->info.type != QETH_CARD_TYPE_IQD); + qdio_free(CARD_DDEV(card)); return rc; } @@ -4906,9 +4906,11 @@ retry: if (retries < 3) QETH_DBF_MESSAGE(2, "%s Retrying to do IDX activates.\n", dev_name(&card->gdev->dev)); + rc = qeth_qdio_clear_card(card, card->info.type != QETH_CARD_TYPE_IQD); ccw_device_set_offline(CARD_DDEV(card)); ccw_device_set_offline(CARD_WDEV(card)); ccw_device_set_offline(CARD_RDEV(card)); + qdio_free(CARD_DDEV(card)); rc = ccw_device_set_online(CARD_RDEV(card)); if (rc) goto retriable; @@ -4918,7 +4920,6 @@ retry: rc = ccw_device_set_online(CARD_DDEV(card)); if (rc) goto retriable; - rc = qeth_qdio_clear_card(card, card->info.type != QETH_CARD_TYPE_IQD); retriable: if (rc == -ERESTARTSYS) { QETH_DBF_TEXT(SETUP, 2, "break1"); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 0710550..908d825 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1091,6 +1091,7 @@ out_remove: ccw_device_set_offline(CARD_DDEV(card)); ccw_device_set_offline(CARD_WDEV(card)); ccw_device_set_offline(CARD_RDEV(card)); + qdio_free(CARD_DDEV(card)); if (recover_flag == CARD_STATE_RECOVER) card->state = CARD_STATE_RECOVER; else @@ -1132,6 +1133,7 @@ static int __qeth_l2_set_offline(struct ccwgroup_device *cgdev, rc = (rc2) ? rc2 : rc3; if (rc) QETH_DBF_TEXT_(SETUP, 2, "1err%d", rc); + qdio_free(CARD_DDEV(card)); if (recover_flag == CARD_STATE_UP) card->state = CARD_STATE_RECOVER; /* let user_space know that device is offline */ @@ -1194,6 +1196,7 @@ static void qeth_l2_shutdown(struct ccwgroup_device *gdev) qeth_hw_trap(card, QETH_DIAGS_TRAP_DISARM); qeth_qdio_clear_card(card, 0); qeth_clear_qdio_buffers(card); + qdio_free(CARD_DDEV(card)); } static int qeth_l2_pm_suspend(struct ccwgroup_device *gdev) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 0f43042..3524d34 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3447,6 +3447,7 @@ out_remove: ccw_device_set_offline(CARD_DDEV(card)); ccw_device_set_offline(CARD_WDEV(card)); ccw_device_set_offline(CARD_RDEV(card)); + qdio_free(CARD_DDEV(card)); if (recover_flag == CARD_STATE_RECOVER) card->state = CARD_STATE_RECOVER; else @@ -3493,6 +3494,7 @@ static int __qeth_l3_set_offline(struct ccwgroup_device *cgdev, rc = (rc2) ? rc2 : rc3; if (rc) QETH_DBF_TEXT_(SETUP, 2, "1err%d", rc); + qdio_free(CARD_DDEV(card)); if (recover_flag == CARD_STATE_UP) card->state = CARD_STATE_RECOVER; /* let user_space know that device is offline */ @@ -3545,6 +3547,7 @@ static void qeth_l3_shutdown(struct ccwgroup_device *gdev) qeth_hw_trap(card, QETH_DIAGS_TRAP_DISARM); qeth_qdio_clear_card(card, 0); qeth_clear_qdio_buffers(card); + qdio_free(CARD_DDEV(card)); } static int qeth_l3_pm_suspend(struct ccwgroup_device *gdev) -- cgit v0.10.2 From fc49beaee2a410402f49fd21c81a37a863b7a9ba Mon Sep 17 00:00:00 2001 From: Shahed Shaikh Date: Mon, 24 Feb 2014 08:54:46 -0500 Subject: qlcnic: Fix function return error check Driver was treating -ve return value as success in case of qlcnic_enable_msi_legacy() failure Signed-off-by: Shahed Shaikh Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index ba78c74..149c4b6 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -818,7 +818,7 @@ static int qlcnic_82xx_setup_intr(struct qlcnic_adapter *adapter) qlcnic_disable_multi_tx(adapter); err = qlcnic_enable_msi_legacy(adapter); - if (!err) + if (err) return err; } } -- cgit v0.10.2 From b7520d2b59b09eb284e5fc9080d13145f0a8d9fd Mon Sep 17 00:00:00 2001 From: Shahed Shaikh Date: Mon, 24 Feb 2014 08:54:47 -0500 Subject: qlcnic: Fix usage of use_msi and use_msi_x module parameters Once interrupts are enabled, instead of using module parameters, use flags (QLCNIC_MSI_ENABLED and QLCNIC_MSIX_ENABLED) set by driver to check interrupt mode. Signed-off-by: Shahed Shaikh Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 149c4b6..e10fc8e 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -3863,7 +3863,7 @@ int qlcnic_validate_rings(struct qlcnic_adapter *adapter, __u32 ring_cnt, strcpy(buf, "Tx"); } - if (!qlcnic_use_msi_x && !qlcnic_use_msi) { + if (!QLCNIC_IS_MSI_FAMILY(adapter)) { netdev_err(netdev, "No RSS/TSS support in INT-x mode\n"); return -EINVAL; } -- cgit v0.10.2 From 46428228b55df144f04cbad023907c177aa00d5f Mon Sep 17 00:00:00 2001 From: Sucheta Chakraborty Date: Mon, 24 Feb 2014 08:54:48 -0500 Subject: qlcnic: Allow any VLAN to be configured from VF. o This patch reverts commit 1414abea048e0835c43600d62808ed8163897227 (qlcnic: Restrict VF from configuring any VLAN mode.) This will allow same multicast address to be used with any VLAN instead of programming seperate (MAC, VLAN) tuples in adapter. This will help save some multicast filters. Signed-off-by: Sucheta Chakraborty Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c index 09acf15..e5277a6 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c @@ -13,8 +13,6 @@ #define QLC_VF_MIN_TX_RATE 100 #define QLC_VF_MAX_TX_RATE 9999 #define QLC_MAC_OPCODE_MASK 0x7 -#define QLC_MAC_STAR_ADD 6 -#define QLC_MAC_STAR_DEL 7 #define QLC_VF_FLOOD_BIT BIT_16 #define QLC_FLOOD_MODE 0x5 @@ -1206,13 +1204,6 @@ static int qlcnic_sriov_validate_cfg_macvlan(struct qlcnic_adapter *adapter, struct qlcnic_vport *vp = vf->vp; u8 op, new_op; - if (((cmd->req.arg[1] & QLC_MAC_OPCODE_MASK) == QLC_MAC_STAR_ADD) || - ((cmd->req.arg[1] & QLC_MAC_OPCODE_MASK) == QLC_MAC_STAR_DEL)) { - netdev_err(adapter->netdev, "MAC + any VLAN filter not allowed from VF %d\n", - vf->pci_func); - return -EINVAL; - } - if (!(cmd->req.arg[1] & BIT_8)) return -EINVAL; -- cgit v0.10.2 From 42beb3f2836a6063ceb8134dbac0e32df1deea26 Mon Sep 17 00:00:00 2001 From: Rajesh Borundia Date: Mon, 24 Feb 2014 08:54:49 -0500 Subject: qlcnic: Fix number of rings when we fall back from msix to legacy. o Driver was not re-setting sds ring count to 1 after failing to allocate msi-x interrupts. Signed-off-by: Rajesh Borundia Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index 4146664..27c4f13 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -340,6 +340,7 @@ int qlcnic_83xx_setup_intr(struct qlcnic_adapter *adapter) if (qlcnic_sriov_vf_check(adapter)) return -EINVAL; num_msix = 1; + adapter->drv_sds_rings = QLCNIC_SINGLE_RING; adapter->drv_tx_rings = QLCNIC_SINGLE_RING; } } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index e10fc8e..1222865 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -816,6 +816,7 @@ static int qlcnic_82xx_setup_intr(struct qlcnic_adapter *adapter) if (!(adapter->flags & QLCNIC_MSIX_ENABLED)) { qlcnic_disable_multi_tx(adapter); + adapter->drv_sds_rings = QLCNIC_SINGLE_RING; err = qlcnic_enable_msi_legacy(adapter); if (err) -- cgit v0.10.2 From 687d705c031916b83953b714917b04d899e23cf5 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Mon, 24 Feb 2014 17:04:52 -0300 Subject: net/cxgb4: use remove handler as shutdown handler Without a shutdown handler, T4 cards behave very badly after a kexec. Some firmware calls return errors indicating allocation failures, for example. This is probably because thouse resources were not released by a BYE message to the firmware, for example. Using the remove handler guarantees we will use a well tested path. With this patch I applied, I managed to use kexec multiple times and probe and iSCSI login worked every time. Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 43ab35f..34e2488 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -6179,6 +6179,7 @@ static struct pci_driver cxgb4_driver = { .id_table = cxgb4_pci_tbl, .probe = init_one, .remove = remove_one, + .shutdown = remove_one, .err_handler = &cxgb4_eeh, }; -- cgit v0.10.2 From 46833a86f7ab30101096d81117dd250bfae74c6f Mon Sep 17 00:00:00 2001 From: Mike Pecovnik Date: Mon, 24 Feb 2014 21:11:16 +0100 Subject: net: Fix permission check in netlink_connect() netlink_sendmsg() was changed to prevent non-root processes from sending messages with dst_pid != 0. netlink_connect() however still only checks if nladdr->nl_groups is set. This patch modifies netlink_connect() to check for the same condition. Signed-off-by: Mike Pecovnik Signed-off-by: David S. Miller diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index fdf5135..04748ab6 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1489,8 +1489,8 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, if (addr->sa_family != AF_NETLINK) return -EINVAL; - /* Only superuser is allowed to send multicasts */ - if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) + if ((nladdr->nl_groups || nladdr->nl_pid) && + !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) return -EPERM; if (!nlk->portid) -- cgit v0.10.2 From a4572e0c23266d01949a1c134475e3bfa7f788e1 Mon Sep 17 00:00:00 2001 From: Cristian Bercaru Date: Tue, 25 Feb 2014 10:22:48 +0200 Subject: phy: unmask link partner capabilities Masking the link partner's capabilities with local capabilities can be misleading in autonegotiation scenarios such as PAUSE frame autonegotiation. This patch calculates the join between the local capabilities and the link parner capabilities, when it determines the speed and duplex settings, but does not mask any of the link partner capabilities when it calculates PAUSE frame settings. Signed-off-by: Cristian Bercaru Signed-off-by: David S. Miller diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 82514e7..4b970f7 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -916,6 +916,8 @@ int genphy_read_status(struct phy_device *phydev) int err; int lpa; int lpagb = 0; + int common_adv; + int common_adv_gb = 0; /* Update the link, but return if there was an error */ err = genphy_update_link(phydev); @@ -937,7 +939,7 @@ int genphy_read_status(struct phy_device *phydev) phydev->lp_advertising = mii_stat1000_to_ethtool_lpa_t(lpagb); - lpagb &= adv << 2; + common_adv_gb = lpagb & adv << 2; } lpa = phy_read(phydev, MII_LPA); @@ -950,25 +952,25 @@ int genphy_read_status(struct phy_device *phydev) if (adv < 0) return adv; - lpa &= adv; + common_adv = lpa & adv; phydev->speed = SPEED_10; phydev->duplex = DUPLEX_HALF; phydev->pause = 0; phydev->asym_pause = 0; - if (lpagb & (LPA_1000FULL | LPA_1000HALF)) { + if (common_adv_gb & (LPA_1000FULL | LPA_1000HALF)) { phydev->speed = SPEED_1000; - if (lpagb & LPA_1000FULL) + if (common_adv_gb & LPA_1000FULL) phydev->duplex = DUPLEX_FULL; - } else if (lpa & (LPA_100FULL | LPA_100HALF)) { + } else if (common_adv & (LPA_100FULL | LPA_100HALF)) { phydev->speed = SPEED_100; - if (lpa & LPA_100FULL) + if (common_adv & LPA_100FULL) phydev->duplex = DUPLEX_FULL; } else - if (lpa & LPA_10FULL) + if (common_adv & LPA_10FULL) phydev->duplex = DUPLEX_FULL; if (phydev->duplex == DUPLEX_FULL) { -- cgit v0.10.2 From e66c176837462928a05a135bbe16cdce70536d6e Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Tue, 25 Feb 2014 10:35:37 -0800 Subject: intel_pstate: Change busy calculation to use fixed point math. Commit fcb6a15c2e (intel_pstate: Take core C0 time into account for core busy calculation) introduced a regression on some processor SKUs supported by intel_pstate. This was due to the truncation caused by using integer math to calculate core busy and C0 percentages. On a i7-4770K processor operating at 800Mhz going to 100% utilization the percent busy of the CPU using integer math is 22%, but it actually is 22.85%. This value scaled to the current frequency returned 97 which the PID interpreted as no error and did not adjust the P state. Tested on i7-4770K, i7-2600, i5-3230M. Fixes: fcb6a15c2e7e (intel_pstate: Take core C0 time into account for core busy calculation) References: https://lkml.org/lkml/2014/2/19/626 References: https://bugzilla.kernel.org/show_bug.cgi?id=70941 Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index e908161..2cd36b9 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -39,9 +39,10 @@ #define BYT_TURBO_RATIOS 0x66c -#define FRAC_BITS 8 +#define FRAC_BITS 6 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) #define fp_toint(X) ((X) >> FRAC_BITS) +#define FP_ROUNDUP(X) ((X) += 1 << FRAC_BITS) static inline int32_t mul_fp(int32_t x, int32_t y) { @@ -556,18 +557,20 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) static inline void intel_pstate_calc_busy(struct cpudata *cpu, struct sample *sample) { - u64 core_pct; - u64 c0_pct; + int32_t core_pct; + int32_t c0_pct; - core_pct = div64_u64(sample->aperf * 100, sample->mperf); + core_pct = div_fp(int_tofp((sample->aperf)), + int_tofp((sample->mperf))); + core_pct = mul_fp(core_pct, int_tofp(100)); + FP_ROUNDUP(core_pct); + + c0_pct = div_fp(int_tofp(sample->mperf), int_tofp(sample->tsc)); - c0_pct = div64_u64(sample->mperf * 100, sample->tsc); sample->freq = fp_toint( - mul_fp(int_tofp(cpu->pstate.max_pstate), - int_tofp(core_pct * 1000))); + mul_fp(int_tofp(cpu->pstate.max_pstate * 1000), core_pct)); - sample->core_pct_busy = mul_fp(int_tofp(core_pct), - div_fp(int_tofp(c0_pct + 1), int_tofp(100))); + sample->core_pct_busy = mul_fp(core_pct, c0_pct); } static inline void intel_pstate_sample(struct cpudata *cpu) @@ -579,6 +582,10 @@ static inline void intel_pstate_sample(struct cpudata *cpu) rdmsrl(MSR_IA32_MPERF, mperf); tsc = native_read_tsc(); + aperf = aperf >> FRAC_BITS; + mperf = mperf >> FRAC_BITS; + tsc = tsc >> FRAC_BITS; + cpu->sample_ptr = (cpu->sample_ptr + 1) % SAMPLE_COUNT; cpu->samples[cpu->sample_ptr].aperf = aperf; cpu->samples[cpu->sample_ptr].mperf = mperf; @@ -610,7 +617,8 @@ static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) core_busy = cpu->samples[cpu->sample_ptr].core_pct_busy; max_pstate = int_tofp(cpu->pstate.max_pstate); current_pstate = int_tofp(cpu->pstate.current_pstate); - return mul_fp(core_busy, div_fp(max_pstate, current_pstate)); + core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); + return FP_ROUNDUP(core_busy); } static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) -- cgit v0.10.2 From f8d5b9e9e5372f0deb7bc1ab1088a9b60b0a793d Mon Sep 17 00:00:00 2001 From: Sebastian Capella Date: Tue, 18 Feb 2014 17:52:08 -0800 Subject: PM / hibernate: Fix restore hang in freeze_processes() During restore, pm_notifier chain are called with PM_RESTORE_PREPARE. The firmware_class driver handler fw_pm_notify does not have a handler for this. As a result, it keeps a reader on the kmod.c umhelper_sem. During freeze_processes, the call to __usermodehelper_disable tries to take a write lock on this semaphore and hangs waiting. Signed-off-by: Sebastian Capella Acked-by: Ming Lei Cc: All applicable Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 8a97ddf..c30df50e 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -1580,6 +1580,7 @@ static int fw_pm_notify(struct notifier_block *notify_block, switch (mode) { case PM_HIBERNATION_PREPARE: case PM_SUSPEND_PREPARE: + case PM_RESTORE_PREPARE: kill_requests_without_uevent(); device_cache_fw_images(); break; -- cgit v0.10.2 From b6085a865762236bb84934161273cdac6dd11c2d Mon Sep 17 00:00:00 2001 From: Eugene Surovegin Date: Thu, 23 Jan 2014 09:31:20 -0800 Subject: x86, kaslr: export offset in VMCOREINFO ELF notes Include kASLR offset in VMCOREINFO ELF notes to assist in debugging. [ hpa: pushing this for v3.14 to avoid having a kernel version with kASLR where we can't debug output. ] Signed-off-by: Eugene Surovegin Link: http://lkml.kernel.org/r/20140123173120.GA25474@www.outflux.net Signed-off-by: Kees Cook Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 4eabc16..679cef0 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -279,5 +279,7 @@ void arch_crash_save_vmcoreinfo(void) VMCOREINFO_SYMBOL(node_data); VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); #endif + vmcoreinfo_append_str("KERNELOFFSET=%lx\n", + (unsigned long)&_text - __START_KERNEL); } -- cgit v0.10.2 From e290e8c59dbc2a15088d868170d799f763202fef Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 9 Feb 2014 13:56:44 -0800 Subject: x86, kaslr: add missed "static" declarations This silences build warnings about unexported variables and functions. Signed-off-by: Kees Cook Link: http://lkml.kernel.org/r/20140209215644.GA30339@www.outflux.net Signed-off-by: H. Peter Anvin diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c index 90a21f4..4dbf967 100644 --- a/arch/x86/boot/compressed/aslr.c +++ b/arch/x86/boot/compressed/aslr.c @@ -111,7 +111,7 @@ struct mem_vector { }; #define MEM_AVOID_MAX 5 -struct mem_vector mem_avoid[MEM_AVOID_MAX]; +static struct mem_vector mem_avoid[MEM_AVOID_MAX]; static bool mem_contains(struct mem_vector *region, struct mem_vector *item) { @@ -180,7 +180,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, } /* Does this memory vector overlap a known avoided area? */ -bool mem_avoid_overlap(struct mem_vector *img) +static bool mem_avoid_overlap(struct mem_vector *img) { int i; @@ -192,8 +192,9 @@ bool mem_avoid_overlap(struct mem_vector *img) return false; } -unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET / CONFIG_PHYSICAL_ALIGN]; -unsigned long slot_max = 0; +static unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET / + CONFIG_PHYSICAL_ALIGN]; +static unsigned long slot_max; static void slots_append(unsigned long addr) { -- cgit v0.10.2 From e2b32e6785138d92d2a40e0d0473575c8c7310a2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 25 Feb 2014 16:59:17 -0800 Subject: x86, kaslr: randomize module base load address Randomize the load address of modules in the kernel to make kASLR effective for modules. Modules can only be loaded within a particular range of virtual address space. This patch adds 10 bits of entropy to the load address by adding 1-1024 * PAGE_SIZE to the beginning range where modules are loaded. The single base offset was chosen because randomizing each module load ends up wasting/fragmenting memory too much. Prior approaches to minimizing fragmentation while doing randomization tend to result in worse entropy than just doing a single base address offset. Example kASLR boot without this change, with a single module loaded: ---[ Modules ]--- 0xffffffffc0000000-0xffffffffc0001000 4K ro GLB x pte 0xffffffffc0001000-0xffffffffc0002000 4K ro GLB NX pte 0xffffffffc0002000-0xffffffffc0004000 8K RW GLB NX pte 0xffffffffc0004000-0xffffffffc0200000 2032K pte 0xffffffffc0200000-0xffffffffff000000 1006M pmd ---[ End Modules ]--- Example kASLR boot after this change, same module loaded: ---[ Modules ]--- 0xffffffffc0000000-0xffffffffc0200000 2M pmd 0xffffffffc0200000-0xffffffffc03bf000 1788K pte 0xffffffffc03bf000-0xffffffffc03c0000 4K ro GLB x pte 0xffffffffc03c0000-0xffffffffc03c1000 4K ro GLB NX pte 0xffffffffc03c1000-0xffffffffc03c3000 8K RW GLB NX pte 0xffffffffc03c3000-0xffffffffc0400000 244K pte 0xffffffffc0400000-0xffffffffff000000 1004M pmd ---[ End Modules ]--- Signed-off-by: Andy Honig Link: http://lkml.kernel.org/r/20140226005916.GA27083@www.outflux.net Signed-off-by: Kees Cook Signed-off-by: H. Peter Anvin diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 7116fda..580a60c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2053,8 +2053,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. IOAPICs that may be present in the system. nokaslr [X86] - Disable kernel base offset ASLR (Address Space - Layout Randomization) if built into the kernel. + Disable kernel and module base offset ASLR (Address + Space Layout Randomization) if built into the kernel. noautogroup Disable scheduler automatic task group creation. diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 18be189..4948313 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -43,13 +44,49 @@ do { \ } while (0) #endif +#ifdef CONFIG_RANDOMIZE_BASE +static unsigned long module_load_offset; +static int randomize_modules = 1; + +static int __init parse_nokaslr(char *p) +{ + randomize_modules = 0; + return 0; +} +early_param("nokaslr", parse_nokaslr); + +static unsigned long int get_module_load_offset(void) +{ + if (randomize_modules) { + mutex_lock(&module_mutex); + /* + * Calculate the module_load_offset the first time this + * code is called. Once calculated it stays the same until + * reboot. + */ + if (module_load_offset == 0) + module_load_offset = + (get_random_int() % 1024 + 1) * PAGE_SIZE; + mutex_unlock(&module_mutex); + } + return module_load_offset; +} +#else +static unsigned long int get_module_load_offset(void) +{ + return 0; +} +#endif + void *module_alloc(unsigned long size) { if (PAGE_ALIGN(size) > MODULES_LEN) return NULL; - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC, - NUMA_NO_NODE, __builtin_return_address(0)); + return __vmalloc_node_range(size, 1, + MODULES_VADDR + get_module_load_offset(), + MODULES_END, GFP_KERNEL | __GFP_HIGHMEM, + PAGE_KERNEL_EXEC, NUMA_NO_NODE, + __builtin_return_address(0)); } #ifdef CONFIG_X86_32 -- cgit v0.10.2 From 3a9016f97fdc8bfbb26ff36ba8f3dc9162eb691b Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 19 Feb 2014 10:07:34 +0100 Subject: xfrm: Fix unlink race when policies are deleted. When a policy is unlinked from the lists in thread context, the xfrm timer can fire before we can mark this policy as dead. So reinitialize the bydst hlist, then hlist_unhashed() will notice that this policy is not linked and will avoid a doulble unlink of that policy. Reported-by: Xianpeng Zhao <673321875@qq.com> Signed-off-by: Steffen Klassert diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 4b98b25..1d5c7bf 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1158,7 +1158,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, if (hlist_unhashed(&pol->bydst)) return NULL; - hlist_del(&pol->bydst); + hlist_del_init(&pol->bydst); hlist_del(&pol->byidx); list_del(&pol->walk.all); net->xfrm.policy_count[dir]--; -- cgit v0.10.2 From fce0a0c72618e021e29ed4e051ce6b42b218c5e6 Mon Sep 17 00:00:00 2001 From: Kailang Yang Date: Wed, 26 Feb 2014 15:23:19 +0800 Subject: ALSA: hda/realtek - Add more entry for enable HP mute led I lost this SSID. Add it into the fixup table. Signed-off-by: Kailang Yang Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0b4ea6c..ec304f3 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4359,6 +4359,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x228a, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x228b, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x228c, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), + SND_PCI_QUIRK(0x103c, 0x228d, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x228e, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x22c5, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), SND_PCI_QUIRK(0x103c, 0x22c6, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1), -- cgit v0.10.2 From 47587fc098451c2100dc1fb618855fc2e2d937af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fernando=20Luis=20V=C3=A1zquez=20Cao?= Date: Wed, 26 Feb 2014 16:51:24 +0900 Subject: HID: hidraw: fix warning destroying hidraw device files after parent I noticed that after hot unplugging a Logitech unifying receiver (drivers/hid/hid-logitech-dj.c) the kernel would occasionally spew a stack trace similar to this: usb 1-1.1.2: USB disconnect, device number 7 WARNING: CPU: 0 PID: 2865 at fs/sysfs/group.c:216 device_del+0x40/0x1b0() sysfs group ffffffff8187fa20 not found for kobject 'hidraw0' [...] CPU: 0 PID: 2865 Comm: upowerd Tainted: G W 3.14.0-rc4 #7 Hardware name: LENOVO 7783PN4/ , BIOS 9HKT43AUS 07/11/2011 0000000000000009 ffffffff814cd684 ffff880427ccfdf8 ffffffff810616e7 ffff88041ec61800 ffff880427ccfe48 ffff88041e444d80 ffff880426fab8e8 ffff880429359960 ffffffff8106174c ffffffff81714b98 0000000000000028 Call Trace: [] ? dump_stack+0x41/0x51 [] ? warn_slowpath_common+0x77/0x90 [] ? warn_slowpath_fmt+0x4c/0x50 [] ? device_del+0x40/0x1b0 [] ? device_unregister+0x2f/0x50 [] ? device_destroy+0x3a/0x40 [] ? drop_ref+0x55/0x120 [hid] [] ? hidraw_release+0x96/0xb0 [hid] [] ? __fput+0xca/0x210 [] ? task_work_run+0x97/0xd0 [] ? do_notify_resume+0x69/0xa0 [] ? int_signal+0x12/0x17 ---[ end trace 63f4a46f6566d737 ]--- During device removal hid_disconnect() is called via hid_hw_stop() to stop the device and free all its resources, including the sysfs files. The problem is that if a user space process, such as upowerd, holds a reference to a hidraw file the corresponding sysfs files will be kept around (drop_ref() does not call device_destroy() if the open counter is not 0) and it will be usb_disconnect() who, by calling device_del() for the USB device, will indirectly remove the sysfs files of the hidraw device (sysfs_remove_dir() is recursive these days). Because of this, by the time user space releases the last reference to the hidraw file and drop_ref() tries to destroy the device the sysfs files are already gone and the kernel will print the warning above. Fix this by calling device_destroy() at USB disconnect time. Signed-off-by: Fernando Luis Vazquez Cao Reviewed-by: David Herrmann Cc: stable@vger.kernel.org # 3.13 Signed-off-by: Jiri Kosina diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index cb0137b..ab24ce2 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -320,13 +320,13 @@ static void drop_ref(struct hidraw *hidraw, int exists_bit) hid_hw_close(hidraw->hid); wake_up_interruptible(&hidraw->wait); } + device_destroy(hidraw_class, + MKDEV(hidraw_major, hidraw->minor)); } else { --hidraw->open; } if (!hidraw->open) { if (!hidraw->exist) { - device_destroy(hidraw_class, - MKDEV(hidraw_major, hidraw->minor)); hidraw_table[hidraw->minor] = NULL; kfree(hidraw); } else { -- cgit v0.10.2 From c7db4ff5d2b459a579d348532a92fd5885520ce6 Mon Sep 17 00:00:00 2001 From: Vijaya Kumar K Date: Tue, 28 Jan 2014 11:20:17 +0000 Subject: arm64: Add macros to manage processor debug state Add macros to enable and disable to manage PSTATE.D for debugging. The macros local_dbg_save and local_dbg_restore are moved to irqflags.h file KGDB boot tests fail because of PSTATE.D is masked. unmask it for debugging support Signed-off-by: Vijaya Kumar K Acked-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 6231479..ee9f28e 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -43,23 +43,6 @@ enum debug_el { #ifndef __ASSEMBLY__ struct task_struct; -#define local_dbg_save(flags) \ - do { \ - typecheck(unsigned long, flags); \ - asm volatile( \ - "mrs %0, daif // local_dbg_save\n" \ - "msr daifset, #8" \ - : "=r" (flags) : : "memory"); \ - } while (0) - -#define local_dbg_restore(flags) \ - do { \ - typecheck(unsigned long, flags); \ - asm volatile( \ - "msr daif, %0 // local_dbg_restore\n" \ - : : "r" (flags) : "memory"); \ - } while (0) - #define DBG_ARCH_ID_RESERVED 0 /* In case of ptrace ABI updates. */ #define DBG_HOOK_HANDLED 0 diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index b2fcfbc..11cc941 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -90,5 +90,28 @@ static inline int arch_irqs_disabled_flags(unsigned long flags) return flags & PSR_I_BIT; } +/* + * save and restore debug state + */ +#define local_dbg_save(flags) \ + do { \ + typecheck(unsigned long, flags); \ + asm volatile( \ + "mrs %0, daif // local_dbg_save\n" \ + "msr daifset, #8" \ + : "=r" (flags) : : "memory"); \ + } while (0) + +#define local_dbg_restore(flags) \ + do { \ + typecheck(unsigned long, flags); \ + asm volatile( \ + "msr daif, %0 // local_dbg_restore\n" \ + : : "r" (flags) : "memory"); \ + } while (0) + +#define local_dbg_enable() asm("msr daifclr, #8" : : : "memory") +#define local_dbg_disable() asm("msr daifset, #8" : : : "memory") + #endif #endif diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 636ba8b..1a8127d 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -138,6 +138,7 @@ static void clear_os_lock(void *unused) { asm volatile("msr oslar_el1, %0" : : "r" (0)); isb(); + local_dbg_enable(); } static int os_lock_notify(struct notifier_block *self, -- cgit v0.10.2 From bcf5763b0d58d20e288ac52f96cbd7788e262cac Mon Sep 17 00:00:00 2001 From: Vijaya Kumar K Date: Tue, 28 Jan 2014 16:50:18 +0530 Subject: arm64: KGDB: Add Basic KGDB support Add KGDB debug support for kernel debugging. With this patch, basic KGDB debugging is possible.GDB register layout is updated and GDB tool can establish connection with target and can set/clear breakpoints. Signed-off-by: Vijaya Kumar K Reviewed-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index ee9f28e..6e9b5b3 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -26,6 +26,53 @@ #define DBG_ESR_EVT_HWWP 0x2 #define DBG_ESR_EVT_BRK 0x6 +/* + * Break point instruction encoding + */ +#define BREAK_INSTR_SIZE 4 + +/* + * ESR values expected for dynamic and compile time BRK instruction + */ +#define DBG_ESR_VAL_BRK(x) (0xf2000000 | ((x) & 0xfffff)) + +/* + * #imm16 values used for BRK instruction generation + * Allowed values for kgbd are 0x400 - 0x7ff + * 0x400: for dynamic BRK instruction + * 0x401: for compile time BRK instruction + */ +#define KGDB_DYN_DGB_BRK_IMM 0x400 +#define KDBG_COMPILED_DBG_BRK_IMM 0x401 + +/* + * BRK instruction encoding + * The #imm16 value should be placed at bits[20:5] within BRK ins + */ +#define AARCH64_BREAK_MON 0xd4200000 + +/* + * Extract byte from BRK instruction + */ +#define KGDB_DYN_DGB_BRK_INS_BYTE(x) \ + ((((AARCH64_BREAK_MON) & 0xffe0001f) >> (x * 8)) & 0xff) + +/* + * Extract byte from BRK #imm16 + */ +#define KGBD_DYN_DGB_BRK_IMM_BYTE(x) \ + (((((KGDB_DYN_DGB_BRK_IMM) & 0xffff) << 5) >> (x * 8)) & 0xff) + +#define KGDB_DYN_DGB_BRK_BYTE(x) \ + (KGDB_DYN_DGB_BRK_INS_BYTE(x) | KGBD_DYN_DGB_BRK_IMM_BYTE(x)) + +#define KGDB_DYN_BRK_INS_BYTE0 KGDB_DYN_DGB_BRK_BYTE(0) +#define KGDB_DYN_BRK_INS_BYTE1 KGDB_DYN_DGB_BRK_BYTE(1) +#define KGDB_DYN_BRK_INS_BYTE2 KGDB_DYN_DGB_BRK_BYTE(2) +#define KGDB_DYN_BRK_INS_BYTE3 KGDB_DYN_DGB_BRK_BYTE(3) + +#define CACHE_FLUSH_IS_SAFE 1 + enum debug_el { DBG_ACTIVE_EL0 = 0, DBG_ACTIVE_EL1, diff --git a/arch/arm64/include/asm/kgdb.h b/arch/arm64/include/asm/kgdb.h new file mode 100644 index 0000000..3c8aafc --- /dev/null +++ b/arch/arm64/include/asm/kgdb.h @@ -0,0 +1,84 @@ +/* + * AArch64 KGDB support + * + * Based on arch/arm/include/kgdb.h + * + * Copyright (C) 2013 Cavium Inc. + * Author: Vijaya Kumar K + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM_KGDB_H +#define __ARM_KGDB_H + +#include +#include + +#ifndef __ASSEMBLY__ + +static inline void arch_kgdb_breakpoint(void) +{ + asm ("brk %0" : : "I" (KDBG_COMPILED_DBG_BRK_IMM)); +} + +extern void kgdb_handle_bus_error(void); +extern int kgdb_fault_expected; + +#endif /* !__ASSEMBLY__ */ + +/* + * gdb is expecting the following registers layout. + * + * General purpose regs: + * r0-r30: 64 bit + * sp,pc : 64 bit + * pstate : 64 bit + * Total: 34 + * FPU regs: + * f0-f31: 128 bit + * Total: 32 + * Extra regs + * fpsr & fpcr: 32 bit + * Total: 2 + * + */ + +#define _GP_REGS 34 +#define _FP_REGS 32 +#define _EXTRA_REGS 2 +/* + * general purpose registers size in bytes. + * pstate is only 4 bytes. subtract 4 bytes + */ +#define GP_REG_BYTES (_GP_REGS * 8) +#define DBG_MAX_REG_NUM (_GP_REGS + _FP_REGS + _EXTRA_REGS) + +/* + * Size of I/O buffer for gdb packet. + * considering to hold all register contents, size is set + */ + +#define BUFMAX 2048 + +/* + * Number of bytes required for gdb_regs buffer. + * _GP_REGS: 8 bytes, _FP_REGS: 16 bytes and _EXTRA_REGS: 4 bytes each + * GDB fails to connect for size beyond this with error + * "'g' packet reply is too long" + */ + +#define NUMREGBYTES ((_GP_REGS * 8) + (_FP_REGS * 16) + \ + (_EXTRA_REGS * 4)) + +#endif /* __ASM_KGDB_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 2d4554b..e52bcdc 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -20,6 +20,7 @@ arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND) += sleep.o suspend.o arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o +arm64-obj-$(CONFIG_KGDB) += kgdb.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c new file mode 100644 index 0000000..4b7a569 --- /dev/null +++ b/arch/arm64/kernel/kgdb.c @@ -0,0 +1,288 @@ +/* + * AArch64 KGDB support + * + * Based on arch/arm/kernel/kgdb.c + * + * Copyright (C) 2013 Cavium Inc. + * Author: Vijaya Kumar K + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { + { "x0", 8, offsetof(struct pt_regs, regs[0])}, + { "x1", 8, offsetof(struct pt_regs, regs[1])}, + { "x2", 8, offsetof(struct pt_regs, regs[2])}, + { "x3", 8, offsetof(struct pt_regs, regs[3])}, + { "x4", 8, offsetof(struct pt_regs, regs[4])}, + { "x5", 8, offsetof(struct pt_regs, regs[5])}, + { "x6", 8, offsetof(struct pt_regs, regs[6])}, + { "x7", 8, offsetof(struct pt_regs, regs[7])}, + { "x8", 8, offsetof(struct pt_regs, regs[8])}, + { "x9", 8, offsetof(struct pt_regs, regs[9])}, + { "x10", 8, offsetof(struct pt_regs, regs[10])}, + { "x11", 8, offsetof(struct pt_regs, regs[11])}, + { "x12", 8, offsetof(struct pt_regs, regs[12])}, + { "x13", 8, offsetof(struct pt_regs, regs[13])}, + { "x14", 8, offsetof(struct pt_regs, regs[14])}, + { "x15", 8, offsetof(struct pt_regs, regs[15])}, + { "x16", 8, offsetof(struct pt_regs, regs[16])}, + { "x17", 8, offsetof(struct pt_regs, regs[17])}, + { "x18", 8, offsetof(struct pt_regs, regs[18])}, + { "x19", 8, offsetof(struct pt_regs, regs[19])}, + { "x20", 8, offsetof(struct pt_regs, regs[20])}, + { "x21", 8, offsetof(struct pt_regs, regs[21])}, + { "x22", 8, offsetof(struct pt_regs, regs[22])}, + { "x23", 8, offsetof(struct pt_regs, regs[23])}, + { "x24", 8, offsetof(struct pt_regs, regs[24])}, + { "x25", 8, offsetof(struct pt_regs, regs[25])}, + { "x26", 8, offsetof(struct pt_regs, regs[26])}, + { "x27", 8, offsetof(struct pt_regs, regs[27])}, + { "x28", 8, offsetof(struct pt_regs, regs[28])}, + { "x29", 8, offsetof(struct pt_regs, regs[29])}, + { "x30", 8, offsetof(struct pt_regs, regs[30])}, + { "sp", 8, offsetof(struct pt_regs, sp)}, + { "pc", 8, offsetof(struct pt_regs, pc)}, + { "pstate", 8, offsetof(struct pt_regs, pstate)}, + { "v0", 16, -1 }, + { "v1", 16, -1 }, + { "v2", 16, -1 }, + { "v3", 16, -1 }, + { "v4", 16, -1 }, + { "v5", 16, -1 }, + { "v6", 16, -1 }, + { "v7", 16, -1 }, + { "v8", 16, -1 }, + { "v9", 16, -1 }, + { "v10", 16, -1 }, + { "v11", 16, -1 }, + { "v12", 16, -1 }, + { "v13", 16, -1 }, + { "v14", 16, -1 }, + { "v15", 16, -1 }, + { "v16", 16, -1 }, + { "v17", 16, -1 }, + { "v18", 16, -1 }, + { "v19", 16, -1 }, + { "v20", 16, -1 }, + { "v21", 16, -1 }, + { "v22", 16, -1 }, + { "v23", 16, -1 }, + { "v24", 16, -1 }, + { "v25", 16, -1 }, + { "v26", 16, -1 }, + { "v27", 16, -1 }, + { "v28", 16, -1 }, + { "v29", 16, -1 }, + { "v30", 16, -1 }, + { "v31", 16, -1 }, + { "fpsr", 4, -1 }, + { "fpcr", 4, -1 }, +}; + +char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return NULL; + + if (dbg_reg_def[regno].offset != -1) + memcpy(mem, (void *)regs + dbg_reg_def[regno].offset, + dbg_reg_def[regno].size); + else + memset(mem, 0, dbg_reg_def[regno].size); + return dbg_reg_def[regno].name; +} + +int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return -EINVAL; + + if (dbg_reg_def[regno].offset != -1) + memcpy((void *)regs + dbg_reg_def[regno].offset, mem, + dbg_reg_def[regno].size); + return 0; +} + +void +sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) +{ + struct pt_regs *thread_regs; + + /* Initialize to zero */ + memset((char *)gdb_regs, 0, NUMREGBYTES); + thread_regs = task_pt_regs(task); + memcpy((void *)gdb_regs, (void *)thread_regs->regs, GP_REG_BYTES); +} + +void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) +{ + regs->pc = pc; +} + +static int compiled_break; + +int kgdb_arch_handle_exception(int exception_vector, int signo, + int err_code, char *remcom_in_buffer, + char *remcom_out_buffer, + struct pt_regs *linux_regs) +{ + unsigned long addr; + char *ptr; + int err; + + switch (remcom_in_buffer[0]) { + case 'D': + case 'k': + /* + * Packet D (Detach), k (kill). No special handling + * is required here. Handle same as c packet. + */ + case 'c': + /* + * Packet c (Continue) to continue executing. + * Set pc to required address. + * Try to read optional parameter and set pc. + * If this was a compiled breakpoint, we need to move + * to the next instruction else we will just breakpoint + * over and over again. + */ + ptr = &remcom_in_buffer[1]; + if (kgdb_hex2long(&ptr, &addr)) + kgdb_arch_set_pc(linux_regs, addr); + else if (compiled_break == 1) + kgdb_arch_set_pc(linux_regs, linux_regs->pc + 4); + + compiled_break = 0; + err = 0; + break; + default: + err = -1; + } + return err; +} + +static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr) +{ + kgdb_handle_exception(1, SIGTRAP, 0, regs); + return 0; +} + +static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) +{ + compiled_break = 1; + kgdb_handle_exception(1, SIGTRAP, 0, regs); + + return 0; +} + +static struct break_hook kgdb_brkpt_hook = { + .esr_mask = 0xffffffff, + .esr_val = DBG_ESR_VAL_BRK(KGDB_DYN_DGB_BRK_IMM), + .fn = kgdb_brk_fn +}; + +static struct break_hook kgdb_compiled_brkpt_hook = { + .esr_mask = 0xffffffff, + .esr_val = DBG_ESR_VAL_BRK(KDBG_COMPILED_DBG_BRK_IMM), + .fn = kgdb_compiled_brk_fn +}; + +static void kgdb_call_nmi_hook(void *ignored) +{ + kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); +} + +void kgdb_roundup_cpus(unsigned long flags) +{ + local_irq_enable(); + smp_call_function(kgdb_call_nmi_hook, NULL, 0); + local_irq_disable(); +} + +static int __kgdb_notify(struct die_args *args, unsigned long cmd) +{ + struct pt_regs *regs = args->regs; + + if (kgdb_handle_exception(1, args->signr, cmd, regs)) + return NOTIFY_DONE; + return NOTIFY_STOP; +} + +static int +kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) +{ + unsigned long flags; + int ret; + + local_irq_save(flags); + ret = __kgdb_notify(ptr, cmd); + local_irq_restore(flags); + + return ret; +} + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_notify, + /* + * Want to be lowest priority + */ + .priority = -INT_MAX, +}; + +/* + * kgdb_arch_init - Perform any architecture specific initalization. + * This function will handle the initalization of any architecture + * specific callbacks. + */ +int kgdb_arch_init(void) +{ + int ret = register_die_notifier(&kgdb_notifier); + + if (ret != 0) + return ret; + + register_break_hook(&kgdb_brkpt_hook); + register_break_hook(&kgdb_compiled_brkpt_hook); + return 0; +} + +/* + * kgdb_arch_exit - Perform any architecture specific uninitalization. + * This function will handle the uninitalization of any architecture + * specific callbacks, for dynamic registration and unregistration. + */ +void kgdb_arch_exit(void) +{ + unregister_break_hook(&kgdb_brkpt_hook); + unregister_break_hook(&kgdb_compiled_brkpt_hook); + unregister_die_notifier(&kgdb_notifier); +} + +/* + * ARM instructions are always in LE. + * Break instruction is encoded in LE format + */ +struct kgdb_arch arch_kgdb_ops = { + .gdb_bpt_instr = { + KGDB_DYN_BRK_INS_BYTE0, + KGDB_DYN_BRK_INS_BYTE1, + KGDB_DYN_BRK_INS_BYTE2, + KGDB_DYN_BRK_INS_BYTE3, + } +}; -- cgit v0.10.2 From 44679a4f142b69ae0c68ed815a48bbd164827281 Mon Sep 17 00:00:00 2001 From: Vijaya Kumar K Date: Tue, 28 Jan 2014 11:20:19 +0000 Subject: arm64: KGDB: Add step debugging support Add KGDB software step debugging support for EL1 debug in AArch64 mode. KGDB registers step debug handler with debug monitor. On receiving 'step' command from GDB tool, target enables software step debugging and step address is updated in ELR. Software Step debugging is disabled when 'continue' command is received Signed-off-by: Vijaya Kumar K Reviewed-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index 4b7a569..75c9cf1 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -137,13 +137,26 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) static int compiled_break; +static void kgdb_arch_update_addr(struct pt_regs *regs, + char *remcom_in_buffer) +{ + unsigned long addr; + char *ptr; + + ptr = &remcom_in_buffer[1]; + if (kgdb_hex2long(&ptr, &addr)) + kgdb_arch_set_pc(regs, addr); + else if (compiled_break == 1) + kgdb_arch_set_pc(regs, regs->pc + 4); + + compiled_break = 0; +} + int kgdb_arch_handle_exception(int exception_vector, int signo, int err_code, char *remcom_in_buffer, char *remcom_out_buffer, struct pt_regs *linux_regs) { - unsigned long addr; - char *ptr; int err; switch (remcom_in_buffer[0]) { @@ -162,13 +175,36 @@ int kgdb_arch_handle_exception(int exception_vector, int signo, * to the next instruction else we will just breakpoint * over and over again. */ - ptr = &remcom_in_buffer[1]; - if (kgdb_hex2long(&ptr, &addr)) - kgdb_arch_set_pc(linux_regs, addr); - else if (compiled_break == 1) - kgdb_arch_set_pc(linux_regs, linux_regs->pc + 4); + kgdb_arch_update_addr(linux_regs, remcom_in_buffer); + atomic_set(&kgdb_cpu_doing_single_step, -1); + kgdb_single_step = 0; + + /* + * Received continue command, disable single step + */ + if (kernel_active_single_step()) + kernel_disable_single_step(); + + err = 0; + break; + case 's': + /* + * Update step address value with address passed + * with step packet. + * On debug exception return PC is copied to ELR + * So just update PC. + * If no step address is passed, resume from the address + * pointed by PC. Do not update PC + */ + kgdb_arch_update_addr(linux_regs, remcom_in_buffer); + atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id()); + kgdb_single_step = 1; - compiled_break = 0; + /* + * Enable single step handling + */ + if (!kernel_active_single_step()) + kernel_enable_single_step(linux_regs); err = 0; break; default: @@ -191,6 +227,12 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) return 0; } +static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) +{ + kgdb_handle_exception(1, SIGTRAP, 0, regs); + return 0; +} + static struct break_hook kgdb_brkpt_hook = { .esr_mask = 0xffffffff, .esr_val = DBG_ESR_VAL_BRK(KGDB_DYN_DGB_BRK_IMM), @@ -203,6 +245,10 @@ static struct break_hook kgdb_compiled_brkpt_hook = { .fn = kgdb_compiled_brk_fn }; +static struct step_hook kgdb_step_hook = { + .fn = kgdb_step_brk_fn +}; + static void kgdb_call_nmi_hook(void *ignored) { kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); @@ -259,6 +305,7 @@ int kgdb_arch_init(void) register_break_hook(&kgdb_brkpt_hook); register_break_hook(&kgdb_compiled_brkpt_hook); + register_step_hook(&kgdb_step_hook); return 0; } @@ -271,6 +318,7 @@ void kgdb_arch_exit(void) { unregister_break_hook(&kgdb_brkpt_hook); unregister_break_hook(&kgdb_compiled_brkpt_hook); + unregister_step_hook(&kgdb_step_hook); unregister_die_notifier(&kgdb_notifier); } -- cgit v0.10.2 From d498d4b47fb3050f2f7840cc49251f87f04d1ca9 Mon Sep 17 00:00:00 2001 From: Vijaya Kumar K Date: Tue, 28 Jan 2014 16:50:20 +0530 Subject: KGDB: make kgdb_breakpoint() as noinline The function kgdb_breakpoint() sets up break point at compile time by calling arch_kgdb_breakpoint(); Though this call is surrounded by wmb() barrier, the compile can still re-order the break point, because this scheduling barrier is not a code motion barrier in gcc. Making kgdb_breakpoint() as noinline solves this problem of code reording around break point instruction and also avoids problem of being called as inline function from other places More details about discussion on this can be found here http://comments.gmane.org/gmane.linux.ports.arm.kernel/269732 Signed-off-by: Vijaya Kumar K Acked-by: Will Deacon Acked-by: Jason Wessel Signed-off-by: Catalin Marinas diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 334b398..99982a7 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -1035,7 +1035,7 @@ int dbg_io_get_char(void) * otherwise as a quick means to stop program execution and "break" into * the debugger. */ -void kgdb_breakpoint(void) +noinline void kgdb_breakpoint(void) { atomic_inc(&kgdb_setting_breakpoint); wmb(); /* Sync point before breakpoint */ -- cgit v0.10.2 From 58dcc204f18af2821f683b235bb376f9db2557f5 Mon Sep 17 00:00:00 2001 From: Vijaya Kumar K Date: Tue, 28 Jan 2014 16:50:21 +0530 Subject: misc: debug: remove compilation warnings typecast instruction_pointer macro to unsigned long to resolve following compiler warnings like warning: format '%lx' expects argument of type 'long unsigned int', but argument 2 has type 'u64' [-Wformat] Signed-off-by: Vijaya Kumar K Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 0e7fa49..5233966 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -164,7 +164,7 @@ static inline int valid_user_regs(struct user_pt_regs *regs) return 0; } -#define instruction_pointer(regs) (regs)->pc +#define instruction_pointer(regs) ((unsigned long)(regs)->pc) #ifdef CONFIG_SMP extern unsigned long profile_pc(struct pt_regs *regs); -- cgit v0.10.2 From 9529247db9ecfc5a723e17093614e7437ab0d5bd Mon Sep 17 00:00:00 2001 From: Vijaya Kumar K Date: Tue, 28 Jan 2014 11:20:22 +0000 Subject: arm64: KGDB: Add KGDB config Add HAVE_ARCH_KGDB for arm64 Kconfig Signed-off-by: Vijaya Kumar K Signed-off-by: Catalin Marinas diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 27bbcfc..0cb33a2 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -26,6 +26,7 @@ config ARM64 select GENERIC_TIME_VSYSCALL select HARDIRQS_SW_RESEND select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK select HAVE_DEBUG_BUGVERBOSE select HAVE_DEBUG_KMEMLEAK -- cgit v0.10.2 From d8ed442a009ecfe155b57d58f231db3d6084633d Mon Sep 17 00:00:00 2001 From: Vijaya Kumar K Date: Fri, 21 Feb 2014 05:13:49 +0000 Subject: arm64: enable processor debug state for secondary cpus processor debug state PSTATE.D is unmasked in smp call clear_os_lock for secondary cpus. So debug state is still masked in normal kernel context. With this patch, unmask debug state on secondary boot for the cpus in normal kernel context. Now kgdb tests passed with multicore. Signed-off-by: Vijaya Kumar K Acked-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 1a8127d..13f87de 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -137,8 +137,6 @@ void disable_debug_monitors(enum debug_el el) static void clear_os_lock(void *unused) { asm volatile("msr oslar_el1, %0" : : "r" (0)); - isb(); - local_dbg_enable(); } static int os_lock_notify(struct notifier_block *self, @@ -157,8 +155,9 @@ static struct notifier_block os_lock_nb = { static int debug_monitors_init(void) { /* Clear the OS lock. */ - smp_call_function(clear_os_lock, NULL, 1); - clear_os_lock(NULL); + on_each_cpu(clear_os_lock, NULL, 1); + isb(); + local_dbg_enable(); /* Register hotplug handler. */ register_cpu_notifier(&os_lock_nb); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 7cfb92a..5070dc3 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -160,6 +160,7 @@ asmlinkage void secondary_start_kernel(void) set_cpu_online(cpu, true); complete(&cpu_running); + local_dbg_enable(); local_irq_enable(); local_async_enable(); -- cgit v0.10.2 From 22bd1c91fe13d59cff734b69b6757adcfbd8dee9 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 4 Feb 2014 16:37:59 +0000 Subject: arm64: Extend the PCI I/O space to 16MB The patch moves the PCI I/O space (currently at 64K) before the earlyprintk mapping and extends it to 16MB. Signed-off-by: Catalin Marinas diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt index 5e054bf..85e24c4 100644 --- a/Documentation/arm64/memory.txt +++ b/Documentation/arm64/memory.txt @@ -35,11 +35,13 @@ ffffffbc00000000 ffffffbdffffffff 8GB vmemmap ffffffbe00000000 ffffffbffbbfffff ~8GB [guard, future vmmemap] -ffffffbffbc00000 ffffffbffbdfffff 2MB earlyprintk device +ffffffbffa000000 ffffffbffaffffff 16MB PCI I/O space + +ffffffbffb000000 ffffffbffbbfffff 12MB [guard] -ffffffbffbe00000 ffffffbffbe0ffff 64KB PCI I/O space +ffffffbffbc00000 ffffffbffbdfffff 2MB earlyprintk device -ffffffbffbe10000 ffffffbcffffffff ~2MB [guard] +ffffffbffbe00000 ffffffbffbffffff 2MB [guard] ffffffbffc000000 ffffffbfffffffff 64MB modules @@ -60,11 +62,13 @@ fffffdfc00000000 fffffdfdffffffff 8GB vmemmap fffffdfe00000000 fffffdfffbbfffff ~8GB [guard, future vmmemap] -fffffdfffbc00000 fffffdfffbdfffff 2MB earlyprintk device +fffffdfffa000000 fffffdfffaffffff 16MB PCI I/O space + +fffffdfffb000000 fffffdfffbbfffff 12MB [guard] -fffffdfffbe00000 fffffdfffbe0ffff 64KB PCI I/O space +fffffdfffbc00000 fffffdfffbdfffff 2MB earlyprintk device -fffffdfffbe10000 fffffdfffbffffff ~2MB [guard] +fffffdfffbe00000 fffffdfffbffffff 2MB [guard] fffffdfffc000000 fffffdffffffffff 64MB modules diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 4cc813e..7846a6b 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -121,7 +121,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) * I/O port access primitives. */ #define IO_SPACE_LIMIT 0xffff -#define PCI_IOBASE ((void __iomem *)(MODULES_VADDR - SZ_2M)) +#define PCI_IOBASE ((void __iomem *)(MODULES_VADDR - SZ_32M)) static inline u8 inb(unsigned long addr) { -- cgit v0.10.2 From 020c1427f3ad73c86fd566d86441e673adfe2ecb Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 11 Feb 2014 15:22:01 +0000 Subject: arm64: Convert asm/tlb.h to generic mmu_gather Over the past couple of years, the generic mmu_gather gained range tracking - 597e1c3580b7 (mm/mmu_gather: enable tlb flush range in generic mmu_gather), 2b047252d087 (Fix TLB gather virtual address range invalidation corner cases) - and tlb_fast_mode() has been removed - 29eb77825cc7 (arch, mm: Remove tlb_fast_mode()). The new mmu_gather structure is now suitable for arm64 and this patch converts the arch asm/tlb.h to the generic code. One functional difference is the shift_arg_pages() case where previously the code was flushing the full mm (no tlb_start_vma call) but now it flushes the range given to tlb_gather_mmu() (possibly slightly more efficient previously). Signed-off-by: Catalin Marinas Cc: Peter Zijlstra diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 717031a..72cadf5 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -19,115 +19,44 @@ #ifndef __ASM_TLB_H #define __ASM_TLB_H -#include -#include -#include -#include - -#define MMU_GATHER_BUNDLE 8 - -/* - * TLB handling. This allows us to remove pages from the page - * tables, and efficiently handle the TLB issues. - */ -struct mmu_gather { - struct mm_struct *mm; - unsigned int fullmm; - struct vm_area_struct *vma; - unsigned long start, end; - unsigned long range_start; - unsigned long range_end; - unsigned int nr; - unsigned int max; - struct page **pages; - struct page *local[MMU_GATHER_BUNDLE]; -}; +#include /* - * This is unnecessarily complex. There's three ways the TLB shootdown - * code is used: + * There's three ways the TLB shootdown code is used: * 1. Unmapping a range of vmas. See zap_page_range(), unmap_region(). * tlb->fullmm = 0, and tlb_start_vma/tlb_end_vma will be called. - * tlb->vma will be non-NULL. * 2. Unmapping all vmas. See exit_mmap(). * tlb->fullmm = 1, and tlb_start_vma/tlb_end_vma will be called. - * tlb->vma will be non-NULL. Additionally, page tables will be freed. + * Page tables will be freed. * 3. Unmapping argument pages. See shift_arg_pages(). * tlb->fullmm = 0, but tlb_start_vma/tlb_end_vma will not be called. - * tlb->vma will be NULL. */ static inline void tlb_flush(struct mmu_gather *tlb) { - if (tlb->fullmm || !tlb->vma) + if (tlb->fullmm) { flush_tlb_mm(tlb->mm); - else if (tlb->range_end > 0) { - flush_tlb_range(tlb->vma, tlb->range_start, tlb->range_end); - tlb->range_start = TASK_SIZE; - tlb->range_end = 0; + } else if (tlb->end > 0) { + struct vm_area_struct vma = { .vm_mm = tlb->mm, }; + flush_tlb_range(&vma, tlb->start, tlb->end); + tlb->start = TASK_SIZE; + tlb->end = 0; } } static inline void tlb_add_flush(struct mmu_gather *tlb, unsigned long addr) { if (!tlb->fullmm) { - if (addr < tlb->range_start) - tlb->range_start = addr; - if (addr + PAGE_SIZE > tlb->range_end) - tlb->range_end = addr + PAGE_SIZE; - } -} - -static inline void __tlb_alloc_page(struct mmu_gather *tlb) -{ - unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); - - if (addr) { - tlb->pages = (void *)addr; - tlb->max = PAGE_SIZE / sizeof(struct page *); + tlb->start = min(tlb->start, addr); + tlb->end = max(tlb->end, addr + PAGE_SIZE); } } -static inline void tlb_flush_mmu(struct mmu_gather *tlb) -{ - tlb_flush(tlb); - free_pages_and_swap_cache(tlb->pages, tlb->nr); - tlb->nr = 0; - if (tlb->pages == tlb->local) - __tlb_alloc_page(tlb); -} - -static inline void -tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end) -{ - tlb->mm = mm; - tlb->fullmm = !(start | (end+1)); - tlb->start = start; - tlb->end = end; - tlb->vma = NULL; - tlb->max = ARRAY_SIZE(tlb->local); - tlb->pages = tlb->local; - tlb->nr = 0; - __tlb_alloc_page(tlb); -} - -static inline void -tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) -{ - tlb_flush_mmu(tlb); - - /* keep the page table cache within bounds */ - check_pgt_cache(); - - if (tlb->pages != tlb->local) - free_pages((unsigned long)tlb->pages, 0); -} - /* * Memorize the range for the TLB flush. */ -static inline void -tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long addr) +static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, + unsigned long addr) { tlb_add_flush(tlb, addr); } @@ -137,38 +66,24 @@ tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long addr) * case where we're doing a full MM flush. When we're doing a munmap, * the vmas are adjusted to only cover the region to be torn down. */ -static inline void -tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) +static inline void tlb_start_vma(struct mmu_gather *tlb, + struct vm_area_struct *vma) { if (!tlb->fullmm) { - tlb->vma = vma; - tlb->range_start = TASK_SIZE; - tlb->range_end = 0; + tlb->start = TASK_SIZE; + tlb->end = 0; } } -static inline void -tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) +static inline void tlb_end_vma(struct mmu_gather *tlb, + struct vm_area_struct *vma) { if (!tlb->fullmm) tlb_flush(tlb); } -static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) -{ - tlb->pages[tlb->nr++] = page; - VM_BUG_ON(tlb->nr > tlb->max); - return tlb->max - tlb->nr; -} - -static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) -{ - if (!__tlb_remove_page(tlb, page)) - tlb_flush_mmu(tlb); -} - static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, - unsigned long addr) + unsigned long addr) { pgtable_page_dtor(pte); tlb_add_flush(tlb, addr); @@ -184,16 +99,5 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, } #endif -#define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr) -#define pmd_free_tlb(tlb, pmdp, addr) __pmd_free_tlb(tlb, pmdp, addr) -#define pud_free_tlb(tlb, pudp, addr) pud_free((tlb)->mm, pudp) - -#define tlb_migrate_finish(mm) do { } while (0) - -static inline void -tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) -{ - tlb_add_flush(tlb, addr); -} #endif -- cgit v0.10.2 From ea8c2e1124457f266f82effc3e6558552527943a Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 17 Feb 2014 12:03:25 +0000 Subject: arm64: Extend the idmap to the whole kernel image This patch changes the idmap page table creation during boot to cover the whole kernel image, allowing functions like cpu_reset() to be safely called with the physical address. This patch also simplifies the create_block_map asm macro to no longer take an idmap argument and always use the phys/virt/end parameters. For the idmap case, phys == virt. Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 0b281ff..61035d6 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -384,26 +384,18 @@ ENDPROC(__calc_phys_offset) * Preserves: tbl, flags * Corrupts: phys, start, end, pstate */ - .macro create_block_map, tbl, flags, phys, start, end, idmap=0 + .macro create_block_map, tbl, flags, phys, start, end lsr \phys, \phys, #BLOCK_SHIFT - .if \idmap - and \start, \phys, #PTRS_PER_PTE - 1 // table index - .else lsr \start, \start, #BLOCK_SHIFT and \start, \start, #PTRS_PER_PTE - 1 // table index - .endif orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry - .ifnc \start,\end lsr \end, \end, #BLOCK_SHIFT and \end, \end, #PTRS_PER_PTE - 1 // table end index - .endif 9999: str \phys, [\tbl, \start, lsl #3] // store the entry - .ifnc \start,\end add \start, \start, #1 // next entry add \phys, \phys, #BLOCK_SIZE // next block cmp \start, \end b.ls 9999b - .endif .endm /* @@ -435,9 +427,13 @@ __create_page_tables: * Create the identity mapping. */ add x0, x25, #PAGE_SIZE // section table address - adr x3, __turn_mmu_on // virtual/physical address + ldr x3, =KERNEL_START + add x3, x3, x28 // __pa(KERNEL_START) create_pgd_entry x25, x0, x3, x5, x6 - create_block_map x0, x7, x3, x5, x5, idmap=1 + ldr x6, =KERNEL_END + mov x5, x3 // __pa(KERNEL_START) + add x6, x6, x28 // __pa(KERNEL_END) + create_block_map x0, x7, x3, x5, x6 /* * Map the kernel image (starting with PHYS_OFFSET). @@ -445,7 +441,7 @@ __create_page_tables: add x0, x26, #PAGE_SIZE // section table address mov x5, #PAGE_OFFSET create_pgd_entry x26, x0, x5, x3, x6 - ldr x6, =KERNEL_END - 1 + ldr x6, =KERNEL_END mov x3, x24 // phys offset create_block_map x0, x7, x3, x5, x6 -- cgit v0.10.2 From 09024aa61e1bc994404683e2e5b363484a15dd12 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Tue, 17 Dec 2013 00:19:29 +0000 Subject: arm64: Fix the soft_restart routine Change the soft_restart() routine to call cpu_reset() at its identity mapped physical address. The cpu_reset() routine must be called at its identity mapped physical address so that when the MMU is turned off the instruction pointer will be at the correct location in physical memory. Signed-off-by: Geoff Levand for Huawei, Linaro Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 1c0a9be..fa6b5bb 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -72,8 +72,17 @@ static void setup_restart(void) void soft_restart(unsigned long addr) { + typedef void (*phys_reset_t)(unsigned long); + phys_reset_t phys_reset; + setup_restart(); - cpu_reset(addr); + + /* Switch to the identity mapping */ + phys_reset = (phys_reset_t)virt_to_phys(cpu_reset); + phys_reset(addr); + + /* Should never get here */ + BUG(); } /* -- cgit v0.10.2 From bb10eb7b4d176f408d45fb492df28bed2981a1f3 Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Thu, 6 Feb 2014 17:21:51 +0530 Subject: arm64: Change misleading function names in dma-mapping arm64_swiotlb_alloc/free_coherent name can be misleading somtimes with CMA support being enabled after this patch (c2104debc235b745265b64d610237a6833fd53) Change this name to be more generic: __dma_alloc/free_coherent Signed-off-by: Ritesh Harjani [catalin.marinas@arm.com: renamed arm64_swiotlb_dma_ops to coherent_swiotlb_dma_ops] Signed-off-by: Catalin Marinas diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index fbd7678..985fc07 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -30,9 +30,9 @@ struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); -static void *arm64_swiotlb_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, - struct dma_attrs *attrs) +static void *__dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs) { if (dev == NULL) { WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); @@ -58,9 +58,9 @@ static void *arm64_swiotlb_alloc_coherent(struct device *dev, size_t size, } } -static void arm64_swiotlb_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) +static void __dma_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) { if (dev == NULL) { WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); @@ -78,9 +78,9 @@ static void arm64_swiotlb_free_coherent(struct device *dev, size_t size, } } -static struct dma_map_ops arm64_swiotlb_dma_ops = { - .alloc = arm64_swiotlb_alloc_coherent, - .free = arm64_swiotlb_free_coherent, +static struct dma_map_ops coherent_swiotlb_dma_ops = { + .alloc = __dma_alloc_coherent, + .free = __dma_free_coherent, .map_page = swiotlb_map_page, .unmap_page = swiotlb_unmap_page, .map_sg = swiotlb_map_sg_attrs, @@ -95,7 +95,7 @@ static struct dma_map_ops arm64_swiotlb_dma_ops = { void __init arm64_swiotlb_init(void) { - dma_ops = &arm64_swiotlb_dma_ops; + dma_ops = &coherent_swiotlb_dma_ops; swiotlb_init(1); } -- cgit v0.10.2 From 16fb1a9bec6126162560f159df449e4781560807 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Tue, 11 Feb 2014 22:28:42 +0000 Subject: arm64: vdso: clean up vdso_pagelist initialization Remove some unnecessary bits that were apparently carried over from another architecture's implementation: - No need to get_page() the vdso text/data - these are part of the kernel image. - No need for ClearPageReserved on the vdso text. - No need to vmap the first text page to check the ELF header - this can be done through &vdso_start. Also some minor cleanup: - Use kcalloc for vdso_pagelist array allocation. - Don't print on allocation failure, slab/slub will do that for us. Signed-off-by: Nathan Lynch Acked-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index a7149ca..50384fe 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -106,49 +106,31 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) static int __init vdso_init(void) { - struct page *pg; - char *vbase; - int i, ret = 0; + int i; + + if (memcmp(&vdso_start, "\177ELF", 4)) { + pr_err("vDSO is not a valid ELF object!\n"); + return -EINVAL; + } vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; pr_info("vdso: %ld pages (%ld code, %ld data) at base %p\n", vdso_pages + 1, vdso_pages, 1L, &vdso_start); /* Allocate the vDSO pagelist, plus a page for the data. */ - vdso_pagelist = kzalloc(sizeof(struct page *) * (vdso_pages + 1), + vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *), GFP_KERNEL); - if (vdso_pagelist == NULL) { - pr_err("Failed to allocate vDSO pagelist!\n"); + if (vdso_pagelist == NULL) return -ENOMEM; - } /* Grab the vDSO code pages. */ - for (i = 0; i < vdso_pages; i++) { - pg = virt_to_page(&vdso_start + i*PAGE_SIZE); - ClearPageReserved(pg); - get_page(pg); - vdso_pagelist[i] = pg; - } - - /* Sanity check the shared object header. */ - vbase = vmap(vdso_pagelist, 1, 0, PAGE_KERNEL); - if (vbase == NULL) { - pr_err("Failed to map vDSO pagelist!\n"); - return -ENOMEM; - } else if (memcmp(vbase, "\177ELF", 4)) { - pr_err("vDSO is not a valid ELF object!\n"); - ret = -EINVAL; - goto unmap; - } + for (i = 0; i < vdso_pages; i++) + vdso_pagelist[i] = virt_to_page(&vdso_start + i * PAGE_SIZE); /* Grab the vDSO data page. */ - pg = virt_to_page(vdso_data); - get_page(pg); - vdso_pagelist[i] = pg; + vdso_pagelist[i] = virt_to_page(vdso_data); -unmap: - vunmap(vbase); - return ret; + return 0; } arch_initcall(vdso_init); -- cgit v0.10.2 From 75306820248e26d15d84acf4e297b9fb27dd3bb2 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 24 Feb 2014 11:59:14 +0900 Subject: ASoC: da732x: Mark DC offset control registers volatile The driver reads from the DC offset control registers during callibration but since the registers are marked as volatile and there is a register cache the values will not be read from the hardware after the first reading rendering the callibration ineffective. It appears that the driver was originally written for the ASoC level register I/O code but converted to regmap prior to merge and this issue was missed during the conversion as the framework level volatile register functionality was not being used. Signed-off-by: Mark Brown Acked-by: Adam Thomson Cc: stable@vger.kernel.org diff --git a/sound/soc/codecs/da732x.c b/sound/soc/codecs/da732x.c index f295b65..f4d965ebc 100644 --- a/sound/soc/codecs/da732x.c +++ b/sound/soc/codecs/da732x.c @@ -1268,11 +1268,23 @@ static struct snd_soc_dai_driver da732x_dai[] = { }, }; +static bool da732x_volatile(struct device *dev, unsigned int reg) +{ + switch (reg) { + case DA732X_REG_HPL_DAC_OFF_CNTL: + case DA732X_REG_HPR_DAC_OFF_CNTL: + return true; + default: + return false; + } +} + static const struct regmap_config da732x_regmap = { .reg_bits = 8, .val_bits = 8, .max_register = DA732X_MAX_REG, + .volatile_reg = da732x_volatile, .reg_defaults = da732x_reg_cache, .num_reg_defaults = ARRAY_SIZE(da732x_reg_cache), .cache_type = REGCACHE_RBTREE, -- cgit v0.10.2 From 8857563b819b140aa8c9be920cfe44d5d3f808b7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 12 Feb 2014 13:02:22 -0800 Subject: notifier: Substitute rcu_access_pointer() for rcu_dereference_raw() (Trivial patch.) If the code is looking at the RCU-protected pointer itself, but not dereferencing it, the rcu_dereference() functions can be downgraded to rcu_access_pointer(). This commit makes this downgrade in __blocking_notifier_call_chain() which simply compares the RCU-protected pointer against NULL with no dereferencing. Signed-off-by: Paul E. McKenney Cc: Andrew Morton Cc: Linus Torvalds Reviewed-by: Josh Triplett diff --git a/kernel/notifier.c b/kernel/notifier.c index 2d5cc4c..db4c8b0 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -309,7 +309,7 @@ int __blocking_notifier_call_chain(struct blocking_notifier_head *nh, * racy then it does not matter what the result of the test * is, we re-check the list after having taken the lock anyway: */ - if (rcu_dereference_raw(nh->head)) { + if (rcu_access_pointer(nh->head)) { down_read(&nh->rwsem); ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls); -- cgit v0.10.2 From 7a754743185a4b05818e10058fa2fbe4e6969085 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Tue, 11 Feb 2014 16:10:12 -0500 Subject: rcu: Fix sparse warning for rcu_expedited from kernel/ksysfs.c This commit fixes the follwoing warning: kernel/ksysfs.c:143:5: warning: symbol 'rcu_expedited' was not declared. Should it be static? Signed-off-by: Paul Gortmaker [ paulmck: Moved the declaration to include/linux/rcupdate.h to avoid including the RCU-internal rcu.h file outside of RCU. ] Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 32decf1..f3706c6 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -46,6 +46,7 @@ #include #include +extern int rcu_expedited; /* for sysctl */ #ifdef CONFIG_RCU_TORTURE_TEST extern int rcutorture_runnable; /* for sysctl */ #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index d945a94..e660964 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -19,6 +19,8 @@ #include #include +#include /* rcu_expedited */ + #define KERNEL_ATTR_RO(_name) \ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 1bd787f..af2e60a 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -116,8 +116,6 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head) } } -extern int rcu_expedited; - #ifdef CONFIG_RCU_STALL_COMMON extern int rcu_cpu_stall_suppress; -- cgit v0.10.2 From 5cb5c6e18f822b19bd41a2c0f9930c82b3ec0bc9 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Wed, 19 Feb 2014 14:33:27 -0500 Subject: rcu: Ensure kernel/rcu/rcu.h can be sourced/used stand-alone The kbuild test bot uncovered an implicit dependence on the trace header being present before rcu.h in ia64 allmodconfig that looks like this: In file included from kernel/ksysfs.c:22:0: kernel/rcu/rcu.h: In function '__rcu_reclaim': kernel/rcu/rcu.h:107:3: error: implicit declaration of function 'trace_rcu_invoke_kfree_callback' [-Werror=implicit-function-declaration] kernel/rcu/rcu.h:112:3: error: implicit declaration of function 'trace_rcu_invoke_callback' [-Werror=implicit-function-declaration] cc1: some warnings being treated as errors Looking at other rcu.h users, we can find that they all were sourcing the trace header in advance of rcu.h itself, as seen in the context of this diff. There were also some inconsistencies as to whether it was or wasn't sourced based on the parent tracing Kconfig. Rather than "fix" it at each use site, and have inconsistent use based on whether "#ifdef CONFIG_RCU_TRACE" was used or not, lets just source the trace header just once, in the actual consumer of it, which is rcu.h itself. We include it unconditionally, as build testing shows us that is a hard requirement for some files. Reported-by: kbuild test robot Signed-off-by: Paul Gortmaker Signed-off-by: Paul E. McKenney diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index af2e60a..bfda272 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -23,6 +23,7 @@ #ifndef __LINUX_RCU_H #define __LINUX_RCU_H +#include #ifdef CONFIG_RCU_TRACE #define RCU_TRACE(stmt) stmt #else /* #ifdef CONFIG_RCU_TRACE */ diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index 2359779..c639556 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c @@ -36,8 +36,6 @@ #include #include -#include - #include "rcu.h" /* diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 53b95bb..d9efcc1 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -37,10 +37,6 @@ #include #include -#ifdef CONFIG_RCU_TRACE -#include -#endif /* #else #ifdef CONFIG_RCU_TRACE */ - #include "rcu.h" /* Forward declarations for tiny_plugin.h. */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 73c3cd2..c7ed5db 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -58,8 +58,6 @@ #include #include "tree.h" -#include - #include "rcu.h" MODULE_ALIAS("rcutree"); diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index fd0d5b5..4c0a9b0 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -49,7 +49,6 @@ #include #define CREATE_TRACE_POINTS -#include #include "rcu.h" -- cgit v0.10.2 From a1989b330093578ea5470bea0a00f940c444c466 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 26 Feb 2014 10:07:04 +0100 Subject: dm mpath: fix stalls when handling invalid ioctls An invalid ioctl will never be valid, irrespective of whether multipath has active paths or not. So for invalid ioctls we do not have to wait for multipath to activate any paths, but can rather return an error code immediately. This fix resolves numerous instances of: udevd[]: worker [] unexpectedly returned with status 0x0100 that have been seen during testing. Signed-off-by: Hannes Reinecke Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 6eb9dc9..422a9fd 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1626,8 +1626,11 @@ static int multipath_ioctl(struct dm_target *ti, unsigned int cmd, /* * Only pass ioctls through if the device sizes match exactly. */ - if (!r && ti->len != i_size_read(bdev->bd_inode) >> SECTOR_SHIFT) - r = scsi_verify_blk_ioctl(NULL, cmd); + if (!bdev || ti->len != i_size_read(bdev->bd_inode) >> SECTOR_SHIFT) { + int err = scsi_verify_blk_ioctl(NULL, cmd); + if (err) + r = err; + } if (r == -ENOTCONN && !fatal_signal_pending(current)) queue_work(kmultipathd, &m->process_queued_ios); -- cgit v0.10.2 From 0b7d25c347900e2550398e29e25caf0fefe3cab2 Mon Sep 17 00:00:00 2001 From: Christian Engelmayer Date: Thu, 23 Jan 2014 22:32:20 +0100 Subject: pwm: lp3943: Fix potential memory leak during request Fix a memory leak in the lp3943_pwm_request_map() error handling path. Make sure already allocated pwm map memory is freed correctly. Detected by Coverity: CID 1162829. Signed-off-by: Christian Engelmayer Acked-by: Milo Kim Signed-off-by: Thierry Reding diff --git a/drivers/pwm/pwm-lp3943.c b/drivers/pwm/pwm-lp3943.c index 8a843a0..a40b9c3 100644 --- a/drivers/pwm/pwm-lp3943.c +++ b/drivers/pwm/pwm-lp3943.c @@ -52,8 +52,10 @@ lp3943_pwm_request_map(struct lp3943_pwm *lp3943_pwm, int hwpwm) offset = pwm_map->output[i]; /* Return an error if the pin is already assigned */ - if (test_and_set_bit(offset, &lp3943->pin_used)) + if (test_and_set_bit(offset, &lp3943->pin_used)) { + kfree(pwm_map); return ERR_PTR(-EBUSY); + } } return pwm_map; -- cgit v0.10.2 From 404381c5839d67aa0c275ad1da96ef3d3928ca2c Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 24 Feb 2014 13:59:32 -0300 Subject: KVM: MMU: drop read-only large sptes when creating lower level sptes Read-only large sptes can be created due to read-only faults as follows: - QEMU pagetable entry that maps guest memory is read-only due to COW. - Guest read faults such memory, COW is not broken, because it is a read-only fault. - Enable dirty logging, large spte not nuked because it is read-only. - Write-fault on such memory causes guest to loop endlessly (which must go down to level 1 because dirty logging is enabled). Fix by dropping large spte when necessary. Signed-off-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e50425d..9b53135 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2672,6 +2672,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, break; } + drop_large_spte(vcpu, iterator.sptep); if (!is_shadow_present_pte(*iterator.sptep)) { u64 base_addr = iterator.addr; -- cgit v0.10.2 From ec6deea1a147a10baf1672dc66025a13bc259680 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 21 Jan 2014 09:06:21 +0100 Subject: clk: nomadik: fix multiplatform problem The Nomadik debugfs screws up multiplatform boots if debugfs is enabled on the multiplatform image, since it's a simple initcall that is unconditionally executed and reads from certain memory locations. Fix this by checking that the driver has been properly initialized, so a base offset to the Nomadik SRC controller exists, before proceeding to register debugfs files. Reported-by: Andrew Lunn Signed-off-by: Linus Walleij Reviewed-by: Andrew Lunn Signed-off-by: Mike Turquette diff --git a/drivers/clk/clk-nomadik.c b/drivers/clk/clk-nomadik.c index 6a934a5..05e04ce 100644 --- a/drivers/clk/clk-nomadik.c +++ b/drivers/clk/clk-nomadik.c @@ -494,6 +494,9 @@ static const struct file_operations nomadik_src_clk_debugfs_ops = { static int __init nomadik_src_clk_init_debugfs(void) { + /* Vital for multiplatform */ + if (!src_base) + return -ENODEV; src_pcksr0_boot = readl(src_base + SRC_PCKSR0); src_pcksr1_boot = readl(src_base + SRC_PCKSR1); debugfs_create_file("nomadik-src-clk", S_IFREG | S_IRUGO, -- cgit v0.10.2 From 9a9bfd032f0207dd6e63c84b7676b58f160af04b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Feb 2014 04:31:03 -0800 Subject: net: tcp: use NET_INC_STATS() While LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES can only be incremented in tcp_transmit_skb() from softirq (incoming message or timer activation), it is better to use NET_INC_STATS() instead of NET_INC_STATS_BH() as tcp_transmit_skb() can be called from process context. This will avoid copy/paste confusion when/if we want to add other SNMP counters in tcp_transmit_skb() Signed-off-by: Eric Dumazet Cc: Hannes Frederic Sowa Cc: Florian Westphal Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 0980581..d718482 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -864,8 +864,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (unlikely(skb->fclone == SKB_FCLONE_ORIG && fclone->fclone == SKB_FCLONE_CLONE)) - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); if (unlikely(skb_cloned(skb))) skb = pskb_copy(skb, gfp_mask); -- cgit v0.10.2 From 8f355e5cee63c2c0c145d8206c4245d0189f47ff Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Tue, 25 Feb 2014 13:17:59 +0000 Subject: sfc: check for NULL efx->ptp_data in efx_ptp_event If we receive a PTP event from the NIC when we haven't set up PTP state in the driver, we attempt to read through a NULL pointer efx->ptp_data, triggering a panic. Signed-off-by: Edward Cree Acked-by: Shradha Shah Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index eb75fbd..d7a3682 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -1668,6 +1668,13 @@ void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev) struct efx_ptp_data *ptp = efx->ptp_data; int code = EFX_QWORD_FIELD(*ev, MCDI_EVENT_CODE); + if (!ptp) { + if (net_ratelimit()) + netif_warn(efx, drv, efx->net_dev, + "Received PTP event but PTP not set up\n"); + return; + } + if (!ptp->enabled) return; -- cgit v0.10.2 From bc90d2918b343e114ddda91802f05a05dfed559e Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 25 Feb 2014 13:35:53 -0800 Subject: MAINTAINERS: Intel nic drivers Add a new F: line for the intel subdirectories. This allows get_maintainers to avoid using git log and cc'ing people that have submitted clean-up style patches for all first level directories under drivers/net/ethernet/intel/ This does not make e100.c maintained. Signed-off-by: Joe Perches Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index 0dba50b..fec577d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4545,6 +4545,7 @@ F: Documentation/networking/ixgbevf.txt F: Documentation/networking/i40e.txt F: Documentation/networking/i40evf.txt F: drivers/net/ethernet/intel/ +F: drivers/net/ethernet/intel/*/ INTEL-MID GPIO DRIVER M: David Cohen -- cgit v0.10.2 From 5e5b066535f0ee58e5de3a2db5fb56fa3cd7e3b1 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Wed, 26 Feb 2014 11:05:22 +0800 Subject: bonding: Fix RTNL: assertion failed at net/core/rtnetlink.c for 802.3ad mode The problem was introduced by the commit 1d3ee88ae0d (bonding: add netlink attributes to slave link dev). The bond_set_active_slave() and bond_set_backup_slave() will use rtmsg_ifinfo to send slave's states, so these two functions should be called in RTNL. In 802.3ad mode, acquiring RTNL for the __enable_port and __disable_port cases is difficult, as those calls generally already hold the state machine lock, and cannot unconditionally call rtnl_lock because either they already hold RTNL (for calls via bond_3ad_unbind_slave) or due to the potential for deadlock with bond_3ad_adapter_speed_changed, bond_3ad_adapter_duplex_changed, bond_3ad_link_change, or bond_3ad_update_lacp_rate. All four of those are called with RTNL held, and acquire the state machine lock second. The calling contexts for __enable_port and __disable_port already hold the state machine lock, and may or may not need RTNL. According to the Jay's opinion, I don't think it is a problem that the slave don't send notify message synchronously when the status changed, normally the state machine is running every 100 ms, send the notify message at the end of the state machine if the slave's state changed should be better. I fix the problem through these steps: 1). add a new function bond_set_slave_state() which could change the slave's state and call rtmsg_ifinfo() according to the input parameters called notify. 2). Add a new slave parameter which called should_notify, if the slave's state changed and don't notify yet, the parameter will be set to 1, and then if the slave's state changed again, the param will be set to 0, it indicate that the slave's state has been restored, no need to notify any one. 3). the __enable_port and __disable_port should not call rtmsg_ifinfo in the state machine lock, any change in the state of slave could set a flag in the slave, it will indicated that an rtmsg_ifinfo should be called at the end of the state machine. Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 6d20fbd..6826e4f 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -181,7 +181,7 @@ static inline int __agg_has_partner(struct aggregator *agg) */ static inline void __disable_port(struct port *port) { - bond_set_slave_inactive_flags(port->slave); + bond_set_slave_inactive_flags(port->slave, BOND_SLAVE_NOTIFY_LATER); } /** @@ -193,7 +193,7 @@ static inline void __enable_port(struct port *port) struct slave *slave = port->slave; if ((slave->link == BOND_LINK_UP) && IS_UP(slave->dev)) - bond_set_slave_active_flags(slave); + bond_set_slave_active_flags(slave, BOND_SLAVE_NOTIFY_LATER); } /** @@ -2062,6 +2062,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work) struct list_head *iter; struct slave *slave; struct port *port; + bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER; read_lock(&bond->lock); rcu_read_lock(); @@ -2119,8 +2120,25 @@ void bond_3ad_state_machine_handler(struct work_struct *work) } re_arm: + bond_for_each_slave_rcu(bond, slave, iter) { + if (slave->should_notify) { + should_notify_rtnl = BOND_SLAVE_NOTIFY_NOW; + break; + } + } rcu_read_unlock(); read_unlock(&bond->lock); + + if (should_notify_rtnl && rtnl_trylock()) { + bond_for_each_slave(bond, slave, iter) { + if (slave->should_notify) { + rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, + GFP_KERNEL); + slave->should_notify = 0; + } + } + rtnl_unlock(); + } queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks); } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 1c6104d..e02029b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -829,21 +829,25 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) if (bond_is_lb(bond)) { bond_alb_handle_active_change(bond, new_active); if (old_active) - bond_set_slave_inactive_flags(old_active); + bond_set_slave_inactive_flags(old_active, + BOND_SLAVE_NOTIFY_NOW); if (new_active) - bond_set_slave_active_flags(new_active); + bond_set_slave_active_flags(new_active, + BOND_SLAVE_NOTIFY_NOW); } else { rcu_assign_pointer(bond->curr_active_slave, new_active); } if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) { if (old_active) - bond_set_slave_inactive_flags(old_active); + bond_set_slave_inactive_flags(old_active, + BOND_SLAVE_NOTIFY_NOW); if (new_active) { bool should_notify_peers = false; - bond_set_slave_active_flags(new_active); + bond_set_slave_active_flags(new_active, + BOND_SLAVE_NOTIFY_NOW); if (bond->params.fail_over_mac) bond_do_fail_over_mac(bond, new_active, @@ -1463,14 +1467,15 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) switch (bond->params.mode) { case BOND_MODE_ACTIVEBACKUP: - bond_set_slave_inactive_flags(new_slave); + bond_set_slave_inactive_flags(new_slave, + BOND_SLAVE_NOTIFY_NOW); break; case BOND_MODE_8023AD: /* in 802.3ad mode, the internal mechanism * will activate the slaves in the selected * aggregator */ - bond_set_slave_inactive_flags(new_slave); + bond_set_slave_inactive_flags(new_slave, BOND_SLAVE_NOTIFY_NOW); /* if this is the first slave */ if (!prev_slave) { SLAVE_AD_INFO(new_slave).id = 1; @@ -1488,7 +1493,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) case BOND_MODE_TLB: case BOND_MODE_ALB: bond_set_active_slave(new_slave); - bond_set_slave_inactive_flags(new_slave); + bond_set_slave_inactive_flags(new_slave, BOND_SLAVE_NOTIFY_NOW); break; default: pr_debug("This slave is always active in trunk mode\n"); @@ -2015,7 +2020,8 @@ static void bond_miimon_commit(struct bonding *bond) if (bond->params.mode == BOND_MODE_ACTIVEBACKUP || bond->params.mode == BOND_MODE_8023AD) - bond_set_slave_inactive_flags(slave); + bond_set_slave_inactive_flags(slave, + BOND_SLAVE_NOTIFY_NOW); pr_info("%s: link status definitely down for interface %s, disabling it\n", bond->dev->name, slave->dev->name); @@ -2562,7 +2568,8 @@ static void bond_ab_arp_commit(struct bonding *bond) slave->link = BOND_LINK_UP; if (bond->current_arp_slave) { bond_set_slave_inactive_flags( - bond->current_arp_slave); + bond->current_arp_slave, + BOND_SLAVE_NOTIFY_NOW); bond->current_arp_slave = NULL; } @@ -2582,7 +2589,8 @@ static void bond_ab_arp_commit(struct bonding *bond) slave->link_failure_count++; slave->link = BOND_LINK_DOWN; - bond_set_slave_inactive_flags(slave); + bond_set_slave_inactive_flags(slave, + BOND_SLAVE_NOTIFY_NOW); pr_info("%s: link status definitely down for interface %s, disabling it\n", bond->dev->name, slave->dev->name); @@ -2657,7 +2665,7 @@ static bool bond_ab_arp_probe(struct bonding *bond) } } - bond_set_slave_inactive_flags(curr_arp_slave); + bond_set_slave_inactive_flags(curr_arp_slave, BOND_SLAVE_NOTIFY_NOW); bond_for_each_slave(bond, slave, iter) { if (!found && !before && IS_UP(slave->dev)) @@ -2677,7 +2685,8 @@ static bool bond_ab_arp_probe(struct bonding *bond) if (slave->link_failure_count < UINT_MAX) slave->link_failure_count++; - bond_set_slave_inactive_flags(slave); + bond_set_slave_inactive_flags(slave, + BOND_SLAVE_NOTIFY_NOW); pr_info("%s: backup interface %s is now down.\n", bond->dev->name, slave->dev->name); @@ -2695,7 +2704,7 @@ static bool bond_ab_arp_probe(struct bonding *bond) } new_slave->link = BOND_LINK_BACK; - bond_set_slave_active_flags(new_slave); + bond_set_slave_active_flags(new_slave, BOND_SLAVE_NOTIFY_NOW); bond_arp_send_all(bond, new_slave); new_slave->jiffies = jiffies; rcu_assign_pointer(bond->current_arp_slave, new_slave); @@ -3046,9 +3055,11 @@ static int bond_open(struct net_device *bond_dev) bond_for_each_slave(bond, slave, iter) { if ((bond->params.mode == BOND_MODE_ACTIVEBACKUP) && (slave != bond->curr_active_slave)) { - bond_set_slave_inactive_flags(slave); + bond_set_slave_inactive_flags(slave, + BOND_SLAVE_NOTIFY_NOW); } else { - bond_set_slave_active_flags(slave); + bond_set_slave_active_flags(slave, + BOND_SLAVE_NOTIFY_NOW); } } read_unlock(&bond->curr_slave_lock); diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index 86ccfb9..9b280ac 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -195,7 +195,8 @@ struct slave { s8 new_link; u8 backup:1, /* indicates backup slave. Value corresponds with BOND_STATE_ACTIVE and BOND_STATE_BACKUP */ - inactive:1; /* indicates inactive slave */ + inactive:1, /* indicates inactive slave */ + should_notify:1; /* indicateds whether the state changed */ u8 duplex; u32 original_mtu; u32 link_failure_count; @@ -303,6 +304,24 @@ static inline void bond_set_backup_slave(struct slave *slave) } } +static inline void bond_set_slave_state(struct slave *slave, + int slave_state, bool notify) +{ + if (slave->backup == slave_state) + return; + + slave->backup = slave_state; + if (notify) { + rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_KERNEL); + slave->should_notify = 0; + } else { + if (slave->should_notify) + slave->should_notify = 0; + else + slave->should_notify = 1; + } +} + static inline void bond_slave_state_change(struct bonding *bond) { struct list_head *iter; @@ -343,6 +362,9 @@ static inline bool bond_is_active_slave(struct slave *slave) #define BOND_ARP_VALIDATE_ALL (BOND_ARP_VALIDATE_ACTIVE | \ BOND_ARP_VALIDATE_BACKUP) +#define BOND_SLAVE_NOTIFY_NOW true +#define BOND_SLAVE_NOTIFY_LATER false + static inline int slave_do_arp_validate(struct bonding *bond, struct slave *slave) { @@ -394,17 +416,19 @@ static inline void bond_netpoll_send_skb(const struct slave *slave, } #endif -static inline void bond_set_slave_inactive_flags(struct slave *slave) +static inline void bond_set_slave_inactive_flags(struct slave *slave, + bool notify) { if (!bond_is_lb(slave->bond)) - bond_set_backup_slave(slave); + bond_set_slave_state(slave, BOND_STATE_BACKUP, notify); if (!slave->bond->params.all_slaves_active) slave->inactive = 1; } -static inline void bond_set_slave_active_flags(struct slave *slave) +static inline void bond_set_slave_active_flags(struct slave *slave, + bool notify) { - bond_set_active_slave(slave); + bond_set_slave_state(slave, BOND_STATE_ACTIVE, notify); slave->inactive = 0; } -- cgit v0.10.2 From b0929915e0356acedf59504521c097ecada88b19 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Wed, 26 Feb 2014 11:05:23 +0800 Subject: bonding: Fix RTNL: assertion failed at net/core/rtnetlink.c for ab arp monitor Veaceslav has reported and fix this problem by commit f2ebd477f141bc0 (bonding: restructure locking of bond_ab_arp_probe()). According Jay's opinion, the current solution is not very well, because the notification is to indicate that the interface has actually changed state in a meaningful way, but these calls in the ab ARP monitor are internal settings of the flags to allow the ARP monitor to search for a slave to become active when there are no active slaves. The flag setting to active or backup is to permit the ARP monitor's response logic to do the right thing when deciding if the test slave (current_arp_slave) is up or not. So the best way to fix the problem is that we should not send a notification when the slave is in testing state, and check the state at the end of the monitor, if the slave's state recover, avoid to send pointless notification twice. And RTNL is really a big lock, hold it regardless the slave's state changed or not when the current_active_slave is null will loss performance (every 100ms), so we should hold it only when the slave's state changed and need to notify. I revert the old commit and add new modifications. Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 6826e4f..dcde5605 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2130,13 +2130,7 @@ re_arm: read_unlock(&bond->lock); if (should_notify_rtnl && rtnl_trylock()) { - bond_for_each_slave(bond, slave, iter) { - if (slave->should_notify) { - rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, - GFP_KERNEL); - slave->should_notify = 0; - } - } + bond_slave_state_notify(bond); rtnl_unlock(); } queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index e02029b..82b70ff 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2623,17 +2623,17 @@ do_failover: /* * Send ARP probes for active-backup mode ARP monitor. + * + * Called with rcu_read_lock hold. */ static bool bond_ab_arp_probe(struct bonding *bond) { struct slave *slave, *before = NULL, *new_slave = NULL, - *curr_arp_slave, *curr_active_slave; + *curr_arp_slave = rcu_dereference(bond->current_arp_slave), + *curr_active_slave = rcu_dereference(bond->curr_active_slave); struct list_head *iter; bool found = false; - - rcu_read_lock(); - curr_arp_slave = rcu_dereference(bond->current_arp_slave); - curr_active_slave = rcu_dereference(bond->curr_active_slave); + bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER; if (curr_arp_slave && curr_active_slave) pr_info("PROBE: c_arp %s && cas %s BAD\n", @@ -2642,32 +2642,23 @@ static bool bond_ab_arp_probe(struct bonding *bond) if (curr_active_slave) { bond_arp_send_all(bond, curr_active_slave); - rcu_read_unlock(); - return true; + return should_notify_rtnl; } - rcu_read_unlock(); /* if we don't have a curr_active_slave, search for the next available * backup slave from the current_arp_slave and make it the candidate * for becoming the curr_active_slave */ - if (!rtnl_trylock()) - return false; - /* curr_arp_slave might have gone away */ - curr_arp_slave = ACCESS_ONCE(bond->current_arp_slave); - if (!curr_arp_slave) { - curr_arp_slave = bond_first_slave(bond); - if (!curr_arp_slave) { - rtnl_unlock(); - return true; - } + curr_arp_slave = bond_first_slave_rcu(bond); + if (!curr_arp_slave) + return should_notify_rtnl; } - bond_set_slave_inactive_flags(curr_arp_slave, BOND_SLAVE_NOTIFY_NOW); + bond_set_slave_inactive_flags(curr_arp_slave, BOND_SLAVE_NOTIFY_LATER); - bond_for_each_slave(bond, slave, iter) { + bond_for_each_slave_rcu(bond, slave, iter) { if (!found && !before && IS_UP(slave->dev)) before = slave; @@ -2686,7 +2677,7 @@ static bool bond_ab_arp_probe(struct bonding *bond) slave->link_failure_count++; bond_set_slave_inactive_flags(slave, - BOND_SLAVE_NOTIFY_NOW); + BOND_SLAVE_NOTIFY_LATER); pr_info("%s: backup interface %s is now down.\n", bond->dev->name, slave->dev->name); @@ -2698,26 +2689,31 @@ static bool bond_ab_arp_probe(struct bonding *bond) if (!new_slave && before) new_slave = before; - if (!new_slave) { - rtnl_unlock(); - return true; - } + if (!new_slave) + goto check_state; new_slave->link = BOND_LINK_BACK; - bond_set_slave_active_flags(new_slave, BOND_SLAVE_NOTIFY_NOW); + bond_set_slave_active_flags(new_slave, BOND_SLAVE_NOTIFY_LATER); bond_arp_send_all(bond, new_slave); new_slave->jiffies = jiffies; rcu_assign_pointer(bond->current_arp_slave, new_slave); - rtnl_unlock(); - return true; +check_state: + bond_for_each_slave_rcu(bond, slave, iter) { + if (slave->should_notify) { + should_notify_rtnl = BOND_SLAVE_NOTIFY_NOW; + break; + } + } + return should_notify_rtnl; } static void bond_activebackup_arp_mon(struct work_struct *work) { struct bonding *bond = container_of(work, struct bonding, arp_work.work); - bool should_notify_peers = false, should_commit = false; + bool should_notify_peers = false; + bool should_notify_rtnl = false; int delta_in_ticks; delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); @@ -2726,11 +2722,12 @@ static void bond_activebackup_arp_mon(struct work_struct *work) goto re_arm; rcu_read_lock(); + should_notify_peers = bond_should_notify_peers(bond); - should_commit = bond_ab_arp_inspect(bond); - rcu_read_unlock(); - if (should_commit) { + if (bond_ab_arp_inspect(bond)) { + rcu_read_unlock(); + /* Race avoidance with bond_close flush of workqueue */ if (!rtnl_trylock()) { delta_in_ticks = 1; @@ -2739,23 +2736,28 @@ static void bond_activebackup_arp_mon(struct work_struct *work) } bond_ab_arp_commit(bond); + rtnl_unlock(); + rcu_read_lock(); } - if (!bond_ab_arp_probe(bond)) { - /* rtnl locking failed, re-arm */ - delta_in_ticks = 1; - should_notify_peers = false; - } + should_notify_rtnl = bond_ab_arp_probe(bond); + rcu_read_unlock(); re_arm: if (bond->params.arp_interval) queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); - if (should_notify_peers) { + if (should_notify_peers || should_notify_rtnl) { if (!rtnl_trylock()) return; - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); + + if (should_notify_peers) + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, + bond->dev); + if (should_notify_rtnl) + bond_slave_state_notify(bond); + rtnl_unlock(); } } diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index 9b280ac..2b0fdec 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -335,6 +335,19 @@ static inline void bond_slave_state_change(struct bonding *bond) } } +static inline void bond_slave_state_notify(struct bonding *bond) +{ + struct list_head *iter; + struct slave *tmp; + + bond_for_each_slave(bond, tmp, iter) { + if (tmp->should_notify) { + rtmsg_ifinfo(RTM_NEWLINK, tmp->dev, 0, GFP_KERNEL); + tmp->should_notify = 0; + } + } +} + static inline int bond_slave_state(struct slave *slave) { return slave->backup; -- cgit v0.10.2 From e5fe0cd442e50f156bb54f5385b19eda50a13ccd Mon Sep 17 00:00:00 2001 From: Freddy Xin Date: Wed, 26 Feb 2014 16:54:31 +0800 Subject: AX88179_178A: Add VID:DID for Lenovo OneLinkDock Gigabit LAN Add VID:DID for Lenovo OneLinkDock Gigabit LAN Signed-off-by: Freddy Xin Signed-off-by: David S. Miller diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 955df81..460e823 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1421,6 +1421,19 @@ static const struct driver_info samsung_info = { .tx_fixup = ax88179_tx_fixup, }; +static const struct driver_info lenovo_info = { + .description = "Lenovo OneLinkDock Gigabit LAN", + .bind = ax88179_bind, + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, + .reset = ax88179_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, + .tx_fixup = ax88179_tx_fixup, +}; + static const struct usb_device_id products[] = { { /* ASIX AX88179 10/100/1000 */ @@ -1438,6 +1451,10 @@ static const struct usb_device_id products[] = { /* Samsung USB Ethernet Adapter */ USB_DEVICE(0x04e8, 0xa100), .driver_info = (unsigned long)&samsung_info, +}, { + /* Lenovo OneLinkDock Gigabit LAN */ + USB_DEVICE(0x17ef, 0x304b), + .driver_info = (unsigned long)&lenovo_info, }, { }, }; -- cgit v0.10.2 From ee6154e11eeccd4ae32c4881415dbd902a869592 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 26 Feb 2014 14:20:30 +0100 Subject: bonding: fix a div error caused by the slave release path There's a bug in the slave release function which leads the transmit functions which use the bond->slave_cnt to a div by 0 because we might just have released our last slave and made slave_cnt == 0 but at the same time we may have a transmitter after the check for an empty list which will fetch it and use it in the slave id calculation. Fix it by moving the slave_cnt after synchronize_rcu so if this was our last slave any new transmitters will see an empty slave list which is checked after rcu lock but before calling the mode transmit functions which rely on bond->slave_cnt. Fixes: 278b208375 ("bonding: initial RCU conversion") CC: Veaceslav Falico CC: Andy Gospodarek CC: Jay Vosburgh CC: David S. Miller Signed-off-by: Nikolay Aleksandrov Acked-by: Veaceslav Falico Signed-off-by: David S. Miller diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 82b70ff..b47fa04 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1659,9 +1659,6 @@ static int __bond_release_one(struct net_device *bond_dev, return -EINVAL; } - /* release the slave from its bond */ - bond->slave_cnt--; - bond_sysfs_slave_del(slave); bond_upper_dev_unlink(bond_dev, slave_dev); @@ -1743,6 +1740,7 @@ static int __bond_release_one(struct net_device *bond_dev, unblock_netpoll_tx(); synchronize_rcu(); + bond->slave_cnt--; if (!bond_has_slaves(bond)) { call_netdevice_notifiers(NETDEV_CHANGEADDR, bond->dev); -- cgit v0.10.2 From f3ca4164529b875374c410193bbbac0ee960895f Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Wed, 26 Feb 2014 21:03:05 +0800 Subject: ACPI / processor: Rework processor throttling with work_on_cpu() acpi_processor_set_throttling() uses set_cpus_allowed_ptr() to make sure that the (struct acpi_processor)->acpi_processor_set_throttling() callback will run on the right CPU. However, the function may be called from a worker thread already bound to a different CPU in which case that won't work. Make acpi_processor_set_throttling() use work_on_cpu() as appropriate instead of abusing set_cpus_allowed_ptr(). Reported-and-tested-by: Jiri Olsa Signed-off-by: Lan Tianyu Cc: All applicable [rjw: Changelog] Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index 28baa05..84243c3 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -56,6 +56,12 @@ struct throttling_tstate { int target_state; /* target T-state */ }; +struct acpi_processor_throttling_arg { + struct acpi_processor *pr; + int target_state; + bool force; +}; + #define THROTTLING_PRECHANGE (1) #define THROTTLING_POSTCHANGE (2) @@ -1060,16 +1066,24 @@ static int acpi_processor_set_throttling_ptc(struct acpi_processor *pr, return 0; } +static long acpi_processor_throttling_fn(void *data) +{ + struct acpi_processor_throttling_arg *arg = data; + struct acpi_processor *pr = arg->pr; + + return pr->throttling.acpi_processor_set_throttling(pr, + arg->target_state, arg->force); +} + int acpi_processor_set_throttling(struct acpi_processor *pr, int state, bool force) { - cpumask_var_t saved_mask; int ret = 0; unsigned int i; struct acpi_processor *match_pr; struct acpi_processor_throttling *p_throttling; + struct acpi_processor_throttling_arg arg; struct throttling_tstate t_state; - cpumask_var_t online_throttling_cpus; if (!pr) return -EINVAL; @@ -1080,14 +1094,6 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, if ((state < 0) || (state > (pr->throttling.state_count - 1))) return -EINVAL; - if (!alloc_cpumask_var(&saved_mask, GFP_KERNEL)) - return -ENOMEM; - - if (!alloc_cpumask_var(&online_throttling_cpus, GFP_KERNEL)) { - free_cpumask_var(saved_mask); - return -ENOMEM; - } - if (cpu_is_offline(pr->id)) { /* * the cpu pointed by pr->id is offline. Unnecessary to change @@ -1096,17 +1102,15 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, return -ENODEV; } - cpumask_copy(saved_mask, ¤t->cpus_allowed); t_state.target_state = state; p_throttling = &(pr->throttling); - cpumask_and(online_throttling_cpus, cpu_online_mask, - p_throttling->shared_cpu_map); + /* * The throttling notifier will be called for every * affected cpu in order to get one proper T-state. * The notifier event is THROTTLING_PRECHANGE. */ - for_each_cpu(i, online_throttling_cpus) { + for_each_cpu_and(i, cpu_online_mask, p_throttling->shared_cpu_map) { t_state.cpu = i; acpi_processor_throttling_notifier(THROTTLING_PRECHANGE, &t_state); @@ -1118,21 +1122,18 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, * it can be called only for the cpu pointed by pr. */ if (p_throttling->shared_type == DOMAIN_COORD_TYPE_SW_ANY) { - /* FIXME: use work_on_cpu() */ - if (set_cpus_allowed_ptr(current, cpumask_of(pr->id))) { - /* Can't migrate to the pr->id CPU. Exit */ - ret = -ENODEV; - goto exit; - } - ret = p_throttling->acpi_processor_set_throttling(pr, - t_state.target_state, force); + arg.pr = pr; + arg.target_state = state; + arg.force = force; + ret = work_on_cpu(pr->id, acpi_processor_throttling_fn, &arg); } else { /* * When the T-state coordination is SW_ALL or HW_ALL, * it is necessary to set T-state for every affected * cpus. */ - for_each_cpu(i, online_throttling_cpus) { + for_each_cpu_and(i, cpu_online_mask, + p_throttling->shared_cpu_map) { match_pr = per_cpu(processors, i); /* * If the pointer is invalid, we will report the @@ -1153,13 +1154,12 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, "on CPU %d\n", i)); continue; } - t_state.cpu = i; - /* FIXME: use work_on_cpu() */ - if (set_cpus_allowed_ptr(current, cpumask_of(i))) - continue; - ret = match_pr->throttling. - acpi_processor_set_throttling( - match_pr, t_state.target_state, force); + + arg.pr = match_pr; + arg.target_state = state; + arg.force = force; + ret = work_on_cpu(pr->id, acpi_processor_throttling_fn, + &arg); } } /* @@ -1168,17 +1168,12 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, * affected cpu to update the T-states. * The notifier event is THROTTLING_POSTCHANGE */ - for_each_cpu(i, online_throttling_cpus) { + for_each_cpu_and(i, cpu_online_mask, p_throttling->shared_cpu_map) { t_state.cpu = i; acpi_processor_throttling_notifier(THROTTLING_POSTCHANGE, &t_state); } - /* restore the previous state */ - /* FIXME: use work_on_cpu() */ - set_cpus_allowed_ptr(current, saved_mask); -exit: - free_cpumask_var(online_throttling_cpus); - free_cpumask_var(saved_mask); + return ret; } -- cgit v0.10.2 From 6dbd46c849e071e6afc1e0cad489b0175bca9318 Mon Sep 17 00:00:00 2001 From: Joerg Dorchain Date: Fri, 21 Feb 2014 20:29:33 +0100 Subject: USB: ftdi_sio: add Cressi Leonardo PID Hello, the following patch adds an entry for the PID of a Cressi Leonardo diving computer interface to kernel 3.13.0. It is detected as FT232RL. Works with subsurface. Signed-off-by: Joerg Dorchain Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index ee1f00f..44ab129 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -907,6 +907,8 @@ static const struct usb_device_id id_table_combined[] = { /* Crucible Devices */ { USB_DEVICE(FTDI_VID, FTDI_CT_COMET_PID) }, { USB_DEVICE(FTDI_VID, FTDI_Z3X_PID) }, + /* Cressi Devices */ + { USB_DEVICE(FTDI_VID, FTDI_CRESSI_PID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 1e2d369..e599fbf 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1320,3 +1320,9 @@ * Manufacturer: Smart GSM Team */ #define FTDI_Z3X_PID 0x0011 + +/* + * Product: Cressi PC Interface + * Manufacturer: Cressi + */ +#define FTDI_CRESSI_PID 0x87d0 -- cgit v0.10.2 From a1227f3c1030e96ebc51d677d2f636268845c5fb Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 19 Feb 2014 10:29:01 +0100 Subject: usb: ehci: fix deadlock when threadirqs option is used ehci_irq() and ehci_hrtimer_func() can deadlock on ehci->lock when threadirqs option is used. To prevent the deadlock use spin_lock_irqsave() in ehci_irq(). This change can be reverted when hrtimer callbacks become threaded. Signed-off-by: Stanislaw Gruszka Cc: stable Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index 4711427..81cda09 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -685,8 +685,15 @@ static irqreturn_t ehci_irq (struct usb_hcd *hcd) struct ehci_hcd *ehci = hcd_to_ehci (hcd); u32 status, masked_status, pcd_status = 0, cmd; int bh; + unsigned long flags; - spin_lock (&ehci->lock); + /* + * For threadirqs option we use spin_lock_irqsave() variant to prevent + * deadlock with ehci hrtimer callback, because hrtimer callbacks run + * in interrupt context even when threadirqs is specified. We can go + * back to spin_lock() variant when hrtimer callbacks become threaded. + */ + spin_lock_irqsave(&ehci->lock, flags); status = ehci_readl(ehci, &ehci->regs->status); @@ -704,7 +711,7 @@ static irqreturn_t ehci_irq (struct usb_hcd *hcd) /* Shared IRQ? */ if (!masked_status || unlikely(ehci->rh_state == EHCI_RH_HALTED)) { - spin_unlock(&ehci->lock); + spin_unlock_irqrestore(&ehci->lock, flags); return IRQ_NONE; } @@ -815,7 +822,7 @@ dead: if (bh) ehci_work (ehci); - spin_unlock (&ehci->lock); + spin_unlock_irqrestore(&ehci->lock, flags); if (pcd_status) usb_hcd_poll_rh_status(hcd); return IRQ_HANDLED; -- cgit v0.10.2 From f63fcc90a379a269a07a1111f5b7ba28ebcb1eb4 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Tue, 11 Feb 2014 22:15:07 +0900 Subject: clk:at91: Fix memory leak in of_at91_clk_master_setup() cppcheck detected following error [clk-master.c:245]: (error) Memory leak: characteristics The original code forgot to free characteristics when irq_of_parse_and_map() failed. Signed-off-by: Masanari Iida Acked-by Boris BREZILLON Acked-by: Alexandre Belloni Signed-off-by: Mike Turquette diff --git a/drivers/clk/at91/clk-master.c b/drivers/clk/at91/clk-master.c index bd313f7..c1af80b 100644 --- a/drivers/clk/at91/clk-master.c +++ b/drivers/clk/at91/clk-master.c @@ -242,7 +242,7 @@ of_at91_clk_master_setup(struct device_node *np, struct at91_pmc *pmc, irq = irq_of_parse_and_map(np, 0); if (!irq) - return; + goto out_free_characteristics; clk = at91_clk_register_master(pmc, irq, name, num_parents, parent_names, layout, -- cgit v0.10.2 From 367d896dafe1b5f49bee75d3a419b9eb9936ae26 Mon Sep 17 00:00:00 2001 From: "Ira W. Snyder" Date: Fri, 31 Jan 2014 15:30:54 -0800 Subject: tools/liblockdep: Fix initialization code path This makes initialization actually happen. Without it, initialization is always skipped due to an incorrect conditional statement. Signed-off-by: Ira W. Snyder Signed-off-by: Sasha Levin diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c index f8465a8..23bd69c 100644 --- a/tools/lib/lockdep/preload.c +++ b/tools/lib/lockdep/preload.c @@ -418,7 +418,7 @@ int pthread_rwlock_unlock(pthread_rwlock_t *rwlock) __attribute__((constructor)) static void init_preload(void) { - if (__init_state != done) + if (__init_state == done) return; #ifndef __GLIBC__ -- cgit v0.10.2 From 75759827bbe78e53330de2c5e061ecec7fa6f86e Mon Sep 17 00:00:00 2001 From: "Ira W. Snyder" Date: Fri, 31 Jan 2014 13:35:31 -0800 Subject: tools/liblockdep: Fix include of asm/hash.h Commit 71ae8aac ("lib: introduce arch optimized hash library") added an include to for setting up an architecture specific fast hash. This patch mirrors the fix used for perf, titled "tools: perf: util: fix include for non x86 architectures". Signed-off-by: Ira W. Snyder Signed-off-by: Sasha Levin diff --git a/tools/lib/lockdep/uinclude/asm/hash.h b/tools/lib/lockdep/uinclude/asm/hash.h new file mode 100644 index 0000000..d82b170b --- /dev/null +++ b/tools/lib/lockdep/uinclude/asm/hash.h @@ -0,0 +1,6 @@ +#ifndef __ASM_GENERIC_HASH_H +#define __ASM_GENERIC_HASH_H + +/* Stub */ + +#endif /* __ASM_GENERIC_HASH_H */ -- cgit v0.10.2 From aef5976fc52a497ac8161f54164c786c771d3ae8 Mon Sep 17 00:00:00 2001 From: "Ira W. Snyder" Date: Fri, 31 Jan 2014 13:35:32 -0800 Subject: tools/liblockdep: Add include directory to allow tests to compile All of the programs in the tests directory require the liblockdep/mutex.h header in order to compile. Add the include directory to the compiler options so that the tests can be built with the provided Makefile. Signed-off-by: Ira W. Snyder Signed-off-by: Sasha Levin diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile index da8b7aa..e8dd749 100644 --- a/tools/lib/lockdep/Makefile +++ b/tools/lib/lockdep/Makefile @@ -112,7 +112,7 @@ export Q VERBOSE LIBLOCKDEP_VERSION = $(LL_VERSION).$(LL_PATCHLEVEL).$(LL_EXTRAVERSION) -INCLUDES = -I. -I/usr/local/include -I./uinclude $(CONFIG_INCLUDES) +INCLUDES = -I. -I/usr/local/include -I./uinclude -I./include $(CONFIG_INCLUDES) # Set compile option CFLAGS if not set elsewhere CFLAGS ?= -g -DCONFIG_LOCKDEP -DCONFIG_STACKTRACE -DCONFIG_PROVE_LOCKING -DBITS_PER_LONG=__WORDSIZE -DLIBLOCKDEP_VERSION='"$(LIBLOCKDEP_VERSION)"' -rdynamic -O0 -g -- cgit v0.10.2 From 1ddc1ffa2f8219e0c55c7f800048533e1ce8dee5 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 5 Feb 2014 17:53:13 -0500 Subject: tools/liblockdep: Mark runtests.sh as executable runtests.sh is used to run the sanity tests for liblockdep and should be set +x. Signed-off-by: Sasha Levin diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh old mode 100644 new mode 100755 -- cgit v0.10.2 From 9e3513b7af9ef3b6225f539181c2d94328bd18f7 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 5 Feb 2014 17:58:37 -0500 Subject: tools/liblockdep: Add a stub for new rcu_is_watching Stub out rcu_is_watching(), prevents build error with the updated tree. Signed-off-by: Sasha Levin diff --git a/tools/lib/lockdep/uinclude/linux/rcu.h b/tools/lib/lockdep/uinclude/linux/rcu.h index 4c99fcb..042ee8e 100644 --- a/tools/lib/lockdep/uinclude/linux/rcu.h +++ b/tools/lib/lockdep/uinclude/linux/rcu.h @@ -13,4 +13,9 @@ static inline int rcu_is_cpu_idle(void) return 1; } +static inline bool rcu_is_watching(void) +{ + return false; +} + #endif -- cgit v0.10.2 From 7b8853419d3344b06cff64b4dc926805698eeba5 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Sat, 8 Feb 2014 09:43:40 +0800 Subject: tools/liblockdep: Use realpath for srctree and objtree If BUILD_SRC or CURDIR contains tailing '/', the file names passed to gcc will contain '//'. It will be contained .o's in debuginfo, then confuse debugedit: https://bugzilla.redhat.com/show_bug.cgi?id=304121 This patch uses realpath command to makesure potential tailing '/'s are removed. Signed-off-by: Wang Nan Cc: Ingo Molnar Cc: Andrew Morton Cc: Geng Hui Signed-off-by: Sasha Levin diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile index e8dd749..07b0b75 100644 --- a/tools/lib/lockdep/Makefile +++ b/tools/lib/lockdep/Makefile @@ -87,8 +87,8 @@ endif # BUILD_SRC # We process the rest of the Makefile if this is the final invocation of make ifeq ($(skip-makefile),) -srctree := $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR)) -objtree := $(CURDIR) +srctree := $(realpath $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR))) +objtree := $(realpath $(CURDIR)) src := $(srctree) obj := $(objtree) -- cgit v0.10.2 From 09a89c219baf0f116387efc928e325cf23630f20 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Wed, 26 Feb 2014 18:20:13 +0100 Subject: bonding: disallow enslaving a bond to itself Enslaving a bond to itself leads to an endless loop and hangs the kernel. Signed-off-by: Jiri Bohac Tested-by: Ding Tianhong Signed-off-by: David S. Miller diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b47fa04..e5628fc 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1197,6 +1197,11 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) return -EBUSY; } + if (bond_dev == slave_dev) { + pr_err("%s: cannot enslave bond to itself.\n", bond_dev->name); + return -EPERM; + } + /* vlan challenged mutual exclusion */ /* no need to lock since we're protected by rtnl_lock */ if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) { -- cgit v0.10.2 From 7995d74ab297e4f5526c8df70007e1e39f0d1f5c Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 26 Feb 2014 16:31:19 +0100 Subject: spi-topcliff-pch: Fix probing when DMA mode is used If during registering SPI master due to SPI device probing a SPI transfer is issued the DMA buffers are not allocated yet. This fixes the following oops: pch_spi 0000:02:0c.1: enabling device (0000 -> 0002) pch_spi 0000:02:0c.1: master is unqueued, this is deprecated BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] pch_spi_handle_dma+0x15c/0x6f4 [...] Signed-off-by: Alexander Stein Signed-off-by: Mark Brown diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c index 3383008c..88eb57e 100644 --- a/drivers/spi/spi-topcliff-pch.c +++ b/drivers/spi/spi-topcliff-pch.c @@ -1452,6 +1452,11 @@ static int pch_spi_pd_probe(struct platform_device *plat_dev) pch_spi_set_master_mode(master); + if (use_dma) { + dev_info(&plat_dev->dev, "Use DMA for data transfers\n"); + pch_alloc_dma_buf(board_dat, data); + } + ret = spi_register_master(master); if (ret != 0) { dev_err(&plat_dev->dev, @@ -1459,14 +1464,10 @@ static int pch_spi_pd_probe(struct platform_device *plat_dev) goto err_spi_register_master; } - if (use_dma) { - dev_info(&plat_dev->dev, "Use DMA for data transfers\n"); - pch_alloc_dma_buf(board_dat, data); - } - return 0; err_spi_register_master: + pch_free_dma_buf(board_dat, data); free_irq(board_dat->pdev->irq, data); err_request_irq: pch_spi_free_resources(board_dat, data); -- cgit v0.10.2 From 566f59394d7b4808f0fa9b64c4c9db1ac2d10cbd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 18 Feb 2014 10:03:43 -0500 Subject: MAINTAINERS: add entry for drm radeon driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an entry for radeon. Signed-off-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Dave Airlie diff --git a/MAINTAINERS b/MAINTAINERS index fb08dce..e03da03 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2851,6 +2851,16 @@ F: drivers/gpu/drm/ F: include/drm/ F: include/uapi/drm/ +RADEON DRM DRIVERS +M: Alex Deucher +M: Christian König +L: dri-devel@lists.freedesktop.org +T: git git://people.freedesktop.org/~agd5f/linux +S: Supported +F: drivers/gpu/drm/radeon/ +F: include/drm/radeon* +F: include/uapi/drm/radeon* + INTEL DRM DRIVERS (excluding Poulsbo, Moorestown and derivative chipsets) M: Daniel Vetter M: Jani Nikula -- cgit v0.10.2 From b0447888dc29f4fb91c3b3b02e3f1f0bfc6e1222 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 18 Feb 2014 10:03:44 -0500 Subject: MAINTAINERS: update drm git tree entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix Dave's git tree. Signed-off-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Dave Airlie diff --git a/MAINTAINERS b/MAINTAINERS index e03da03..623ba85 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2845,7 +2845,7 @@ F: lib/kobj* DRM DRIVERS M: David Airlie L: dri-devel@lists.freedesktop.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/airlied/drm-2.6.git +T: git git://people.freedesktop.org/~airlied/linux S: Maintained F: drivers/gpu/drm/ F: include/drm/ -- cgit v0.10.2 From e7eb65cac0720df8b3946af7f7a9dc363cf0a814 Mon Sep 17 00:00:00 2001 From: Max Stepanov Date: Sun, 16 Feb 2014 16:36:57 +0200 Subject: iwlwifi: mvm: change of listen interval from 70 to 10 Some APs reject STA association request if a listen interval value exceeds a threshold of 10. Thus, for example, Cisco APs may deny STA associations returning status code 12 (Association denied due to reason outside the scope of 802.11 standard) in the association response frame. Fixing the issue by setting the default IWL_CONN_MAX_LISTEN_INTERVAL value from 70 to 10. Cc: [3.10+] Signed-off-by: Max Stepanov Reviewed-by: Alexander Bondar Signed-off-by: Emmanuel Grumbach diff --git a/drivers/net/wireless/iwlwifi/mvm/mvm.h b/drivers/net/wireless/iwlwifi/mvm/mvm.h index e4ead86..2b0ba1f 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/iwlwifi/mvm/mvm.h @@ -152,7 +152,7 @@ enum iwl_power_scheme { IWL_POWER_SCHEME_LP }; -#define IWL_CONN_MAX_LISTEN_INTERVAL 70 +#define IWL_CONN_MAX_LISTEN_INTERVAL 10 #define IWL_UAPSD_AC_INFO (IEEE80211_WMM_IE_STA_QOSINFO_AC_VO |\ IEEE80211_WMM_IE_STA_QOSINFO_AC_VI |\ IEEE80211_WMM_IE_STA_QOSINFO_AC_BK |\ -- cgit v0.10.2 From b3619b288b621e63f66908045f48495869a996a6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 27 Feb 2014 07:41:32 +0100 Subject: ASoC: sta32x: Fix wrong enum for limiter2 release rate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a typo in the Limiter2 Release Rate control, a wrong enum for Limiter1 is assigned. It must point to Limiter2. Spotted by a compile warning: In file included from sound/soc/codecs/sta32x.c:34:0: sound/soc/codecs/sta32x.c:223:29: warning: ‘sta32x_limiter2_release_rate_enum’ defined but not used [-Wunused-variable] static SOC_ENUM_SINGLE_DECL(sta32x_limiter2_release_rate_enum, ^ include/sound/soc.h:275:18: note: in definition of macro ‘SOC_ENUM_DOUBLE_DECL’ struct soc_enum name = SOC_ENUM_DOUBLE(xreg, xshift_l, xshift_r, \ ^ sound/soc/codecs/sta32x.c:223:8: note: in expansion of macro ‘SOC_ENUM_SINGLE_DECL’ static SOC_ENUM_SINGLE_DECL(sta32x_limiter2_release_rate_enum, ^ Signed-off-by: Takashi Iwai Signed-off-by: Mark Brown Cc: diff --git a/sound/soc/codecs/sta32x.c b/sound/soc/codecs/sta32x.c index ea78c17..2735361 100644 --- a/sound/soc/codecs/sta32x.c +++ b/sound/soc/codecs/sta32x.c @@ -434,7 +434,7 @@ SOC_SINGLE_TLV("Treble Tone Control", STA32X_TONE, STA32X_TONE_TTC_SHIFT, 15, 0, SOC_ENUM("Limiter1 Attack Rate (dB/ms)", sta32x_limiter1_attack_rate_enum), SOC_ENUM("Limiter2 Attack Rate (dB/ms)", sta32x_limiter2_attack_rate_enum), SOC_ENUM("Limiter1 Release Rate (dB/ms)", sta32x_limiter1_release_rate_enum), -SOC_ENUM("Limiter2 Release Rate (dB/ms)", sta32x_limiter1_release_rate_enum), +SOC_ENUM("Limiter2 Release Rate (dB/ms)", sta32x_limiter2_release_rate_enum), /* depending on mode, the attack/release thresholds have * two different enum definitions; provide both -- cgit v0.10.2 From 143582c6847cb285b361804c613127c25de60ca4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 25 Feb 2014 10:37:15 +0100 Subject: iwlwifi: fix TX status for aggregated packets Only the first packet is currently handled correctly, but then all others are assumed to have failed which is problematic. Fix this, marking them all successful instead (since if they're not then the firmware will have transmitted them as single frames.) This fixes the lost packet reporting. Also do a tiny variable scoping cleanup. Cc: Signed-off-by: Johannes Berg [Add the dvm part] Signed-off-by: Emmanuel Grumbach diff --git a/drivers/net/wireless/iwlwifi/dvm/tx.c b/drivers/net/wireless/iwlwifi/dvm/tx.c index a6839df..398dd09 100644 --- a/drivers/net/wireless/iwlwifi/dvm/tx.c +++ b/drivers/net/wireless/iwlwifi/dvm/tx.c @@ -1291,8 +1291,6 @@ int iwlagn_rx_reply_compressed_ba(struct iwl_priv *priv, struct iwl_compressed_ba_resp *ba_resp = (void *)pkt->data; struct iwl_ht_agg *agg; struct sk_buff_head reclaimed_skbs; - struct ieee80211_tx_info *info; - struct ieee80211_hdr *hdr; struct sk_buff *skb; int sta_id; int tid; @@ -1379,22 +1377,28 @@ int iwlagn_rx_reply_compressed_ba(struct iwl_priv *priv, freed = 0; skb_queue_walk(&reclaimed_skbs, skb) { - hdr = (struct ieee80211_hdr *)skb->data; + struct ieee80211_hdr *hdr = (void *)skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); if (ieee80211_is_data_qos(hdr->frame_control)) freed++; else WARN_ON_ONCE(1); - info = IEEE80211_SKB_CB(skb); iwl_trans_free_tx_cmd(priv->trans, info->driver_data[1]); + memset(&info->status, 0, sizeof(info->status)); + /* Packet was transmitted successfully, failures come as single + * frames because before failing a frame the firmware transmits + * it without aggregation at least once. + */ + info->flags |= IEEE80211_TX_STAT_ACK; + if (freed == 1) { /* this is the first skb we deliver in this batch */ /* put the rate scaling data there */ info = IEEE80211_SKB_CB(skb); memset(&info->status, 0, sizeof(info->status)); - info->flags |= IEEE80211_TX_STAT_ACK; info->flags |= IEEE80211_TX_STAT_AMPDU; info->status.ampdu_ack_len = ba_resp->txed_2_done; info->status.ampdu_len = ba_resp->txed; diff --git a/drivers/net/wireless/iwlwifi/mvm/tx.c b/drivers/net/wireless/iwlwifi/mvm/tx.c index 4df12fa..76ee486 100644 --- a/drivers/net/wireless/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/iwlwifi/mvm/tx.c @@ -822,16 +822,12 @@ int iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb, struct iwl_mvm_ba_notif *ba_notif = (void *)pkt->data; struct sk_buff_head reclaimed_skbs; struct iwl_mvm_tid_data *tid_data; - struct ieee80211_tx_info *info; struct ieee80211_sta *sta; struct iwl_mvm_sta *mvmsta; - struct ieee80211_hdr *hdr; struct sk_buff *skb; int sta_id, tid, freed; - /* "flow" corresponds to Tx queue */ u16 scd_flow = le16_to_cpu(ba_notif->scd_flow); - /* "ssn" is start of block-ack Tx window, corresponds to index * (in Tx queue's circular buffer) of first TFD/frame in window */ u16 ba_resp_scd_ssn = le16_to_cpu(ba_notif->scd_ssn); @@ -888,22 +884,26 @@ int iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb, freed = 0; skb_queue_walk(&reclaimed_skbs, skb) { - hdr = (struct ieee80211_hdr *)skb->data; + struct ieee80211_hdr *hdr = (void *)skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); if (ieee80211_is_data_qos(hdr->frame_control)) freed++; else WARN_ON_ONCE(1); - info = IEEE80211_SKB_CB(skb); iwl_trans_free_tx_cmd(mvm->trans, info->driver_data[1]); + memset(&info->status, 0, sizeof(info->status)); + /* Packet was transmitted successfully, failures come as single + * frames because before failing a frame the firmware transmits + * it without aggregation at least once. + */ + info->flags |= IEEE80211_TX_STAT_ACK; + if (freed == 1) { /* this is the first skb we deliver in this batch */ /* put the rate scaling data there */ - info = IEEE80211_SKB_CB(skb); - memset(&info->status, 0, sizeof(info->status)); - info->flags |= IEEE80211_TX_STAT_ACK; info->flags |= IEEE80211_TX_STAT_AMPDU; info->status.ampdu_ack_len = ba_notif->txed_2_done; info->status.ampdu_len = ba_notif->txed; -- cgit v0.10.2 From c685689fd24d310343ac33942e9a54a974ae9c43 Mon Sep 17 00:00:00 2001 From: Chuansheng Liu Date: Mon, 24 Feb 2014 11:29:50 +0800 Subject: genirq: Remove racy waitqueue_active check We hit one rare case below: T1 calling disable_irq(), but hanging at synchronize_irq() always; The corresponding irq thread is in sleeping state; And all CPUs are in idle state; After analysis, we found there is one possible scenerio which causes T1 is waiting there forever: CPU0 CPU1 synchronize_irq() wait_event() spin_lock() atomic_dec_and_test(&threads_active) insert the __wait into queue spin_unlock() if(waitqueue_active) atomic_read(&threads_active) wake_up() Here after inserted the __wait into queue on CPU0, and before test if queue is empty on CPU1, there is no barrier, it maybe cause it is not visible for CPU1 immediately, although CPU0 has updated the queue list. It is similar for CPU0 atomic_read() threads_active also. So we'd need one smp_mb() before waitqueue_active.that, but removing the waitqueue_active() check solves it as wel l and it makes things simple and clear. Signed-off-by: Chuansheng Liu Cc: Xiaoming Wang Link: http://lkml.kernel.org/r/1393212590-32543-1-git-send-email-chuansheng.liu@intel.com Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 481a13c..d3bf660 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -802,8 +802,7 @@ static irqreturn_t irq_thread_fn(struct irq_desc *desc, static void wake_threads_waitq(struct irq_desc *desc) { - if (atomic_dec_and_test(&desc->threads_active) && - waitqueue_active(&desc->wait_for_threads)) + if (atomic_dec_and_test(&desc->threads_active)) wake_up(&desc->wait_for_threads); } -- cgit v0.10.2 From f207dbe63c61158c234e2e8929a3725a7f6b2b9b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 27 Feb 2014 12:20:31 +0100 Subject: Revert "sched/wait: Suppress Sparse 'variable shadowing' warning" This reverts commit 980f88e414418bf65569a3b62b08b07e6fc2f4c6. This warning is actually useful, don't suppress it. We actually rely on the shadowing for ___wait_cond_timeout(). We further used the __ret variable in __wait_event_timeout()'s cmd argument: __ret = schedule_timeout(__ret). That now explicitly uses the wrong __ret. Reported-by: Gregory CLEMENT Requested-by: Andrew Morton Requested-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-Q5blhuqqzwgVwvjf1gszrdol@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/include/linux/wait.h b/include/linux/wait.h index c55ea5c..559044c 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -195,7 +195,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); ({ \ __label__ __out; \ wait_queue_t __wait; \ - long ___ret = ret; \ + long __ret = ret; \ \ INIT_LIST_HEAD(&__wait.task_list); \ if (exclusive) \ @@ -210,7 +210,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); break; \ \ if (___wait_is_interruptible(state) && __int) { \ - ___ret = __int; \ + __ret = __int; \ if (exclusive) { \ abort_exclusive_wait(&wq, &__wait, \ state, NULL); \ @@ -222,7 +222,7 @@ wait_queue_head_t *bit_waitqueue(void *, int); cmd; \ } \ finish_wait(&wq, &__wait); \ -__out: ___ret; \ +__out: __ret; \ }) #define __wait_event(wq, condition) \ -- cgit v0.10.2 From 791c9e0292671a3bfa95286bb5c08129d8605618 Mon Sep 17 00:00:00 2001 From: George McCollister Date: Tue, 18 Feb 2014 17:56:51 -0600 Subject: sched: Fix double normalization of vruntime dequeue_entity() is called when p->on_rq and sets se->on_rq = 0 which appears to guarentee that the !se->on_rq condition is met. If the task has done set_current_state(TASK_INTERRUPTIBLE) without schedule() the second condition will be met and vruntime will be incorrectly adjusted twice. In certain cases this can result in the task's vruntime never increasing past the vruntime of other tasks on the CFS' run queue, starving them of CPU time. This patch changes switched_from_fair() to use !p->on_rq instead of !se->on_rq. I'm able to cause a task with a priority of 120 to starve all other tasks with the same priority on an ARM platform running 3.2.51-rt72 PREEMPT RT by writing one character at time to a serial tty (16550 UART) in a tight loop. I'm also able to verify making this change corrects the problem on that platform and kernel version. Signed-off-by: George McCollister Signed-off-by: Peter Zijlstra Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1392767811-28916-1-git-send-email-george.mccollister@gmail.com Signed-off-by: Ingo Molnar diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7815709..9b4c4f3 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7001,15 +7001,15 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p) struct cfs_rq *cfs_rq = cfs_rq_of(se); /* - * Ensure the task's vruntime is normalized, so that when its + * Ensure the task's vruntime is normalized, so that when it's * switched back to the fair class the enqueue_entity(.flags=0) will * do the right thing. * - * If it was on_rq, then the dequeue_entity(.flags=0) will already - * have normalized the vruntime, if it was !on_rq, then only when + * If it's on_rq, then the dequeue_entity(.flags=0) will already + * have normalized the vruntime, if it's !on_rq, then only when * the task is sleeping will it still have non-normalized vruntime. */ - if (!se->on_rq && p->state != TASK_RUNNING) { + if (!p->on_rq && p->state != TASK_RUNNING) { /* * Fix up our vruntime so that the current sleep doesn't * cause 'unlimited' sleep bonus. -- cgit v0.10.2 From 3908ac13b550c93f97d8db136ff572be5495bc06 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 25 Feb 2014 19:52:23 +0400 Subject: sched/deadline: Cleanup RT leftovers from {inc/dec}_dl_migration In deadline class we do not have group scheduling. So, let's remove unnecessary X = X; equations. Signed-off-by: Kirill Tkhai Signed-off-by: Peter Zijlstra Cc: Juri Lelli Link: http://lkml.kernel.org/r/1393343543.4089.5.camel@tkhai Signed-off-by: Ingo Molnar diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 15cbc17..aecf930 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -135,7 +135,6 @@ static void update_dl_migration(struct dl_rq *dl_rq) static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) { struct task_struct *p = dl_task_of(dl_se); - dl_rq = &rq_of_dl_rq(dl_rq)->dl; if (p->nr_cpus_allowed > 1) dl_rq->dl_nr_migratory++; @@ -146,7 +145,6 @@ static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) { struct task_struct *p = dl_task_of(dl_se); - dl_rq = &rq_of_dl_rq(dl_rq)->dl; if (p->nr_cpus_allowed > 1) dl_rq->dl_nr_migratory--; -- cgit v0.10.2 From eec751ed41a0ae7e92a43c33a458d7bd1b941631 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Mon, 24 Feb 2014 11:47:12 +0100 Subject: sched/deadline: Switch CPU's presence test order Commit 82b9580 ("sched/deadline: Test for CPU's presence explicitly") changed how we check if a CPU returned by cpudeadline machinery is valid. But, we don't want to call cpu_present() if best_cpu is equal to -1. So, switch the order of tests inside WARN_ON(). Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra Cc: boris.ostrovsky@oracle.com Cc: konrad.wilk@oracle.com Cc: rostedt@goodmis.org Link: http://lkml.kernel.org/r/1393238832-9100-1-git-send-email-juri.lelli@gmail.com Signed-off-by: Ingo Molnar diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c index 5b8838b..5b9bb42 100644 --- a/kernel/sched/cpudeadline.c +++ b/kernel/sched/cpudeadline.c @@ -70,7 +70,7 @@ static void cpudl_heapify(struct cpudl *cp, int idx) static void cpudl_change_key(struct cpudl *cp, int idx, u64 new_dl) { - WARN_ON(!cpu_present(idx) || idx == IDX_INVALID); + WARN_ON(idx == IDX_INVALID || !cpu_present(idx)); if (dl_time_before(new_dl, cp->elements[idx].dl)) { cp->elements[idx].dl = new_dl; @@ -117,7 +117,7 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, } out: - WARN_ON(!cpu_present(best_cpu) && best_cpu != -1); + WARN_ON(best_cpu != -1 && !cpu_present(best_cpu)); return best_cpu; } -- cgit v0.10.2 From faa5993736d9b44b508cab4f1f3a77d66641c6f4 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Fri, 21 Feb 2014 11:37:15 +0100 Subject: sched/deadline: Prevent rt_time growth to infinity Kirill Tkhai noted: Since deadline tasks share rt bandwidth, we must care about bandwidth timer set. Otherwise rt_time may grow up to infinity in update_curr_dl(), if there are no other available RT tasks on top level bandwidth. RT task were in fact throttled right after they got enqueued, and never executed again (rt_time never again went below rt_runtime). Peter then proposed to accrue DL execution on rt_time only when rt timer is active, and proposed a patch (this patch is a slight modification of that) to implement that behavior. While this solves Kirill problem, it has a drawback. Indeed, Kirill noted again: It looks we may get into a situation, when all CPU time is shared between RT and DL tasks: rt_runtime = n rt_period = 2n | RT working, DL sleeping | DL working, RT sleeping | ----------------------------------------------------------- | (1) duration = n | (2) duration = n | (repeat) |--------------------------|------------------------------| | (rt_bw timer is running) | (rt_bw timer is not running) | No time for fair tasks at all. While this can happen during the first period, if rq is always backlogged, RT tasks won't have the opportunity to execute anymore: rt_time reached rt_runtime during (1), suppose after (2) RT is enqueued back, it gets throttled since rt timer didn't fire, replenishment is from now on eaten up by DL tasks that accrue their execution on rt_time (while rt timer is active - we have an RT task waiting for replenishment). FAIR tasks are not touched after this first period. Ok, this is not ideal, and the situation is even worse! What above (the nice case), practically never happens in reality, where your rt timer is not aligned to tasks periods, tasks are in general not periodic, etc.. Long story short, you always risk to overload your system. This patch is based on Peter's idea, but exploits an additional fact: if you don't have RT tasks enqueued, it makes little sense to continue incrementing rt_time once you reached the upper limit (DL tasks have their own mechanism for throttling). This cures both problems: - no matter how many DL instances in the past, you'll have an rt_time slightly above rt_runtime when an RT task is enqueued, and from that point on (after the first replenishment), the task will normally execute; - you can still eat up all bandwidth during the first period, but not anymore after that, remember that DL execution will increment rt_time till the upper limit is reached. The situation is still not perfect! But, we have a simple solution for now, that limits how much you can jeopardize your system, as we keep working towards the right answer: RT groups scheduled using deadline servers. Reported-by: Kirill Tkhai Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20140225151515.617714e2f2cd6c558531ba61@gmail.com Signed-off-by: Ingo Molnar diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index aecf930..6e79b3f 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -562,6 +562,8 @@ int dl_runtime_exceeded(struct rq *rq, struct sched_dl_entity *dl_se) return 1; } +extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); + /* * Update the current task's runtime statistics (provided it is still * a -deadline task and has not been removed from the dl_rq). @@ -625,11 +627,13 @@ static void update_curr_dl(struct rq *rq) struct rt_rq *rt_rq = &rq->rt; raw_spin_lock(&rt_rq->rt_runtime_lock); - rt_rq->rt_time += delta_exec; /* * We'll let actual RT tasks worry about the overflow here, we - * have our own CBS to keep us inline -- see above. + * have our own CBS to keep us inline; only account when RT + * bandwidth is relevant. */ + if (sched_rt_bandwidth_account(rt_rq)) + rt_rq->rt_time += delta_exec; raw_spin_unlock(&rt_rq->rt_runtime_lock); } } diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index a2740b7..1999021 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -538,6 +538,14 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) #endif /* CONFIG_RT_GROUP_SCHED */ +bool sched_rt_bandwidth_account(struct rt_rq *rt_rq) +{ + struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); + + return (hrtimer_active(&rt_b->rt_period_timer) || + rt_rq->rt_time < rt_b->rt_runtime); +} + #ifdef CONFIG_SMP /* * We ran out of runtime, see if we can borrow some from our neighbours. -- cgit v0.10.2 From 26e61e8939b1fe8729572dabe9a9e97d930dd4f6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 21 Feb 2014 16:03:12 +0100 Subject: perf/x86: Fix event scheduling Vince "Super Tester" Weaver reported a new round of syscall fuzzing (Trinity) failures, with perf WARN_ON()s triggering. He also provided traces of the failures. This is I think the relevant bit: > pec_1076_warn-2804 [000] d... 147.926153: x86_pmu_disable: x86_pmu_disable > pec_1076_warn-2804 [000] d... 147.926153: x86_pmu_state: Events: { > pec_1076_warn-2804 [000] d... 147.926156: x86_pmu_state: 0: state: .R config: ffffffffffffffff ( (null)) > pec_1076_warn-2804 [000] d... 147.926158: x86_pmu_state: 33: state: AR config: 0 (ffff88011ac99800) > pec_1076_warn-2804 [000] d... 147.926159: x86_pmu_state: } > pec_1076_warn-2804 [000] d... 147.926160: x86_pmu_state: n_events: 1, n_added: 0, n_txn: 1 > pec_1076_warn-2804 [000] d... 147.926161: x86_pmu_state: Assignment: { > pec_1076_warn-2804 [000] d... 147.926162: x86_pmu_state: 0->33 tag: 1 config: 0 (ffff88011ac99800) > pec_1076_warn-2804 [000] d... 147.926163: x86_pmu_state: } > pec_1076_warn-2804 [000] d... 147.926166: collect_events: Adding event: 1 (ffff880119ec8800) So we add the insn:p event (fd[23]). At this point we should have: n_events = 2, n_added = 1, n_txn = 1 > pec_1076_warn-2804 [000] d... 147.926170: collect_events: Adding event: 0 (ffff8800c9e01800) > pec_1076_warn-2804 [000] d... 147.926172: collect_events: Adding event: 4 (ffff8800cbab2c00) We try and add the {BP,cycles,br_insn} group (fd[3], fd[4], fd[15]). These events are 0:cycles and 4:br_insn, the BP event isn't x86_pmu so that's not visible. group_sched_in() pmu->start_txn() /* nop - BP pmu */ event_sched_in() event->pmu->add() So here we should end up with: 0: n_events = 3, n_added = 2, n_txn = 2 4: n_events = 4, n_added = 3, n_txn = 3 But seeing the below state on x86_pmu_enable(), the must have failed, because the 0 and 4 events aren't there anymore. Looking at group_sched_in(), since the BP is the leader, its event_sched_in() must have succeeded, for otherwise we would not have seen the sibling adds. But since neither 0 or 4 are in the below state; their event_sched_in() must have failed; but I don't see why, the complete state: 0,0,1:p,4 fits perfectly fine on a core2. However, since we try and schedule 4 it means the 0 event must have succeeded! Therefore the 4 event must have failed, its failure will have put group_sched_in() into the fail path, which will call: event_sched_out() event->pmu->del() on 0 and the BP event. Now x86_pmu_del() will reduce n_events; but it will not reduce n_added; giving what we see below: n_event = 2, n_added = 2, n_txn = 2 > pec_1076_warn-2804 [000] d... 147.926177: x86_pmu_enable: x86_pmu_enable > pec_1076_warn-2804 [000] d... 147.926177: x86_pmu_state: Events: { > pec_1076_warn-2804 [000] d... 147.926179: x86_pmu_state: 0: state: .R config: ffffffffffffffff ( (null)) > pec_1076_warn-2804 [000] d... 147.926181: x86_pmu_state: 33: state: AR config: 0 (ffff88011ac99800) > pec_1076_warn-2804 [000] d... 147.926182: x86_pmu_state: } > pec_1076_warn-2804 [000] d... 147.926184: x86_pmu_state: n_events: 2, n_added: 2, n_txn: 2 > pec_1076_warn-2804 [000] d... 147.926184: x86_pmu_state: Assignment: { > pec_1076_warn-2804 [000] d... 147.926186: x86_pmu_state: 0->33 tag: 1 config: 0 (ffff88011ac99800) > pec_1076_warn-2804 [000] d... 147.926188: x86_pmu_state: 1->0 tag: 1 config: 1 (ffff880119ec8800) > pec_1076_warn-2804 [000] d... 147.926188: x86_pmu_state: } > pec_1076_warn-2804 [000] d... 147.926190: x86_pmu_enable: S0: hwc->idx: 33, hwc->last_cpu: 0, hwc->last_tag: 1 hwc->state: 0 So the problem is that x86_pmu_del(), when called from a group_sched_in() that fails (for whatever reason), and without x86_pmu TXN support (because the leader is !x86_pmu), will corrupt the n_added state. Reported-and-Tested-by: Vince Weaver Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Cc: Stephane Eranian Cc: Dave Jones Cc: Link: http://lkml.kernel.org/r/20140221150312.GF3104@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 895604f..79f9f84 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1192,6 +1192,9 @@ static void x86_pmu_del(struct perf_event *event, int flags) for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event_list[i]) { + if (i >= cpuc->n_events - cpuc->n_added) + --cpuc->n_added; + if (x86_pmu.put_event_constraints) x86_pmu.put_event_constraints(cpuc, event); -- cgit v0.10.2 From e3703f8cdfcf39c25c4338c3ad8e68891cca3731 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 24 Feb 2014 12:06:12 +0100 Subject: perf: Fix hotplug splat Drew Richardson reported that he could make the kernel go *boom* when hotplugging while having perf events active. It turned out that when you have a group event, the code in __perf_event_exit_context() fails to remove the group siblings from the context. We then proceed with destroying and freeing the event, and when you re-plug the CPU and try and add another event to that CPU, things go *boom* because you've still got dead entries there. Reported-by: Drew Richardson Signed-off-by: Peter Zijlstra Cc: Will Deacon Cc: Link: http://lkml.kernel.org/n/tip-k6v5wundvusvcseqj1si0oz0@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index 56003c6..fa0b2d4 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7856,14 +7856,14 @@ static void perf_pmu_rotate_stop(struct pmu *pmu) static void __perf_event_exit_context(void *__info) { struct perf_event_context *ctx = __info; - struct perf_event *event, *tmp; + struct perf_event *event; perf_pmu_rotate_stop(ctx->pmu); - list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) - __perf_remove_from_context(event); - list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) + rcu_read_lock(); + list_for_each_entry_rcu(event, &ctx->event_list, event_entry) __perf_remove_from_context(event); + rcu_read_unlock(); } static void perf_event_exit_cpu_context(int cpu) @@ -7887,11 +7887,11 @@ static void perf_event_exit_cpu(int cpu) { struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); + perf_event_exit_cpu_context(cpu); + mutex_lock(&swhash->hlist_mutex); swevent_hlist_release(swhash); mutex_unlock(&swhash->hlist_mutex); - - perf_event_exit_cpu_context(cpu); } #else static inline void perf_event_exit_cpu(int cpu) { } -- cgit v0.10.2 From f5f9739d7a0ccbdcf913a0b3604b134129d14f7e Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Wed, 26 Feb 2014 11:19:33 +0000 Subject: sched: Put rq's sched_avg under CONFIG_FAIR_GROUP_SCHED The struct sched_avg of struct rq is only used in case group scheduling is enabled inside __update_tg_runnable_avg() to update per-cpu representation of a task group. I.e. that there is no need to maintain the runnable avg of a rq in the !CONFIG_FAIR_GROUP_SCHED case. This patch guards struct sched_avg of struct rq and update_rq_runnable_avg() with CONFIG_FAIR_GROUP_SCHED. There is an extra empty definition for update_rq_runnable_avg() necessary for the !CONFIG_FAIR_GROUP_SCHED && CONFIG_SMP case. The function print_cfs_group_stats() which prints out struct sched_avg of struct rq is already guarded with CONFIG_FAIR_GROUP_SCHED. Reviewed-by: Ben Segall Signed-off-by: Dietmar Eggemann Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/530DCDC5.1060406@arm.com Signed-off-by: Ingo Molnar diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a3a41c61..be4f7d9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2374,12 +2374,19 @@ static inline void __update_group_entity_contrib(struct sched_entity *se) se->avg.load_avg_contrib >>= NICE_0_SHIFT; } } + +static inline void update_rq_runnable_avg(struct rq *rq, int runnable) +{ + __update_entity_runnable_avg(rq_clock_task(rq), &rq->avg, runnable); + __update_tg_runnable_avg(&rq->avg, &rq->cfs); +} #else /* CONFIG_FAIR_GROUP_SCHED */ static inline void __update_cfs_rq_tg_load_contrib(struct cfs_rq *cfs_rq, int force_update) {} static inline void __update_tg_runnable_avg(struct sched_avg *sa, struct cfs_rq *cfs_rq) {} static inline void __update_group_entity_contrib(struct sched_entity *se) {} +static inline void update_rq_runnable_avg(struct rq *rq, int runnable) {} #endif /* CONFIG_FAIR_GROUP_SCHED */ static inline void __update_task_entity_contrib(struct sched_entity *se) @@ -2478,12 +2485,6 @@ static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update) __update_cfs_rq_tg_load_contrib(cfs_rq, force_update); } -static inline void update_rq_runnable_avg(struct rq *rq, int runnable) -{ - __update_entity_runnable_avg(rq_clock_task(rq), &rq->avg, runnable); - __update_tg_runnable_avg(&rq->avg, &rq->cfs); -} - /* Add the load generated by se into cfs_rq's child load-average */ static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index d608125..046084e 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -541,6 +541,8 @@ struct rq { #ifdef CONFIG_FAIR_GROUP_SCHED /* list of leaf cfs_rq on this cpu: */ struct list_head leaf_cfs_rq_list; + + struct sched_avg avg; #endif /* CONFIG_FAIR_GROUP_SCHED */ /* @@ -630,8 +632,6 @@ struct rq { #ifdef CONFIG_SMP struct llist_head wake_list; #endif - - struct sched_avg avg; }; static inline int cpu_of(struct rq *rq) -- cgit v0.10.2 From 06d50c65b1043b166d102accc081093f79d8f7e5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 24 Feb 2014 18:22:07 +0100 Subject: sched/idle: Remove stale old file Commit cf37b6b48428d ("sched/idle: Move cpu/idle.c to sched/idle.c") said to simply move a file; somehow it got mangled and created an old version of the file and forgot to remove the old file. Fix this fail; add the lost change and remove the now identical old file. Signed-off-by: Peter Zijlstra Cc: rjw@rjwysocki.net Cc: nicolas.pitre@linaro.org Cc: preeti@linux.vnet.ibm.com Cc: Daniel Lezcano Link: http://lkml.kernel.org/r/20140224172207.GC9987@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar diff --git a/kernel/cpu/idle.c b/kernel/cpu/idle.c deleted file mode 100644 index b7976a1..0000000 --- a/kernel/cpu/idle.c +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Generic entry point for the idle threads - */ -#include -#include -#include -#include -#include -#include - -#include - -#include - -static int __read_mostly cpu_idle_force_poll; - -void cpu_idle_poll_ctrl(bool enable) -{ - if (enable) { - cpu_idle_force_poll++; - } else { - cpu_idle_force_poll--; - WARN_ON_ONCE(cpu_idle_force_poll < 0); - } -} - -#ifdef CONFIG_GENERIC_IDLE_POLL_SETUP -static int __init cpu_idle_poll_setup(char *__unused) -{ - cpu_idle_force_poll = 1; - return 1; -} -__setup("nohlt", cpu_idle_poll_setup); - -static int __init cpu_idle_nopoll_setup(char *__unused) -{ - cpu_idle_force_poll = 0; - return 1; -} -__setup("hlt", cpu_idle_nopoll_setup); -#endif - -static inline int cpu_idle_poll(void) -{ - rcu_idle_enter(); - trace_cpu_idle_rcuidle(0, smp_processor_id()); - local_irq_enable(); - while (!tif_need_resched()) - cpu_relax(); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); - rcu_idle_exit(); - return 1; -} - -/* Weak implementations for optional arch specific functions */ -void __weak arch_cpu_idle_prepare(void) { } -void __weak arch_cpu_idle_enter(void) { } -void __weak arch_cpu_idle_exit(void) { } -void __weak arch_cpu_idle_dead(void) { } -void __weak arch_cpu_idle(void) -{ - cpu_idle_force_poll = 1; - local_irq_enable(); -} - -/* - * Generic idle loop implementation - */ -static void cpu_idle_loop(void) -{ - while (1) { - tick_nohz_idle_enter(); - - while (!need_resched()) { - check_pgt_cache(); - rmb(); - - if (cpu_is_offline(smp_processor_id())) - arch_cpu_idle_dead(); - - local_irq_disable(); - arch_cpu_idle_enter(); - - /* - * In poll mode we reenable interrupts and spin. - * - * Also if we detected in the wakeup from idle - * path that the tick broadcast device expired - * for us, we don't want to go deep idle as we - * know that the IPI is going to arrive right - * away - */ - if (cpu_idle_force_poll || tick_check_broadcast_expired()) { - cpu_idle_poll(); - } else { - if (!current_clr_polling_and_test()) { - stop_critical_timings(); - rcu_idle_enter(); - if (cpuidle_idle_call()) - arch_cpu_idle(); - if (WARN_ON_ONCE(irqs_disabled())) - local_irq_enable(); - rcu_idle_exit(); - start_critical_timings(); - } else { - local_irq_enable(); - } - __current_set_polling(); - } - arch_cpu_idle_exit(); - } - - /* - * Since we fell out of the loop above, we know - * TIF_NEED_RESCHED must be set, propagate it into - * PREEMPT_NEED_RESCHED. - * - * This is required because for polling idle loops we will - * not have had an IPI to fold the state for us. - */ - preempt_set_need_resched(); - tick_nohz_idle_exit(); - schedule_preempt_disabled(); - } -} - -void cpu_startup_entry(enum cpuhp_state state) -{ - /* - * This #ifdef needs to die, but it's too late in the cycle to - * make this generic (arm and sh have never invoked the canary - * init for the non boot cpus!). Will be fixed in 3.11 - */ -#ifdef CONFIG_X86 - /* - * If we're the non-boot CPU, nothing set the stack canary up - * for us. The boot CPU already has it initialized but no harm - * in doing it again. This is a good place for updating it, as - * we wont ever return from this function (so the invalid - * canaries already on the stack wont ever trigger). - */ - boot_init_stack_canary(); -#endif - __current_set_polling(); - arch_cpu_idle_prepare(); - cpu_idle_loop(); -} diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 14ca434..b7976a1 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -108,14 +108,17 @@ static void cpu_idle_loop(void) __current_set_polling(); } arch_cpu_idle_exit(); - /* - * We need to test and propagate the TIF_NEED_RESCHED - * bit here because we might not have send the - * reschedule IPI to idle tasks. - */ - if (tif_need_resched()) - set_preempt_need_resched(); } + + /* + * Since we fell out of the loop above, we know + * TIF_NEED_RESCHED must be set, propagate it into + * PREEMPT_NEED_RESCHED. + * + * This is required because for polling idle loops we will + * not have had an IPI to fold the state for us. + */ + preempt_set_need_resched(); tick_nohz_idle_exit(); schedule_preempt_disabled(); } -- cgit v0.10.2 From 37e117c07b89194aae7062bc63bde1104c03db02 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 14 Feb 2014 12:25:08 +0100 Subject: sched: Guarantee task priority in pick_next_task() Michael spotted that the idle_balance() push down created a task priority problem. Previously, when we called idle_balance() before pick_next_task() it wasn't a problem when -- because of the rq->lock droppage -- an rt/dl task slipped in. Similarly for pre_schedule(), rt pre-schedule could have a dl task slip in. But by pulling it into the pick_next_task() loop, we'll not try a higher task priority again. Cure this by creating a re-start condition in pick_next_task(); and triggering this from pick_next_task_{rt,fair}(). It also fixes a live-lock where we get stuck in pick_next_task_fair() due to idle_balance() seeing !0 nr_running but there not actually being any fair tasks about. Reported-by: Michael Wang Fixes: 38033c37faab ("sched: Push down pre_schedule() and idle_balance()") Tested-by: Sasha Levin Signed-off-by: Peter Zijlstra Cc: Juri Lelli Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20140224121218.GR15586@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a8a73b8..cde573d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2586,24 +2586,28 @@ static inline void schedule_debug(struct task_struct *prev) static inline struct task_struct * pick_next_task(struct rq *rq, struct task_struct *prev) { - const struct sched_class *class; + const struct sched_class *class = &fair_sched_class; struct task_struct *p; /* * Optimization: we know that if all tasks are in * the fair class we can call that function directly: */ - if (likely(prev->sched_class == &fair_sched_class && + if (likely(prev->sched_class == class && rq->nr_running == rq->cfs.h_nr_running)) { p = fair_sched_class.pick_next_task(rq, prev); - if (likely(p)) + if (likely(p && p != RETRY_TASK)) return p; } +again: for_each_class(class) { p = class->pick_next_task(rq, prev); - if (p) + if (p) { + if (unlikely(p == RETRY_TASK)) + goto again; return p; + } } BUG(); /* the idle class will always have a runnable task */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index be4f7d9..16042b5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4686,6 +4686,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev) struct cfs_rq *cfs_rq = &rq->cfs; struct sched_entity *se; struct task_struct *p; + int new_tasks; again: #ifdef CONFIG_FAIR_GROUP_SCHED @@ -4784,7 +4785,17 @@ simple: return p; idle: - if (idle_balance(rq)) /* drops rq->lock */ + /* + * Because idle_balance() releases (and re-acquires) rq->lock, it is + * possible for any higher priority task to appear. In that case we + * must re-start the pick_next_entity() loop. + */ + new_tasks = idle_balance(rq); + + if (rq->nr_running != rq->cfs.h_nr_running) + return RETRY_TASK; + + if (new_tasks) goto again; return NULL; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 4d4b386..398b3f9 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1360,8 +1360,16 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev) struct task_struct *p; struct rt_rq *rt_rq = &rq->rt; - if (need_pull_rt_task(rq, prev)) + if (need_pull_rt_task(rq, prev)) { pull_rt_task(rq); + /* + * pull_rt_task() can drop (and re-acquire) rq->lock; this + * means a dl task can slip in, in which case we need to + * re-start task selection. + */ + if (unlikely(rq->dl.dl_nr_running)) + return RETRY_TASK; + } if (!rt_rq->rt_nr_running) return NULL; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 046084e..1929deb 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1091,6 +1091,8 @@ static const u32 prio_to_wmult[40] = { #define DEQUEUE_SLEEP 1 +#define RETRY_TASK ((void *)-1UL) + struct sched_class { const struct sched_class *next; @@ -1105,6 +1107,9 @@ struct sched_class { * It is the responsibility of the pick_next_task() method that will * return the next task to call put_prev_task() on the @prev task or * something equivalent. + * + * May return RETRY_TASK when it finds a higher prio class has runnable + * tasks. */ struct task_struct * (*pick_next_task) (struct rq *rq, struct task_struct *prev); -- cgit v0.10.2 From 2b3942e4bb20ef8ef26515bd958c2df83d9a6210 Mon Sep 17 00:00:00 2001 From: Dongsheng Yang Date: Mon, 24 Feb 2014 22:12:01 +0800 Subject: trace: Replace hardcoding of 19 with MAX_NICE Use MAX_NICE instead of the value 19 for ring_buffer_benchmark. Signed-off-by: Dongsheng Yang Signed-off-by: Peter Zijlstra Acked-by: Steven Rostedt Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1393251121-25534-1-git-send-email-yangds.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index a5457d5..0434ff1 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -40,8 +40,8 @@ static int write_iteration = 50; module_param(write_iteration, uint, 0644); MODULE_PARM_DESC(write_iteration, "# of writes between timestamp readings"); -static int producer_nice = 19; -static int consumer_nice = 19; +static int producer_nice = MAX_NICE; +static int consumer_nice = MAX_NICE; static int producer_fifo = -1; static int consumer_fifo = -1; @@ -308,7 +308,7 @@ static void ring_buffer_producer(void) /* Let the user know that the test is running at low priority */ if (producer_fifo < 0 && consumer_fifo < 0 && - producer_nice == 19 && consumer_nice == 19) + producer_nice == MAX_NICE && consumer_nice == MAX_NICE) trace_printk("WARNING!!! This test is running at lowest priority.\n"); trace_printk("Time: %lld (usecs)\n", time); -- cgit v0.10.2 From 9e3170411ed171a126f4dca1672012a33efe59e5 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 10 Feb 2014 17:44:18 +0000 Subject: perf: Fix prototype of find_pmu_context() For some reason find_pmu_context() is defined as returning void * rather than a __percpu struct perf_cpu_context *. As all the requisite types are defined in advance there's no reason to keep it that way. This patch modifies the prototype of pmu_find_context to return a __percpu struct perf_cpu_context *. Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra Reviewed-by: Dave Martin Acked-by: Will Deacon Link: http://lkml.kernel.org/r/1392054264-23570-2-git-send-email-mark.rutland@arm.com Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index fa99006..4251598 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6313,7 +6313,7 @@ static int perf_event_idx_default(struct perf_event *event) * Ensures all contexts with the same task_ctx_nr have the same * pmu_cpu_context too. */ -static void *find_pmu_context(int ctxn) +static struct perf_cpu_context __percpu *find_pmu_context(int ctxn) { struct pmu *pmu; -- cgit v0.10.2 From fdded676c3ef680bf1abc415d307d7e69a6768d1 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 10 Feb 2014 17:44:19 +0000 Subject: perf: Remove redundant PMU assignment Currently perf_branch_stack_sched_in iterates over the set of pmus, checks that each pmu has a flush_branch_stack callback, then overwrites the pmu before calling the callback. This is either redundant or broken. In systems with a single hw pmu, pmu == cpuctx->ctx.pmu, and thus the assignment is redundant. In systems with multiple hw pmus (i.e. multiple pmus with task_ctx_nr == perf_hw_context) the pmus share the same perf_cpu_context. Thus the assignment can cause one of the pmus to flush its branch stack repeatedly rather than causing each of the pmus to flush their branch stacks. Worse still, if only some pmus have the callback the assignment can result in a branch to NULL. This patch removes the redundant assignment. Signed-off-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1392054264-23570-3-git-send-email-mark.rutland@arm.com Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index 4251598..823a53d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2582,8 +2582,6 @@ static void perf_branch_stack_sched_in(struct task_struct *prev, if (cpuctx->ctx.nr_branch_stack > 0 && pmu->flush_branch_stack) { - pmu = cpuctx->ctx.pmu; - perf_ctx_lock(cpuctx, cpuctx->task_ctx); perf_pmu_disable(pmu); -- cgit v0.10.2 From c347a2f1793e285b0812343e715bb7e953dbdf68 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 24 Feb 2014 12:26:21 +0100 Subject: perf/x86: Add a few more comments Add a few comments on the ->add(), ->del() and ->*_txn() implementation. Requested-by: Vince Weaver Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-he3819318c245j7t5e1e22tr@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 79f9f84..ae407f7 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -892,7 +892,6 @@ static void x86_pmu_enable(struct pmu *pmu) * hw_perf_group_sched_in() or x86_pmu_enable() * * step1: save events moving to new counters - * step2: reprogram moved events into new counters */ for (i = 0; i < n_running; i++) { event = cpuc->event_list[i]; @@ -918,6 +917,9 @@ static void x86_pmu_enable(struct pmu *pmu) x86_pmu_stop(event, PERF_EF_UPDATE); } + /* + * step2: reprogram moved events into new counters + */ for (i = 0; i < cpuc->n_events; i++) { event = cpuc->event_list[i]; hwc = &event->hw; @@ -1043,7 +1045,7 @@ static int x86_pmu_add(struct perf_event *event, int flags) /* * If group events scheduling transaction was started, * skip the schedulability test here, it will be performed - * at commit time (->commit_txn) as a whole + * at commit time (->commit_txn) as a whole. */ if (cpuc->group_flag & PERF_EVENT_TXN) goto done_collect; @@ -1058,6 +1060,10 @@ static int x86_pmu_add(struct perf_event *event, int flags) memcpy(cpuc->assign, assign, n*sizeof(int)); done_collect: + /* + * Commit the collect_events() state. See x86_pmu_del() and + * x86_pmu_*_txn(). + */ cpuc->n_events = n; cpuc->n_added += n - n0; cpuc->n_txn += n - n0; @@ -1183,28 +1189,38 @@ static void x86_pmu_del(struct perf_event *event, int flags) * If we're called during a txn, we don't need to do anything. * The events never got scheduled and ->cancel_txn will truncate * the event_list. + * + * XXX assumes any ->del() called during a TXN will only be on + * an event added during that same TXN. */ if (cpuc->group_flag & PERF_EVENT_TXN) return; + /* + * Not a TXN, therefore cleanup properly. + */ x86_pmu_stop(event, PERF_EF_UPDATE); for (i = 0; i < cpuc->n_events; i++) { - if (event == cpuc->event_list[i]) { + if (event == cpuc->event_list[i]) + break; + } - if (i >= cpuc->n_events - cpuc->n_added) - --cpuc->n_added; + if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without ->add() ? */ + return; - if (x86_pmu.put_event_constraints) - x86_pmu.put_event_constraints(cpuc, event); + /* If we have a newly added event; make sure to decrease n_added. */ + if (i >= cpuc->n_events - cpuc->n_added) + --cpuc->n_added; - while (++i < cpuc->n_events) - cpuc->event_list[i-1] = cpuc->event_list[i]; + if (x86_pmu.put_event_constraints) + x86_pmu.put_event_constraints(cpuc, event); + + /* Delete the array entry. */ + while (++i < cpuc->n_events) + cpuc->event_list[i-1] = cpuc->event_list[i]; + --cpuc->n_events; - --cpuc->n_events; - break; - } - } perf_event_update_userpage(event); } @@ -1598,7 +1614,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu) { __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN); /* - * Truncate the collected events. + * Truncate collected array by the number of events added in this + * transaction. See x86_pmu_add() and x86_pmu_*_txn(). */ __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn)); __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn)); @@ -1609,6 +1626,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu) * Commit group events scheduling transaction * Perform the group schedulability test as a whole * Return 0 if success + * + * Does not cancel the transaction on failure; expects the caller to do this. */ static int x86_pmu_commit_txn(struct pmu *pmu) { diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 4972c24..3b2f9bd 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -130,9 +130,11 @@ struct cpu_hw_events { unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; int enabled; - int n_events; - int n_added; - int n_txn; + int n_events; /* the # of events in the below arrays */ + int n_added; /* the # last events in the below arrays; + they've never been enabled yet */ + int n_txn; /* the # last events in the below arrays; + added in the current transaction */ int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ u64 tags[X86_PMC_IDX_MAX]; struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ -- cgit v0.10.2 From 4a2345937c17722bd2979f662ae909846b4a052a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 24 Feb 2014 12:43:31 +0100 Subject: perf: Optimize group_sched_in() Use the ctx pmu instead of the event pmu. When a group leader is a software event but the group contains hardware events, the entire group is on the hardware PMU. Using the hardware PMU for the transaction makes most sense since that's the most expensive one to programm (and software PMUs generally don't have TXN support anyway). Signed-off-by: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-sctoo9t2f3nn2c9g568928q3@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index 823a53d..661951a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1733,7 +1733,7 @@ group_sched_in(struct perf_event *group_event, struct perf_event_context *ctx) { struct perf_event *event, *partial_group = NULL; - struct pmu *pmu = group_event->pmu; + struct pmu *pmu = ctx->pmu; u64 now = ctx->time; bool simulate = false; -- cgit v0.10.2 From 19e7640d1f2302c20df2733e3e3df49acb17189e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 27 Feb 2014 12:09:22 +0000 Subject: arm64: Replace ZONE_DMA32 with ZONE_DMA On arm64 we do not have two DMA zones, so it does not make sense to implement ZONE_DMA32. This patch changes ZONE_DMA32 with ZONE_DMA, the latter covering 32-bit dma address space to honour GFP_DMA allocations. Signed-off-by: Catalin Marinas diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0cb33a2..33e8f84 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -86,7 +86,7 @@ config GENERIC_CSUM config GENERIC_CALIBRATE_DELAY def_bool y -config ZONE_DMA32 +config ZONE_DMA def_bool y config ARCH_DMA_ADDR_T_64BIT diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 985fc07..26b2512c 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -39,9 +39,9 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, return NULL; } - if (IS_ENABLED(CONFIG_ZONE_DMA32) && + if (IS_ENABLED(CONFIG_ZONE_DMA) && dev->coherent_dma_mask <= DMA_BIT_MASK(32)) - flags |= GFP_DMA32; + flags |= GFP_DMA; if (IS_ENABLED(CONFIG_DMA_CMA)) { struct page *page; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index d0b4c2e..a61a4d5 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -59,22 +60,22 @@ static int __init early_initrd(char *p) early_param("initrd", early_initrd); #endif -#define MAX_DMA32_PFN ((4UL * 1024 * 1024 * 1024) >> PAGE_SHIFT) - static void __init zone_sizes_init(unsigned long min, unsigned long max) { struct memblock_region *reg; unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES]; - unsigned long max_dma32 = min; + unsigned long max_dma = min; memset(zone_size, 0, sizeof(zone_size)); -#ifdef CONFIG_ZONE_DMA32 /* 4GB maximum for 32-bit only capable devices */ - max_dma32 = max(min, min(max, MAX_DMA32_PFN)); - zone_size[ZONE_DMA32] = max_dma32 - min; -#endif - zone_size[ZONE_NORMAL] = max - max_dma32; + if (IS_ENABLED(CONFIG_ZONE_DMA)) { + unsigned long max_dma_phys = + (unsigned long)dma_to_phys(NULL, DMA_BIT_MASK(32) + 1); + max_dma = max(min, min(max, max_dma_phys >> PAGE_SHIFT)); + zone_size[ZONE_DMA] = max_dma - min; + } + zone_size[ZONE_NORMAL] = max - max_dma; memcpy(zhole_size, zone_size, sizeof(zhole_size)); @@ -84,15 +85,15 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) if (start >= max) continue; -#ifdef CONFIG_ZONE_DMA32 - if (start < max_dma32) { - unsigned long dma_end = min(end, max_dma32); - zhole_size[ZONE_DMA32] -= dma_end - start; + + if (IS_ENABLED(CONFIG_ZONE_DMA) && start < max_dma) { + unsigned long dma_end = min(end, max_dma); + zhole_size[ZONE_DMA] -= dma_end - start; } -#endif - if (end > max_dma32) { + + if (end > max_dma) { unsigned long normal_end = min(end, max); - unsigned long normal_start = max(start, max_dma32); + unsigned long normal_start = max(start, max_dma); zhole_size[ZONE_NORMAL] -= normal_end - normal_start; } } -- cgit v0.10.2 From 64be38ab03e9b238a1299857fef8b3707c0ed045 Mon Sep 17 00:00:00 2001 From: Rashika Kheria Date: Thu, 27 Feb 2014 17:10:12 +0530 Subject: genirq: Include missing header file in irqdomain.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Include appropriate header file include/linux/of_irq.h in kernel/irq/irqdomain.c because it contains prototype definition of function define in kernel/irq/irqdomain.c. This eliminates the following warning in kernel/irq/irqdomain.c: kernel/irq/irqdomain.c:468:14: warning: no previous prototype for ‘irq_create_of_mapping’ [-Wmissing-prototypes] Signed-off-by: Rashika Kheria Reviewed-by: Josh Triplett Cc: Benjamin Herrenschmidt Link: http://lkml.kernel.org/r/eb89aebea7ff1a46122918ac389ebecf8248be9a.1393493276.git.rashika.kheria@gmail.com Signed-off-by: Thomas Gleixner diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index cf68bb3..f140337 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include -- cgit v0.10.2 From 3690951fc6d42f3a0903987677d0e592c49dd8db Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 27 Feb 2014 12:24:57 +0000 Subject: arm64: Use swiotlb late initialisation Since arm64 does not support ISA, there is no need for early swiotlb initialisation. This patch switches the DMA mapping code to swiotlb_tlb_late_init_with_default_size(). A side effect of this is that GFP_DMA is used for the swiotlb buffer and devices with a 32-bit coherent mask are correctly supported. Signed-off-by: Catalin Marinas diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 26b2512c..afa91a2 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -93,11 +93,17 @@ static struct dma_map_ops coherent_swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, }; -void __init arm64_swiotlb_init(void) +extern int swiotlb_late_init_with_default_size(size_t default_size); + +static int __init swiotlb_late_init(void) { + size_t swiotlb_size = min(SZ_64M, MAX_ORDER_NR_PAGES << PAGE_SHIFT); + dma_ops = &coherent_swiotlb_dma_ops; - swiotlb_init(1); + + return swiotlb_late_init_with_default_size(swiotlb_size); } +subsys_initcall(swiotlb_late_init); #define PREALLOC_DMA_DEBUG_ENTRIES 4096 diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index a61a4d5..88627c4 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -262,8 +262,6 @@ static void __init free_unused_memmap(void) */ void __init mem_init(void) { - arm64_swiotlb_init(); - max_mapnr = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map; #ifndef CONFIG_SPARSEMEM_VMEMMAP -- cgit v0.10.2 From 4729583006772b9530404bc1bb7c3aa4a10ffd4d Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 27 Feb 2014 18:19:03 +0800 Subject: cpuset: fix a locking issue in cpuset_migrate_mm() I can trigger a lockdep warning: # mount -t cgroup -o cpuset xxx /cgroup # mkdir /cgroup/cpuset # mkdir /cgroup/tmp # echo 0 > /cgroup/tmp/cpuset.cpus # echo 0 > /cgroup/tmp/cpuset.mems # echo 1 > /cgroup/tmp/cpuset.memory_migrate # echo $$ > /cgroup/tmp/tasks # echo 1 > /cgruop/tmp/cpuset.mems =============================== [ INFO: suspicious RCU usage. ] 3.14.0-rc1-0.1-default+ #32 Not tainted ------------------------------- include/linux/cgroup.h:682 suspicious rcu_dereference_check() usage! ... [] dump_stack+0x72/0x86 [] lockdep_rcu_suspicious+0x101/0x140 [] cpuset_migrate_mm+0xb1/0xe0 ... We used to hold cgroup_mutex when calling cpuset_migrate_mm(), but now we hold cpuset_mutex, which causes task_css() to complain. This is not a false-positive but a real issue. Holding cpuset_mutex won't prevent a task from migrating to another cpuset, and it won't prevent the original task->cgroup from destroying during this change. Fixes: 5d21cc2db040 (cpuset: replace cgroup_mutex locking with cpuset internal locking) Cc: # 3.9+ Signed-off-by: Li Zefan Sigend-off-by: Tejun Heo diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 4410ac6..dba9e4a 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -974,12 +974,6 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, * Temporarilly set tasks mems_allowed to target nodes of migration, * so that the migration code can allocate pages on these nodes. * - * Call holding cpuset_mutex, so current's cpuset won't change - * during this call, as manage_mutex holds off any cpuset_attach() - * calls. Therefore we don't need to take task_lock around the - * call to guarantee_online_mems(), as we know no one is changing - * our task's cpuset. - * * While the mm_struct we are migrating is typically from some * other task, the task_struct mems_allowed that we are hacking * is for our current task, which must allocate new pages for that @@ -996,8 +990,10 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); + rcu_read_lock(); mems_cs = effective_nodemask_cpuset(task_cs(tsk)); guarantee_online_mems(mems_cs, &tsk->mems_allowed); + rcu_read_unlock(); } /* -- cgit v0.10.2 From 99afb0fd5f05aac467ffa85c36778fec4396209b Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 27 Feb 2014 18:19:36 +0800 Subject: cpuset: fix a race condition in __cpuset_node_allowed_softwall() It's not safe to access task's cpuset after releasing task_lock(). Holding callback_mutex won't help. Cc: Signed-off-by: Li Zefan Signed-off-by: Tejun Heo diff --git a/kernel/cpuset.c b/kernel/cpuset.c index dba9e4a..e6b1b66 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2482,9 +2482,9 @@ int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) task_lock(current); cs = nearest_hardwall_ancestor(task_cs(current)); + allowed = node_isset(node, cs->mems_allowed); task_unlock(current); - allowed = node_isset(node, cs->mems_allowed); mutex_unlock(&callback_mutex); return allowed; } -- cgit v0.10.2 From 171699f7630c92d0a928f83e5fb3aeabe35398c0 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Wed, 26 Feb 2014 12:06:49 -0700 Subject: x86: Add support for the clflushopt instruction Add support for the new clflushopt instruction. This instruction was announced in the document "Intel Architecture Instruction Set Extensions Programming Reference" with Ref # 319433-018. http://download-software.intel.com/sites/default/files/managed/50/1a/319433-018.pdf [ hpa: changed the feature flag to simply X86_FEATURE_CLFLUSHOPT - if that is what we want to report in /proc/cpuinfo anyway... ] Signed-off-by: Ross Zwisler Link: http://lkml.kernel.org/r/1393441612-19729-2-git-send-email-ross.zwisler@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 5f12968..bc507d7 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -221,6 +221,7 @@ #define X86_FEATURE_RDSEED (9*32+18) /* The RDSEED instruction */ #define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */ #define X86_FEATURE_SMAP (9*32+20) /* Supervisor Mode Access Prevention */ +#define X86_FEATURE_CLFLUSHOPT (9*32+23) /* CLFLUSHOPT instruction */ #define X86_FEATURE_AVX512PF (9*32+26) /* AVX-512 Prefetch */ #define X86_FEATURE_AVX512ER (9*32+27) /* AVX-512 Exponential and Reciprocal */ #define X86_FEATURE_AVX512CD (9*32+28) /* AVX-512 Conflict Detection */ diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 645cad2..e820c08 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -191,6 +191,14 @@ static inline void clflush(volatile void *__p) asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); } +static inline void clflushopt(volatile void *__p) +{ + alternative_io(".byte " __stringify(NOP_DS_PREFIX) "; clflush %P0", + ".byte 0x66; clflush %P0", + X86_FEATURE_CLFLUSHOPT, + "+m" (*(volatile char __force *)__p)); +} + #define nop() asm volatile ("nop") -- cgit v0.10.2 From 8b80fd8b45458a7d0b726d454c51c5219e0fb2ee Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Wed, 26 Feb 2014 12:06:50 -0700 Subject: x86: Use clflushopt in clflush_cache_range If clflushopt is available on the system, use it instead of clflush in clflush_cache_range. Signed-off-by: Ross Zwisler Link: http://lkml.kernel.org/r/1393441612-19729-3-git-send-email-ross.zwisler@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index b3b19f4..b1c4672 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -126,8 +126,8 @@ within(unsigned long addr, unsigned long start, unsigned long end) * @vaddr: virtual start address * @size: number of bytes to flush * - * clflush is an unordered instruction which needs fencing with mfence - * to avoid ordering issues. + * clflushopt is an unordered instruction which needs fencing with mfence or + * sfence to avoid ordering issues. */ void clflush_cache_range(void *vaddr, unsigned int size) { @@ -136,11 +136,11 @@ void clflush_cache_range(void *vaddr, unsigned int size) mb(); for (; vaddr < vend; vaddr += boot_cpu_data.x86_clflush_size) - clflush(vaddr); + clflushopt(vaddr); /* * Flush any possible final partial cacheline: */ - clflush(vend); + clflushopt(vend); mb(); } -- cgit v0.10.2 From 2a0c772f1967565d5b71a98012e74d5a7bddc1c4 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Wed, 26 Feb 2014 12:06:51 -0700 Subject: x86: Use clflushopt in drm_clflush_page If clflushopt is available on the system, use it instead of clflush in drm_clflush_page. Signed-off-by: Ross Zwisler Link: http://lkml.kernel.org/r/1393441612-19729-4-git-send-email-ross.zwisler@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c index bb8f580..c518fb6 100644 --- a/drivers/gpu/drm/drm_cache.c +++ b/drivers/gpu/drm/drm_cache.c @@ -32,6 +32,12 @@ #include #if defined(CONFIG_X86) + +/* + * clflushopt is an unordered instruction which needs fencing with mfence or + * sfence to avoid ordering issues. For drm_clflush_page this fencing happens + * in the caller. + */ static void drm_clflush_page(struct page *page) { @@ -44,7 +50,7 @@ drm_clflush_page(struct page *page) page_virtual = kmap_atomic(page); for (i = 0; i < PAGE_SIZE; i += size) - clflush(page_virtual + i); + clflushopt(page_virtual + i); kunmap_atomic(page_virtual); } -- cgit v0.10.2 From 2a0788dc9bc46789ec98aea0f30c6fb420196b12 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Wed, 26 Feb 2014 12:06:52 -0700 Subject: x86: Use clflushopt in drm_clflush_virt_range If clflushopt is available on the system, use it instead of clflush in drm_clflush_virt_range. Signed-off-by: Ross Zwisler Link: http://lkml.kernel.org/r/1393441612-19729-5-git-send-email-ross.zwisler@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c index c518fb6..534cb89 100644 --- a/drivers/gpu/drm/drm_cache.c +++ b/drivers/gpu/drm/drm_cache.c @@ -139,7 +139,7 @@ drm_clflush_virt_range(char *addr, unsigned long length) mb(); for (; addr < end; addr += boot_cpu_data.x86_clflush_size) clflush(addr); - clflush(end - 1); + clflushopt(end - 1); mb(); return; } -- cgit v0.10.2 From 840d2830e6e56b8fdacc7ff12915dd91bf91566b Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 27 Feb 2014 08:31:30 -0800 Subject: x86, cpufeature: Rename X86_FEATURE_CLFLSH to X86_FEATURE_CLFLUSH We call this "clflush" in /proc/cpuinfo, and have cpu_has_clflush()... let's be consistent and just call it that. Cc: Gleb Natapov Cc: Paolo Bonzini Cc: Alan Cox Link: http://lkml.kernel.org/n/tip-mlytfzjkvuf739okyn40p8a5@git.kernel.org diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index bc507d7..63211ef 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -37,7 +37,7 @@ #define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */ #define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ #define X86_FEATURE_PN (0*32+18) /* Processor serial number */ -#define X86_FEATURE_CLFLSH (0*32+19) /* "clflush" CLFLUSH instruction */ +#define X86_FEATURE_CLFLUSH (0*32+19) /* CLFLUSH instruction */ #define X86_FEATURE_DS (0*32+21) /* "dts" Debug Store */ #define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ #define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ @@ -318,7 +318,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_pmm_enabled boot_cpu_has(X86_FEATURE_PMM_EN) #define cpu_has_ds boot_cpu_has(X86_FEATURE_DS) #define cpu_has_pebs boot_cpu_has(X86_FEATURE_PEBS) -#define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH) +#define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLUSH) #define cpu_has_bts boot_cpu_has(X86_FEATURE_BTS) #define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8e28bf2..2c6ac6f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1025,7 +1025,7 @@ __setup("show_msr=", setup_show_msr); static __init int setup_noclflush(char *arg) { - setup_clear_cpu_cap(X86_FEATURE_CLFLSH); + setup_clear_cpu_cap(X86_FEATURE_CLFLUSH); return 1; } __setup("noclflush", setup_noclflush); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a32da80..ffc78c3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1379,7 +1379,7 @@ static inline void mwait_play_dead(void) if (!this_cpu_has(X86_FEATURE_MWAIT)) return; - if (!this_cpu_has(X86_FEATURE_CLFLSH)) + if (!this_cpu_has(X86_FEATURE_CLFLUSH)) return; if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF) return; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c697625..e5503d8 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -263,7 +263,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, F(TSC) | F(MSR) | F(PAE) | F(MCE) | F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | - F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) | + F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) | 0 /* Reserved, DS, ACPI */ | F(MMX) | F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) | 0 /* HTT, TM, Reserved, PBE */; diff --git a/drivers/gpu/drm/gma500/mmu.c b/drivers/gpu/drm/gma500/mmu.c index 49bac41..c3e67ba 100644 --- a/drivers/gpu/drm/gma500/mmu.c +++ b/drivers/gpu/drm/gma500/mmu.c @@ -520,7 +520,7 @@ struct psb_mmu_driver *psb_mmu_driver_init(uint8_t __iomem * registers, driver->has_clflush = 0; - if (boot_cpu_has(X86_FEATURE_CLFLSH)) { + if (boot_cpu_has(X86_FEATURE_CLFLUSH)) { uint32_t tfms, misc, cap0, cap4, clflush_size; /* -- cgit v0.10.2 From da4aaa7d860c63a1bfd3c73cf8309afe2840c5b9 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 27 Feb 2014 08:36:31 -0800 Subject: x86, cpufeature: If we disable CLFLUSH, we should disable CLFLUSHOPT If we explicitly disable the use of CLFLUSH, we should disable the use of CLFLUSHOPT as well. Cc: Ross Zwisler Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/n/tip-jtdv7btppr4jgzxm3sxx1e74@git.kernel.org diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2c6ac6f..cca53d8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1026,6 +1026,7 @@ __setup("show_msr=", setup_show_msr); static __init int setup_noclflush(char *arg) { setup_clear_cpu_cap(X86_FEATURE_CLFLUSH); + setup_clear_cpu_cap(X86_FEATURE_CLFLUSHOPT); return 1; } __setup("noclflush", setup_noclflush); -- cgit v0.10.2 From 7d48935eff401bb7970e73e822871a10e3643df1 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 12 Feb 2014 23:58:15 -0500 Subject: dm thin: allow metadata space larger than supported to go unused It was always intended that a user could provide a thin metadata device that is larger than the max supported by the on-disk format. The extra space would just go unused. Unfortunately that never worked. If the user attempted to use a larger metadata device on creation they would get an error like the following: device-mapper: space map common: space map too large device-mapper: transaction manager: couldn't create metadata space map device-mapper: thin metadata: tm_create_with_sm failed device-mapper: table: 252:17: thin-pool: Error creating metadata object device-mapper: ioctl: error adding target to table Fix this by allowing the initial metadata space map creation to cap its size at the max number of blocks supported (DM_SM_METADATA_MAX_BLOCKS). get_metadata_dev_size() must also impose DM_SM_METADATA_MAX_BLOCKS (via THIN_METADATA_MAX_SECTORS), otherwise extending metadata would cap at THIN_METADATA_MAX_SECTORS_WARNING (which is larger than supported). Also, the calculation for THIN_METADATA_MAX_SECTORS didn't account for the sizeof the disk_bitmap_header. So the supported maximum metadata size is a bit smaller (reduced from 33423360 to 33292800 sectors). Lastly, remove the "excess space will not be used" warning message from get_metadata_dev_size(); it resulted in printing the warning multiple times. Factor out warn_if_metadata_device_too_big(), call it from pool_ctr() and maybe_resize_metadata_dev(). Signed-off-by: Mike Snitzer Acked-by: Joe Thornber diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 3bb4506..baa87ff 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -483,7 +483,7 @@ static int __write_initial_superblock(struct dm_pool_metadata *pmd) disk_super->data_mapping_root = cpu_to_le64(pmd->root); disk_super->device_details_root = cpu_to_le64(pmd->details_root); - disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); + disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE); disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT); disk_super->data_block_size = cpu_to_le32(pmd->data_block_size); @@ -651,7 +651,7 @@ static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, bool f { int r; - pmd->bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE, + pmd->bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT, THIN_METADATA_CACHE_SIZE, THIN_MAX_CONCURRENT_LOCKS); if (IS_ERR(pmd->bm)) { diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h index dd6088a..82ea384 100644 --- a/drivers/md/dm-thin-metadata.h +++ b/drivers/md/dm-thin-metadata.h @@ -9,16 +9,14 @@ #include "persistent-data/dm-block-manager.h" #include "persistent-data/dm-space-map.h" +#include "persistent-data/dm-space-map-metadata.h" -#define THIN_METADATA_BLOCK_SIZE 4096 +#define THIN_METADATA_BLOCK_SIZE DM_SM_METADATA_BLOCK_SIZE /* * The metadata device is currently limited in size. - * - * We have one block of index, which can hold 255 index entries. Each - * index entry contains allocation info about 16k metadata blocks. */ -#define THIN_METADATA_MAX_SECTORS (255 * (1 << 14) * (THIN_METADATA_BLOCK_SIZE / (1 << SECTOR_SHIFT))) +#define THIN_METADATA_MAX_SECTORS DM_SM_METADATA_MAX_SECTORS /* * A metadata device larger than 16GB triggers a warning. diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 4cf4b19..7e84bac 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2000,16 +2000,27 @@ static void metadata_low_callback(void *context) dm_table_event(pool->ti->table); } -static sector_t get_metadata_dev_size(struct block_device *bdev) +static sector_t get_dev_size(struct block_device *bdev) +{ + return i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; +} + +static void warn_if_metadata_device_too_big(struct block_device *bdev) { - sector_t metadata_dev_size = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; + sector_t metadata_dev_size = get_dev_size(bdev); char buffer[BDEVNAME_SIZE]; - if (metadata_dev_size > THIN_METADATA_MAX_SECTORS_WARNING) { + if (metadata_dev_size > THIN_METADATA_MAX_SECTORS_WARNING) DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.", bdevname(bdev, buffer), THIN_METADATA_MAX_SECTORS); - metadata_dev_size = THIN_METADATA_MAX_SECTORS_WARNING; - } +} + +static sector_t get_metadata_dev_size(struct block_device *bdev) +{ + sector_t metadata_dev_size = get_dev_size(bdev); + + if (metadata_dev_size > THIN_METADATA_MAX_SECTORS) + metadata_dev_size = THIN_METADATA_MAX_SECTORS; return metadata_dev_size; } @@ -2018,7 +2029,7 @@ static dm_block_t get_metadata_dev_size_in_blocks(struct block_device *bdev) { sector_t metadata_dev_size = get_metadata_dev_size(bdev); - sector_div(metadata_dev_size, THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); + sector_div(metadata_dev_size, THIN_METADATA_BLOCK_SIZE); return metadata_dev_size; } @@ -2096,12 +2107,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->error = "Error opening metadata block device"; goto out_unlock; } - - /* - * Run for the side-effect of possibly issuing a warning if the - * device is too big. - */ - (void) get_metadata_dev_size(metadata_dev->bdev); + warn_if_metadata_device_too_big(metadata_dev->bdev); r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &data_dev); if (r) { @@ -2288,6 +2294,7 @@ static int maybe_resize_metadata_dev(struct dm_target *ti, bool *need_commit) return -EINVAL; } else if (metadata_dev_size > sb_metadata_dev_size) { + warn_if_metadata_device_too_big(pool->md_dev); DMINFO("%s: growing the metadata device from %llu to %llu blocks", dm_device_name(pool->pool_md), sb_metadata_dev_size, metadata_dev_size); diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 536782e..e9bdd46 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -680,6 +680,8 @@ int dm_sm_metadata_create(struct dm_space_map *sm, if (r) return r; + if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) + nr_blocks = DM_SM_METADATA_MAX_BLOCKS; r = sm_ll_extend(&smm->ll, nr_blocks); if (r) return r; diff --git a/drivers/md/persistent-data/dm-space-map-metadata.h b/drivers/md/persistent-data/dm-space-map-metadata.h index 39bba08..64df923 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.h +++ b/drivers/md/persistent-data/dm-space-map-metadata.h @@ -9,6 +9,17 @@ #include "dm-transaction-manager.h" +#define DM_SM_METADATA_BLOCK_SIZE (4096 >> SECTOR_SHIFT) + +/* + * The metadata device is currently limited in size. + * + * We have one block of index, which can hold 255 index entries. Each + * index entry contains allocation info about ~16k metadata blocks. + */ +#define DM_SM_METADATA_MAX_BLOCKS (255 * ((1 << 14) - 64)) +#define DM_SM_METADATA_MAX_SECTORS (DM_SM_METADATA_MAX_BLOCKS * DM_SM_METADATA_BLOCK_SIZE) + /* * Unfortunately we have to use two-phase construction due to the cycle * between the tm and sm. -- cgit v0.10.2 From 7363590d2c4691593fd280f94b3deaeb5e83dbbd Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 21 May 2013 17:35:19 +0100 Subject: arm64: Implement coherent DMA API based on swiotlb This patch adds support for DMA API cache maintenance on SoCs without hardware device cache coherency. Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 88932498..4c60e64 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -85,6 +85,13 @@ static inline void flush_cache_page(struct vm_area_struct *vma, } /* + * Cache maintenance functions used by the DMA API. No to be used directly. + */ +extern void __dma_map_area(const void *, size_t, int); +extern void __dma_unmap_area(const void *, size_t, int); +extern void __dma_flush_range(const void *, const void *); + +/* * Copy user data from/to a page which is mapped into a different * processes address space. Really, we want to allow our "user * space" model to handle this. diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index fd0c0c0..3a4572e 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -30,6 +30,8 @@ #define DMA_ERROR_CODE (~(dma_addr_t)0) extern struct dma_map_ops *dma_ops; +extern struct dma_map_ops coherent_swiotlb_dma_ops; +extern struct dma_map_ops noncoherent_swiotlb_dma_ops; static inline struct dma_map_ops *__generic_dma_ops(struct device *dev) { @@ -47,6 +49,11 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) return __generic_dma_ops(dev); } +static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) +{ + dev->archdata.dma_ops = ops; +} + #include static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 1ea9f26..97fcef5 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -166,3 +166,81 @@ ENTRY(__flush_dcache_area) dsb sy ret ENDPROC(__flush_dcache_area) + +/* + * __dma_inv_range(start, end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +__dma_inv_range: + dcache_line_size x2, x3 + sub x3, x2, #1 + bic x0, x0, x3 + bic x1, x1, x3 +1: dc ivac, x0 // invalidate D / U line + add x0, x0, x2 + cmp x0, x1 + b.lo 1b + dsb sy + ret +ENDPROC(__dma_inv_range) + +/* + * __dma_clean_range(start, end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +__dma_clean_range: + dcache_line_size x2, x3 + sub x3, x2, #1 + bic x0, x0, x3 +1: dc cvac, x0 // clean D / U line + add x0, x0, x2 + cmp x0, x1 + b.lo 1b + dsb sy + ret +ENDPROC(__dma_clean_range) + +/* + * __dma_flush_range(start, end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(__dma_flush_range) + dcache_line_size x2, x3 + sub x3, x2, #1 + bic x0, x0, x3 +1: dc civac, x0 // clean & invalidate D / U line + add x0, x0, x2 + cmp x0, x1 + b.lo 1b + dsb sy + ret +ENDPROC(__dma_flush_range) + +/* + * __dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(__dma_map_area) + add x1, x1, x0 + cmp w2, #DMA_FROM_DEVICE + b.eq __dma_inv_range + b __dma_clean_range +ENDPROC(__dma_map_area) + +/* + * __dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(__dma_unmap_area) + add x1, x1, x0 + cmp w2, #DMA_TO_DEVICE + b.ne __dma_inv_range + ret +ENDPROC(__dma_unmap_area) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index afa91a2..88fbc5e 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -78,7 +78,166 @@ static void __dma_free_coherent(struct device *dev, size_t size, } } -static struct dma_map_ops coherent_swiotlb_dma_ops = { +static void *__dma_alloc_noncoherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs) +{ + struct page *page, **map; + void *ptr, *coherent_ptr; + int order, i; + + size = PAGE_ALIGN(size); + order = get_order(size); + + ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs); + if (!ptr) + goto no_mem; + map = kmalloc(sizeof(struct page *) << order, flags & ~GFP_DMA); + if (!map) + goto no_map; + + /* remove any dirty cache lines on the kernel alias */ + __dma_flush_range(ptr, ptr + size); + + /* create a coherent mapping */ + page = virt_to_page(ptr); + for (i = 0; i < (size >> PAGE_SHIFT); i++) + map[i] = page + i; + coherent_ptr = vmap(map, size >> PAGE_SHIFT, VM_MAP, + pgprot_dmacoherent(pgprot_default)); + kfree(map); + if (!coherent_ptr) + goto no_map; + + return coherent_ptr; + +no_map: + __dma_free_coherent(dev, size, ptr, *dma_handle, attrs); +no_mem: + *dma_handle = ~0; + return NULL; +} + +static void __dma_free_noncoherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle)); + + vunmap(vaddr); + __dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs); +} + +static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + dma_addr_t dev_addr; + + dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs); + __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); + + return dev_addr; +} + + +static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); + swiotlb_unmap_page(dev, dev_addr, size, dir, attrs); +} + +static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct scatterlist *sg; + int i, ret; + + ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs); + for_each_sg(sgl, sg, ret, i) + __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), + sg->length, dir); + + return ret; +} + +static void __swiotlb_unmap_sg_attrs(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nelems, i) + __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), + sg->length, dir); + swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs); +} + +static void __swiotlb_sync_single_for_cpu(struct device *dev, + dma_addr_t dev_addr, size_t size, + enum dma_data_direction dir) +{ + __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); + swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir); +} + +static void __swiotlb_sync_single_for_device(struct device *dev, + dma_addr_t dev_addr, size_t size, + enum dma_data_direction dir) +{ + swiotlb_sync_single_for_device(dev, dev_addr, size, dir); + __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); +} + +static void __swiotlb_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nelems, i) + __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), + sg->length, dir); + swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir); +} + +static void __swiotlb_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + swiotlb_sync_sg_for_device(dev, sgl, nelems, dir); + for_each_sg(sgl, sg, nelems, i) + __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), + sg->length, dir); +} + +struct dma_map_ops noncoherent_swiotlb_dma_ops = { + .alloc = __dma_alloc_noncoherent, + .free = __dma_free_noncoherent, + .map_page = __swiotlb_map_page, + .unmap_page = __swiotlb_unmap_page, + .map_sg = __swiotlb_map_sg_attrs, + .unmap_sg = __swiotlb_unmap_sg_attrs, + .sync_single_for_cpu = __swiotlb_sync_single_for_cpu, + .sync_single_for_device = __swiotlb_sync_single_for_device, + .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu, + .sync_sg_for_device = __swiotlb_sync_sg_for_device, + .dma_supported = swiotlb_dma_supported, + .mapping_error = swiotlb_dma_mapping_error, +}; +EXPORT_SYMBOL(noncoherent_swiotlb_dma_ops); + +struct dma_map_ops coherent_swiotlb_dma_ops = { .alloc = __dma_alloc_coherent, .free = __dma_free_coherent, .map_page = swiotlb_map_page, @@ -92,6 +251,7 @@ static struct dma_map_ops coherent_swiotlb_dma_ops = { .dma_supported = swiotlb_dma_supported, .mapping_error = swiotlb_dma_mapping_error, }; +EXPORT_SYMBOL(coherent_swiotlb_dma_ops); extern int swiotlb_late_init_with_default_size(size_t default_size); -- cgit v0.10.2 From 724b9e1d75ab3401aaa081bd4efb440c1b3509db Mon Sep 17 00:00:00 2001 From: Hiroaki SHIMODA Date: Wed, 26 Feb 2014 21:43:42 +0900 Subject: sch_tbf: Fix potential memory leak in tbf_change(). The allocated child qdisc is not freed in error conditions. Defer the allocation after user configuration turns out to be valid and acceptable. Fixes: cc106e441a63b ("net: sched: tbf: fix the calculation of max_size") Signed-off-by: Hiroaki SHIMODA Cc: Yang Yingliang Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 1cb413f..4f505a0 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -334,18 +334,6 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB])); - if (q->qdisc != &noop_qdisc) { - err = fifo_set_limit(q->qdisc, qopt->limit); - if (err) - goto done; - } else if (qopt->limit > 0) { - child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit); - if (IS_ERR(child)) { - err = PTR_ERR(child); - goto done; - } - } - buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U); mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U); @@ -390,6 +378,18 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) goto done; } + if (q->qdisc != &noop_qdisc) { + err = fifo_set_limit(q->qdisc, qopt->limit); + if (err) + goto done; + } else if (qopt->limit > 0) { + child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit); + if (IS_ERR(child)) { + err = PTR_ERR(child); + goto done; + } + } + sch_tree_lock(sch); if (child) { qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); -- cgit v0.10.2 From b20c9f29c5c25921c6ad18b50d4b61e6d181c3cc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 26 Feb 2014 18:47:36 +0000 Subject: arm/arm64: KVM: detect CPU reset on CPU_PM_EXIT Commit 1fcf7ce0c602 (arm: kvm: implement CPU PM notifier) added support for CPU power-management, using a cpu_notifier to re-init KVM on a CPU that entered CPU idle. The code assumed that a CPU entering idle would actually be powered off, loosing its state entierely, and would then need to be reinitialized. It turns out that this is not always the case, and some HW performs CPU PM without actually killing the core. In this case, we try to reinitialize KVM while it is still live. It ends up badly, as reported by Andre Przywara (using a Calxeda Midway): [ 3.663897] Kernel panic - not syncing: unexpected prefetch abort in Hyp mode at: 0x685760 [ 3.663897] unexpected data abort in Hyp mode at: 0xc067d150 [ 3.663897] unexpected HVC/SVC trap in Hyp mode at: 0xc0901dd0 The trick here is to detect if we've been through a full re-init or not by looking at HVBAR (VBAR_EL2 on arm64). This involves implementing the backend for __hyp_get_vectors in the main KVM HYP code (rather small), and checking the return value against the default one when the CPU notifier is called on CPU_PM_EXIT. Reported-by: Andre Przywara Tested-by: Andre Przywara Cc: Lorenzo Pieralisi Cc: Rob Herring Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Paolo Bonzini diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 1d8248e..bd18bb8 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -878,7 +878,8 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self, unsigned long cmd, void *v) { - if (cmd == CPU_PM_EXIT) { + if (cmd == CPU_PM_EXIT && + __hyp_get_vectors() == hyp_default_vectors) { cpu_init_hyp_mode(NULL); return NOTIFY_OK; } diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index ddc1553..0d68d40 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -220,6 +220,10 @@ after_vfp_restore: * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are * passed in r0 and r1. * + * A function pointer with a value of 0xffffffff has a special meaning, + * and is used to implement __hyp_get_vectors in the same way as in + * arch/arm/kernel/hyp_stub.S. + * * The calling convention follows the standard AAPCS: * r0 - r3: caller save * r12: caller save @@ -363,6 +367,11 @@ hyp_hvc: host_switch_to_hyp: pop {r0, r1, r2} + /* Check for __hyp_get_vectors */ + cmp r0, #-1 + mrceq p15, 4, r0, c12, c0, 0 @ get HVBAR + beq 1f + push {lr} mrs lr, SPSR push {lr} @@ -378,7 +387,7 @@ THUMB( orr lr, #1) pop {lr} msr SPSR_csxf, lr pop {lr} - eret +1: eret guest_trap: load_vcpu @ Load VCPU pointer to r0 diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 3b47c36..2c56012 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -694,6 +694,24 @@ __hyp_panic_str: .align 2 +/* + * u64 kvm_call_hyp(void *hypfn, ...); + * + * This is not really a variadic function in the classic C-way and care must + * be taken when calling this to ensure parameters are passed in registers + * only, since the stack will change between the caller and the callee. + * + * Call the function with the first argument containing a pointer to the + * function you wish to call in Hyp mode, and subsequent arguments will be + * passed as x0, x1, and x2 (a maximum of 3 arguments in addition to the + * function pointer can be passed). The function being called must be mapped + * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are + * passed in r0 and r1. + * + * A function pointer with a value of 0 has a special meaning, and is + * used to implement __hyp_get_vectors in the same way as in + * arch/arm64/kernel/hyp_stub.S. + */ ENTRY(kvm_call_hyp) hvc #0 ret @@ -737,7 +755,12 @@ el1_sync: // Guest trapped into EL2 pop x2, x3 pop x0, x1 - push lr, xzr + /* Check for __hyp_get_vectors */ + cbnz x0, 1f + mrs x0, vbar_el2 + b 2f + +1: push lr, xzr /* * Compute the function address in EL2, and shuffle the parameters. @@ -750,7 +773,7 @@ el1_sync: // Guest trapped into EL2 blr lr pop lr, xzr - eret +2: eret el1_trap: /* -- cgit v0.10.2 From a08d3b3b99efd509133946056531cdf8f3a0c09b Mon Sep 17 00:00:00 2001 From: Andrew Honig Date: Thu, 27 Feb 2014 19:35:14 +0100 Subject: kvm: x86: fix emulator buffer overflow (CVE-2014-0049) The problem occurs when the guest performs a pusha with the stack address pointing to an mmio address (or an invalid guest physical address) to start with, but then extending into an ordinary guest physical address. When doing repeated emulated pushes emulator_read_write sets mmio_needed to 1 on the first one. On a later push when the stack points to regular memory, mmio_nr_fragments is set to 0, but mmio_is_needed is not set to 0. As a result, KVM exits to userspace, and then returns to complete_emulated_mmio. In complete_emulated_mmio vcpu->mmio_cur_fragment is incremented. The termination condition of vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments is never achieved. The code bounces back and fourth to userspace incrementing mmio_cur_fragment past it's buffer. If the guest does nothing else it eventually leads to a a crash on a memcpy from invalid memory address. However if a guest code can cause the vm to be destroyed in another vcpu with excellent timing, then kvm_clear_async_pf_completion_queue can be used by the guest to control the data that's pointed to by the call to cancel_work_item, which can be used to gain execution. Fixes: f78146b0f9230765c6315b2e14f56112513389ad Signed-off-by: Andrew Honig Cc: stable@vger.kernel.org (3.5+) Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 39c28f09..2b85784 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6186,7 +6186,7 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) frag->len -= len; } - if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) { + if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) { vcpu->mmio_needed = 0; /* FIXME: return into emulator if single-stepping. */ -- cgit v0.10.2 From 9f050c7f9738ffa746c63415136645ad231b1348 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 Feb 2014 09:16:01 -0500 Subject: drm/radeon: print the supported atpx function mask Print the supported functions mask in addition to the version. This is useful in debugging PX problems since we can see what functions are available. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c index 485848f..fa9a9c0 100644 --- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c +++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c @@ -219,7 +219,8 @@ static int radeon_atpx_verify_interface(struct radeon_atpx *atpx) memcpy(&output, info->buffer.pointer, size); /* TODO: check version? */ - printk("ATPX version %u\n", output.version); + printk("ATPX version %u, functions 0x%08x\n", + output.version, output.function_bits); radeon_atpx_parse_functions(&atpx->functions, output.function_bits); -- cgit v0.10.2 From 5e386b574cf7e1593e1296e5b0feea4108ed6ad8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 20 Feb 2014 18:47:14 +0100 Subject: drm/radeon: fix missing bo reservation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we might get a crash here. Signed-off-by: Christian König Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 114d167..2aecd6d 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -537,6 +537,10 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) radeon_vm_init(rdev, &fpriv->vm); + r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); + if (r) + return r; + /* map the ib pool buffer read only into * virtual address space */ bo_va = radeon_vm_bo_add(rdev, &fpriv->vm, @@ -544,6 +548,8 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) r = radeon_vm_bo_set_addr(rdev, bo_va, RADEON_VA_IB_OFFSET, RADEON_VM_PAGE_READABLE | RADEON_VM_PAGE_SNOOPED); + + radeon_bo_unreserve(rdev->ring_tmp_bo.bo); if (r) { radeon_vm_fini(rdev, &fpriv->vm); kfree(fpriv); -- cgit v0.10.2 From 9ef4e1d000a5b335fcebfcf8aef3405e59574c89 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 25 Feb 2014 10:21:43 -0500 Subject: drm/radeon: disable pll sharing for DP on DCE4.1 Causes display problems. We had already disabled sharing for non-DP displays. Based on a patch from: Niels Ole Salscheider bug: https://bugzilla.kernel.org/show_bug.cgi?id=58121 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 0d19f4f..daa4dd3 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1774,6 +1774,20 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc) return ATOM_PPLL1; DRM_ERROR("unable to allocate a PPLL\n"); return ATOM_PPLL_INVALID; + } else if (ASIC_IS_DCE41(rdev)) { + /* Don't share PLLs on DCE4.1 chips */ + if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(radeon_crtc->encoder))) { + if (rdev->clock.dp_extclk) + /* skip PPLL programming if using ext clock */ + return ATOM_PPLL_INVALID; + } + pll_in_use = radeon_get_pll_use_mask(crtc); + if (!(pll_in_use & (1 << ATOM_PPLL1))) + return ATOM_PPLL1; + if (!(pll_in_use & (1 << ATOM_PPLL2))) + return ATOM_PPLL2; + DRM_ERROR("unable to allocate a PPLL\n"); + return ATOM_PPLL_INVALID; } else if (ASIC_IS_DCE4(rdev)) { /* in DP mode, the DP ref clock can come from PPLL, DCPLL, or ext clock, * depending on the asic: @@ -1801,7 +1815,7 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc) if (pll != ATOM_PPLL_INVALID) return pll; } - } else if (!ASIC_IS_DCE41(rdev)) { /* Don't share PLLs on DCE4.1 chips */ + } else { /* use the same PPLL for all monitors with the same clock */ pll = radeon_get_shared_nondp_ppll(crtc); if (pll != ATOM_PPLL_INVALID) -- cgit v0.10.2 From d965441342f3b7d63db784cad852328d17d47942 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Wed, 26 Feb 2014 19:22:47 -0500 Subject: drm/radeon: free uvd ring on unload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Need to free the uvd ring. Also reshuffle gart tear down to happen after uvd tear down. Signed-off-by: Jérôme Glisse Cc: stable@vger.kernel.org Reviewed-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 5623e75..8a2c010 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -5475,9 +5475,9 @@ void evergreen_fini(struct radeon_device *rdev) radeon_wb_fini(rdev); radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); - evergreen_pcie_gart_fini(rdev); uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev); + evergreen_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); radeon_fence_driver_fini(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 6781fee..3e6804b 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -171,6 +171,8 @@ void radeon_uvd_fini(struct radeon_device *rdev) radeon_bo_unref(&rdev->uvd.vcpu_bo); + radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX]); + release_firmware(rdev->uvd_fw); } diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 6c772e5..4e37a42 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -1955,9 +1955,9 @@ void rv770_fini(struct radeon_device *rdev) radeon_wb_fini(rdev); radeon_ib_pool_fini(rdev); radeon_irq_kms_fini(rdev); - rv770_pcie_gart_fini(rdev); uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev); + rv770_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); radeon_fence_driver_fini(rdev); -- cgit v0.10.2 From d7eb0a0940618f36e5937d81c06ad7bf438a99e2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 18 Feb 2014 10:25:39 -0500 Subject: drm/radeon: fix audio disable on dce6+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Properly clear the enable bit when audio disable is requested. Signed-off-by: Alex Deucher Reviewed-by: Christian König Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c index 713a5d3..36e8f10 100644 --- a/drivers/gpu/drm/radeon/dce6_afmt.c +++ b/drivers/gpu/drm/radeon/dce6_afmt.c @@ -283,7 +283,7 @@ static void dce6_audio_enable(struct radeon_device *rdev, bool enable) { WREG32_ENDPOINT(pin->offset, AZ_F0_CODEC_PIN_CONTROL_HOTPLUG_CONTROL, - AUDIO_ENABLED); + enable ? AUDIO_ENABLED : 0); DRM_INFO("%s audio %d support\n", enable ? "Enabling" : "Disabling", pin->id); } -- cgit v0.10.2 From 832eafaf34ff7d0348fe701e417900c6cf1f5656 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 18 Feb 2014 11:07:55 -0500 Subject: drm/radeon: change audio enable logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Disable audio around audio hw setup. This may avoid hangs on certain asics. Signed-off-by: Alex Deucher Reviewed-by: Christian König diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c index 36e8f10..94e8587 100644 --- a/drivers/gpu/drm/radeon/dce6_afmt.c +++ b/drivers/gpu/drm/radeon/dce6_afmt.c @@ -278,13 +278,15 @@ static int dce6_audio_chipset_supported(struct radeon_device *rdev) return !ASIC_IS_NODCE(rdev); } -static void dce6_audio_enable(struct radeon_device *rdev, - struct r600_audio_pin *pin, - bool enable) +void dce6_audio_enable(struct radeon_device *rdev, + struct r600_audio_pin *pin, + bool enable) { + if (!pin) + return; + WREG32_ENDPOINT(pin->offset, AZ_F0_CODEC_PIN_CONTROL_HOTPLUG_CONTROL, enable ? AUDIO_ENABLED : 0); - DRM_INFO("%s audio %d support\n", enable ? "Enabling" : "Disabling", pin->id); } static const u32 pin_offsets[7] = @@ -323,7 +325,8 @@ int dce6_audio_init(struct radeon_device *rdev) rdev->audio.pin[i].connected = false; rdev->audio.pin[i].offset = pin_offsets[i]; rdev->audio.pin[i].id = i; - dce6_audio_enable(rdev, &rdev->audio.pin[i], true); + /* disable audio. it will be set up later */ + dce6_audio_enable(rdev, &rdev->audio.pin[i], false); } return 0; diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c index 0c6d5ce..05b0c95 100644 --- a/drivers/gpu/drm/radeon/evergreen_hdmi.c +++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c @@ -306,6 +306,15 @@ void evergreen_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode return; offset = dig->afmt->offset; + /* disable audio prior to setting up hw */ + if (ASIC_IS_DCE6(rdev)) { + dig->afmt->pin = dce6_audio_get_pin(rdev); + dce6_audio_enable(rdev, dig->afmt->pin, false); + } else { + dig->afmt->pin = r600_audio_get_pin(rdev); + r600_audio_enable(rdev, dig->afmt->pin, false); + } + evergreen_audio_set_dto(encoder, mode->clock); WREG32(HDMI_VBI_PACKET_CONTROL + offset, @@ -409,12 +418,16 @@ void evergreen_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode WREG32(AFMT_RAMP_CONTROL1 + offset, 0x007FFFFF); WREG32(AFMT_RAMP_CONTROL2 + offset, 0x00000001); WREG32(AFMT_RAMP_CONTROL3 + offset, 0x00000001); + + /* enable audio after to setting up hw */ + if (ASIC_IS_DCE6(rdev)) + dce6_audio_enable(rdev, dig->afmt->pin, true); + else + r600_audio_enable(rdev, dig->afmt->pin, true); } void evergreen_hdmi_enable(struct drm_encoder *encoder, bool enable) { - struct drm_device *dev = encoder->dev; - struct radeon_device *rdev = dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; @@ -427,15 +440,6 @@ void evergreen_hdmi_enable(struct drm_encoder *encoder, bool enable) if (!enable && !dig->afmt->enabled) return; - if (enable) { - if (ASIC_IS_DCE6(rdev)) - dig->afmt->pin = dce6_audio_get_pin(rdev); - else - dig->afmt->pin = r600_audio_get_pin(rdev); - } else { - dig->afmt->pin = NULL; - } - dig->afmt->enabled = enable; DRM_DEBUG("%sabling HDMI interface @ 0x%04X for encoder 0x%x\n", diff --git a/drivers/gpu/drm/radeon/r600_audio.c b/drivers/gpu/drm/radeon/r600_audio.c index 47fc2b8..bffac10 100644 --- a/drivers/gpu/drm/radeon/r600_audio.c +++ b/drivers/gpu/drm/radeon/r600_audio.c @@ -142,12 +142,15 @@ void r600_audio_update_hdmi(struct work_struct *work) } /* enable the audio stream */ -static void r600_audio_enable(struct radeon_device *rdev, - struct r600_audio_pin *pin, - bool enable) +void r600_audio_enable(struct radeon_device *rdev, + struct r600_audio_pin *pin, + bool enable) { u32 value = 0; + if (!pin) + return; + if (ASIC_IS_DCE4(rdev)) { if (enable) { value |= 0x81000000; /* Required to enable audio */ @@ -158,7 +161,6 @@ static void r600_audio_enable(struct radeon_device *rdev, WREG32_P(R600_AUDIO_ENABLE, enable ? 0x81000000 : 0x0, ~0x81000000); } - DRM_INFO("%s audio %d support\n", enable ? "Enabling" : "Disabling", pin->id); } /* @@ -178,8 +180,8 @@ int r600_audio_init(struct radeon_device *rdev) rdev->audio.pin[0].status_bits = 0; rdev->audio.pin[0].category_code = 0; rdev->audio.pin[0].id = 0; - - r600_audio_enable(rdev, &rdev->audio.pin[0], true); + /* disable audio. it will be set up later */ + r600_audio_enable(rdev, &rdev->audio.pin[0], false); return 0; } diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index 3016fc1..ec810cd 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -460,6 +460,10 @@ void r600_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mod return; offset = dig->afmt->offset; + /* disable audio prior to setting up hw */ + dig->afmt->pin = r600_audio_get_pin(rdev); + r600_audio_enable(rdev, dig->afmt->pin, false); + r600_audio_set_dto(encoder, mode->clock); WREG32(HDMI0_VBI_PACKET_CONTROL + offset, @@ -531,6 +535,9 @@ void r600_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mod WREG32(HDMI0_RAMP_CONTROL3 + offset, 0x00000001); r600_hdmi_audio_workaround(encoder); + + /* enable audio after to setting up hw */ + r600_audio_enable(rdev, dig->afmt->pin, true); } /* @@ -651,11 +658,6 @@ void r600_hdmi_enable(struct drm_encoder *encoder, bool enable) if (!enable && !dig->afmt->enabled) return; - if (enable) - dig->afmt->pin = r600_audio_get_pin(rdev); - else - dig->afmt->pin = NULL; - /* Older chipsets require setting HDMI and routing manually */ if (!ASIC_IS_DCE3(rdev)) { if (enable) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 024db37..e887d02 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -2747,6 +2747,12 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, void r600_audio_update_hdmi(struct work_struct *work); struct r600_audio_pin *r600_audio_get_pin(struct radeon_device *rdev); struct r600_audio_pin *dce6_audio_get_pin(struct radeon_device *rdev); +void r600_audio_enable(struct radeon_device *rdev, + struct r600_audio_pin *pin, + bool enable); +void dce6_audio_enable(struct radeon_device *rdev, + struct r600_audio_pin *pin, + bool enable); /* * R600 vram scratch functions -- cgit v0.10.2 From 3803c8e5b50946dd6bc18972d9190757d05648f0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 18 Feb 2014 11:12:11 -0500 Subject: drm/radeon: enable speaker allocation setup on dce3.2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we disable audio while setting up the audio hw, we should be able to set this up without hangs. Signed-off-by: Alex Deucher Reviewed-by: Christian König diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index ec810cd..85a2bb2 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -329,9 +329,6 @@ static void dce3_2_afmt_write_speaker_allocation(struct drm_encoder *encoder) u8 *sadb; int sad_count; - /* XXX: setting this register causes hangs on some asics */ - return; - list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { if (connector->encoder == encoder) { radeon_connector = to_radeon_connector(connector); -- cgit v0.10.2 From cb664981607a6b5b3d670ad57bbda893b2528d96 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 27 Feb 2014 20:47:53 +0100 Subject: mac80211: fix association to 20/40 MHz VHT networks When a VHT network uses 20 or 40 MHz as per the HT operation information, the channel center frequency segment 0 field in the VHT operation information is reserved, so ignore it. This fixes association with such networks when the AP puts 0 into the field, previously we'd disconnect due to an invalid channel with the message wlan0: AP VHT information is invalid, disable VHT Cc: stable@vger.kernel.org Fixes: f2d9d270c15ae ("mac80211: support VHT association") Reported-by: Tim Nelson Signed-off-by: Johannes Berg diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c415f00..245dce9 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -222,6 +222,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, switch (vht_oper->chan_width) { case IEEE80211_VHT_CHANWIDTH_USE_HT: vht_chandef.width = chandef->width; + vht_chandef.center_freq1 = chandef->center_freq1; break; case IEEE80211_VHT_CHANWIDTH_80MHZ: vht_chandef.width = NL80211_CHAN_WIDTH_80; -- cgit v0.10.2 From bf439b3154ce49d81a79b14f9fab18af99018ae2 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti Date: Thu, 27 Feb 2014 13:38:26 +0900 Subject: net: ipv6: ping: Use socket mark in routing lookup Signed-off-by: Lorenzo Colitti Signed-off-by: David S. Miller diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index fb9beb7..587bbdc 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -135,6 +135,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, fl6.flowi6_proto = IPPROTO_ICMPV6; fl6.saddr = np->saddr; fl6.daddr = *daddr; + fl6.flowi6_mark = sk->sk_mark; fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); -- cgit v0.10.2 From 280e7c48c3b873e4987a63da276ecab25383f494 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 11 Jan 2014 11:42:51 -0800 Subject: perf tools: fix BFD detection on opensuse opensuse libbfd requires -lz -liberty to build. Add those to the BFD feature detection. Signed-off-by: Andi Kleen Acked-by: David Ahern Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1389469379-13340-2-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index c48d449..0331ea2 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -478,7 +478,7 @@ else endif ifeq ($(feature-libbfd), 1) - EXTLIBS += -lbfd + EXTLIBS += -lbfd -lz -liberty endif ifdef NO_DEMANGLE diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile index 12e5513..523b7bc 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/perf/config/feature-checks/Makefile @@ -121,7 +121,7 @@ test-libpython-version.bin: $(BUILD) $(FLAGS_PYTHON_EMBED) test-libbfd.bin: - $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl + $(BUILD) -DPACKAGE='"perf"' -lbfd -lz -liberty -ldl test-liberty.bin: $(CC) -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty -- cgit v0.10.2 From 1b385cbdd74aa803e966e01e5fe49490d6044e30 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 27 Feb 2014 22:54:11 +0100 Subject: kvm, vmx: Really fix lazy FPU on nested guest Commit e504c9098ed6 (kvm, vmx: Fix lazy FPU on nested guest, 2013-11-13) highlighted a real problem, but the fix was subtly wrong. nested_read_cr0 is the CR0 as read by L2, but here we want to look at the CR0 value reflecting L1's setup. In other words, L2 might think that TS=0 (so nested_read_cr0 has the bit clear); but if L1 is actually running it with TS=1, we should inject the fault into L1. The effective value of CR0 in L2 is contained in vmcs12->guest_cr0, use it. Fixes: e504c9098ed6acd9e1079c5e10e4910724ad429f Reported-by: Kashyap Chamarty Reported-by: Stefan Bader Tested-by: Kashyap Chamarty Tested-by: Anthoine Bourgeois Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a06f101..3927528 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6688,7 +6688,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) else if (is_page_fault(intr_info)) return enable_ept; else if (is_no_device(intr_info) && - !(nested_read_cr0(vmcs12) & X86_CR0_TS)) + !(vmcs12->guest_cr0 & X86_CR0_TS)) return 0; return vmcs12->exception_bitmap & (1u << (intr_info & INTR_INFO_VECTOR_MASK)); -- cgit v0.10.2 From babe723d6d733a1045bb44ab82fbe27bdfa2bff8 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 27 Feb 2014 15:42:26 +0200 Subject: bnx2x: Add missing bit in default Tx switching Commit c14db2025 "bnx2x: Correct default Tx switching behaviour" supposedly changed the default Tx switching behaviour, but was missing the fastpath change required for FW to pass packets from PFs to VFs. Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 66c0df7..dbcff50 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -3875,7 +3875,9 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) xmit_type); } - /* Add the macs to the parsing BD this is a vf */ + /* Add the macs to the parsing BD if this is a vf or if + * Tx Switching is enabled. + */ if (IS_VF(bp)) { /* override GRE parameters in BD */ bnx2x_set_fw_mac_addr(&pbd_e2->data.mac_addr.src_hi, @@ -3887,6 +3889,11 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) &pbd_e2->data.mac_addr.dst_mid, &pbd_e2->data.mac_addr.dst_lo, eth->h_dest); + } else if (bp->flags & TX_SWITCHING) { + bnx2x_set_fw_mac_addr(&pbd_e2->data.mac_addr.dst_hi, + &pbd_e2->data.mac_addr.dst_mid, + &pbd_e2->data.mac_addr.dst_lo, + eth->h_dest); } SET_FLAG(pbd_e2_parsing_data, -- cgit v0.10.2 From feff9ab2e7fa773b6a3965f77375fe89f7fd85cf Mon Sep 17 00:00:00 2001 From: Duan Jiong Date: Thu, 27 Feb 2014 17:14:41 +0800 Subject: neigh: recompute reachabletime before returning from neigh_periodic_work() If the neigh table's entries is less than gc_thresh1, the function will return directly, and the reachabletime will not be recompute, so the reachabletime can be guessed. Signed-off-by: Duan Jiong Signed-off-by: David S. Miller diff --git a/net/core/neighbour.c b/net/core/neighbour.c index e1aa0f3..e161290 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -766,9 +766,6 @@ static void neigh_periodic_work(struct work_struct *work) nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); - if (atomic_read(&tbl->entries) < tbl->gc_thresh1) - goto out; - /* * periodically recompute ReachableTime from random function */ @@ -781,6 +778,9 @@ static void neigh_periodic_work(struct work_struct *work) neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); } + if (atomic_read(&tbl->entries) < tbl->gc_thresh1) + goto out; + for (i = 0 ; i < (1 << nht->hash_shift); i++) { np = &nht->hash_buckets[i]; -- cgit v0.10.2 From accfe0e356327da5bd53da8852b93fc22de9b5fc Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Thu, 27 Feb 2014 12:57:58 +0100 Subject: ipv6: ipv6_find_hdr restore prev functionality The commit 9195bb8e381d81d5a315f911904cdf0cfcc919b8 ("ipv6: improve ipv6_find_hdr() to skip empty routing headers") broke ipv6_find_hdr(). When a target is specified like IPPROTO_ICMPV6 ipv6_find_hdr() returns -ENOENT when it's found, not the header as expected. A part of IPVS is broken and possible also nft_exthdr_eval(). When target is -1 which it is most cases, it works. This patch exits the do while loop if the specific header is found so the nexthdr could be returned as expected. Reported-by: Art -kwaak- van Breemen Signed-off-by: Hans Schillstrom CC:Ansis Atteka Signed-off-by: David S. Miller diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 140748d..8af3eb5 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -212,7 +212,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, found = (nexthdr == target); if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) { - if (target < 0) + if (target < 0 || found) break; return -ENOENT; } -- cgit v0.10.2 From 4b41636878ee32d4c45a49de7749abca9721bd6a Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 27 Feb 2014 15:35:48 -0800 Subject: ARM: OMAP3: Fix pinctrl interrupts for core2 After splitting padconf core into two parts to avoid exposing unaccessable registers, the new padconf core2 domain was left without a wake-up interrupt. Fix the issue by passing the shared wake-up interrupt in platform data like we do for padconf core and wkup domains already. Fixes: 3d49538364 (ARM: dts: Split omap3 pinmux core device) Signed-off-by: Tony Lindgren diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index 0cc710d..c33e07e 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -257,6 +257,7 @@ struct of_dev_auxdata omap_auxdata_lookup[] __initdata = { #endif #ifdef CONFIG_ARCH_OMAP3 OF_DEV_AUXDATA("ti,omap3-padconf", 0x48002030, "48002030.pinmux", &pcs_pdata), + OF_DEV_AUXDATA("ti,omap3-padconf", 0x480025a0, "480025a0.pinmux", &pcs_pdata), OF_DEV_AUXDATA("ti,omap3-padconf", 0x48002a00, "48002a00.pinmux", &pcs_pdata), /* Only on am3517 */ OF_DEV_AUXDATA("ti,davinci_mdio", 0x5c030000, "davinci_mdio.0", NULL), -- cgit v0.10.2 From fd40dccb1a170ba689664481a3de83617b7194d2 Mon Sep 17 00:00:00 2001 From: Philippe De Muyter Date: Thu, 27 Feb 2014 10:16:15 +0100 Subject: spi: spi-imx: spi_imx_remove: do not disable disabled clocks Currently, at module removal, one gets the following warnings: ------------[ cut here ]------------ WARNING: at drivers/clk/clk.c:780 clk_disable+0x18/0x24() Modules linked in: spi_imx(-) [last unloaded: ev76c560] CPU: 1 PID: 16337 Comm: rmmod Tainted: G W 3.10.17-80548-g90191eb-dirty #33 [<80013b4c>] (unwind_backtrace+0x0/0xf8) from [<800115dc>] (show_stack+0x10/0x14) [<800115dc>] (show_stack+0x10/0x14) from [<800257b8>] (warn_slowpath_common+0x4c/0x68) [<800257b8>] (warn_slowpath_common+0x4c/0x68) from [<800257f0>] (warn_slowpath_null+0x1c/0x24) [<800257f0>] (warn_slowpath_null+0x1c/0x24) from [<803f60ec>] (clk_disable+0x18/0x24) [<803f60ec>] (clk_disable+0x18/0x24) from [<7f02c9cc>] (spi_imx_remove+0x54/0x9c [spi_imx]) [<7f02c9cc>] (spi_imx_remove+0x54/0x9c [spi_imx]) from [<8025868c>] (platform_drv_remove+0x18/0x1c) [<8025868c>] (platform_drv_remove+0x18/0x1c) from [<80256f60>] (__device_release_driver+0x70/0xcc) [<80256f60>] (__device_release_driver+0x70/0xcc) from [<80257770>] (driver_detach+0xcc/0xd0) [<80257770>] (driver_detach+0xcc/0xd0) from [<80256d90>] (bus_remove_driver+0x7c/0xc0) [<80256d90>] (bus_remove_driver+0x7c/0xc0) from [<80068668>] (SyS_delete_module+0x144/0x1f8) [<80068668>] (SyS_delete_module+0x144/0x1f8) from [<8000e080>] (ret_fast_syscall+0x0/0x30) ---[ end trace 1f5df9ad54996300 ]--- ------------[ cut here ]------------ WARNING: at drivers/clk/clk.c:780 clk_disable+0x18/0x24() Modules linked in: spi_imx(-) [last unloaded: ev76c560] CPU: 1 PID: 16337 Comm: rmmod Tainted: G W 3.10.17-80548-g90191eb-dirty #33 [<80013b4c>] (unwind_backtrace+0x0/0xf8) from [<800115dc>] (show_stack+0x10/0x14) [<800115dc>] (show_stack+0x10/0x14) from [<800257b8>] (warn_slowpath_common+0x4c/0x68) [<800257b8>] (warn_slowpath_common+0x4c/0x68) from [<800257f0>] (warn_slowpath_null+0x1c/0x24) [<800257f0>] (warn_slowpath_null+0x1c/0x24) from [<803f60ec>] (clk_disable+0x18/0x24) [<803f60ec>] (clk_disable+0x18/0x24) from [<7f02c9e8>] (spi_imx_remove+0x70/0x9c [spi_imx]) [<7f02c9e8>] (spi_imx_remove+0x70/0x9c [spi_imx]) from [<8025868c>] (platform_drv_remove+0x18/0x1c) [<8025868c>] (platform_drv_remove+0x18/0x1c) from [<80256f60>] (__device_release_driver+0x70/0xcc) [<80256f60>] (__device_release_driver+0x70/0xcc) from [<80257770>] (driver_detach+0xcc/0xd0) [<80257770>] (driver_detach+0xcc/0xd0) from [<80256d90>] (bus_remove_driver+0x7c/0xc0) [<80256d90>] (bus_remove_driver+0x7c/0xc0) from [<80068668>] (SyS_delete_module+0x144/0x1f8) [<80068668>] (SyS_delete_module+0x144/0x1f8) from [<8000e080>] (ret_fast_syscall+0x0/0x30) ---[ end trace 1f5df9ad54996301 ]--- Since commit 9e556dcc55774c9a1032f32baa0e5cfafede8b70, "spi: spi-imx: only enable the clocks when we start to transfer a message", clocks are always disabled except when transmitting messages. There is thus no need to disable them at module removal. Fixes: 9e556dcc55774 (spi: spi-imx: only enable the clocks when we start to transfer a message) Signed-off-by: Philippe De Muyter Acked-by: Huang Shijie Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index a5474ef..47f15d9 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -948,8 +948,8 @@ static int spi_imx_remove(struct platform_device *pdev) spi_bitbang_stop(&spi_imx->bitbang); writel(0, spi_imx->base + MXC_CSPICTRL); - clk_disable_unprepare(spi_imx->clk_ipg); - clk_disable_unprepare(spi_imx->clk_per); + clk_unprepare(spi_imx->clk_ipg); + clk_unprepare(spi_imx->clk_per); spi_master_put(master); return 0; -- cgit v0.10.2 From 41dd03a94c7d408d2ef32530545097f7d1befe5c Mon Sep 17 00:00:00 2001 From: Tony Breeds Date: Thu, 20 Feb 2014 21:13:52 +1100 Subject: powerpc/le: Ensure that the 'stop-self' RTAS token is handled correctly Currently we're storing a host endian RTAS token in rtas_stop_self_args.token. We then pass that directly to rtas. This is fine on big endian however on little endian the token is not what we expect. This will typically result in hitting: panic("Alas, I survived.\n"); To fix this we always use the stop-self token in host order and always convert it to be32 before passing this to rtas. Signed-off-by: Tony Breeds Cc: stable@vger.kernel.org Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 82789e7..0ea99e3 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -35,12 +35,7 @@ #include "offline_states.h" /* This version can't take the spinlock, because it never returns */ -static struct rtas_args rtas_stop_self_args = { - .token = RTAS_UNKNOWN_SERVICE, - .nargs = 0, - .nret = 1, - .rets = &rtas_stop_self_args.args[0], -}; +static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE; static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) = CPU_STATE_OFFLINE; @@ -93,15 +88,20 @@ void set_default_offline_state(int cpu) static void rtas_stop_self(void) { - struct rtas_args *args = &rtas_stop_self_args; + struct rtas_args args = { + .token = cpu_to_be32(rtas_stop_self_token), + .nargs = 0, + .nret = 1, + .rets = &args.args[0], + }; local_irq_disable(); - BUG_ON(args->token == RTAS_UNKNOWN_SERVICE); + BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE); printk("cpu %u (hwid %u) Ready to die...\n", smp_processor_id(), hard_smp_processor_id()); - enter_rtas(__pa(args)); + enter_rtas(__pa(&args)); panic("Alas, I survived.\n"); } @@ -392,10 +392,10 @@ static int __init pseries_cpu_hotplug_init(void) } } - rtas_stop_self_args.token = rtas_token("stop-self"); + rtas_stop_self_token = rtas_token("stop-self"); qcss_tok = rtas_token("query-cpu-stopped-state"); - if (rtas_stop_self_args.token == RTAS_UNKNOWN_SERVICE || + if (rtas_stop_self_token == RTAS_UNKNOWN_SERVICE || qcss_tok == RTAS_UNKNOWN_SERVICE) { printk(KERN_INFO "CPU Hotplug not supported by firmware " "- disabling.\n"); -- cgit v0.10.2 From f5295bd8ea8a65dc5eac608b151386314cb978f1 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Mon, 24 Feb 2014 17:30:55 +0100 Subject: powerpc/crashdump : Fix page frame number check in copy_oldmem_page In copy_oldmem_page, the current check using max_pfn and min_low_pfn to decide if the page is backed or not, is not valid when the memory layout is not continuous. This happens when running as a QEMU/KVM guest, where RTAS is mapped higher in the memory. In that case max_pfn points to the end of RTAS, and a hole between the end of the kdump kernel and RTAS is not backed by PTEs. As a consequence, the kdump kernel is crashing in copy_oldmem_page when accessing in a direct way the pages in that hole. This fix relies on the memblock's service memblock_is_region_memory to check if the read page is part or not of the directly accessible memory. Signed-off-by: Laurent Dufour Tested-by: Mahesh Salgaonkar CC: Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 11c1d06..7a13f37 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -98,17 +98,19 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, unsigned long offset, int userbuf) { void *vaddr; + phys_addr_t paddr; if (!csize) return 0; csize = min_t(size_t, csize, PAGE_SIZE); + paddr = pfn << PAGE_SHIFT; - if ((min_low_pfn < pfn) && (pfn < max_pfn)) { - vaddr = __va(pfn << PAGE_SHIFT); + if (memblock_is_region_memory(paddr, csize)) { + vaddr = __va(paddr); csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf); } else { - vaddr = __ioremap(pfn << PAGE_SHIFT, PAGE_SIZE, 0); + vaddr = __ioremap(paddr, PAGE_SIZE, 0); csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf); iounmap(vaddr); } -- cgit v0.10.2 From a95fc58549e8f462e560868a16b1fa97b12d5db6 Mon Sep 17 00:00:00 2001 From: Liu Ping Fan Date: Wed, 26 Feb 2014 10:23:01 +0800 Subject: powerpc/ftrace: bugfix for test_24bit_addr The branch target should be the func addr, not the addr of func_descr_t. So using ppc_function_entry() to generate the right target addr. Signed-off-by: Liu Ping Fan Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index 9b27b29..b0ded97 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -74,6 +74,7 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new) */ static int test_24bit_addr(unsigned long ip, unsigned long addr) { + addr = ppc_function_entry((void *)addr); /* use the create_branch to verify that this offset can be branched */ return create_branch((unsigned int *)ip, addr, 0); -- cgit v0.10.2 From 573ebfa6601fa58b439e7f15828762839ccd306a Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 26 Feb 2014 17:07:38 +1100 Subject: powerpc: Increase stack redzone for 64-bit userspace to 512 bytes The new ELFv2 little-endian ABI increases the stack redzone -- the area below the stack pointer that can be used for storing data -- from 288 bytes to 512 bytes. This means that we need to allow more space on the user stack when delivering a signal to a 64-bit process. To make the code a bit clearer, we define new USER_REDZONE_SIZE and KERNEL_REDZONE_SIZE symbols in ptrace.h. For now, we leave the kernel redzone size at 288 bytes, since increasing it to 512 bytes would increase the size of interrupt stack frames correspondingly. Gcc currently only makes use of 288 bytes of redzone even when compiling for the new little-endian ABI, and the kernel cannot currently be compiled with the new ABI anyway. In the future, hopefully gcc will provide an option to control the amount of redzone used, and then we could reduce it even more. This also changes the code in arch_compat_alloc_user_space() to preserve the expanded redzone. It is not clear why this function would ever be used on a 64-bit process, though. Signed-off-by: Paul Mackerras CC: [v3.13] Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h index 84fdf68..a613d2c 100644 --- a/arch/powerpc/include/asm/compat.h +++ b/arch/powerpc/include/asm/compat.h @@ -200,10 +200,11 @@ static inline void __user *arch_compat_alloc_user_space(long len) /* * We can't access below the stack pointer in the 32bit ABI and - * can access 288 bytes in the 64bit ABI + * can access 288 bytes in the 64bit big-endian ABI, + * or 512 bytes with the new ELFv2 little-endian ABI. */ if (!is_32bit_task()) - usp -= 288; + usp -= USER_REDZONE_SIZE; return (void __user *) (usp - len); } diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index becc08e..279b80f 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -28,11 +28,23 @@ #ifdef __powerpc64__ +/* + * Size of redzone that userspace is allowed to use below the stack + * pointer. This is 288 in the 64-bit big-endian ELF ABI, and 512 in + * the new ELFv2 little-endian ABI, so we allow the larger amount. + * + * For kernel code we allow a 288-byte redzone, in order to conserve + * kernel stack space; gcc currently only uses 288 bytes, and will + * hopefully allow explicit control of the redzone size in future. + */ +#define USER_REDZONE_SIZE 512 +#define KERNEL_REDZONE_SIZE 288 + #define STACK_FRAME_OVERHEAD 112 /* size of minimum stack frame */ #define STACK_FRAME_LR_SAVE 2 /* Location of LR in stack frame */ #define STACK_FRAME_REGS_MARKER ASM_CONST(0x7265677368657265) #define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + \ - STACK_FRAME_OVERHEAD + 288) + STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE) #define STACK_FRAME_MARKER 12 /* Size of dummy stack frame allocated when calling signal handler. */ @@ -41,6 +53,8 @@ #else /* __powerpc64__ */ +#define USER_REDZONE_SIZE 0 +#define KERNEL_REDZONE_SIZE 0 #define STACK_FRAME_OVERHEAD 16 /* size of minimum stack frame */ #define STACK_FRAME_LR_SAVE 1 /* Location of LR in stack frame */ #define STACK_FRAME_REGS_MARKER ASM_CONST(0x72656773) diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index e35bf77..8d253c2 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -65,8 +65,8 @@ struct rt_sigframe { struct siginfo __user *pinfo; void __user *puc; struct siginfo info; - /* 64 bit ABI allows for 288 bytes below sp before decrementing it. */ - char abigap[288]; + /* New 64 bit little-endian ABI allows redzone of 512 bytes below sp */ + char abigap[USER_REDZONE_SIZE]; } __attribute__ ((aligned (16))); static const char fmt32[] = KERN_INFO \ -- cgit v0.10.2 From 947166043732b69878123bf31f51933ad0316080 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 25 Feb 2014 15:28:37 +0800 Subject: powerpc/powernv: Dump PHB diag-data immediately The PHB diag-data is important to help locating the root cause for EEH errors such as frozen PE or fenced PHB. However, the EEH core enables IO path by clearing part of HW registers before collecting this data causing it to be corrupted. This patch fixes this by dumping the PHB diag-data immediately when frozen/fenced state on PE or PHB is detected for the first time in eeh_ops::get_state() or next_error() backend. Signed-off-by: Gavin Shan CC: Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index f514743..253fefe 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c @@ -114,6 +114,7 @@ DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbB_dbgfs_ops, ioda_eeh_inbB_dbgfs_get, ioda_eeh_inbB_dbgfs_set, "0x%llx\n"); #endif /* CONFIG_DEBUG_FS */ + /** * ioda_eeh_post_init - Chip dependent post initialization * @hose: PCI controller @@ -221,6 +222,22 @@ static int ioda_eeh_set_option(struct eeh_pe *pe, int option) return ret; } +static void ioda_eeh_phb_diag(struct pci_controller *hose) +{ + struct pnv_phb *phb = hose->private_data; + long rc; + + rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob, + PNV_PCI_DIAG_BUF_SIZE); + if (rc != OPAL_SUCCESS) { + pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n", + __func__, hose->global_number, rc); + return; + } + + pnv_pci_dump_phb_diag_data(hose, phb->diag.blob); +} + /** * ioda_eeh_get_state - Retrieve the state of PE * @pe: EEH PE @@ -272,6 +289,9 @@ static int ioda_eeh_get_state(struct eeh_pe *pe) result |= EEH_STATE_DMA_ACTIVE; result |= EEH_STATE_MMIO_ENABLED; result |= EEH_STATE_DMA_ENABLED; + } else if (!(pe->state & EEH_PE_ISOLATED)) { + eeh_pe_state_mark(pe, EEH_PE_ISOLATED); + ioda_eeh_phb_diag(hose); } return result; @@ -315,6 +335,15 @@ static int ioda_eeh_get_state(struct eeh_pe *pe) __func__, fstate, hose->global_number, pe_no); } + /* Dump PHB diag-data for frozen PE */ + if (result != EEH_STATE_NOT_SUPPORT && + (result & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) != + (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE) && + !(pe->state & EEH_PE_ISOLATED)) { + eeh_pe_state_mark(pe, EEH_PE_ISOLATED); + ioda_eeh_phb_diag(hose); + } + return result; } @@ -530,42 +559,6 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option) } /** - * ioda_eeh_get_log - Retrieve error log - * @pe: EEH PE - * @severity: Severity level of the log - * @drv_log: buffer to store the log - * @len: space of the log buffer - * - * The function is used to retrieve error log from P7IOC. - */ -static int ioda_eeh_get_log(struct eeh_pe *pe, int severity, - char *drv_log, unsigned long len) -{ - s64 ret; - unsigned long flags; - struct pci_controller *hose = pe->phb; - struct pnv_phb *phb = hose->private_data; - - spin_lock_irqsave(&phb->lock, flags); - - ret = opal_pci_get_phb_diag_data2(phb->opal_id, - phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE); - if (ret) { - spin_unlock_irqrestore(&phb->lock, flags); - pr_warning("%s: Can't get log for PHB#%x-PE#%x (%lld)\n", - __func__, hose->global_number, pe->addr, ret); - return -EIO; - } - - /* The PHB diag-data is always indicative */ - pnv_pci_dump_phb_diag_data(hose, phb->diag.blob); - - spin_unlock_irqrestore(&phb->lock, flags); - - return 0; -} - -/** * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE * @pe: EEH PE * @@ -646,22 +639,6 @@ static void ioda_eeh_hub_diag(struct pci_controller *hose) } } -static void ioda_eeh_phb_diag(struct pci_controller *hose) -{ - struct pnv_phb *phb = hose->private_data; - long rc; - - rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob, - PNV_PCI_DIAG_BUF_SIZE); - if (rc != OPAL_SUCCESS) { - pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n", - __func__, hose->global_number, rc); - return; - } - - pnv_pci_dump_phb_diag_data(hose, phb->diag.blob); -} - static int ioda_eeh_get_phb_pe(struct pci_controller *hose, struct eeh_pe **pe) { @@ -835,6 +812,20 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) } /* + * EEH core will try recover from fenced PHB or + * frozen PE. In the time for frozen PE, EEH core + * enable IO path for that before collecting logs, + * but it ruins the site. So we have to dump the + * log in advance here. + */ + if ((ret == EEH_NEXT_ERR_FROZEN_PE || + ret == EEH_NEXT_ERR_FENCED_PHB) && + !((*pe)->state & EEH_PE_ISOLATED)) { + eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); + ioda_eeh_phb_diag(hose); + } + + /* * If we have no errors on the specific PHB or only * informative error there, we continue poking it. * Otherwise, we need actions to be taken by upper @@ -852,7 +843,6 @@ struct pnv_eeh_ops ioda_eeh_ops = { .set_option = ioda_eeh_set_option, .get_state = ioda_eeh_get_state, .reset = ioda_eeh_reset, - .get_log = ioda_eeh_get_log, .configure_bridge = ioda_eeh_configure_bridge, .next_error = ioda_eeh_next_error }; -- cgit v0.10.2 From af87d2fe95444d107e0c0cf0ba7e20e6716a7bfd Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 25 Feb 2014 15:28:38 +0800 Subject: powerpc/powernv: Refactor PHB diag-data dump As Ben suggested, the patch prints PHB diag-data with multiple fields in one line and omits the line if the fields of that line are all zero. With the patch applied, the PHB3 diag-data dump looks like: PHB3 PHB#3 Diag-data (Version: 1) brdgCtl: 00000002 RootSts: 0000000f 00400000 b0830008 00100147 00002000 nFir: 0000000000000000 0030006e00000000 0000000000000000 PhbSts: 0000001c00000000 0000000000000000 Lem: 0000000000100000 42498e327f502eae 0000000000000000 InAErr: 8000000000000000 8000000000000000 0402030000000000 0000000000000000 PE[ 8] A/B: 8480002b00000000 8000000000000000 [ The current diag data is so big that it overflows the printk buffer pretty quickly in cases when we get a handful of errors at once which can happen. --BenH ] Signed-off-by: Gavin Shan CC: Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 95633d7..8518817 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -134,57 +134,72 @@ static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose, pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n\n", hose->global_number, common->version); - pr_info(" brdgCtl: %08x\n", data->brdgCtl); - - pr_info(" portStatusReg: %08x\n", data->portStatusReg); - pr_info(" rootCmplxStatus: %08x\n", data->rootCmplxStatus); - pr_info(" busAgentStatus: %08x\n", data->busAgentStatus); - - pr_info(" deviceStatus: %08x\n", data->deviceStatus); - pr_info(" slotStatus: %08x\n", data->slotStatus); - pr_info(" linkStatus: %08x\n", data->linkStatus); - pr_info(" devCmdStatus: %08x\n", data->devCmdStatus); - pr_info(" devSecStatus: %08x\n", data->devSecStatus); - - pr_info(" rootErrorStatus: %08x\n", data->rootErrorStatus); - pr_info(" uncorrErrorStatus: %08x\n", data->uncorrErrorStatus); - pr_info(" corrErrorStatus: %08x\n", data->corrErrorStatus); - pr_info(" tlpHdr1: %08x\n", data->tlpHdr1); - pr_info(" tlpHdr2: %08x\n", data->tlpHdr2); - pr_info(" tlpHdr3: %08x\n", data->tlpHdr3); - pr_info(" tlpHdr4: %08x\n", data->tlpHdr4); - pr_info(" sourceId: %08x\n", data->sourceId); - pr_info(" errorClass: %016llx\n", data->errorClass); - pr_info(" correlator: %016llx\n", data->correlator); - pr_info(" p7iocPlssr: %016llx\n", data->p7iocPlssr); - pr_info(" p7iocCsr: %016llx\n", data->p7iocCsr); - pr_info(" lemFir: %016llx\n", data->lemFir); - pr_info(" lemErrorMask: %016llx\n", data->lemErrorMask); - pr_info(" lemWOF: %016llx\n", data->lemWOF); - pr_info(" phbErrorStatus: %016llx\n", data->phbErrorStatus); - pr_info(" phbFirstErrorStatus: %016llx\n", data->phbFirstErrorStatus); - pr_info(" phbErrorLog0: %016llx\n", data->phbErrorLog0); - pr_info(" phbErrorLog1: %016llx\n", data->phbErrorLog1); - pr_info(" mmioErrorStatus: %016llx\n", data->mmioErrorStatus); - pr_info(" mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus); - pr_info(" mmioErrorLog0: %016llx\n", data->mmioErrorLog0); - pr_info(" mmioErrorLog1: %016llx\n", data->mmioErrorLog1); - pr_info(" dma0ErrorStatus: %016llx\n", data->dma0ErrorStatus); - pr_info(" dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus); - pr_info(" dma0ErrorLog0: %016llx\n", data->dma0ErrorLog0); - pr_info(" dma0ErrorLog1: %016llx\n", data->dma0ErrorLog1); - pr_info(" dma1ErrorStatus: %016llx\n", data->dma1ErrorStatus); - pr_info(" dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus); - pr_info(" dma1ErrorLog0: %016llx\n", data->dma1ErrorLog0); - pr_info(" dma1ErrorLog1: %016llx\n", data->dma1ErrorLog1); + if (data->brdgCtl) + pr_info(" brdgCtl: %08x\n", + data->brdgCtl); + if (data->portStatusReg || data->rootCmplxStatus || + data->busAgentStatus) + pr_info(" UtlSts: %08x %08x %08x\n", + data->portStatusReg, data->rootCmplxStatus, + data->busAgentStatus); + if (data->deviceStatus || data->slotStatus || + data->linkStatus || data->devCmdStatus || + data->devSecStatus) + pr_info(" RootSts: %08x %08x %08x %08x %08x\n", + data->deviceStatus, data->slotStatus, + data->linkStatus, data->devCmdStatus, + data->devSecStatus); + if (data->rootErrorStatus || data->uncorrErrorStatus || + data->corrErrorStatus) + pr_info(" RootErrSts: %08x %08x %08x\n", + data->rootErrorStatus, data->uncorrErrorStatus, + data->corrErrorStatus); + if (data->tlpHdr1 || data->tlpHdr2 || + data->tlpHdr3 || data->tlpHdr4) + pr_info(" RootErrLog: %08x %08x %08x %08x\n", + data->tlpHdr1, data->tlpHdr2, + data->tlpHdr3, data->tlpHdr4); + if (data->sourceId || data->errorClass || + data->correlator) + pr_info(" RootErrLog1: %08x %016llx %016llx\n", + data->sourceId, data->errorClass, + data->correlator); + if (data->p7iocPlssr || data->p7iocCsr) + pr_info(" PhbSts: %016llx %016llx\n", + data->p7iocPlssr, data->p7iocCsr); + if (data->lemFir || data->lemErrorMask || + data->lemWOF) + pr_info(" Lem: %016llx %016llx %016llx\n", + data->lemFir, data->lemErrorMask, + data->lemWOF); + if (data->phbErrorStatus || data->phbFirstErrorStatus || + data->phbErrorLog0 || data->phbErrorLog1) + pr_info(" PhbErr: %016llx %016llx %016llx %016llx\n", + data->phbErrorStatus, data->phbFirstErrorStatus, + data->phbErrorLog0, data->phbErrorLog1); + if (data->mmioErrorStatus || data->mmioFirstErrorStatus || + data->mmioErrorLog0 || data->mmioErrorLog1) + pr_info(" OutErr: %016llx %016llx %016llx %016llx\n", + data->mmioErrorStatus, data->mmioFirstErrorStatus, + data->mmioErrorLog0, data->mmioErrorLog1); + if (data->dma0ErrorStatus || data->dma0FirstErrorStatus || + data->dma0ErrorLog0 || data->dma0ErrorLog1) + pr_info(" InAErr: %016llx %016llx %016llx %016llx\n", + data->dma0ErrorStatus, data->dma0FirstErrorStatus, + data->dma0ErrorLog0, data->dma0ErrorLog1); + if (data->dma1ErrorStatus || data->dma1FirstErrorStatus || + data->dma1ErrorLog0 || data->dma1ErrorLog1) + pr_info(" InBErr: %016llx %016llx %016llx %016llx\n", + data->dma1ErrorStatus, data->dma1FirstErrorStatus, + data->dma1ErrorLog0, data->dma1ErrorLog1); for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) { if ((data->pestA[i] >> 63) == 0 && (data->pestB[i] >> 63) == 0) continue; - pr_info(" PE[%3d] PESTA: %016llx\n", i, data->pestA[i]); - pr_info(" PESTB: %016llx\n", data->pestB[i]); + pr_info(" PE[%3d] A/B: %016llx %016llx\n", + i, data->pestA[i], data->pestB[i]); } } @@ -197,62 +212,77 @@ static void pnv_pci_dump_phb3_diag_data(struct pci_controller *hose, data = (struct OpalIoPhb3ErrorData*)common; pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n\n", hose->global_number, common->version); - - pr_info(" brdgCtl: %08x\n", data->brdgCtl); - - pr_info(" portStatusReg: %08x\n", data->portStatusReg); - pr_info(" rootCmplxStatus: %08x\n", data->rootCmplxStatus); - pr_info(" busAgentStatus: %08x\n", data->busAgentStatus); - - pr_info(" deviceStatus: %08x\n", data->deviceStatus); - pr_info(" slotStatus: %08x\n", data->slotStatus); - pr_info(" linkStatus: %08x\n", data->linkStatus); - pr_info(" devCmdStatus: %08x\n", data->devCmdStatus); - pr_info(" devSecStatus: %08x\n", data->devSecStatus); - - pr_info(" rootErrorStatus: %08x\n", data->rootErrorStatus); - pr_info(" uncorrErrorStatus: %08x\n", data->uncorrErrorStatus); - pr_info(" corrErrorStatus: %08x\n", data->corrErrorStatus); - pr_info(" tlpHdr1: %08x\n", data->tlpHdr1); - pr_info(" tlpHdr2: %08x\n", data->tlpHdr2); - pr_info(" tlpHdr3: %08x\n", data->tlpHdr3); - pr_info(" tlpHdr4: %08x\n", data->tlpHdr4); - pr_info(" sourceId: %08x\n", data->sourceId); - pr_info(" errorClass: %016llx\n", data->errorClass); - pr_info(" correlator: %016llx\n", data->correlator); - - pr_info(" nFir: %016llx\n", data->nFir); - pr_info(" nFirMask: %016llx\n", data->nFirMask); - pr_info(" nFirWOF: %016llx\n", data->nFirWOF); - pr_info(" PhbPlssr: %016llx\n", data->phbPlssr); - pr_info(" PhbCsr: %016llx\n", data->phbCsr); - pr_info(" lemFir: %016llx\n", data->lemFir); - pr_info(" lemErrorMask: %016llx\n", data->lemErrorMask); - pr_info(" lemWOF: %016llx\n", data->lemWOF); - pr_info(" phbErrorStatus: %016llx\n", data->phbErrorStatus); - pr_info(" phbFirstErrorStatus: %016llx\n", data->phbFirstErrorStatus); - pr_info(" phbErrorLog0: %016llx\n", data->phbErrorLog0); - pr_info(" phbErrorLog1: %016llx\n", data->phbErrorLog1); - pr_info(" mmioErrorStatus: %016llx\n", data->mmioErrorStatus); - pr_info(" mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus); - pr_info(" mmioErrorLog0: %016llx\n", data->mmioErrorLog0); - pr_info(" mmioErrorLog1: %016llx\n", data->mmioErrorLog1); - pr_info(" dma0ErrorStatus: %016llx\n", data->dma0ErrorStatus); - pr_info(" dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus); - pr_info(" dma0ErrorLog0: %016llx\n", data->dma0ErrorLog0); - pr_info(" dma0ErrorLog1: %016llx\n", data->dma0ErrorLog1); - pr_info(" dma1ErrorStatus: %016llx\n", data->dma1ErrorStatus); - pr_info(" dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus); - pr_info(" dma1ErrorLog0: %016llx\n", data->dma1ErrorLog0); - pr_info(" dma1ErrorLog1: %016llx\n", data->dma1ErrorLog1); + if (data->brdgCtl) + pr_info(" brdgCtl: %08x\n", + data->brdgCtl); + if (data->portStatusReg || data->rootCmplxStatus || + data->busAgentStatus) + pr_info(" UtlSts: %08x %08x %08x\n", + data->portStatusReg, data->rootCmplxStatus, + data->busAgentStatus); + if (data->deviceStatus || data->slotStatus || + data->linkStatus || data->devCmdStatus || + data->devSecStatus) + pr_info(" RootSts: %08x %08x %08x %08x %08x\n", + data->deviceStatus, data->slotStatus, + data->linkStatus, data->devCmdStatus, + data->devSecStatus); + if (data->rootErrorStatus || data->uncorrErrorStatus || + data->corrErrorStatus) + pr_info(" RootErrSts: %08x %08x %08x\n", + data->rootErrorStatus, data->uncorrErrorStatus, + data->corrErrorStatus); + if (data->tlpHdr1 || data->tlpHdr2 || + data->tlpHdr3 || data->tlpHdr4) + pr_info(" RootErrLog: %08x %08x %08x %08x\n", + data->tlpHdr1, data->tlpHdr2, + data->tlpHdr3, data->tlpHdr4); + if (data->sourceId || data->errorClass || + data->correlator) + pr_info(" RootErrLog1: %08x %016llx %016llx\n", + data->sourceId, data->errorClass, + data->correlator); + if (data->nFir || data->nFirMask || + data->nFirWOF) + pr_info(" nFir: %016llx %016llx %016llx\n", + data->nFir, data->nFirMask, + data->nFirWOF); + if (data->phbPlssr || data->phbCsr) + pr_info(" PhbSts: %016llx %016llx\n", + data->phbPlssr, data->phbCsr); + if (data->lemFir || data->lemErrorMask || + data->lemWOF) + pr_info(" Lem: %016llx %016llx %016llx\n", + data->lemFir, data->lemErrorMask, + data->lemWOF); + if (data->phbErrorStatus || data->phbFirstErrorStatus || + data->phbErrorLog0 || data->phbErrorLog1) + pr_info(" PhbErr: %016llx %016llx %016llx %016llx\n", + data->phbErrorStatus, data->phbFirstErrorStatus, + data->phbErrorLog0, data->phbErrorLog1); + if (data->mmioErrorStatus || data->mmioFirstErrorStatus || + data->mmioErrorLog0 || data->mmioErrorLog1) + pr_info(" OutErr: %016llx %016llx %016llx %016llx\n", + data->mmioErrorStatus, data->mmioFirstErrorStatus, + data->mmioErrorLog0, data->mmioErrorLog1); + if (data->dma0ErrorStatus || data->dma0FirstErrorStatus || + data->dma0ErrorLog0 || data->dma0ErrorLog1) + pr_info(" InAErr: %016llx %016llx %016llx %016llx\n", + data->dma0ErrorStatus, data->dma0FirstErrorStatus, + data->dma0ErrorLog0, data->dma0ErrorLog1); + if (data->dma1ErrorStatus || data->dma1FirstErrorStatus || + data->dma1ErrorLog0 || data->dma1ErrorLog1) + pr_info(" InBErr: %016llx %016llx %016llx %016llx\n", + data->dma1ErrorStatus, data->dma1FirstErrorStatus, + data->dma1ErrorLog0, data->dma1ErrorLog1); for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) { if ((data->pestA[i] >> 63) == 0 && (data->pestB[i] >> 63) == 0) continue; - pr_info(" PE[%3d] PESTA: %016llx\n", i, data->pestA[i]); - pr_info(" PESTB: %016llx\n", data->pestB[i]); + pr_info(" PE[%3d] A/B: %016llx %016llx\n", + i, data->pestA[i], data->pestB[i]); } } -- cgit v0.10.2 From 2f3f38e4d3d03dd4125cc9a1f49ab3cc91d8d670 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 28 Feb 2014 16:20:29 +1100 Subject: powerpc/powernv: Fix opal_xscom_{read,write} prototype The OPAL firmware functions opal_xscom_read and opal_xscom_write take a 64-bit argument for the XSCOM (PCB) address in order to support the indirect mode on P8. Signed-off-by: Benjamin Herrenschmidt CC: [v3.13] diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 40157e2..ed82142 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -816,8 +816,8 @@ int64_t opal_pci_next_error(uint64_t phb_id, uint64_t *first_frozen_pe, int64_t opal_pci_poll(uint64_t phb_id); int64_t opal_return_cpu(void); -int64_t opal_xscom_read(uint32_t gcid, uint32_t pcb_addr, __be64 *val); -int64_t opal_xscom_write(uint32_t gcid, uint32_t pcb_addr, uint64_t val); +int64_t opal_xscom_read(uint32_t gcid, uint64_t pcb_addr, __be64 *val); +int64_t opal_xscom_write(uint32_t gcid, uint64_t pcb_addr, uint64_t val); int64_t opal_lpc_write(uint32_t chip_id, enum OpalLPCAddressType addr_type, uint32_t addr, uint32_t data, uint32_t sz); -- cgit v0.10.2 From e0cf957614976896111e676e5134ac98ee227d3d Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 28 Feb 2014 16:20:38 +1100 Subject: powerpc/powernv: Fix indirect XSCOM unmangling We need to unmangle the full address, not just the register number, and we also need to support the real indirect bit being set for in-kernel uses. Signed-off-by: Benjamin Herrenschmidt CC: [v3.13] diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c index 4fbf276..4cd2ea6 100644 --- a/arch/powerpc/platforms/powernv/opal-xscom.c +++ b/arch/powerpc/platforms/powernv/opal-xscom.c @@ -71,11 +71,11 @@ static int opal_xscom_err_xlate(int64_t rc) } } -static u64 opal_scom_unmangle(u64 reg) +static u64 opal_scom_unmangle(u64 addr) { /* * XSCOM indirect addresses have the top bit set. Additionally - * the reset of the top 3 nibbles is always 0. + * the rest of the top 3 nibbles is always 0. * * Because the debugfs interface uses signed offsets and shifts * the address left by 3, we basically cannot use the top 4 bits @@ -86,10 +86,13 @@ static u64 opal_scom_unmangle(u64 reg) * conversion here. To leave room for further xscom address * expansion, we only clear out the top byte * + * For in-kernel use, we also support the real indirect bit, so + * we test for any of the top 5 bits + * */ - if (reg & (1ull << 59)) - reg = (reg & ~(0xffull << 56)) | (1ull << 63); - return reg; + if (addr & (0x1full << 59)) + addr = (addr & ~(0xffull << 56)) | (1ull << 63); + return addr; } static int opal_scom_read(scom_map_t map, u64 reg, u64 *value) @@ -98,8 +101,8 @@ static int opal_scom_read(scom_map_t map, u64 reg, u64 *value) int64_t rc; __be64 v; - reg = opal_scom_unmangle(reg); - rc = opal_xscom_read(m->chip, m->addr + reg, (__be64 *)__pa(&v)); + reg = opal_scom_unmangle(m->addr + reg); + rc = opal_xscom_read(m->chip, reg, (__be64 *)__pa(&v)); *value = be64_to_cpu(v); return opal_xscom_err_xlate(rc); } @@ -109,8 +112,8 @@ static int opal_scom_write(scom_map_t map, u64 reg, u64 value) struct opal_scom_map *m = map; int64_t rc; - reg = opal_scom_unmangle(reg); - rc = opal_xscom_write(m->chip, m->addr + reg, value); + reg = opal_scom_unmangle(m->addr + reg); + rc = opal_xscom_write(m->chip, reg, value); return opal_xscom_err_xlate(rc); } -- cgit v0.10.2 From 8987583366ae9e03c306c2b7d73bdb952df1d08d Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Tue, 18 Feb 2014 22:25:15 +0100 Subject: firewire: net: fix use after free Commit 8408dc1c14c1 "firewire: net: use dev_printk API" introduced a use-after-free in a failure path. fwnet_transmit_packet_failed(ptask) may free ptask, then the dev_err() call dereferenced it. The fix is straightforward; simply reorder the two calls. Reported-by: Dan Carpenter Cc: stable@vger.kernel.org # v3.4+ Signed-off-by: Stefan Richter diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index 6b89598..4af0a7b 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -929,8 +929,6 @@ static void fwnet_write_complete(struct fw_card *card, int rcode, if (rcode == RCODE_COMPLETE) { fwnet_transmit_packet_done(ptask); } else { - fwnet_transmit_packet_failed(ptask); - if (printk_timed_ratelimit(&j, 1000) || rcode != last_rcode) { dev_err(&ptask->dev->netdev->dev, "fwnet_write_complete failed: %x (skipped %d)\n", @@ -938,8 +936,10 @@ static void fwnet_write_complete(struct fw_card *card, int rcode, errors_skipped = 0; last_rcode = rcode; - } else + } else { errors_skipped++; + } + fwnet_transmit_packet_failed(ptask); } } -- cgit v0.10.2 From 48095d991d85687569ac025b18a6c7ae1632c9f7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 3 Feb 2014 17:25:33 -0800 Subject: audit: Use struct net not pid_t to remember the network namespce to reply in In struct audit_netlink_list and audit_reply add a reference to the network namespace of the caller and remove the userspace pid of the caller. This cleanly remembers the callers network namespace, and removes a huge class of races and nasty failure modes that can occur when attempting to relook up the callers network namespace from a pid_t (including the caller's network namespace changing, pid wraparound, and the pid simply not being present). Signed-off-by: "Eric W. Biederman" diff --git a/kernel/audit.c b/kernel/audit.c index 34c5a23..1e5756f 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -182,7 +182,7 @@ struct audit_buffer { struct audit_reply { __u32 portid; - pid_t pid; + struct net *net; struct sk_buff *skb; }; @@ -500,7 +500,7 @@ int audit_send_list(void *_dest) { struct audit_netlink_list *dest = _dest; struct sk_buff *skb; - struct net *net = get_net_ns_by_pid(dest->pid); + struct net *net = dest->net; struct audit_net *aunet = net_generic(net, audit_net_id); /* wait for parent to finish and send an ACK */ @@ -510,6 +510,7 @@ int audit_send_list(void *_dest) while ((skb = __skb_dequeue(&dest->q)) != NULL) netlink_unicast(aunet->nlsk, skb, dest->portid, 0); + put_net(net); kfree(dest); return 0; @@ -543,7 +544,7 @@ out_kfree_skb: static int audit_send_reply_thread(void *arg) { struct audit_reply *reply = (struct audit_reply *)arg; - struct net *net = get_net_ns_by_pid(reply->pid); + struct net *net = reply->net; struct audit_net *aunet = net_generic(net, audit_net_id); mutex_lock(&audit_cmd_mutex); @@ -552,6 +553,7 @@ static int audit_send_reply_thread(void *arg) /* Ignore failure. It'll only happen if the sender goes away, because our timeout is set to infinite. */ netlink_unicast(aunet->nlsk , reply->skb, reply->portid, 0); + put_net(net); kfree(reply); return 0; } @@ -583,8 +585,8 @@ static void audit_send_reply(__u32 portid, int seq, int type, int done, if (!skb) goto out; + reply->net = get_net(current->nsproxy->net_ns); reply->portid = portid; - reply->pid = task_pid_vnr(current); reply->skb = skb; tsk = kthread_run(audit_send_reply_thread, reply, "audit_send_reply"); diff --git a/kernel/audit.h b/kernel/audit.h index 57cc64d..8df13221 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -247,7 +247,7 @@ extern void audit_panic(const char *message); struct audit_netlink_list { __u32 portid; - pid_t pid; + struct net *net; struct sk_buff_head q; }; diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 14a78cc..a5e3d73d 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "audit.h" /* @@ -1083,8 +1084,8 @@ int audit_list_rules_send(__u32 portid, int seq) dest = kmalloc(sizeof(struct audit_netlink_list), GFP_KERNEL); if (!dest) return -ENOMEM; + dest->net = get_net(current->nsproxy->net_ns); dest->portid = portid; - dest->pid = task_pid_vnr(current); skb_queue_head_init(&dest->q); mutex_lock(&audit_filter_mutex); -- cgit v0.10.2 From b39c2a57a00a841f057a75b41df4c26173288b66 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 26 Feb 2014 18:14:26 +0100 Subject: perf tools: Fix strict alias issue for find_first_bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When compiling perf tool code with gcc 4.4.7 I'm getting following error: CC util/session.o cc1: warnings being treated as errors util/session.c: In function ‘perf_session_deliver_event’: tools/perf/util/include/linux/bitops.h:109: error: dereferencing pointer ‘p’ does break strict-aliasing rules tools/perf/util/include/linux/bitops.h:101: error: dereferencing pointer ‘p’ does break strict-aliasing rules util/session.c:697: note: initialized from here tools/perf/util/include/linux/bitops.h:101: note: initialized from here make[1]: *** [util/session.o] Error 1 make: *** [util/session.o] Error 2 The aliased types here are u64 and unsigned long pointers, which is safe for the find_first_bit processing. This error shows up for me only for gcc 4.4 on 32bit x86, even for -Wstrict-aliasing=3, while newer gcc are quiet and scream here for -Wstrict-aliasing={2,1}. Looks like newer gcc changed the rules for strict alias warnings. The gcc documentation offers workaround for valid aliasing by using __may_alias__ attribute: http://gcc.gnu.org/onlinedocs/gcc-4.4.0/gcc/Type-Attributes.html Using this workaround for the find_first_bit function. Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1393434867-20271-1-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h index 45cf10a..dadfa7e 100644 --- a/tools/perf/util/include/linux/bitops.h +++ b/tools/perf/util/include/linux/bitops.h @@ -87,13 +87,15 @@ static __always_inline unsigned long __ffs(unsigned long word) return num; } +typedef const unsigned long __attribute__((__may_alias__)) long_alias_t; + /* * Find the first set bit in a memory region. */ static inline unsigned long find_first_bit(const unsigned long *addr, unsigned long size) { - const unsigned long *p = addr; + long_alias_t *p = (long_alias_t *) addr; unsigned long result = 0; unsigned long tmp; -- cgit v0.10.2 From 288ac26cc2334e5e6ecad6416e9bf750691afd84 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Fri, 28 Feb 2014 09:57:47 +0000 Subject: arm64: remove redundant "psci:" prefixes Since 652af899799354049b273af897b798b8f03fdd88 "arm64: factor out spin-table boot method" psci prefix's been introduced. We have a common pr_fmt, so clean them up. Signed-off-by: Vladimir Murzin Acked-by: Mark Rutland Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 4f97db3..83ebee8 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -251,7 +251,7 @@ static int cpu_psci_cpu_boot(unsigned int cpu) { int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_entry)); if (err) - pr_err("psci: failed to boot CPU%d (%d)\n", cpu, err); + pr_err("failed to boot CPU%d (%d)\n", cpu, err); return err; } @@ -278,7 +278,7 @@ static void cpu_psci_cpu_die(unsigned int cpu) ret = psci_ops.cpu_off(state); - pr_crit("psci: unable to power off CPU%u (%d)\n", cpu, ret); + pr_crit("unable to power off CPU%u (%d)\n", cpu, ret); } #endif -- cgit v0.10.2 From 64b4f60f497058f1c6ba118a0260249ee5c091a6 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Fri, 28 Feb 2014 09:57:33 +0000 Subject: arm64: remove return value form psci_init() psci_init() is written to return err code if something goes wrong. However, the single user, setup_arch(), doesn't care about it. Moreover, every error path is supplied with a clear message which is enough for pleasant debugging. Signed-off-by: Vladimir Murzin Acked-by: Mark Rutland Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/psci.h b/arch/arm64/include/asm/psci.h index e5312ea..d15ab8b4 100644 --- a/arch/arm64/include/asm/psci.h +++ b/arch/arm64/include/asm/psci.h @@ -14,6 +14,6 @@ #ifndef __ASM_PSCI_H #define __ASM_PSCI_H -int psci_init(void); +void psci_init(void); #endif /* __ASM_PSCI_H */ diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 83ebee8..ea4828a 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -176,22 +176,20 @@ static const struct of_device_id psci_of_match[] __initconst = { {}, }; -int __init psci_init(void) +void __init psci_init(void) { struct device_node *np; const char *method; u32 id; - int err = 0; np = of_find_matching_node(NULL, psci_of_match); if (!np) - return -ENODEV; + return; pr_info("probing function IDs from device-tree\n"); if (of_property_read_string(np, "method", &method)) { pr_warning("missing \"method\" property\n"); - err = -ENXIO; goto out_put_node; } @@ -201,7 +199,6 @@ int __init psci_init(void) invoke_psci_fn = __invoke_psci_fn_smc; } else { pr_warning("invalid \"method\" property: %s\n", method); - err = -EINVAL; goto out_put_node; } @@ -227,7 +224,7 @@ int __init psci_init(void) out_put_node: of_node_put(np); - return err; + return; } #ifdef CONFIG_SMP -- cgit v0.10.2 From e0d849fad746cb36a6822e4595d8ba9bf0adf7fa Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Thu, 27 Feb 2014 22:46:48 +0100 Subject: dm cache: fix truncation bug when mapping I/O to >2TB fast device When remapping a block to the cache's fast device that is larger than 2TB we must not truncate the destination sector to 32bits. The 32bit temporary result of from_cblock() was being overflowed in remap_to_cache() due to the logical left shift. Use an intermediate 64bit type to store the 32bit from_cblock() result to fix the overflow. Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index db09444..1af7014 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -671,15 +671,16 @@ static void remap_to_cache(struct cache *cache, struct bio *bio, dm_cblock_t cblock) { sector_t bi_sector = bio->bi_iter.bi_sector; + sector_t block = from_cblock(cblock); bio->bi_bdev = cache->cache_dev->bdev; if (!block_size_is_power_of_two(cache)) bio->bi_iter.bi_sector = - (from_cblock(cblock) * cache->sectors_per_block) + + (block * cache->sectors_per_block) + sector_div(bi_sector, cache->sectors_per_block); else bio->bi_iter.bi_sector = - (from_cblock(cblock) << cache->sectors_per_block_shift) | + (block << cache->sectors_per_block_shift) | (bi_sector & (cache->sectors_per_block - 1)); } -- cgit v0.10.2 From addea9ef055bf5a10f9ec967b1e0443ceb44bd83 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 24 Feb 2014 02:27:57 +0000 Subject: cpufreq: enable ARM drivers on arm64 Enable cpufreq and power kconfig menus on arm64 along with arm cpufreq drivers. The power menu is needed for OPP support. At least on Calxeda systems, the same cpufreq driver is used for arm and arm64 based systems. Signed-off-by: Rob Herring Acked-by: Viresh Kumar Signed-off-by: Mark Brown Signed-off-by: Catalin Marinas diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 33e8f84..c2056ca 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -304,6 +304,14 @@ source "drivers/cpuidle/Kconfig" endmenu +menu "Power management options" + +source "kernel/power/Kconfig" + +source "drivers/cpufreq/Kconfig" + +endmenu + source "net/Kconfig" source "drivers/Kconfig" diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 4b029c0..1fbe11f 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -200,7 +200,7 @@ source "drivers/cpufreq/Kconfig.x86" endmenu menu "ARM CPU frequency scaling drivers" -depends on ARM +depends on ARM || ARM64 source "drivers/cpufreq/Kconfig.arm" endmenu -- cgit v0.10.2 From 84fe6826c28f69d8708bd575faed7f75e6b6f57f Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 25 Feb 2014 11:38:53 +0000 Subject: arm64: mm: Add double logical invert to pte accessors Page table entries on ARM64 are 64 bits, and some pte functions such as pte_dirty return a bitwise-and of a flag with the pte value. If the flag to be tested resides in the upper 32 bits of the pte, then we run into the danger of the result being dropped if downcast. For example: gather_stats(page, md, pte_dirty(*pte), 1); where pte_dirty(*pte) is downcast to an int. This patch adds a double logical invert to all the pte_ accessors to ensure predictable downcasting. Signed-off-by: Steve Capper Cc: Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index b524dcd..aa3917c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -136,11 +136,11 @@ extern struct page *empty_zero_page; /* * The following only work if pte_present(). Undefined behaviour otherwise. */ -#define pte_present(pte) (pte_val(pte) & (PTE_VALID | PTE_PROT_NONE)) -#define pte_dirty(pte) (pte_val(pte) & PTE_DIRTY) -#define pte_young(pte) (pte_val(pte) & PTE_AF) -#define pte_special(pte) (pte_val(pte) & PTE_SPECIAL) -#define pte_write(pte) (pte_val(pte) & PTE_WRITE) +#define pte_present(pte) (!!(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE))) +#define pte_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY)) +#define pte_young(pte) (!!(pte_val(pte) & PTE_AF)) +#define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) +#define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_valid_user(pte) \ -- cgit v0.10.2 From b57fc9e80692043e2a3a74e1d2c047eb700dcd0c Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 28 Feb 2014 16:12:25 +0000 Subject: arm64: Fix !CONFIG_SMP kernel build Commit fb4a96029c8a (arm64: kernel: fix per-cpu offset restore on resume) uses per_cpu_offset() unconditionally during CPU wakeup, however, this is only defined for the SMP case. Signed-off-by: Catalin Marinas Reported-by: Dave P Martin diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 13fb0b3..453a179 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -16,6 +16,8 @@ #ifndef __ASM_PERCPU_H #define __ASM_PERCPU_H +#ifdef CONFIG_SMP + static inline void set_my_cpu_offset(unsigned long off) { asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory"); @@ -36,6 +38,12 @@ static inline unsigned long __my_cpu_offset(void) } #define __my_cpu_offset __my_cpu_offset() +#else /* !CONFIG_SMP */ + +#define set_my_cpu_offset(x) do { } while (0) + +#endif /* CONFIG_SMP */ + #include #endif /* __ASM_PERCPU_H */ -- cgit v0.10.2 From 14f398ca2f26a2ed6236aec54395e0fa06ec8a82 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Fri, 28 Feb 2014 12:02:56 -0500 Subject: dm cache mq: fix memory allocation failure for large cache devices The memory allocated for the multiqueue policy's hash table doesn't need to be physically contiguous. Use vzalloc() instead of kzalloc(). Fedora has been carrying this fix since 10/10/2013. Failure seen during creation of a 10TB cached device with a 2048 sector block size and 411GB cache size: dmsetup: page allocation failure: order:9, mode:0x10c0d0 CPU: 11 PID: 29235 Comm: dmsetup Not tainted 3.10.4 #3 Hardware name: Supermicro X8DTL/X8DTL, BIOS 2.1a 12/30/2011 000000000010c0d0 ffff880090941898 ffffffff81387ab4 ffff880090941928 ffffffff810bb26f 0000000000000009 000000000010c0d0 ffff880090941928 ffffffff81385dbc ffffffff815f3840 ffffffff00000000 000002000010c0d0 Call Trace: [] dump_stack+0x19/0x1b [] warn_alloc_failed+0x110/0x124 [] ? __alloc_pages_direct_compact+0x17c/0x18e [] __alloc_pages_nodemask+0x6c7/0x75e [] __get_free_pages+0x12/0x3f [] kmalloc_order_trace+0x29/0x88 [] __kmalloc+0x36/0x11b [] ? mq_create+0x1dc/0x2cf [dm_cache_mq] [] mq_create+0x2af/0x2cf [dm_cache_mq] [] dm_cache_policy_create+0xa7/0xd2 [dm_cache] [] ? cache_ctr+0x245/0xa13 [dm_cache] [] cache_ctr+0x353/0xa13 [dm_cache] [] dm_table_add_target+0x227/0x2ce [dm_mod] [] table_load+0x286/0x2ac [dm_mod] [] ? dev_wait+0x8a/0x8a [dm_mod] [] ctl_ioctl+0x39a/0x3c2 [dm_mod] [] dm_ctl_ioctl+0xe/0x12 [dm_mod] [] vfs_ioctl+0x21/0x34 [] do_vfs_ioctl+0x3b1/0x3f4 [] ? ____fput+0x9/0xb [] ? task_work_run+0x7e/0x92 [] SyS_ioctl+0x52/0x82 [] system_call_fastpath+0x16/0x1b Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c index 1e018e9..0e385e4 100644 --- a/drivers/md/dm-cache-policy-mq.c +++ b/drivers/md/dm-cache-policy-mq.c @@ -872,7 +872,7 @@ static void mq_destroy(struct dm_cache_policy *p) { struct mq_policy *mq = to_mq_policy(p); - kfree(mq->table); + vfree(mq->table); epool_exit(&mq->cache_pool); epool_exit(&mq->pre_cache_pool); kfree(mq); @@ -1245,7 +1245,7 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size, mq->nr_buckets = next_power(from_cblock(cache_size) / 2, 16); mq->hash_bits = ffs(mq->nr_buckets) - 1; - mq->table = kzalloc(sizeof(*mq->table) * mq->nr_buckets, GFP_KERNEL); + mq->table = vzalloc(sizeof(*mq->table) * mq->nr_buckets); if (!mq->table) goto bad_alloc_table; -- cgit v0.10.2 From adb07df1e039e9fe43e66aeea8b4771f83659dbb Mon Sep 17 00:00:00 2001 From: Bing Zhao Date: Wed, 26 Feb 2014 20:11:22 -0800 Subject: mwifiex: do not advertise usb autosuspend support As many Surface Pro I & II users have found out, the mwifiex_usb doesn't support usb autosuspend, and it has caused some system stability issues. Bug 69661 - mwifiex_usb on MS Surface Pro 1 is unstable Bug 60815 - Interface hangs in mwifiex_usb Bug 64111 - mwifiex_usb USB8797 crash failed to get signal information USB autosuspend get triggered when Surface Pro's AC power is removed or powertop enables power saving on USB8797 device. Driver's suspend handler is called here, but resume handler won't be called until the AC power is put back on or powertop disables power saving for USB8797. We need to refactor the suspend/resume handlers to support usb autosuspend properly. For now let's just remove it. Cc: # 3.5+ Signed-off-by: Bing Zhao Signed-off-by: Amitkumar Karwar Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/mwifiex/usb.c b/drivers/net/wireless/mwifiex/usb.c index cb6b70a..2087488 100644 --- a/drivers/net/wireless/mwifiex/usb.c +++ b/drivers/net/wireless/mwifiex/usb.c @@ -525,13 +525,6 @@ static int mwifiex_usb_resume(struct usb_interface *intf) MWIFIEX_BSS_ROLE_ANY), MWIFIEX_ASYNC_CMD); -#ifdef CONFIG_PM - /* Resume handler may be called due to remote wakeup, - * force to exit suspend anyway - */ - usb_disable_autosuspend(card->udev); -#endif /* CONFIG_PM */ - return 0; } @@ -571,7 +564,6 @@ static struct usb_driver mwifiex_usb_driver = { .id_table = mwifiex_usb_table, .suspend = mwifiex_usb_suspend, .resume = mwifiex_usb_resume, - .supports_autosuspend = 1, }; static int mwifiex_usb_tx_init(struct mwifiex_adapter *adapter) -- cgit v0.10.2 From f3ffaaa8b727da6c442b5b8cb356c2196b6e2d59 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 28 Feb 2014 11:02:02 -0800 Subject: [IA64] sba_iommu: fix section mismatch To: linux-kernel@vger.kernel.org Fix the section mismatch warning by remove __init annotate for functions ioc_iova_init(), ioc_init() and acpi_sba_ioc_add() because they may be called at runtime. WARNING: vmlinux.o(.data+0x66ee0): Section mismatch in reference from the variable acpi_sba_ioc_handler to the function .init.text:acpi_sba_ioc_add() The variable acpi_sba_ioc_handler references the function __init acpi_sba_ioc_add() Signed-off-by: Jiang Liu Signed-off-by: Tony Luck diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 8e858b5..30c43d3 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -1596,7 +1596,7 @@ static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, * ***************************************************************/ -static void __init +static void ioc_iova_init(struct ioc *ioc) { int tcnfg; @@ -1807,7 +1807,7 @@ static struct ioc_iommu ioc_iommu_info[] __initdata = { { SX2000_IOC_ID, "sx2000", NULL }, }; -static struct ioc * __init +static struct ioc * ioc_init(unsigned long hpa, void *handle) { struct ioc *ioc; @@ -2041,7 +2041,7 @@ sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle) #define sba_map_ioc_to_node(ioc, handle) #endif -static int __init +static int acpi_sba_ioc_add(struct acpi_device *device, const struct acpi_device_id *not_used) { -- cgit v0.10.2 From d21114236d0e1232023cb8c84bd29e06810f6a2f Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 17 Feb 2014 13:59:30 +0800 Subject: Fix warning in make defconfig Now the ACPI container and ACPI PCI hotplug driver has been converted as built-in modules, so reflect these changes in IA64 defconfig file. Signed-off-by: Jiang Liu Signed-off-by: Tony Luck diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig index efbd292..6404acb 100644 --- a/arch/ia64/configs/generic_defconfig +++ b/arch/ia64/configs/generic_defconfig @@ -30,9 +30,9 @@ CONFIG_ACPI_BUTTON=m CONFIG_ACPI_FAN=m CONFIG_ACPI_DOCK=y CONFIG_ACPI_PROCESSOR=m -CONFIG_ACPI_CONTAINER=m +CONFIG_ACPI_CONTAINER=y CONFIG_HOTPLUG_PCI=y -CONFIG_HOTPLUG_PCI_ACPI=m +CONFIG_HOTPLUG_PCI_ACPI=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y -- cgit v0.10.2 From fb0cfecf67cc1cec4487efb6267317de432add2f Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Sat, 22 Feb 2014 10:59:36 -0300 Subject: ARM: dts: omap3-igep: fix boot fail due wrong compatible match This patch is based on commit: 016c12d2 ("ARM: OMAP3: Fix hardware detection for omap3630 when booted with device tree") and fixes a boot hang due the IGEP board being wrongly initialized as an OMAP3430 platform instead of an OMAP3630. Signed-off-by: Javier Martinez Canillas Signed-off-by: Tony Lindgren diff --git a/arch/arm/boot/dts/omap3-igep0020.dts b/arch/arm/boot/dts/omap3-igep0020.dts index 25a2b5f..f2779ac 100644 --- a/arch/arm/boot/dts/omap3-igep0020.dts +++ b/arch/arm/boot/dts/omap3-igep0020.dts @@ -14,7 +14,7 @@ / { model = "IGEPv2 (TI OMAP AM/DM37x)"; - compatible = "isee,omap3-igep0020", "ti,omap3"; + compatible = "isee,omap3-igep0020", "ti,omap36xx", "ti,omap3"; leds { pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/omap3-igep0030.dts b/arch/arm/boot/dts/omap3-igep0030.dts index 145c58c..2793749 100644 --- a/arch/arm/boot/dts/omap3-igep0030.dts +++ b/arch/arm/boot/dts/omap3-igep0030.dts @@ -13,7 +13,7 @@ / { model = "IGEP COM MODULE (TI OMAP AM/DM37x)"; - compatible = "isee,omap3-igep0030", "ti,omap3"; + compatible = "isee,omap3-igep0030", "ti,omap36xx", "ti,omap3"; leds { pinctrl-names = "default"; -- cgit v0.10.2 From dca1c8d17a2feae056f9e334ea75a462ae4cb52a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 22 Feb 2014 19:35:38 -0500 Subject: cifs: mask off top byte in get_rfc1002_length() The rfc1002 length actually includes a type byte, which we aren't masking off. In most cases, it's not a problem since the RFC1002_SESSION_MESSAGE type is 0, but when doing a RFC1002 session establishment, the type is non-zero and that throws off the returned length. Signed-off-by: Jeff Layton Tested-by: Sachin Prabhu Signed-off-by: Steve French diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index cf32f03..c0f3718 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -513,7 +513,7 @@ struct cifs_mnt_data { static inline unsigned int get_rfc1002_length(void *buf) { - return be32_to_cpu(*((__be32 *)buf)); + return be32_to_cpu(*((__be32 *)buf)) & 0xffffff; } static inline void -- cgit v0.10.2 From 014325e933b43844f5d01fcef39bbc3301320e59 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Thu, 20 Feb 2014 14:37:20 -0700 Subject: ARM: tegra: add LED options back into tegra_defconfig The last time tegra_defconfig was rebuilt, various LEDs options were somehow selected by other options, and hence their entries in tegra_defconfig were removed by "make savedefconfig". However, for some reason this is no longer happening, so we need to add the entries back into tegra_defconfig so the they are enabled in .config. Fixes: db079b1811d1 ("ARM: tegra: rebuild tegra_defconfig to add DEBUG_FS)" Reported-by: Marc Dietrich Signed-off-by: Stephen Warren Signed-off-by: Arnd Bergmann diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index 00fe9e9..27d69b5 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig @@ -204,7 +204,10 @@ CONFIG_MMC_BLOCK_MINORS=16 CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SDHCI_TEGRA=y +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y CONFIG_LEDS_GPIO=y +CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y CONFIG_LEDS_TRIGGER_ONESHOT=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y -- cgit v0.10.2 From d7b95315cc7f441418845a165ee56df723941487 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 28 Feb 2014 15:05:10 -0800 Subject: tg3: Don't check undefined error bits in RXBD Redefine the RXD_ERR_MASK to include only relevant error bits. This fixes a customer reported issue of randomly dropping packets on the 5719. Signed-off-by: Michael Chan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 3167ed6..3b6d0ba 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -6843,8 +6843,7 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget) work_mask |= opaque_key; - if ((desc->err_vlan & RXD_ERR_MASK) != 0 && - (desc->err_vlan != RXD_ERR_ODD_NIBBLE_RCVD_MII)) { + if (desc->err_vlan & RXD_ERR_MASK) { drop_it: tg3_recycle_rx(tnapi, tpr, opaque_key, desc_idx, *post_ptr); diff --git a/drivers/net/ethernet/broadcom/tg3.h b/drivers/net/ethernet/broadcom/tg3.h index ef47238..04321e5 100644 --- a/drivers/net/ethernet/broadcom/tg3.h +++ b/drivers/net/ethernet/broadcom/tg3.h @@ -2608,7 +2608,11 @@ struct tg3_rx_buffer_desc { #define RXD_ERR_TOO_SMALL 0x00400000 #define RXD_ERR_NO_RESOURCES 0x00800000 #define RXD_ERR_HUGE_FRAME 0x01000000 -#define RXD_ERR_MASK 0xffff0000 + +#define RXD_ERR_MASK (RXD_ERR_BAD_CRC | RXD_ERR_COLLISION | \ + RXD_ERR_LINK_LOST | RXD_ERR_PHY_DECODE | \ + RXD_ERR_MAC_ABRT | RXD_ERR_TOO_SMALL | \ + RXD_ERR_NO_RESOURCES | RXD_ERR_HUGE_FRAME) u32 reserved; u32 opaque; -- cgit v0.10.2 From 877767dccf5c35d4ed245701956effb72af77d52 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 28 Feb 2014 14:14:03 +0100 Subject: bna: fix vlan tag stripping and implement its toggling The recent commit "fe1624c bna: RX Filter Enhancements" disables VLAN tag stripping if the NIC is in promiscuous mode. This causes __vlan_hwaccel_put_tag() is called when the stripping is disabled. Because of this VLAN over bna does not work and causes BUGs in conjunction with openvswitch like this: Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index cf64f3d..4ad1187 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -707,7 +707,8 @@ bnad_cq_process(struct bnad *bnad, struct bna_ccb *ccb, int budget) else skb_checksum_none_assert(skb); - if (flags & BNA_CQ_EF_VLAN) + if ((flags & BNA_CQ_EF_VLAN) && + (bnad->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(cmpl->vlan_tag)); if (BNAD_RXBUF_IS_SK_BUFF(unmap_q->type)) @@ -2094,7 +2095,9 @@ bnad_init_rx_config(struct bnad *bnad, struct bna_rx_config *rx_config) rx_config->q1_buf_size = BFI_SMALL_RXBUF_SIZE; } - rx_config->vlan_strip_status = BNA_STATUS_T_ENABLED; + rx_config->vlan_strip_status = + (bnad->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) ? + BNA_STATUS_T_ENABLED : BNA_STATUS_T_DISABLED; } static void @@ -3245,11 +3248,6 @@ bnad_set_rx_mode(struct net_device *netdev) BNA_RXMODE_ALLMULTI; bna_rx_mode_set(bnad->rx_info[0].rx, new_mode, mode_mask, NULL); - if (bnad->cfg_flags & BNAD_CF_PROMISC) - bna_rx_vlan_strip_disable(bnad->rx_info[0].rx); - else - bna_rx_vlan_strip_enable(bnad->rx_info[0].rx); - spin_unlock_irqrestore(&bnad->bna_lock, flags); } @@ -3374,6 +3372,27 @@ bnad_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) return 0; } +static int bnad_set_features(struct net_device *dev, netdev_features_t features) +{ + struct bnad *bnad = netdev_priv(dev); + netdev_features_t changed = features ^ dev->features; + + if ((changed & NETIF_F_HW_VLAN_CTAG_RX) && netif_running(dev)) { + unsigned long flags; + + spin_lock_irqsave(&bnad->bna_lock, flags); + + if (features & NETIF_F_HW_VLAN_CTAG_RX) + bna_rx_vlan_strip_enable(bnad->rx_info[0].rx); + else + bna_rx_vlan_strip_disable(bnad->rx_info[0].rx); + + spin_unlock_irqrestore(&bnad->bna_lock, flags); + } + + return 0; +} + #ifdef CONFIG_NET_POLL_CONTROLLER static void bnad_netpoll(struct net_device *netdev) @@ -3421,6 +3440,7 @@ static const struct net_device_ops bnad_netdev_ops = { .ndo_change_mtu = bnad_change_mtu, .ndo_vlan_rx_add_vid = bnad_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = bnad_vlan_rx_kill_vid, + .ndo_set_features = bnad_set_features, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = bnad_netpoll #endif @@ -3433,14 +3453,14 @@ bnad_netdev_init(struct bnad *bnad, bool using_dac) netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | - NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_HW_VLAN_CTAG_TX; + NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; netdev->vlan_features = NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6; - netdev->features |= netdev->hw_features | - NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER; + netdev->features |= netdev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER; if (using_dac) netdev->features |= NETIF_F_HIGHDMA; -- cgit v0.10.2 From 8427defd087ae6c5eaea9d609dfe7f165568accd Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 28 Feb 2014 22:40:53 +0000 Subject: MAINTAINERS: add maintainer entry for Armada DRM driver Add a maintainers entry for the Armada DRM driver. Signed-off-by: Russell King Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index df8869d..4f42ace 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5501,6 +5501,11 @@ W: http://www.kernel.org/doc/man-pages L: linux-man@vger.kernel.org S: Maintained +MARVELL ARMADA DRM SUPPORT +M: Russell King +S: Maintained +F: drivers/gpu/drm/armada/ + MARVELL GIGABIT ETHERNET DRIVERS (skge/sky2) M: Mirko Lindner M: Stephen Hemminger -- cgit v0.10.2 From 6f285b19d09f72e801525f5eea1bdad22e559bf0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 28 Feb 2014 19:44:55 -0800 Subject: audit: Send replies in the proper network namespace. In perverse cases of file descriptor passing the current network namespace of a process and the network namespace of a socket used by that socket may differ. Therefore use the network namespace of the appropiate socket to ensure replies always go to the appropiate socket. Signed-off-by: "Eric W. Biederman" diff --git a/include/linux/audit.h b/include/linux/audit.h index aa865a9..ec1464d 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -43,6 +43,7 @@ struct mq_attr; struct mqstat; struct audit_watch; struct audit_tree; +struct sk_buff; struct audit_krule { int vers_ops; @@ -463,7 +464,7 @@ extern int audit_filter_user(int type); extern int audit_filter_type(int type); extern int audit_rule_change(int type, __u32 portid, int seq, void *data, size_t datasz); -extern int audit_list_rules_send(__u32 portid, int seq); +extern int audit_list_rules_send(struct sk_buff *request_skb, int seq); extern u32 audit_enabled; #else /* CONFIG_AUDIT */ diff --git a/kernel/audit.c b/kernel/audit.c index 1e5756f..32086bf 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -570,9 +570,11 @@ static int audit_send_reply_thread(void *arg) * Allocates an skb, builds the netlink message, and sends it to the port id. * No failure notifications. */ -static void audit_send_reply(__u32 portid, int seq, int type, int done, +static void audit_send_reply(struct sk_buff *request_skb, int seq, int type, int done, int multi, const void *payload, int size) { + u32 portid = NETLINK_CB(request_skb).portid; + struct net *net = sock_net(NETLINK_CB(request_skb).sk); struct sk_buff *skb; struct task_struct *tsk; struct audit_reply *reply = kmalloc(sizeof(struct audit_reply), @@ -585,7 +587,7 @@ static void audit_send_reply(__u32 portid, int seq, int type, int done, if (!skb) goto out; - reply->net = get_net(current->nsproxy->net_ns); + reply->net = get_net(net); reply->portid = portid; reply->skb = skb; @@ -675,8 +677,7 @@ static int audit_get_feature(struct sk_buff *skb) seq = nlmsg_hdr(skb)->nlmsg_seq; - audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0, - &af, sizeof(af)); + audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &af, sizeof(af)); return 0; } @@ -796,8 +797,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) s.backlog = skb_queue_len(&audit_skb_queue); s.version = AUDIT_VERSION_LATEST; s.backlog_wait_time = audit_backlog_wait_time; - audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0, - &s, sizeof(s)); + audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s)); break; } case AUDIT_SET: { @@ -907,7 +907,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) seq, data, nlmsg_len(nlh)); break; case AUDIT_LIST_RULES: - err = audit_list_rules_send(NETLINK_CB(skb).portid, seq); + err = audit_list_rules_send(skb, seq); break; case AUDIT_TRIM: audit_trim_trees(); @@ -972,8 +972,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) memcpy(sig_data->ctx, ctx, len); security_release_secctx(ctx, len); } - audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_SIGNAL_INFO, - 0, 0, sig_data, sizeof(*sig_data) + len); + audit_send_reply(skb, seq, AUDIT_SIGNAL_INFO, 0, 0, + sig_data, sizeof(*sig_data) + len); kfree(sig_data); break; case AUDIT_TTY_GET: { @@ -985,8 +985,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) s.log_passwd = tsk->signal->audit_tty_log_passwd; spin_unlock(&tsk->sighand->siglock); - audit_send_reply(NETLINK_CB(skb).portid, seq, - AUDIT_TTY_GET, 0, 0, &s, sizeof(s)); + audit_send_reply(skb, seq, AUDIT_TTY_GET, 0, 0, &s, sizeof(s)); break; } case AUDIT_TTY_SET: { diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index a5e3d73d..e8d1c7c 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "audit.h" /* @@ -1069,8 +1070,10 @@ int audit_rule_change(int type, __u32 portid, int seq, void *data, * @portid: target portid for netlink audit messages * @seq: netlink audit message sequence (serial) number */ -int audit_list_rules_send(__u32 portid, int seq) +int audit_list_rules_send(struct sk_buff *request_skb, int seq) { + u32 portid = NETLINK_CB(request_skb).portid; + struct net *net = sock_net(NETLINK_CB(request_skb).sk); struct task_struct *tsk; struct audit_netlink_list *dest; int err = 0; @@ -1084,7 +1087,7 @@ int audit_list_rules_send(__u32 portid, int seq) dest = kmalloc(sizeof(struct audit_netlink_list), GFP_KERNEL); if (!dest) return -ENOMEM; - dest->net = get_net(current->nsproxy->net_ns); + dest->net = get_net(net); dest->portid = portid; skb_queue_head_init(&dest->q); -- cgit v0.10.2 From b7e63a1079b266866a732cf699d8c4d61391bbda Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 26 Feb 2014 11:19:14 -0800 Subject: NFSv4: Fix another nfs4_sequence corruptor nfs4_release_lockowner needs to set the rpc_message reply to point to the nfs4_sequence_res in order to avoid another Oopsable situation in nfs41_assign_slot. Fixes: fbd4bfd1d9d21 (NFS: Add nfs4_sequence calls for RELEASE_LOCKOWNER) Cc: stable@vger.kernel.org # 3.12+ Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2da6a69..44e088d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5828,8 +5828,7 @@ struct nfs_release_lockowner_data { struct nfs4_lock_state *lsp; struct nfs_server *server; struct nfs_release_lockowner_args args; - struct nfs4_sequence_args seq_args; - struct nfs4_sequence_res seq_res; + struct nfs_release_lockowner_res res; unsigned long timestamp; }; @@ -5837,7 +5836,7 @@ static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata { struct nfs_release_lockowner_data *data = calldata; nfs40_setup_sequence(data->server, - &data->seq_args, &data->seq_res, task); + &data->args.seq_args, &data->res.seq_res, task); data->timestamp = jiffies; } @@ -5846,7 +5845,7 @@ static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata) struct nfs_release_lockowner_data *data = calldata; struct nfs_server *server = data->server; - nfs40_sequence_done(task, &data->seq_res); + nfs40_sequence_done(task, &data->res.seq_res); switch (task->tk_status) { case 0: @@ -5887,7 +5886,6 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st data = kmalloc(sizeof(*data), GFP_NOFS); if (!data) return -ENOMEM; - nfs4_init_sequence(&data->seq_args, &data->seq_res, 0); data->lsp = lsp; data->server = server; data->args.lock_owner.clientid = server->nfs_client->cl_clientid; @@ -5895,6 +5893,8 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st data->args.lock_owner.s_dev = server->s_dev; msg.rpc_argp = &data->args; + msg.rpc_resp = &data->res; + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data); return 0; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index b2fb167..5624e4e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -467,9 +467,14 @@ struct nfs_lockt_res { }; struct nfs_release_lockowner_args { + struct nfs4_sequence_args seq_args; struct nfs_lowner lock_owner; }; +struct nfs_release_lockowner_res { + struct nfs4_sequence_res seq_res; +}; + struct nfs4_delegreturnargs { struct nfs4_sequence_args seq_args; const struct nfs_fh *fhandle; -- cgit v0.10.2 From b355cee88e3b1a193f0e9a81db810f6f83ad728b Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Thu, 27 Feb 2014 11:37:15 +0800 Subject: ACPI / resources: ignore invalid ACPI device resources ACPI table may export resource entry with 0 length. But the current code interprets this kind of resource in a wrong way. It will create a resource structure with res->end = acpi_resource->start + acpi_resource->len - 1; This patch fixes a problem on my machine that a platform device fails to be created because one of its ACPI IO resource entry (start = 0, end = 0, length = 0) is translated into a generic resource with start = 0, end = 0xffffffff. Signed-off-by: Zhang Rui Cc: All applicable Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index b7201fc..0bdacc5 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -77,18 +77,24 @@ bool acpi_dev_resource_memory(struct acpi_resource *ares, struct resource *res) switch (ares->type) { case ACPI_RESOURCE_TYPE_MEMORY24: memory24 = &ares->data.memory24; + if (!memory24->address_length) + return false; acpi_dev_get_memresource(res, memory24->minimum, memory24->address_length, memory24->write_protect); break; case ACPI_RESOURCE_TYPE_MEMORY32: memory32 = &ares->data.memory32; + if (!memory32->address_length) + return false; acpi_dev_get_memresource(res, memory32->minimum, memory32->address_length, memory32->write_protect); break; case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: fixed_memory32 = &ares->data.fixed_memory32; + if (!fixed_memory32->address_length) + return false; acpi_dev_get_memresource(res, fixed_memory32->address, fixed_memory32->address_length, fixed_memory32->write_protect); @@ -144,12 +150,16 @@ bool acpi_dev_resource_io(struct acpi_resource *ares, struct resource *res) switch (ares->type) { case ACPI_RESOURCE_TYPE_IO: io = &ares->data.io; + if (!io->address_length) + return false; acpi_dev_get_ioresource(res, io->minimum, io->address_length, io->io_decode); break; case ACPI_RESOURCE_TYPE_FIXED_IO: fixed_io = &ares->data.fixed_io; + if (!fixed_io->address_length) + return false; acpi_dev_get_ioresource(res, fixed_io->address, fixed_io->address_length, ACPI_DECODE_10); -- cgit v0.10.2 From ae2045694b3353bf2b2980817303c9e9feb8cfff Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Fri, 28 Feb 2014 13:31:04 +0100 Subject: drm/vmwgfx: Remove some unused surface formats These formats are deprecated. Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul diff --git a/drivers/gpu/drm/vmwgfx/svga3d_reg.h b/drivers/gpu/drm/vmwgfx/svga3d_reg.h index bb594c1..f58dc7d 100644 --- a/drivers/gpu/drm/vmwgfx/svga3d_reg.h +++ b/drivers/gpu/drm/vmwgfx/svga3d_reg.h @@ -261,12 +261,7 @@ typedef enum SVGA3dSurfaceFormat { /* Planar video formats. */ SVGA3D_YV12 = 121, - /* Shader constant formats. */ - SVGA3D_SURFACE_SHADERCONST_FLOAT = 122, - SVGA3D_SURFACE_SHADERCONST_INT = 123, - SVGA3D_SURFACE_SHADERCONST_BOOL = 124, - - SVGA3D_FORMAT_MAX = 125, + SVGA3D_FORMAT_MAX = 122, } SVGA3dSurfaceFormat; typedef uint32 SVGA3dColor; /* a, r, g, b */ -- cgit v0.10.2 From a34417f6be521d1027b803f0b550ce622c971f41 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Fri, 28 Feb 2014 13:33:21 +0100 Subject: drm/vmwgfx: Make sure backing mobs are cleared when allocated. Update driver date. Backing mob contents is propagated to user-space, so make sure backing mobs are cleared when allocated. This also accidently fix rendering errors with celestia when emulating legacy mode. Also update driver date. Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 9e4be17..0783155 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -40,7 +40,7 @@ #include #include "vmwgfx_fence.h" -#define VMWGFX_DRIVER_DATE "20121114" +#define VMWGFX_DRIVER_DATE "20140228" #define VMWGFX_DRIVER_MAJOR 2 #define VMWGFX_DRIVER_MINOR 5 #define VMWGFX_DRIVER_PATCHLEVEL 0 diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 2aa4bc6..9757b57 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -427,8 +427,7 @@ int vmw_dmabuf_init(struct vmw_private *dev_priv, INIT_LIST_HEAD(&vmw_bo->res_list); ret = ttm_bo_init(bdev, &vmw_bo->base, size, - (user) ? ttm_bo_type_device : - ttm_bo_type_kernel, placement, + ttm_bo_type_device, placement, 0, interruptible, NULL, acc_size, NULL, bo_free); return ret; -- cgit v0.10.2 From 6950e23e54b1c94abf8c60c1ddfac79133d41de2 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 1 Mar 2014 01:20:18 +0400 Subject: drm/vmwgfx: avoid null pointer dereference at failure paths vmw_takedown_otable_base() and vmw_mob_unbind() check for potential vmw_fifo_reserve() failure and print error message, but then immediately dereference NULL pointer. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Reviewed-by: Thomas Hellstrom diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c index d4a5a19..04a64b8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c @@ -188,18 +188,20 @@ static void vmw_takedown_otable_base(struct vmw_private *dev_priv, bo = otable->page_table->pt_bo; cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd)); - if (unlikely(cmd == NULL)) - DRM_ERROR("Failed reserving FIFO space for OTable setup.\n"); - - memset(cmd, 0, sizeof(*cmd)); - cmd->header.id = SVGA_3D_CMD_SET_OTABLE_BASE; - cmd->header.size = sizeof(cmd->body); - cmd->body.type = type; - cmd->body.baseAddress = 0; - cmd->body.sizeInBytes = 0; - cmd->body.validSizeInBytes = 0; - cmd->body.ptDepth = SVGA3D_MOBFMT_INVALID; - vmw_fifo_commit(dev_priv, sizeof(*cmd)); + if (unlikely(cmd == NULL)) { + DRM_ERROR("Failed reserving FIFO space for OTable " + "takedown.\n"); + } else { + memset(cmd, 0, sizeof(*cmd)); + cmd->header.id = SVGA_3D_CMD_SET_OTABLE_BASE; + cmd->header.size = sizeof(cmd->body); + cmd->body.type = type; + cmd->body.baseAddress = 0; + cmd->body.sizeInBytes = 0; + cmd->body.validSizeInBytes = 0; + cmd->body.ptDepth = SVGA3D_MOBFMT_INVALID; + vmw_fifo_commit(dev_priv, sizeof(*cmd)); + } if (bo) { int ret; @@ -562,11 +564,12 @@ void vmw_mob_unbind(struct vmw_private *dev_priv, if (unlikely(cmd == NULL)) { DRM_ERROR("Failed reserving FIFO space for Memory " "Object unbinding.\n"); + } else { + cmd->header.id = SVGA_3D_CMD_DESTROY_GB_MOB; + cmd->header.size = sizeof(cmd->body); + cmd->body.mobid = mob->id; + vmw_fifo_commit(dev_priv, sizeof(*cmd)); } - cmd->header.id = SVGA_3D_CMD_DESTROY_GB_MOB; - cmd->header.size = sizeof(cmd->body); - cmd->body.mobid = mob->id; - vmw_fifo_commit(dev_priv, sizeof(*cmd)); if (bo) { vmw_fence_single_bo(bo, NULL); ttm_bo_unreserve(bo); -- cgit v0.10.2 From ae41a3030ce8901853b3cfd5e118a4b0288aa068 Mon Sep 17 00:00:00 2001 From: Marek Belisko Date: Sat, 1 Mar 2014 14:58:48 +0100 Subject: ARM: dts: omap3-gta04: Add ti,omap36xx to compatible property to avoid problems with booting Without that change booting leads to crash with more warnings like below: [ 0.284454] omap_hwmod: uart4: cannot clk_get main_clk uart4_fck [ 0.284484] omap_hwmod: uart4: cannot _init_clocks [ 0.284484] ------------[ cut here ]------------ [ 0.284545] WARNING: CPU: 0 PID: 1 at arch/arm/mach-omap2/omap_hwmod.c:2543 _init+0x300/0x3e4() [ 0.284545] omap_hwmod: uart4: couldn't init clocks [ 0.284576] Modules linked in: [ 0.284606] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.13.0-next-20140124-00020-gd2aefec-dirty #26 [ 0.284637] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 0.284667] [] (show_stack) from [] (dump_stack+0x7c/0x94) [ 0.284729] [] (dump_stack) from [] (warn_slowpath_common+0x6c/0x90) [ 0.284729] [] (warn_slowpath_common) from [] (warn_slowpath_fmt+0x30/0x40) [ 0.284759] [] (warn_slowpath_fmt) from [] (_init+0x300/0x3e4) [ 0.284790] [] (_init) from [] (__omap_hwmod_setup_all+0x40/0x8c) [ 0.284820] [] (__omap_hwmod_setup_all) from [] (do_one_initcall+0xe8/0x14c) [ 0.284851] [] (do_one_initcall) from [] (kernel_init_freeable+0x104/0x1c8) [ 0.284881] [] (kernel_init_freeable) from [] (kernel_init+0x8/0x118) [ 0.284912] [] (kernel_init) from [] (ret_from_fork+0x14/0x2c) [ 0.285064] ---[ end trace 63de210ad43b627d ]--- Reference: https://lkml.org/lkml/2013/10/8/553 Signed-off-by: Marek Belisko Signed-off-by: Tony Lindgren diff --git a/arch/arm/boot/dts/omap3-gta04.dts b/arch/arm/boot/dts/omap3-gta04.dts index c551e4a..d3b253b 100644 --- a/arch/arm/boot/dts/omap3-gta04.dts +++ b/arch/arm/boot/dts/omap3-gta04.dts @@ -13,7 +13,7 @@ / { model = "OMAP3 GTA04"; - compatible = "ti,omap3-gta04", "ti,omap3"; + compatible = "ti,omap3-gta04", "ti,omap36xx", "ti,omap3"; cpus { cpu@0 { -- cgit v0.10.2 From 3130497f5babd42b24a7febda46b46116e0adc83 Mon Sep 17 00:00:00 2001 From: "Li, Aubrey" Date: Sun, 2 Mar 2014 08:53:57 +0800 Subject: ACPI / sleep: pm_power_off needs more sanity checks to be installed Sleep control and status registers need santity checks as well before ACPI installs acpi_power_off to pm_power_off hook. The checking code in acpi_enter_sleep_state() is too late, we should not allow a not-working pm_power_off function to be hooked up. Signed-off-by: Aubrey Li Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index b718806..b0f6c4a 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -807,7 +807,12 @@ int __init acpi_sleep_init(void) acpi_sleep_hibernate_setup(); status = acpi_get_sleep_type_data(ACPI_STATE_S5, &type_a, &type_b); - if (ACPI_SUCCESS(status)) { + /* + * Check both ACPI S5 object and ACPI sleep registers to + * install pm_power_off_prepare/pm_power_off hook + */ + if (ACPI_SUCCESS(status) && acpi_gbl_FADT.sleep_control.address + && acpi_gbl_FADT.sleep_status.address) { sleep_states[ACPI_STATE_S5] = 1; pm_power_off_prepare = acpi_power_off_prepare; pm_power_off = acpi_power_off; -- cgit v0.10.2 From 25d54fe5657f74766f2c79ad1267320793403f9e Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sun, 2 Mar 2014 17:29:33 +0100 Subject: b44: add calls to phy_{start,stop} When support for external phys was added to b44, the calls to start and stop the phy were missing in the mac driver. This adds the calls to phy_start() and phy_stop(). Signed-off-by: Hauke Mehrtens Acked-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 1f7b5aa..e8046e1 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -1484,6 +1484,10 @@ static int b44_open(struct net_device *dev) add_timer(&bp->timer); b44_enable_ints(bp); + + if (bp->flags & B44_FLAG_EXTERNAL_PHY) + phy_start(bp->phydev); + netif_start_queue(dev); out: return err; @@ -1646,6 +1650,9 @@ static int b44_close(struct net_device *dev) netif_stop_queue(dev); + if (bp->flags & B44_FLAG_EXTERNAL_PHY) + phy_stop(bp->phydev); + napi_disable(&bp->napi); del_timer_sync(&bp->timer); -- cgit v0.10.2 From 1d3f41ea0e95bed0502a7ac3f42fdc23ca6b7b82 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sun, 2 Mar 2014 17:29:34 +0100 Subject: b44: always set duplex mode why phy changes Without this patch b44_check_phy() was called when the phy called the adjust callback. This method only change the mac duplex mode when the carrier was off. When the phy changed the duplex mode after the carrier was on the mac was not changed. This happened when an external phy was used. Signed-off-by: Hauke Mehrtens Acked-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index e8046e1..8a7bf7d 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -2229,7 +2229,12 @@ static void b44_adjust_link(struct net_device *dev) } if (status_changed) { - b44_check_phy(bp); + u32 val = br32(bp, B44_TX_CTRL); + if (bp->flags & B44_FLAG_FULL_DUPLEX) + val |= TX_CTRL_DUPLEX; + else + val &= ~TX_CTRL_DUPLEX; + bw32(bp, B44_TX_CTRL, val); phy_print_status(phydev); } } -- cgit v0.10.2 From 635d61a3735e05c8da72740006670f819e5b6a5f Mon Sep 17 00:00:00 2001 From: Gerry Demaret Date: Fri, 28 Feb 2014 18:50:46 +0100 Subject: USB AX88179/178A: Support D-Link DUB-1312 Add the USB device ID for the D-Link DUB-1312 USB 3.0 to Gigabit Ethernet Adapter to the AX88179/178A driver. Signed-off-by: Gerry Demaret Signed-off-by: David S. Miller diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 460e823..d2e6fdb 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1395,6 +1395,19 @@ static const struct driver_info ax88178a_info = { .tx_fixup = ax88179_tx_fixup, }; +static const struct driver_info dlink_dub1312_info = { + .description = "D-Link DUB-1312 USB 3.0 to Gigabit Ethernet Adapter", + .bind = ax88179_bind, + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, + .reset = ax88179_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, + .tx_fixup = ax88179_tx_fixup, +}; + static const struct driver_info sitecom_info = { .description = "Sitecom USB 3.0 to Gigabit Adapter", .bind = ax88179_bind, @@ -1444,6 +1457,10 @@ static const struct usb_device_id products[] = { USB_DEVICE(0x0b95, 0x178a), .driver_info = (unsigned long)&ax88178a_info, }, { + /* D-Link DUB-1312 USB 3.0 to Gigabit Ethernet Adapter */ + USB_DEVICE(0x2001, 0x4a00), + .driver_info = (unsigned long)&dlink_dub1312_info, +}, { /* Sitecom USB 3.0 to Gigabit Adapter */ USB_DEVICE(0x0df6, 0x0072), .driver_info = (unsigned long)&sitecom_info, -- cgit v0.10.2 From 0414855fdc4a40da05221fc6062cccbc0c30f169 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 2 Mar 2014 18:56:16 -0800 Subject: Linux 3.14-rc5 diff --git a/Makefile b/Makefile index 81f1bf3..78209ee 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Shuffling Zombie Juror # *DOCUMENTATION* -- cgit v0.10.2 From 755a48a7a4eb05b9c8424e3017d947b2961a60e0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 2 Mar 2014 22:03:12 -0500 Subject: NFS: Fix a delegation callback race The clean-up in commit 36281caa839f ended up removing a NULL pointer check that is needed in order to prevent an Oops in nfs_async_inode_return_delegation(). Reported-by: "Yan, Zheng" Link: http://lkml.kernel.org/r/5313E9F6.2020405@intel.com Fixes: 36281caa839f (NFSv4: Further clean-ups of delegation stateid validation) Cc: stable@vger.kernel.org # 3.4+ Signed-off-by: Trond Myklebust diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index ef792f2..5d8ccec 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -659,16 +659,19 @@ int nfs_async_inode_return_delegation(struct inode *inode, rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation == NULL) + goto out_enoent; - if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) { - rcu_read_unlock(); - return -ENOENT; - } + if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) + goto out_enoent; nfs_mark_return_delegation(server, delegation); rcu_read_unlock(); nfs_delegation_run_state_manager(clp); return 0; +out_enoent: + rcu_read_unlock(); + return -ENOENT; } static struct inode * -- cgit v0.10.2 From 878eaf61be20a78dd4a0a14ac5b1a3a78298cba7 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 27 Feb 2014 14:51:55 +1000 Subject: MAINTAINERS: update AGP tree to point at drm tree Signed-off-by: Dave Airlie diff --git a/MAINTAINERS b/MAINTAINERS index c6d0e93..e9d85f6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -473,7 +473,7 @@ F: net/rxrpc/af_rxrpc.c AGPGART DRIVER M: David Airlie -T: git git://git.kernel.org/pub/scm/linux/kernel/git/airlied/drm-2.6.git +T: git git://people.freedesktop.org/~airlied/linux (part of drm maint) S: Maintained F: drivers/char/agp/ F: include/linux/agp* -- cgit v0.10.2 From 61d1cf163c8653934cc8cd5d0b2a562d0990c265 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Sun, 2 Mar 2014 20:54:42 +0100 Subject: spi: spi-ath79: fix initial GPIO CS line setup The 'ath79_spi_setup_cs' function initializes the chip select line of a given SPI device in order to make sure that the device is inactive. If the SPI_CS_HIGH bit is set for a given device, it means that the CS line of that device is active HIGH so it must be set to LOW initially. In case of GPIO CS lines, the 'ath79_spi_setup_cs' function does the opposite of that due to the wrong GPIO flags. Fix the code to use the correct GPIO flags. Reported-by: Ronald Wahl Signed-off-by: Gabor Juhos Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/drivers/spi/spi-ath79.c b/drivers/spi/spi-ath79.c index 31534b5..c3b2fb9 100644 --- a/drivers/spi/spi-ath79.c +++ b/drivers/spi/spi-ath79.c @@ -132,9 +132,9 @@ static int ath79_spi_setup_cs(struct spi_device *spi) flags = GPIOF_DIR_OUT; if (spi->mode & SPI_CS_HIGH) - flags |= GPIOF_INIT_HIGH; - else flags |= GPIOF_INIT_LOW; + else + flags |= GPIOF_INIT_HIGH; status = gpio_request_one(cdata->gpio, flags, dev_name(&spi->dev)); -- cgit v0.10.2 From 64e8d20bd39b81994d9cd60e7b42f8ec8652f5af Mon Sep 17 00:00:00 2001 From: Rashika Kheria Date: Thu, 27 Feb 2014 17:25:54 +0530 Subject: kernel: Include appropriate header file in time/timekeeping_debug.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Include appropriate header file kernel/time/timekeeping_internal.h in kernel/time/timekeeping_debug.c because it has prototype declaration of function defined in kernel/time/timekeeping_debug.c. This eliminates the following warning in kernel/time/timekeeping_debug.c: kernel/time/timekeeping_debug.c:68:6: warning: no previous prototype for ‘tk_debug_account_sleep_time’ [-Wmissing-prototypes] Signed-off-by: Rashika Kheria Reviewed-by: Josh Triplett Signed-off-by: John Stultz diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c index 802433a..4d54f97 100644 --- a/kernel/time/timekeeping_debug.c +++ b/kernel/time/timekeeping_debug.c @@ -21,6 +21,8 @@ #include #include +#include "timekeeping_internal.h" + static unsigned int sleep_time_bin[32] = {0}; static int tk_debug_show_sleep_time(struct seq_file *s, void *data) -- cgit v0.10.2 From a6b92b6650d010d58b6e6fe42c6271266e0b1134 Mon Sep 17 00:00:00 2001 From: Marius Knaust Date: Mon, 3 Mar 2014 01:48:58 +0100 Subject: ALSA: hda - Added inverted digital-mic handling for Acer TravelMate 8371 Signed-off-by: Marius Knaust Cc: Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ec304f3..7ba7a11f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4253,6 +4253,7 @@ static const struct hda_fixup alc269_fixups[] = { }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { + SND_PCI_QUIRK(0x1025, 0x0283, "Acer TravelMate 8371", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x029b, "Acer 1810TZ", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x0349, "Acer AOD260", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x047c, "Acer AC700", ALC269_FIXUP_ACER_AC700), -- cgit v0.10.2 From b6ab66aa5d376583a17137cbb2d3a728f29acae2 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 25 Feb 2014 13:11:47 +0200 Subject: drm/i915: use backlight legacy combination mode also for i915gm/i945gm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i915gm and i945gm also seem to use and need the legacy combination mode bit in BLC_PWM_CTL. v2: Also do this for i915gm (Ville). Reported-and-tested-by: Luis Ortega Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=75001 Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 350de35..079ea38 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -698,7 +698,7 @@ static void i9xx_enable_backlight(struct intel_connector *connector) freq /= 0xff; ctl = freq << 17; - if (IS_GEN2(dev) && panel->backlight.combination_mode) + if (panel->backlight.combination_mode) ctl |= BLM_LEGACY_MODE; if (IS_PINEVIEW(dev) && panel->backlight.active_low_pwm) ctl |= BLM_POLARITY_PNV; @@ -979,7 +979,7 @@ static int i9xx_setup_backlight(struct intel_connector *connector) ctl = I915_READ(BLC_PWM_CTL); - if (IS_GEN2(dev)) + if (IS_GEN2(dev) || IS_I915GM(dev) || IS_I945GM(dev)) panel->backlight.combination_mode = ctl & BLM_LEGACY_MODE; if (IS_PINEVIEW(dev)) -- cgit v0.10.2 From 3617dc9675f0184b7bb210cfa34f3cac928d8055 Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Mon, 13 Jan 2014 16:25:21 +0530 Subject: drm/i915: Resolving the memory region conflict for Stolen area There is a conflict seen when requesting the kernel to reserve the physical space used for the stolen area. This is because some BIOS are wrapping the stolen area in the root PCI bus, but have an off-by-one error. As a workaround we retry the reservation with an offset of 1 instead of 0. v2: updated commit message & the comment in source file (Daniel) Signed-off-by: Akash Goel Reviewed-by: Jesse Barnes Tested-by: Arjan van de Ven Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 1a24e84..d58b4e2 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -82,9 +82,22 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev) r = devm_request_mem_region(dev->dev, base, dev_priv->gtt.stolen_size, "Graphics Stolen Memory"); if (r == NULL) { - DRM_ERROR("conflict detected with stolen region: [0x%08x - 0x%08x]\n", - base, base + (uint32_t)dev_priv->gtt.stolen_size); - base = 0; + /* + * One more attempt but this time requesting region from + * base + 1, as we have seen that this resolves the region + * conflict with the PCI Bus. + * This is a BIOS w/a: Some BIOS wrap stolen in the root + * PCI bus, but have an off-by-one error. Hence retry the + * reservation starting from 1 instead of 0. + */ + r = devm_request_mem_region(dev->dev, base + 1, + dev_priv->gtt.stolen_size - 1, + "Graphics Stolen Memory"); + if (r == NULL) { + DRM_ERROR("conflict detected with stolen region: [0x%08x - 0x%08x]\n", + base, base + (uint32_t)dev_priv->gtt.stolen_size); + base = 0; + } } return base; -- cgit v0.10.2 From bcdb72ac7c00d2b56359fc82bcc8fe50454717d5 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 14 Feb 2014 20:23:54 +0200 Subject: drm/i915: fix pch pci device enumeration pci_get_class(class, from) drops the refcount for 'from', so the extra pci_dev_put we do on it will result in a use after free bug starting with the WARN below. Regression introduced in commit 6a9c4b35e6696a63805b6da5e4889c6986e9ee1b Author: Rui Guo Date: Wed Jun 19 21:10:23 2013 +0800 drm/i915: Fix PCH detect with multiple ISA bridges in VM [ 164.338460] WARNING: CPU: 1 PID: 2094 at include/linux/kref.h:47 klist_next+0xae/0x110() [ 164.347731] CPU: 1 PID: 2094 Comm: modprobe Tainted: G O 3.13.0-imre+ #354 [ 164.356468] Hardware name: Intel Corp. VALLEYVIEW B0 PLATFORM/NOTEBOOK, BIOS BYTICRB1.X64.0062.R70.1310112051 10/11/2013 [ 164.368796] Call Trace: [ 164.371609] [] dump_stack+0x4e/0x7a [ 164.377447] [] warn_slowpath_common+0x7d/0xa0 [ 164.384238] [] warn_slowpath_null+0x1a/0x20 [ 164.390851] [] klist_next+0xae/0x110 [ 164.396777] [] ? pci_do_find_bus+0x70/0x70 [ 164.403286] [] bus_find_device+0x89/0xc0 [ 164.409719] [] pci_get_dev_by_id+0x63/0xa0 [ 164.416238] [] pci_get_class+0x44/0x50 [ 164.422433] [] intel_dsm_detect+0x16f/0x1f0 [i915] [ 164.429801] [] intel_register_dsm_handler+0xe/0x10 [i915] [ 164.437831] [] i915_driver_load+0xafe/0xf30 [i915] [ 164.445126] [] ? intel_alloc_coherent+0x110/0x110 [ 164.452340] [] drm_dev_register+0xc7/0x150 [drm] [ 164.459462] [] drm_get_pci_dev+0x11f/0x1f0 [drm] [ 164.466554] [] ? _raw_spin_unlock_irqrestore+0x51/0x70 [ 164.474287] [] i915_pci_probe+0x56/0x60 [i915] [ 164.481185] [] pci_device_probe+0x78/0xf0 [ 164.487603] [] driver_probe_device+0x155/0x350 [ 164.494505] [] __driver_attach+0x6e/0xa0 [ 164.500826] [] ? __device_attach+0x50/0x50 [ 164.507333] [] bus_for_each_dev+0x6e/0xc0 [ 164.513752] [] driver_attach+0x1e/0x20 [ 164.519870] [] bus_add_driver+0x138/0x260 [ 164.526289] [] ? 0xffffffffa0187fff [ 164.532116] [] driver_register+0x98/0xe0 [ 164.538558] [] ? 0xffffffffa0187fff [ 164.544389] [] __pci_register_driver+0x60/0x70 [ 164.551336] [] drm_pci_init+0x6d/0x120 [drm] [ 164.558040] [] ? 0xffffffffa0187fff [ 164.563928] [] i915_init+0x6a/0x6c [i915] [ 164.570363] [] do_one_initcall+0xaa/0x160 [ 164.576783] [] ? set_memory_nx+0x40/0x50 [ 164.583100] [] load_module+0x1fb5/0x2550 [ 164.589410] [] ? store_uevent+0x40/0x40 [ 164.595628] [] SyS_init_module+0xed/0x100 [ 164.602048] [] system_call_fastpath+0x16/0x1b v2: simplify the loop further (Chris) Signed-off-by: Imre Deak Cc: Jesse Barnes Reported-by: Jesse Barnes Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=65652 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=74161 Reviewed-by: Chris Wilson Cc: stable@vger.kernel.org Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 04f1f02..ec7bb0f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -403,7 +403,7 @@ MODULE_DEVICE_TABLE(pci, pciidlist); void intel_detect_pch(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - struct pci_dev *pch; + struct pci_dev *pch = NULL; /* In all current cases, num_pipes is equivalent to the PCH_NOP setting * (which really amounts to a PCH but no South Display). @@ -424,12 +424,9 @@ void intel_detect_pch(struct drm_device *dev) * all the ISA bridge devices and check for the first match, instead * of only checking the first one. */ - pch = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); - while (pch) { - struct pci_dev *curr = pch; + while ((pch = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, pch))) { if (pch->vendor == PCI_VENDOR_ID_INTEL) { - unsigned short id; - id = pch->device & INTEL_PCH_DEVICE_ID_MASK; + unsigned short id = pch->device & INTEL_PCH_DEVICE_ID_MASK; dev_priv->pch_id = id; if (id == INTEL_PCH_IBX_DEVICE_ID_TYPE) { @@ -461,18 +458,16 @@ void intel_detect_pch(struct drm_device *dev) DRM_DEBUG_KMS("Found LynxPoint LP PCH\n"); WARN_ON(!IS_HASWELL(dev)); WARN_ON(!IS_ULT(dev)); - } else { - goto check_next; - } - pci_dev_put(pch); + } else + continue; + break; } -check_next: - pch = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, curr); - pci_dev_put(curr); } if (!pch) - DRM_DEBUG_KMS("No PCH found?\n"); + DRM_DEBUG_KMS("No PCH found.\n"); + + pci_dev_put(pch); } bool i915_semaphore_is_enabled(struct drm_device *dev) -- cgit v0.10.2 From 03b8c7b623c80af264c4c8d6111e5c6289933666 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 2 Mar 2014 13:09:47 +0100 Subject: futex: Allow architectures to skip futex_atomic_cmpxchg_inatomic() test If an architecture has futex_atomic_cmpxchg_inatomic() implemented and there is no runtime check necessary, allow to skip the test within futex_init(). This allows to get rid of some code which would always give the same result, and also allows the compiler to optimize a couple of if statements away. Signed-off-by: Heiko Carstens Cc: Finn Thain Cc: Geert Uytterhoeven Link: http://lkml.kernel.org/r/20140302120947.GA3641@osiris Signed-off-by: Thomas Gleixner diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 65a0775..bb74b21 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -117,6 +117,7 @@ config S390 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_TRACE_MCOUNT_TEST + select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZ4 diff --git a/include/linux/futex.h b/include/linux/futex.h index b0d95ca..6435f46 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -55,7 +55,11 @@ union futex_key { #ifdef CONFIG_FUTEX extern void exit_robust_list(struct task_struct *curr); extern void exit_pi_state_list(struct task_struct *curr); +#ifdef CONFIG_HAVE_FUTEX_CMPXCHG +#define futex_cmpxchg_enabled 1 +#else extern int futex_cmpxchg_enabled; +#endif #else static inline void exit_robust_list(struct task_struct *curr) { diff --git a/init/Kconfig b/init/Kconfig index 009a797..d56cb03 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1387,6 +1387,13 @@ config FUTEX support for "fast userspace mutexes". The resulting kernel may not run glibc-based applications correctly. +config HAVE_FUTEX_CMPXCHG + bool + help + Architectures should select this if futex_atomic_cmpxchg_inatomic() + is implemented and always working. This removes a couple of runtime + checks. + config EPOLL bool "Enable eventpoll support" if EXPERT default y diff --git a/kernel/futex.c b/kernel/futex.c index 44a1261..5d17e3a 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -157,7 +157,9 @@ * enqueue. */ +#ifndef CONFIG_HAVE_FUTEX_CMPXCHG int __read_mostly futex_cmpxchg_enabled; +#endif /* * Futex flags used to encode options to functions and preserve them across @@ -2843,9 +2845,28 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); } -static int __init futex_init(void) +static void __init futex_detect_cmpxchg(void) { +#ifndef CONFIG_HAVE_FUTEX_CMPXCHG u32 curval; + + /* + * This will fail and we want it. Some arch implementations do + * runtime detection of the futex_atomic_cmpxchg_inatomic() + * functionality. We want to know that before we call in any + * of the complex code paths. Also we want to prevent + * registration of robust lists in that case. NULL is + * guaranteed to fault and we get -EFAULT on functional + * implementation, the non-functional ones will return + * -ENOSYS. + */ + if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT) + futex_cmpxchg_enabled = 1; +#endif +} + +static int __init futex_init(void) +{ unsigned int futex_shift; unsigned long i; @@ -2861,18 +2882,8 @@ static int __init futex_init(void) &futex_shift, NULL, futex_hashsize, futex_hashsize); futex_hashsize = 1UL << futex_shift; - /* - * This will fail and we want it. Some arch implementations do - * runtime detection of the futex_atomic_cmpxchg_inatomic() - * functionality. We want to know that before we call in any - * of the complex code paths. Also we want to prevent - * registration of robust lists in that case. NULL is - * guaranteed to fault and we get -EFAULT on functional - * implementation, the non-functional ones will return - * -ENOSYS. - */ - if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT) - futex_cmpxchg_enabled = 1; + + futex_detect_cmpxchg(); for (i = 0; i < futex_hashsize; i++) { plist_head_init(&futex_queues[i].chain); -- cgit v0.10.2 From 5be93bdda64e85450598c6e97f79fb8f6acf30e0 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 19 Feb 2014 12:00:51 +0100 Subject: can: flexcan: fix shutdown: first disable chip, then all interrupts When shutting down the CAN interface (ifconfig canX down) during high CAN bus loads, the CAN core might hang and freeze the whole CPU. This patch fixes the shutdown sequence by first disabling the CAN core then disabling all interrupts. Cc: linux-stable Signed-off-by: Marc Kleine-Budde diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 320bef2..dcd69c9 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -827,14 +827,16 @@ static void flexcan_chip_stop(struct net_device *dev) struct flexcan_regs __iomem *regs = priv->base; u32 reg; - /* Disable all interrupts */ - flexcan_write(0, ®s->imask1); - /* Disable + halt module */ reg = flexcan_read(®s->mcr); reg |= FLEXCAN_MCR_MDIS | FLEXCAN_MCR_HALT; flexcan_write(reg, ®s->mcr); + /* Disable all interrupts */ + flexcan_write(0, ®s->imask1); + flexcan_write(priv->reg_ctrl_default & ~FLEXCAN_CTRL_ERR_ALL, + ®s->ctrl); + if (priv->reg_xceiver) regulator_disable(priv->reg_xceiver); priv->can.state = CAN_STATE_STOPPED; -- cgit v0.10.2 From 7e9e148af01ef388efb6e2490805970be4622792 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 28 Feb 2014 14:52:01 +0100 Subject: can: flexcan: flexcan_open(): fix error path if flexcan_chip_start() fails If flexcan_chip_start() in flexcan_open() fails, the interrupt is not freed, this patch adds the missing cleanup. Cc: linux-stable Signed-off-by: Marc Kleine-Budde diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index dcd69c9..30af702 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -868,7 +868,7 @@ static int flexcan_open(struct net_device *dev) /* start chip and queuing */ err = flexcan_chip_start(dev); if (err) - goto out_close; + goto out_free_irq; can_led_event(dev, CAN_LED_EVENT_OPEN); @@ -877,6 +877,8 @@ static int flexcan_open(struct net_device *dev) return 0; + out_free_irq: + free_irq(dev->irq, dev); out_close: close_candev(dev); out_disable_per: -- cgit v0.10.2 From 9b00b300e7bce032c467c36ca47fe2a776887fc2 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 28 Feb 2014 15:30:18 +0100 Subject: can: flexcan: fix transition from and to low power mode in chip_{en,dis}able In flexcan_chip_enable() and flexcan_chip_disable() fixed delays are used. Experiments have shown that the transition from and to low power mode may take several microseconds. This patch adds a while loop which polls the Low Power Mode ACK bit (LPM_ACK) that indicates a successfull mode change. If the function runs into a timeout a error value is returned. Cc: linux-stable Signed-off-by: Marc Kleine-Budde diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 30af702..5af60ab 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -144,6 +144,8 @@ #define FLEXCAN_MB_CODE_MASK (0xf0ffffff) +#define FLEXCAN_TIMEOUT_US (50) + /* * FLEXCAN hardware feature flags * @@ -269,26 +271,42 @@ static inline int flexcan_has_and_handle_berr(const struct flexcan_priv *priv, (reg_esr & FLEXCAN_ESR_ERR_BUS); } -static inline void flexcan_chip_enable(struct flexcan_priv *priv) +static int flexcan_chip_enable(struct flexcan_priv *priv) { struct flexcan_regs __iomem *regs = priv->base; + unsigned int timeout = FLEXCAN_TIMEOUT_US / 10; u32 reg; reg = flexcan_read(®s->mcr); reg &= ~FLEXCAN_MCR_MDIS; flexcan_write(reg, ®s->mcr); - udelay(10); + while (timeout-- && (flexcan_read(®s->mcr) & FLEXCAN_MCR_LPM_ACK)) + usleep_range(10, 20); + + if (flexcan_read(®s->mcr) & FLEXCAN_MCR_LPM_ACK) + return -ETIMEDOUT; + + return 0; } -static inline void flexcan_chip_disable(struct flexcan_priv *priv) +static int flexcan_chip_disable(struct flexcan_priv *priv) { struct flexcan_regs __iomem *regs = priv->base; + unsigned int timeout = FLEXCAN_TIMEOUT_US / 10; u32 reg; reg = flexcan_read(®s->mcr); reg |= FLEXCAN_MCR_MDIS; flexcan_write(reg, ®s->mcr); + + while (timeout-- && !(flexcan_read(®s->mcr) & FLEXCAN_MCR_LPM_ACK)) + usleep_range(10, 20); + + if (!(flexcan_read(®s->mcr) & FLEXCAN_MCR_LPM_ACK)) + return -ETIMEDOUT; + + return 0; } static int flexcan_get_berr_counter(const struct net_device *dev, @@ -709,7 +727,9 @@ static int flexcan_chip_start(struct net_device *dev) u32 reg_mcr, reg_ctrl; /* enable module */ - flexcan_chip_enable(priv); + err = flexcan_chip_enable(priv); + if (err) + return err; /* soft reset */ flexcan_write(FLEXCAN_MCR_SOFTRST, ®s->mcr); @@ -949,12 +969,16 @@ static int register_flexcandev(struct net_device *dev) goto out_disable_ipg; /* select "bus clock", chip must be disabled */ - flexcan_chip_disable(priv); + err = flexcan_chip_disable(priv); + if (err) + goto out_disable_per; reg = flexcan_read(®s->ctrl); reg |= FLEXCAN_CTRL_CLK_SRC; flexcan_write(reg, ®s->ctrl); - flexcan_chip_enable(priv); + err = flexcan_chip_enable(priv); + if (err) + goto out_chip_disable; /* set freeze, halt and activate FIFO, restrict register access */ reg = flexcan_read(®s->mcr); @@ -971,14 +995,15 @@ static int register_flexcandev(struct net_device *dev) if (!(reg & FLEXCAN_MCR_FEN)) { netdev_err(dev, "Could not enable RX FIFO, unsupported core\n"); err = -ENODEV; - goto out_disable_per; + goto out_chip_disable; } err = register_candev(dev); - out_disable_per: /* disable core and turn off clocks */ + out_chip_disable: flexcan_chip_disable(priv); + out_disable_per: clk_disable_unprepare(priv->clk_per); out_disable_ipg: clk_disable_unprepare(priv->clk_ipg); @@ -1121,8 +1146,11 @@ static int flexcan_suspend(struct device *device) { struct net_device *dev = dev_get_drvdata(device); struct flexcan_priv *priv = netdev_priv(dev); + int err; - flexcan_chip_disable(priv); + err = flexcan_chip_disable(priv); + if (err) + return err; if (netif_running(dev)) { netif_stop_queue(dev); @@ -1143,9 +1171,7 @@ static int flexcan_resume(struct device *device) netif_device_attach(dev); netif_start_queue(dev); } - flexcan_chip_enable(priv); - - return 0; + return flexcan_chip_enable(priv); } #endif /* CONFIG_PM_SLEEP */ -- cgit v0.10.2 From f003698e23f6f56a791774f14d0ac35d04872490 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 28 Feb 2014 17:18:27 +0100 Subject: can: flexcan: factor out transceiver {en,dis}able into seperate functions This patch moves the transceiver enable and disable into seperate functions, where the NULL pointer check is hidden. Cc: linux-stable Signed-off-by: Marc Kleine-Budde diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 5af60ab..6b0fecd 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -264,6 +264,22 @@ static inline void flexcan_write(u32 val, void __iomem *addr) } #endif +static inline int flexcan_transceiver_enable(const struct flexcan_priv *priv) +{ + if (!priv->reg_xceiver) + return 0; + + return regulator_enable(priv->reg_xceiver); +} + +static inline int flexcan_transceiver_disable(const struct flexcan_priv *priv) +{ + if (!priv->reg_xceiver) + return 0; + + return regulator_disable(priv->reg_xceiver); +} + static inline int flexcan_has_and_handle_berr(const struct flexcan_priv *priv, u32 reg_esr) { @@ -808,11 +824,9 @@ static int flexcan_chip_start(struct net_device *dev) if (priv->devtype_data->features & FLEXCAN_HAS_V10_FEATURES) flexcan_write(0x0, ®s->rxfgmask); - if (priv->reg_xceiver) { - err = regulator_enable(priv->reg_xceiver); - if (err) - goto out; - } + err = flexcan_transceiver_enable(priv); + if (err) + goto out; /* synchronize with the can bus */ reg_mcr = flexcan_read(®s->mcr); @@ -857,8 +871,7 @@ static void flexcan_chip_stop(struct net_device *dev) flexcan_write(priv->reg_ctrl_default & ~FLEXCAN_CTRL_ERR_ALL, ®s->ctrl); - if (priv->reg_xceiver) - regulator_disable(priv->reg_xceiver); + flexcan_transceiver_disable(priv); priv->can.state = CAN_STATE_STOPPED; return; -- cgit v0.10.2 From b1aa1c7a2165b44ecce66286a3095cc6c7667d1c Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 28 Feb 2014 17:08:21 +0100 Subject: can: flexcan: fix transition from and to freeze mode in chip_{,un}freeze This patch factors out freeze and unfreeze of the CAN core into seperate functions. Experiments have shown that the transition from and to freeze mode may take several microseconds, especially the time entering the freeze mode depends on the current bitrate. This patch adds a while loop which polls the Freeze Mode ACK bit (FRZ_ACK) that indicates a successfull mode change. If the function runs into a timeout a error value is returned. Cc: linux-stable Signed-off-by: Marc Kleine-Budde diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 6b0fecd..330b5b9 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -325,6 +325,44 @@ static int flexcan_chip_disable(struct flexcan_priv *priv) return 0; } +static int flexcan_chip_freeze(struct flexcan_priv *priv) +{ + struct flexcan_regs __iomem *regs = priv->base; + unsigned int timeout = 1000 * 1000 * 10 / priv->can.bittiming.bitrate; + u32 reg; + + reg = flexcan_read(®s->mcr); + reg |= FLEXCAN_MCR_HALT; + flexcan_write(reg, ®s->mcr); + + while (timeout-- && !(flexcan_read(®s->mcr) & FLEXCAN_MCR_FRZ_ACK)) + usleep_range(100, 200); + + if (!(flexcan_read(®s->mcr) & FLEXCAN_MCR_FRZ_ACK)) + return -ETIMEDOUT; + + return 0; +} + +static int flexcan_chip_unfreeze(struct flexcan_priv *priv) +{ + struct flexcan_regs __iomem *regs = priv->base; + unsigned int timeout = FLEXCAN_TIMEOUT_US / 10; + u32 reg; + + reg = flexcan_read(®s->mcr); + reg &= ~FLEXCAN_MCR_HALT; + flexcan_write(reg, ®s->mcr); + + while (timeout-- && (flexcan_read(®s->mcr) & FLEXCAN_MCR_FRZ_ACK)) + usleep_range(10, 20); + + if (flexcan_read(®s->mcr) & FLEXCAN_MCR_FRZ_ACK) + return -ETIMEDOUT; + + return 0; +} + static int flexcan_get_berr_counter(const struct net_device *dev, struct can_berr_counter *bec) { @@ -756,7 +794,7 @@ static int flexcan_chip_start(struct net_device *dev) netdev_err(dev, "Failed to softreset can module (mcr=0x%08x)\n", reg_mcr); err = -ENODEV; - goto out; + goto out_chip_disable; } flexcan_set_bittiming(dev); @@ -826,12 +864,12 @@ static int flexcan_chip_start(struct net_device *dev) err = flexcan_transceiver_enable(priv); if (err) - goto out; + goto out_chip_disable; /* synchronize with the can bus */ - reg_mcr = flexcan_read(®s->mcr); - reg_mcr &= ~FLEXCAN_MCR_HALT; - flexcan_write(reg_mcr, ®s->mcr); + err = flexcan_chip_unfreeze(priv); + if (err) + goto out_transceiver_disable; priv->can.state = CAN_STATE_ERROR_ACTIVE; @@ -844,7 +882,9 @@ static int flexcan_chip_start(struct net_device *dev) return 0; - out: + out_transceiver_disable: + flexcan_transceiver_disable(priv); + out_chip_disable: flexcan_chip_disable(priv); return err; } @@ -859,12 +899,10 @@ static void flexcan_chip_stop(struct net_device *dev) { struct flexcan_priv *priv = netdev_priv(dev); struct flexcan_regs __iomem *regs = priv->base; - u32 reg; - /* Disable + halt module */ - reg = flexcan_read(®s->mcr); - reg |= FLEXCAN_MCR_MDIS | FLEXCAN_MCR_HALT; - flexcan_write(reg, ®s->mcr); + /* freeze + disable module */ + flexcan_chip_freeze(priv); + flexcan_chip_disable(priv); /* Disable all interrupts */ flexcan_write(0, ®s->imask1); -- cgit v0.10.2 From 1c37a72c1bd0b83be8b95cff7f1bc9b1f32bd433 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Mon, 3 Mar 2014 13:37:14 +0200 Subject: mac80211: consider virtual mon when calculating min_def When calculating the current max bw required for a channel context, we didn't consider the virtual monitor interface, resulting in its channel context being narrower than configured. This broke monitor mode with iwlmvm, which uses the minimal width. Reported-by: Ido Yariv Signed-off-by: Eliad Peller Signed-off-by: Johannes Berg diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index f43613a..0c1ecfd 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -100,6 +100,12 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, } max_bw = max(max_bw, width); } + + /* use the configured bandwidth in case of monitor interface */ + sdata = rcu_dereference(local->monitor_sdata); + if (sdata && rcu_access_pointer(sdata->vif.chanctx_conf) == conf) + max_bw = max(max_bw, conf->def.width); + rcu_read_unlock(); return max_bw; -- cgit v0.10.2 From bc00a91d627026f08abf69bf5d0015499dd30c2a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 3 Mar 2014 13:55:54 +0100 Subject: cfg80211: remove racy beacon_interval assignment In case of AP mode, the beacon interval is already reset to zero inside cfg80211_stop_ap(), and in the other modes it isn't relevant. Remove the assignment to remove a potential race since the assignment isn't properly locked. Reported-by: Michal Kazior Signed-off-by: Johannes Berg diff --git a/net/wireless/core.c b/net/wireless/core.c index 010892b..a3bf18d 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -788,8 +788,6 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, default: break; } - - wdev->beacon_interval = 0; } static int cfg80211_netdev_notifier_call(struct notifier_block *nb, -- cgit v0.10.2 From d96e43e8fce28cf97df576a07af9d65657a41a6f Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 28 Feb 2014 20:48:36 +0100 Subject: can: flexcan: flexcan_remove(): add missing netif_napi_del() This patch adds the missing netif_napi_del() to the flexcan_remove() function. Cc: linux-stable Signed-off-by: Marc Kleine-Budde diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 330b5b9..dff4fa4 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -1184,9 +1184,10 @@ static int flexcan_probe(struct platform_device *pdev) static int flexcan_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); + struct flexcan_priv *priv = netdev_priv(dev); unregister_flexcandev(dev); - + netif_napi_del(&priv->napi); free_candev(dev); return 0; -- cgit v0.10.2 From 4b5b82274a17f0ebbf02378df2ba7f36a3f5af19 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 28 Feb 2014 15:16:59 +0100 Subject: can: flexcan: factor out soft reset into seperate funtion This patch moves the soft reset into a seperate function. Signed-off-by: Marc Kleine-Budde diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index dff4fa4..61376ab 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -363,6 +363,21 @@ static int flexcan_chip_unfreeze(struct flexcan_priv *priv) return 0; } +static int flexcan_chip_softreset(struct flexcan_priv *priv) +{ + struct flexcan_regs __iomem *regs = priv->base; + unsigned int timeout = FLEXCAN_TIMEOUT_US / 10; + + flexcan_write(FLEXCAN_MCR_SOFTRST, ®s->mcr); + while (timeout-- && (flexcan_read(®s->mcr) & FLEXCAN_MCR_SOFTRST)) + usleep_range(10, 20); + + if (flexcan_read(®s->mcr) & FLEXCAN_MCR_SOFTRST) + return -ETIMEDOUT; + + return 0; +} + static int flexcan_get_berr_counter(const struct net_device *dev, struct can_berr_counter *bec) { @@ -786,16 +801,9 @@ static int flexcan_chip_start(struct net_device *dev) return err; /* soft reset */ - flexcan_write(FLEXCAN_MCR_SOFTRST, ®s->mcr); - udelay(10); - - reg_mcr = flexcan_read(®s->mcr); - if (reg_mcr & FLEXCAN_MCR_SOFTRST) { - netdev_err(dev, "Failed to softreset can module (mcr=0x%08x)\n", - reg_mcr); - err = -ENODEV; + err = flexcan_chip_softreset(priv); + if (err) goto out_chip_disable; - } flexcan_set_bittiming(dev); -- cgit v0.10.2 From 821047c4055cca833c4674f172a9d73003563eb6 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Sat, 1 Mar 2014 15:31:53 +0100 Subject: can: remove CAN FD compatibility for CAN 2.0 sockets In commit e2d265d3b587 (canfd: add support for CAN FD in CAN_RAW sockets) CAN FD frames with a payload length up to 8 byte are passed to legacy sockets where the CAN FD support was not enabled by the application. After some discussions with developers at a fair this well meant feature leads to confusion as no clean switch for CAN / CAN FD is provided to the application programmer. Additionally a compatibility like this for legacy CAN_RAW sockets requires some compatibility handling for the sending, e.g. make CAN2.0 frames a CAN FD frame with BRS at transmission time (?!?). This will become a mess when people start to develop applications with real CAN FD hardware. This patch reverts the bad compatibility code together with the documentation describing the removed feature. Acked-by: Stephane Grosjean Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt index f3089d4..0cbe6ec 100644 --- a/Documentation/networking/can.txt +++ b/Documentation/networking/can.txt @@ -554,12 +554,6 @@ solution for a couple of reasons: not specified in the struct can_frame and therefore it is only valid in CANFD_MTU sized CAN FD frames. - As long as the payload length is <=8 the received CAN frames from CAN FD - capable CAN devices can be received and read by legacy sockets too. When - user-generated CAN FD frames have a payload length <=8 these can be send - by legacy CAN network interfaces too. Sending CAN FD frames with payload - length > 8 to a legacy CAN network interface returns an -EMSGSIZE error. - Implementation hint for new CAN applications: To build a CAN FD aware application use struct canfd_frame as basic CAN diff --git a/net/can/raw.c b/net/can/raw.c index 8be757c..081e81f 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -121,13 +121,9 @@ static void raw_rcv(struct sk_buff *oskb, void *data) if (!ro->recv_own_msgs && oskb->sk == sk) return; - /* do not pass frames with DLC > 8 to a legacy socket */ - if (!ro->fd_frames) { - struct canfd_frame *cfd = (struct canfd_frame *)oskb->data; - - if (unlikely(cfd->len > CAN_MAX_DLEN)) - return; - } + /* do not pass non-CAN2.0 frames to a legacy socket */ + if (!ro->fd_frames && oskb->len != CAN_MTU) + return; /* clone the given skb to be able to enqueue it into the rcv queue */ skb = skb_clone(oskb, GFP_ATOMIC); @@ -738,9 +734,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; - struct raw_sock *ro = raw_sk(sk); struct sk_buff *skb; - int rxmtu; int err = 0; int noblock; @@ -751,20 +745,10 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) return err; - /* - * when serving a legacy socket the DLC <= 8 is already checked inside - * raw_rcv(). Now check if we need to pass a canfd_frame to a legacy - * socket and cut the possible CANFD_MTU/CAN_MTU length to CAN_MTU - */ - if (!ro->fd_frames) - rxmtu = CAN_MTU; - else - rxmtu = skb->len; - - if (size < rxmtu) + if (size < skb->len) msg->msg_flags |= MSG_TRUNC; else - size = rxmtu; + size = skb->len; err = memcpy_toiovec(msg->msg_iov, skb->data, size); if (err < 0) { -- cgit v0.10.2 From 7693decce869a94821bed1fa6c237c7224096b5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 3 Mar 2014 09:25:52 -0500 Subject: ARM: XEN depends on having a MMU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch/arm/xen/enlighten.c (and maybe others) use MMU-specific functions like pte_mkspecial which are only available on MMU builds. So let XEN depend on MMU. Suggested-by: Arnd Bergmann Signed-off-by: Uwe Kleine-König Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 24307dc..8b78465 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1883,6 +1883,7 @@ config XEN depends on ARM && AEABI && OF depends on CPU_V7 && !CPU_V6 depends on !GENERIC_ATOMIC64 + depends on MMU select ARM_PSCI select SWIOTLB_XEN select ARCH_DMA_ADDR_T_64BIT -- cgit v0.10.2 From 37d6a82bd99d3354e59a9a629fa48fc75ca51c66 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 3 Mar 2014 23:05:15 +0800 Subject: Thermal: update INT3404 thermal driver help text Signed-off-by: Zhang Rui diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 35c0664..7932294 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -210,8 +210,16 @@ config ACPI_INT3403_THERMAL tristate "ACPI INT3403 thermal driver" depends on X86 && ACPI help - This driver uses ACPI INT3403 device objects. If present, it will - register each INT3403 thermal sensor as a thermal zone. + Newer laptops and tablets that use ACPI may have thermal sensors + outside the core CPU/SOC for thermal safety reasons. These + temperature sensors are also exposed for the OS to use via the so + called INT3403 ACPI object. This driver will, on devices that have + such sensors, expose the temperature information from these sensors + to userspace via the normal thermal framework. This means that a wide + range of applications and GUI widgets can show this information to + the user or use this information for making decisions. For example, + the Intel Thermal Daemon can use this information to allow the user + to select his laptop to run without turning on the fans. menu "Texas Instruments thermal drivers" source "drivers/thermal/ti-soc-thermal/Kconfig" -- cgit v0.10.2 From 79786880a47a8c5b4c8146c03432b3387a07a169 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 2 Mar 2014 15:33:35 +0100 Subject: x86_pkg_temp_thermal: Do not expose as a hwmon device The temperature value reported by x86_pkg_temp_thermal is already reported by the coretemp driver. So, do not expose this thermal zone as a hwmon device, because it would be redundant. Signed-off-by: Jean Delvare Cc: Zhang Rui Cc: Eduardo Valentin Acked-by: Guenter Roeck Signed-off-by: Zhang Rui diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c index 972e1c7..3d8d972 100644 --- a/drivers/thermal/x86_pkg_temp_thermal.c +++ b/drivers/thermal/x86_pkg_temp_thermal.c @@ -68,6 +68,10 @@ struct phy_dev_entry { struct thermal_zone_device *tzone; }; +static const struct thermal_zone_params pkg_temp_tz_params = { + .no_hwmon = true, +}; + /* List maintaining number of package instances */ static LIST_HEAD(phy_dev_list); static DEFINE_MUTEX(phy_dev_list_mutex); @@ -446,7 +450,7 @@ static int pkg_temp_thermal_device_add(unsigned int cpu) thres_count, (thres_count == MAX_NUMBER_OF_TRIPS) ? 0x03 : 0x01, - phy_dev_entry, &tzone_ops, NULL, 0, 0); + phy_dev_entry, &tzone_ops, &pkg_temp_tz_params, 0, 0); if (IS_ERR(phy_dev_entry->tzone)) { err = PTR_ERR(phy_dev_entry->tzone); goto err_ret_free; -- cgit v0.10.2 From 3e4216531c442ea99cb32a69e5ef4af723e7b5a9 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 2 Mar 2014 15:05:49 +0100 Subject: x86_pkg_temp_thermal: Fix the thermal zone type The thermal zone type should not include an instance number. Otherwise each zone is considered a different type and the thermal-to-hwmon bridge fails to group them all in a single hwmon device. I also changed the type to "x86_pkg_temp", because "pkg" was too generic, and other thermal drivers use an underscore, not a dash, as a separator. Or maybe "cpu_pkg_temp" would be better? Signed-off-by: Jean Delvare Cc: Zhang Rui Cc: Eduardo Valentin Signed-off-by: Zhang Rui diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c index 3d8d972..081fd7e 100644 --- a/drivers/thermal/x86_pkg_temp_thermal.c +++ b/drivers/thermal/x86_pkg_temp_thermal.c @@ -398,7 +398,6 @@ static int pkg_temp_thermal_device_add(unsigned int cpu) int err; u32 tj_max; struct phy_dev_entry *phy_dev_entry; - char buffer[30]; int thres_count; u32 eax, ebx, ecx, edx; u8 *temp; @@ -444,9 +443,7 @@ static int pkg_temp_thermal_device_add(unsigned int cpu) phy_dev_entry->first_cpu = cpu; phy_dev_entry->tj_max = tj_max; phy_dev_entry->ref_cnt = 1; - snprintf(buffer, sizeof(buffer), "pkg-temp-%d\n", - phy_dev_entry->phys_proc_id); - phy_dev_entry->tzone = thermal_zone_device_register(buffer, + phy_dev_entry->tzone = thermal_zone_device_register("x86_pkg_temp", thres_count, (thres_count == MAX_NUMBER_OF_TRIPS) ? 0x03 : 0x01, -- cgit v0.10.2 From d1c8b0410b77f1e43f97cd22bc7e0a71a5305840 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 27 Jan 2014 09:40:58 +0100 Subject: thermal,rcar_thermal: Add dependency on HAS_IOMEM Commit beeb5a1e (thermal: rcar-thermal: Enable driver compilation with COMPILE_TEST) broke build on archs wihout io memory. On archs like S390 or um this driver cannot build nor work. Make it depend on HAS_IOMEM to bypass build failures. drivers/thermal/rcar_thermal.c:404: undefined reference to `devm_ioremap_resource' drivers/thermal/rcar_thermal.c:426: undefined reference to `devm_ioremap_resource' Signed-off-by: Richard Weinberger Acked-by: Laurent Pinchart Acked-by: Kuninori Morimoto Signed-off-by: Zhang Rui diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 7932294..5f88d76 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -136,6 +136,7 @@ config SPEAR_THERMAL config RCAR_THERMAL tristate "Renesas R-Car thermal driver" depends on ARCH_SHMOBILE || COMPILE_TEST + depends on HAS_IOMEM help Enable this to plug the R-Car thermal sensor driver into the Linux thermal framework. -- cgit v0.10.2 From 5ca0cce5622bf476e3e6bf627fe8e9381d6ae174 Mon Sep 17 00:00:00 2001 From: Ni Wade Date: Mon, 17 Feb 2014 11:02:55 +0800 Subject: Thermal: Allow first update of cooling device state In initialization, if the cooling device is initialized at max cooling state, and the thermal zone temperature is below the first trip point, then the cooling state can't be updated to the right state, untill the first trip point be triggered. To fix this issue, allow first update of cooling device state during registration, initialized "updated" device field as "false" (instead of "true"). Signed-off-by: Wei Ni Signed-off-by: Zhang Rui diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 338a88b..02f57af 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1107,7 +1107,7 @@ __thermal_cooling_device_register(struct device_node *np, INIT_LIST_HEAD(&cdev->thermal_instances); cdev->np = np; cdev->ops = ops; - cdev->updated = true; + cdev->updated = false; cdev->device.class = &thermal_class; cdev->devdata = devdata; dev_set_name(&cdev->device, "cooling_device%d", cdev->id); -- cgit v0.10.2 From f2234bcd03ad031225d7dc37dd18852a2f2ff2bf Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 24 Jan 2014 10:23:19 +0800 Subject: Thermal: thermal zone governor fix This patch does a cleanup about the thermal zone govenor, setting and make the following rule. 1. For thermal zone devices that are registered w/o tz->tzp, they can use the default thermal governor only. 2. For thermal zone devices w/ governor name specified in tz->tzp->governor_name, we will use the default govenor if the governor specified is not available at the moment, and update tz->governor when the matched governor is registered. This also fixes a problem that OF registered thermal zones are running with no governor. Signed-off-by: Zhang Rui Acked-by: Javi Merino diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 02f57af..71b0ec0 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -56,10 +56,15 @@ static LIST_HEAD(thermal_governor_list); static DEFINE_MUTEX(thermal_list_lock); static DEFINE_MUTEX(thermal_governor_lock); +static struct thermal_governor *def_governor; + static struct thermal_governor *__find_governor(const char *name) { struct thermal_governor *pos; + if (!name || !name[0]) + return def_governor; + list_for_each_entry(pos, &thermal_governor_list, governor_list) if (!strnicmp(name, pos->name, THERMAL_NAME_LENGTH)) return pos; @@ -82,17 +87,23 @@ int thermal_register_governor(struct thermal_governor *governor) if (__find_governor(governor->name) == NULL) { err = 0; list_add(&governor->governor_list, &thermal_governor_list); + if (!def_governor && !strncmp(governor->name, + DEFAULT_THERMAL_GOVERNOR, THERMAL_NAME_LENGTH)) + def_governor = governor; } mutex_lock(&thermal_list_lock); list_for_each_entry(pos, &thermal_tz_list, node) { + /* + * only thermal zones with specified tz->tzp->governor_name + * may run with tz->govenor unset + */ if (pos->governor) continue; - if (pos->tzp) - name = pos->tzp->governor_name; - else - name = DEFAULT_THERMAL_GOVERNOR; + + name = pos->tzp->governor_name; + if (!strnicmp(name, governor->name, THERMAL_NAME_LENGTH)) pos->governor = governor; } @@ -342,8 +353,8 @@ static void monitor_thermal_zone(struct thermal_zone_device *tz) static void handle_non_critical_trips(struct thermal_zone_device *tz, int trip, enum thermal_trip_type trip_type) { - if (tz->governor) - tz->governor->throttle(tz, trip); + tz->governor ? tz->governor->throttle(tz, trip) : + def_governor->throttle(tz, trip); } static void handle_critical_trips(struct thermal_zone_device *tz, @@ -1533,7 +1544,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, if (tz->tzp) tz->governor = __find_governor(tz->tzp->governor_name); else - tz->governor = __find_governor(DEFAULT_THERMAL_GOVERNOR); + tz->governor = def_governor; mutex_unlock(&thermal_governor_lock); -- cgit v0.10.2 From 2a26ebef841bac72a4c5776e920496e07b405628 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Mon, 3 Mar 2014 05:57:26 +0100 Subject: rt,blk,mq: Make blk_mq_cpu_notify_lock a raw spinlock [ 365.164040] BUG: sleeping function called from invalid context at kernel/rtmutex.c:674 [ 365.164041] in_atomic(): 1, irqs_disabled(): 1, pid: 26, name: migration/1 [ 365.164043] no locks held by migration/1/26. [ 365.164044] irq event stamp: 6648 [ 365.164056] hardirqs last enabled at (6647): [] restore_args+0x0/0x30 [ 365.164062] hardirqs last disabled at (6648): [] multi_cpu_stop+0x9d/0x120 [ 365.164070] softirqs last enabled at (0): [] copy_process.part.28+0x6fc/0x1920 [ 365.164072] softirqs last disabled at (0): [< (null)>] (null) [ 365.164076] CPU: 1 PID: 26 Comm: migration/1 Tainted: GF N 3.12.12-rt19-0.gcb6c4a2-rt #3 [ 365.164078] Hardware name: QCI QSSC-S4R/QSSC-S4R, BIOS QSSC-S4R.QCI.01.00.S013.032920111005 03/29/2011 [ 365.164091] 0000000000000001 ffff880a42ea7c30 ffffffff815367e6 ffffffff81a086c0 [ 365.164099] ffff880a42ea7c40 ffffffff8108919c ffff880a42ea7c60 ffffffff8153c24f [ 365.164107] ffff880a42ea91f0 00000000ffffffe1 ffff880a42ea7c88 ffffffff81297ec0 [ 365.164108] Call Trace: [ 365.164119] [] try_stack_unwind+0x191/0x1a0 [ 365.164127] [] dump_trace+0x92/0x360 [ 365.164133] [] show_trace_log_lvl+0x48/0x60 [ 365.164138] [] show_stack_log_lvl+0xd8/0x1d0 [ 365.164143] [] show_stack+0x20/0x50 [ 365.164153] [] dump_stack+0x54/0x9a [ 365.164163] [] __might_sleep+0xfc/0x140 [ 365.164173] [] rt_spin_lock+0x1f/0x70 [ 365.164182] [] blk_mq_main_cpu_notify+0x20/0x70 [ 365.164191] [] notifier_call_chain+0x4c/0x70 [ 365.164201] [] __raw_notifier_call_chain+0x9/0x10 [ 365.164207] [] cpu_notify+0x1e/0x40 [ 365.164217] [] take_cpu_down+0x22/0x40 [ 365.164223] [] multi_cpu_stop+0xd6/0x120 [ 365.164229] [] cpu_stopper_thread+0xd7/0x1e0 [ 365.164235] [] smpboot_thread_fn+0x203/0x380 [ 365.164241] [] kthread+0xc8/0xd0 [ 365.164250] [] ret_from_fork+0x7c/0xb0 [ 365.164429] smpboot: CPU 1 is now offline Signed-off-by: Mike Galbraith Signed-off-by: Jens Axboe diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c index 3146bef..136ef86 100644 --- a/block/blk-mq-cpu.c +++ b/block/blk-mq-cpu.c @@ -11,7 +11,7 @@ #include "blk-mq.h" static LIST_HEAD(blk_mq_cpu_notify_list); -static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock); +static DEFINE_RAW_SPINLOCK(blk_mq_cpu_notify_lock); static int blk_mq_main_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) @@ -19,12 +19,12 @@ static int blk_mq_main_cpu_notify(struct notifier_block *self, unsigned int cpu = (unsigned long) hcpu; struct blk_mq_cpu_notifier *notify; - spin_lock(&blk_mq_cpu_notify_lock); + raw_spin_lock(&blk_mq_cpu_notify_lock); list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) notify->notify(notify->data, action, cpu); - spin_unlock(&blk_mq_cpu_notify_lock); + raw_spin_unlock(&blk_mq_cpu_notify_lock); return NOTIFY_OK; } @@ -32,16 +32,16 @@ void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier) { BUG_ON(!notifier->notify); - spin_lock(&blk_mq_cpu_notify_lock); + raw_spin_lock(&blk_mq_cpu_notify_lock); list_add_tail(¬ifier->list, &blk_mq_cpu_notify_list); - spin_unlock(&blk_mq_cpu_notify_lock); + raw_spin_unlock(&blk_mq_cpu_notify_lock); } void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier) { - spin_lock(&blk_mq_cpu_notify_lock); + raw_spin_lock(&blk_mq_cpu_notify_lock); list_del(¬ifier->list); - spin_unlock(&blk_mq_cpu_notify_lock); + raw_spin_unlock(&blk_mq_cpu_notify_lock); } void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier, -- cgit v0.10.2 From 17b0c1f7865d8bf4f5e5aa94e2aeafb35d23e4e9 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 11 Feb 2014 21:39:06 +0200 Subject: drm/i915: vlv: reserve GT power context early We reserve the space for the power context in stolen memory at a fixed address from a delayed work. This races with the subsequent driver init/resume code which could allocate something at that address, so the reservation for the power context fails. Reserve the space up-front, so this can't happen. This also adds a missing struct_mutex lock around the stolen allocation, which wasn't taken in the delayed work path. Signed-off-by: Imre Deak Reviewed-by: Jesse Barnes Cc: stable@vger.kernel.org Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index d77cc81..e1fc35a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3493,6 +3493,8 @@ static void valleyview_setup_pctx(struct drm_device *dev) u32 pcbr; int pctx_size = 24*1024; + WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + pcbr = I915_READ(VLV_PCBR); if (pcbr) { /* BIOS set it up already, grab the pre-alloc'd space */ @@ -3542,8 +3544,6 @@ static void valleyview_enable_rps(struct drm_device *dev) I915_WRITE(GTFIFODBG, gtfifodbg); } - valleyview_setup_pctx(dev); - /* If VLV, Forcewake all wells, else re-direct to regular path */ gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); @@ -4395,6 +4395,8 @@ void intel_enable_gt_powersave(struct drm_device *dev) ironlake_enable_rc6(dev); intel_init_emon(dev); } else if (IS_GEN6(dev) || IS_GEN7(dev)) { + if (IS_VALLEYVIEW(dev)) + valleyview_setup_pctx(dev); /* * PCU communication is slow and this doesn't need to be * done at any specific time, so do this out of our fast path -- cgit v0.10.2 From d9d820810d4eeac336c515dfb62561c21b296f07 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Thu, 27 Feb 2014 16:30:56 -0300 Subject: drm/i915: fix assert_cursor on BDW MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to read the correct register, not a register that doesn't exist and will trigger "Unclaimed register" messages when we touch it. Also rearrange the checks in an attempt to prevent this error from happening again. Signed-off-by: Paulo Zanoni Reviewed-by: Ville Syrjälä [Jani: dropped an extra empty line introduced.] Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 4c16728..9b8a7c7 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1092,12 +1092,12 @@ static void assert_cursor(struct drm_i915_private *dev_priv, struct drm_device *dev = dev_priv->dev; bool cur_state; - if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) - cur_state = I915_READ(CURCNTR_IVB(pipe)) & CURSOR_MODE; - else if (IS_845G(dev) || IS_I865G(dev)) + if (IS_845G(dev) || IS_I865G(dev)) cur_state = I915_READ(_CURACNTR) & CURSOR_ENABLE; - else + else if (INTEL_INFO(dev)->gen <= 6 || IS_VALLEYVIEW(dev)) cur_state = I915_READ(CURCNTR(pipe)) & CURSOR_MODE; + else + cur_state = I915_READ(CURCNTR_IVB(pipe)) & CURSOR_MODE; WARN(cur_state != state, "cursor on pipe %c assertion failure (expected %s, current %s)\n", -- cgit v0.10.2 From 6375b768a9850b6154478993e5fb566fa4614a9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 3 Mar 2014 11:33:36 +0200 Subject: drm/i915: Reject >165MHz modes w/ DVI monitors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Single-link DVI max dotclock is 165MHz. Filter out modes with higher dotclock when the monitor doesn't support HDMI. Modes higher than 165 MHz were allowed in commit 7d148ef51a657fd04036c3ed7803da600dd0d451 Author: Daniel Vetter Date: Mon Jul 22 18:02:39 2013 +0200 drm/i915: fix hdmi portclock limits Also don't attempt to use 12bpc mode with DVI monitors. Cc: Adam Nielsen Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=75345 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=70331 Tested-by: Ralf Jung Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 6db0d9d..ee3181e 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -845,7 +845,7 @@ static int hdmi_portclock_limit(struct intel_hdmi *hdmi) { struct drm_device *dev = intel_hdmi_to_dev(hdmi); - if (IS_G4X(dev)) + if (!hdmi->has_hdmi_sink || IS_G4X(dev)) return 165000; else if (IS_HASWELL(dev) || INTEL_INFO(dev)->gen >= 8) return 300000; @@ -899,8 +899,8 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, * outputs. We also need to check that the higher clock still fits * within limits. */ - if (pipe_config->pipe_bpp > 8*3 && clock_12bpc <= portclock_limit - && HAS_PCH_SPLIT(dev)) { + if (pipe_config->pipe_bpp > 8*3 && intel_hdmi->has_hdmi_sink && + clock_12bpc <= portclock_limit && HAS_PCH_SPLIT(dev)) { DRM_DEBUG_KMS("picking bpc to 12 for HDMI output\n"); desired_bpp = 12*3; -- cgit v0.10.2 From d13c46c67e546bb1dc1c4dc7c43e388d0119276b Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 3 Mar 2014 14:49:51 +0000 Subject: DRM: armada: fix use of kfifo_put() The kfifo_put() API changed in 498d319bb512 (kfifo API type safety) which now results in the wrong pointer being added to the kfifo ring, which then causes an oops. Fix this. Cc: # 3.13 Signed-off-by: Russell King diff --git a/drivers/gpu/drm/armada/armada_drv.c b/drivers/gpu/drm/armada/armada_drv.c index 62d0ff3..073dbf3 100644 --- a/drivers/gpu/drm/armada/armada_drv.c +++ b/drivers/gpu/drm/armada/armada_drv.c @@ -68,15 +68,7 @@ void __armada_drm_queue_unref_work(struct drm_device *dev, { struct armada_private *priv = dev->dev_private; - /* - * Yes, we really must jump through these hoops just to store a - * _pointer_ to something into the kfifo. This is utterly insane - * and idiotic, because it kfifo requires the _data_ pointed to by - * the pointer const, not the pointer itself. Not only that, but - * you have to pass a pointer _to_ the pointer you want stored. - */ - const struct drm_framebuffer *silly_api_alert = fb; - WARN_ON(!kfifo_put(&priv->fb_unref, &silly_api_alert)); + WARN_ON(!kfifo_put(&priv->fb_unref, fb)); schedule_work(&priv->fb_unref_work); } -- cgit v0.10.2 From de39d7a4f3693c4247135cbce42716bf2a113577 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 1 Mar 2014 02:18:57 +0300 Subject: hsr: off by one sanity check in hsr_register_frame_in() This is a sanity check and we never pass invalid values so this patch doesn't change anything. However the node->time_in[] array has HSR_MAX_SLAVE (2) elements and not HSR_MAX_DEV (3). Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 327060c..7ae0d7f6 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -297,7 +297,7 @@ static bool seq_nr_after(u16 a, u16 b) void hsr_register_frame_in(struct node_entry *node, enum hsr_dev_idx dev_idx) { - if ((dev_idx < 0) || (dev_idx >= HSR_MAX_DEV)) { + if ((dev_idx < 0) || (dev_idx >= HSR_MAX_SLAVE)) { WARN_ONCE(1, "%s: Invalid dev_idx (%d)\n", __func__, dev_idx); return; } -- cgit v0.10.2 From c84a57113f59486e6688be1cd443b96e3118efa0 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Fri, 28 Feb 2014 16:42:26 -0800 Subject: tcp: fix bogus RTT on special retransmission RTT may be bogus with tall loss probe (TLP) when a packet is retransmitted and latter (s)acked without TCPCB_SACKED_RETRANS flag. For example, TLP calls __tcp_retransmit_skb() instead of tcp_retransmit_skb(). The skb timestamps are updated but the sacked flag is not marked with TCPCB_SACKED_RETRANS. As a result we'll get bogus RTT in tcp_clean_rtx_queue() or in tcp_sacktag_one() on spurious retransmission. The fix is to apply the sticky flag TCP_EVER_RETRANS to enforce Karn's check on RTT sampling. However this will disable F-RTO if timeout occurs after TLP, by resetting undo_marker in tcp_enter_loss(). We relax this check to only if any pending retransmists are still in-flight. Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Acked-by: Neal Cardwell Acked-by: Nandita Dukkipati Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 227cba7..eeaac39 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1945,8 +1945,9 @@ void tcp_enter_loss(struct sock *sk, int how) if (skb == tcp_send_head(sk)) break; - if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) tp->undo_marker = 0; + TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d718482..f0eb4e3 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2337,6 +2337,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); unsigned int cur_mss; + int err; /* Inconslusive MTU probe */ if (icsk->icsk_mtup.probe_size) { @@ -2400,11 +2401,15 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) skb_headroom(skb) >= 0xFFFF)) { struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); - return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : - -ENOBUFS; + err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : + -ENOBUFS; } else { - return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); + err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); } + + if (likely(!err)) + TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; + return err; } int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) -- cgit v0.10.2 From e842b068bf9a9eb7b38e0e1875357440a1fd2169 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 1 Mar 2014 16:54:36 +0300 Subject: qlcnic: dcb: a couple off by one bugs The ->tc_cfg[] array has QLC_DCB_MAX_TC (8) elements so the check is off by one. These functions are always called with valid values though so it doesn't affect how the code works. Signed-off-by: Dan Carpenter Acked-by: Sucheta Chakraborty Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c index 77f1bce..7d4f549 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c @@ -807,7 +807,7 @@ qlcnic_dcb_get_pg_tc_cfg_tx(struct net_device *netdev, int tc, u8 *prio, !type->tc_param_valid) return; - if (tc < 0 || (tc > QLC_DCB_MAX_TC)) + if (tc < 0 || (tc >= QLC_DCB_MAX_TC)) return; tc_cfg = &type->tc_cfg[tc]; @@ -843,7 +843,7 @@ static void qlcnic_dcb_get_pg_bwg_cfg_tx(struct net_device *netdev, int pgid, !type->tc_param_valid) return; - if (pgid < 0 || pgid > QLC_DCB_MAX_PG) + if (pgid < 0 || pgid >= QLC_DCB_MAX_PG) return; pgcfg = &type->pg_cfg[pgid]; -- cgit v0.10.2 From c502224efb2eaec47f16da39235079598a0f3ed2 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Mon, 3 Mar 2014 14:24:20 +0530 Subject: be2net: Fix to reset transparent vlan tagging For disabling transparent tagging issue SET_HSW_CONFIG with pvid_valid=1 and pvid=0xFFFF and not with the default pvid as this case would fail in Lancer. Hence removing the get_hsw_config call from be_vf_setup() as it's only use of getting default pvid is no longer needed. Also do proper housekeeping only if the FW command succeeds. Signed-off-by: Kalesh AP Signed-off-by: Somnath Kotur Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 8d09615..05529e2 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -350,11 +350,13 @@ struct be_drv_stats { u32 roce_drops_crc; }; +/* A vlan-id of 0xFFFF must be used to clear transparent vlan-tagging */ +#define BE_RESET_VLAN_TAG_ID 0xFFFF + struct be_vf_cfg { unsigned char mac_addr[ETH_ALEN]; int if_handle; int pmac_id; - u16 def_vid; u16 vlan_tag; u32 tx_rate; }; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 04ac9c6..45662af 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1287,24 +1287,20 @@ static int be_set_vf_vlan(struct net_device *netdev, if (vlan || qos) { vlan |= qos << VLAN_PRIO_SHIFT; - if (vf_cfg->vlan_tag != vlan) { - /* If this is new value, program it. Else skip. */ - vf_cfg->vlan_tag = vlan; + if (vf_cfg->vlan_tag != vlan) status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_cfg->if_handle, 0); - } } else { /* Reset Transparent Vlan Tagging. */ - vf_cfg->vlan_tag = 0; - vlan = vf_cfg->def_vid; - status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, - vf_cfg->if_handle, 0); + status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, + vf + 1, vf_cfg->if_handle, 0); } - - if (status) + if (!status) + vf_cfg->vlan_tag = vlan; + else dev_info(&adapter->pdev->dev, - "VLAN %d config on VF %d failed\n", vlan, vf); + "VLAN %d config on VF %d failed\n", vlan, vf); return status; } @@ -3013,11 +3009,11 @@ static int be_vf_setup_init(struct be_adapter *adapter) static int be_vf_setup(struct be_adapter *adapter) { + struct device *dev = &adapter->pdev->dev; struct be_vf_cfg *vf_cfg; - u16 def_vlan, lnk_speed; int status, old_vfs, vf; - struct device *dev = &adapter->pdev->dev; u32 privileges; + u16 lnk_speed; old_vfs = pci_num_vf(adapter->pdev); if (old_vfs) { @@ -3084,12 +3080,6 @@ static int be_vf_setup(struct be_adapter *adapter) if (!status) vf_cfg->tx_rate = lnk_speed; - status = be_cmd_get_hsw_config(adapter, &def_vlan, - vf + 1, vf_cfg->if_handle, NULL); - if (status) - goto err; - vf_cfg->def_vid = def_vlan; - if (!old_vfs) be_cmd_enable_vf(adapter, vf + 1); } -- cgit v0.10.2 From 7ad09458a5be9a0990457c1a198e702559ac25ca Mon Sep 17 00:00:00 2001 From: Somnath kotur Date: Mon, 3 Mar 2014 14:24:43 +0530 Subject: be2net: clear promiscuous bits in adapter->flags while disabling promiscuous mode We should clear promiscuous bits in adapter->flags while disabling promiscuous mode. Else we will not put interface back into VLAN promisc mode if the vlans already added exceeds the maximum limit. Signed-off-by: Kalesh AP Signed-off-by: Somnath Kotur Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 45662af..9e6d678 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1157,6 +1157,14 @@ ret: return status; } +static void be_clear_promisc(struct be_adapter *adapter) +{ + adapter->promiscuous = false; + adapter->flags &= ~BE_FLAGS_VLAN_PROMISC; + + be_cmd_rx_filter(adapter, IFF_PROMISC, OFF); +} + static void be_set_rx_mode(struct net_device *netdev) { struct be_adapter *adapter = netdev_priv(netdev); @@ -1170,9 +1178,7 @@ static void be_set_rx_mode(struct net_device *netdev) /* BE was previously in promiscuous mode; disable it */ if (adapter->promiscuous) { - adapter->promiscuous = false; - be_cmd_rx_filter(adapter, IFF_PROMISC, OFF); - + be_clear_promisc(adapter); if (adapter->vlans_added) be_vid_config(adapter); } -- cgit v0.10.2 From c91289510602baf9a05b3501d97dc70efa269e01 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 3 Mar 2014 14:25:07 +0530 Subject: be2net: Fix skb double free in be_xmit_wrokarounds() failure path skb_padto(), skb_share_check() and __vlan_put_tag() routines free skb when they return an error. This patch fixes be_xmit_workarounds() to not free skb again in such cases. Signed-off-by: Vasundhara Volam Signed-off-by: Sathya Perla Signed-off-by: Somnath Kotur Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 9e6d678..a9da6f9 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -927,7 +927,7 @@ static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter, */ if (unlikely(!BEx_chip(adapter) && skb->len <= 32)) { if (skb_padto(skb, 36)) - goto tx_drop; + goto err; skb->len = 36; } @@ -959,7 +959,7 @@ static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter, vlan_tx_tag_present(skb)) { skb = be_insert_vlan_in_pkt(adapter, skb, skip_hw_vlan); if (unlikely(!skb)) - goto tx_drop; + goto err; } /* HW may lockup when VLAN HW tagging is requested on @@ -981,12 +981,13 @@ static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter, be_vlan_tag_tx_chk(adapter, skb)) { skb = be_insert_vlan_in_pkt(adapter, skb, skip_hw_vlan); if (unlikely(!skb)) - goto tx_drop; + goto err; } return skb; tx_drop: dev_kfree_skb_any(skb); +err: return NULL; } -- cgit v0.10.2 From ec495fac119f2eaea8b8c6e6db17ba163c7af1f4 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 3 Mar 2014 14:25:38 +0530 Subject: be2net: isolate TX workarounds not applicable to Skyhawk-R Some of TX workarounds in be_xmit_workarounds() routine are not applicable (and result in HW errors) to Skyhawk-R chip. Isolate BE3-R/Lancer specific workarounds to a separate routine. Signed-off-by: Vasundhara Volam Signed-off-by: Sathya Perla Signed-off-by: Somnath Kotur Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index a9da6f9..36c8061 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -913,24 +913,14 @@ static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, return BE3_chip(adapter) && be_ipv6_exthdr_check(skb); } -static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter, - struct sk_buff *skb, - bool *skip_hw_vlan) +static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter, + struct sk_buff *skb, + bool *skip_hw_vlan) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; unsigned int eth_hdr_len; struct iphdr *ip; - /* Lancer, SH-R ASICs have a bug wherein Packets that are 32 bytes or less - * may cause a transmit stall on that port. So the work-around is to - * pad short packets (<= 32 bytes) to a 36-byte length. - */ - if (unlikely(!BEx_chip(adapter) && skb->len <= 32)) { - if (skb_padto(skb, 36)) - goto err; - skb->len = 36; - } - /* For padded packets, BE HW modifies tot_len field in IP header * incorrecly when VLAN tag is inserted by HW. * For padded packets, Lancer computes incorrect checksum. @@ -991,6 +981,29 @@ err: return NULL; } +static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter, + struct sk_buff *skb, + bool *skip_hw_vlan) +{ + /* Lancer, SH-R ASICs have a bug wherein Packets that are 32 bytes or + * less may cause a transmit stall on that port. So the work-around is + * to pad short packets (<= 32 bytes) to a 36-byte length. + */ + if (unlikely(!BEx_chip(adapter) && skb->len <= 32)) { + if (skb_padto(skb, 36)) + return NULL; + skb->len = 36; + } + + if (BEx_chip(adapter) || lancer_chip(adapter)) { + skb = be_lancer_xmit_workarounds(adapter, skb, skip_hw_vlan); + if (!skb) + return NULL; + } + + return skb; +} + static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev) { struct be_adapter *adapter = netdev_priv(netdev); -- cgit v0.10.2 From 3995d265b3640fb7dc843a3c5f62ced4f121fb89 Mon Sep 17 00:00:00 2001 From: Schuyler Patton Date: Mon, 3 Mar 2014 16:19:06 +0530 Subject: net: cpsw: fix cpdma rx descriptor leak on down interface This patch fixes a CPDMA RX Descriptor leak that occurs after taking the interface down when the CPSW is in Dual MAC mode. Previously the CPSW_ALE port was left open up which causes packets to be received and processed by the RX interrupt handler and were passed to the non active network interface where they were ignored. The fix is for the slave_stop function of the selected interface to disable the respective CPSW_ALE Port from forwarding packets. This blocks traffic from being received on the inactive interface. Signed-off-by: Schuyler Patton Reviewed-by: Felipe Balbi Signed-off-by: Mugunthan V N Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 651087b..ffd4d12 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1164,11 +1164,17 @@ static void cpsw_init_host_port(struct cpsw_priv *priv) static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_priv *priv) { + u32 slave_port; + + slave_port = cpsw_get_slave_port(priv, slave->slave_num); + if (!slave->phy) return; phy_stop(slave->phy); phy_disconnect(slave->phy); slave->phy = NULL; + cpsw_ale_control_set(priv->ale, slave_port, + ALE_PORT_STATE, ALE_PORT_STATE_DISABLE); } static int cpsw_ndo_open(struct net_device *ndev) -- cgit v0.10.2 From 10ddceb22bab11dab10ba645c7df2e4a8e7a5db5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 3 Mar 2014 20:18:36 +0800 Subject: ip_tunnel:multicast process cause panic due to skb->_skb_refdst NULL pointer when ip_tunnel process multicast packets, it may check if the packet is looped back packet though 'rt_is_output_route(skb_rtable(skb))' in ip_tunnel_rcv(), but before that , skb->_skb_refdst has been dropped in iptunnel_pull_header(), so which leads to a panic. fix the bug: https://bugzilla.kernel.org/show_bug.cgi?id=70681 Signed-off-by: Xin Long Signed-off-by: David S. Miller diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 8d69626..6f847dd 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -108,7 +108,6 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) nf_reset(skb); secpath_reset(skb); skb_clear_hash_if_not_l4(skb); - skb_dst_drop(skb); skb->vlan_tci = 0; skb_set_queue_mapping(skb, 0); skb->pkt_type = PACKET_HOST; -- cgit v0.10.2 From ec0223ec48a90cb605244b45f7c62de856403729 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 3 Mar 2014 17:23:04 +0100 Subject: net: sctp: fix sctp_sf_do_5_1D_ce to verify if we/peer is AUTH capable RFC4895 introduced AUTH chunks for SCTP; during the SCTP handshake RANDOM; CHUNKS; HMAC-ALGO are negotiated (CHUNKS being optional though): ---------- INIT[RANDOM; CHUNKS; HMAC-ALGO] ----------> <------- INIT-ACK[RANDOM; CHUNKS; HMAC-ALGO] --------- -------------------- COOKIE-ECHO --------------------> <-------------------- COOKIE-ACK --------------------- A special case is when an endpoint requires COOKIE-ECHO chunks to be authenticated: ---------- INIT[RANDOM; CHUNKS; HMAC-ALGO] ----------> <------- INIT-ACK[RANDOM; CHUNKS; HMAC-ALGO] --------- ------------------ AUTH; COOKIE-ECHO ----------------> <-------------------- COOKIE-ACK --------------------- RFC4895, section 6.3. Receiving Authenticated Chunks says: The receiver MUST use the HMAC algorithm indicated in the HMAC Identifier field. If this algorithm was not specified by the receiver in the HMAC-ALGO parameter in the INIT or INIT-ACK chunk during association setup, the AUTH chunk and all the chunks after it MUST be discarded and an ERROR chunk SHOULD be sent with the error cause defined in Section 4.1. [...] If no endpoint pair shared key has been configured for that Shared Key Identifier, all authenticated chunks MUST be silently discarded. [...] When an endpoint requires COOKIE-ECHO chunks to be authenticated, some special procedures have to be followed because the reception of a COOKIE-ECHO chunk might result in the creation of an SCTP association. If a packet arrives containing an AUTH chunk as a first chunk, a COOKIE-ECHO chunk as the second chunk, and possibly more chunks after them, and the receiver does not have an STCB for that packet, then authentication is based on the contents of the COOKIE-ECHO chunk. In this situation, the receiver MUST authenticate the chunks in the packet by using the RANDOM parameters, CHUNKS parameters and HMAC_ALGO parameters obtained from the COOKIE-ECHO chunk, and possibly a local shared secret as inputs to the authentication procedure specified in Section 6.3. If authentication fails, then the packet is discarded. If the authentication is successful, the COOKIE-ECHO and all the chunks after the COOKIE-ECHO MUST be processed. If the receiver has an STCB, it MUST process the AUTH chunk as described above using the STCB from the existing association to authenticate the COOKIE-ECHO chunk and all the chunks after it. [...] Commit bbd0d59809f9 introduced the possibility to receive and verification of AUTH chunk, including the edge case for authenticated COOKIE-ECHO. On reception of COOKIE-ECHO, the function sctp_sf_do_5_1D_ce() handles processing, unpacks and creates a new association if it passed sanity checks and also tests for authentication chunks being present. After a new association has been processed, it invokes sctp_process_init() on the new association and walks through the parameter list it received from the INIT chunk. It checks SCTP_PARAM_RANDOM, SCTP_PARAM_HMAC_ALGO and SCTP_PARAM_CHUNKS, and copies them into asoc->peer meta data (peer_random, peer_hmacs, peer_chunks) in case sysctl -w net.sctp.auth_enable=1 is set. If in INIT's SCTP_PARAM_SUPPORTED_EXT parameter SCTP_CID_AUTH is set, peer_random != NULL and peer_hmacs != NULL the peer is to be assumed asoc->peer.auth_capable=1, in any other case asoc->peer.auth_capable=0. Now, if in sctp_sf_do_5_1D_ce() chunk->auth_chunk is available, we set up a fake auth chunk and pass that on to sctp_sf_authenticate(), which at latest in sctp_auth_calculate_hmac() reliably dereferences a NULL pointer at position 0..0008 when setting up the crypto key in crypto_hash_setkey() by using asoc->asoc_shared_key that is NULL as condition key_id == asoc->active_key_id is true if the AUTH chunk was injected correctly from remote. This happens no matter what net.sctp.auth_enable sysctl says. The fix is to check for net->sctp.auth_enable and for asoc->peer.auth_capable before doing any operations like sctp_sf_authenticate() as no key is activated in sctp_auth_asoc_init_active_key() for each case. Now as RFC4895 section 6.3 states that if the used HMAC-ALGO passed from the INIT chunk was not used in the AUTH chunk, we SHOULD send an error; however in this case it would be better to just silently discard such a maliciously prepared handshake as we didn't even receive a parameter at all. Also, as our endpoint has no shared key configured, section 6.3 says that MUST silently discard, which we are doing from now onwards. Before calling sctp_sf_pdiscard(), we need not only to free the association, but also the chunk->auth_chunk skb, as commit bbd0d59809f9 created a skb clone in that case. I have tested this locally by using netfilter's nfqueue and re-injecting packets into the local stack after maliciously modifying the INIT chunk (removing RANDOM; HMAC-ALGO param) and the SCTP packet containing the COOKIE_ECHO (injecting AUTH chunk before COOKIE_ECHO). Fixed with this patch applied. Fixes: bbd0d59809f9 ("[SCTP]: Implement the receive and verification of AUTH chunk") Signed-off-by: Daniel Borkmann Cc: Vlad Yasevich Cc: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 591b44d..ae65b6b 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -758,6 +758,13 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, struct sctp_chunk auth; sctp_ierror_t ret; + /* Make sure that we and the peer are AUTH capable */ + if (!net->sctp.auth_enable || !new_asoc->peer.auth_capable) { + kfree_skb(chunk->auth_chunk); + sctp_association_free(new_asoc); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + } + /* set-up our fake chunk so that we can process it */ auth.skb = chunk->auth_chunk; auth.asoc = chunk->asoc; -- cgit v0.10.2 From 8b4703e9bd1172a5f8244276ebb94302e6153e26 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 3 Mar 2014 15:33:53 -0500 Subject: macvlan: Add support for 'always_on' offload features Macvlan currently inherits all of its features from the lower device. When lower device disables offload support, this causes macvlan to disable offload support as well. This causes performance regression when using macvlan/macvtap in bridge mode. It can be easily demonstrated by creating 2 namespaces using macvlan in bridge mode and running netperf between them: MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.0.0.1 () port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 20.00 1204.61 To restore the performance, we add software offload features to the list of "always_on" features for macvlan. This way when a namespace or a guest using macvtap initially sends a packet, this packet will not be segmented at macvlan level. It will only be segmented when macvlan sends the packet to the lower device. MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.0.0.1 () port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec 87380 16384 16384 20.00 5507.35 Fixes: 6acf54f1cf0a6747bac9fea26f34cfc5a9029523 (macvtap: Add support of packet capture on macvtap device.) Fixes: 797f87f83b60685ff8a13fa0572d2f10393c50d3 (macvlan: fix netdev feature propagation from lower device) CC: Florian Westphal CC: Christian Borntraeger CC: Jason Wang CC: Michael S. Tsirkin Tested-by: Christian Borntraeger Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index a5d2189..1831fb7 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -506,6 +506,9 @@ static int macvlan_change_mtu(struct net_device *dev, int new_mtu) static struct lock_class_key macvlan_netdev_xmit_lock_key; static struct lock_class_key macvlan_netdev_addr_lock_key; +#define ALWAYS_ON_FEATURES \ + (NETIF_F_SG | NETIF_F_GEN_CSUM | NETIF_F_GSO_SOFTWARE | NETIF_F_LLTX) + #define MACVLAN_FEATURES \ (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \ NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \ @@ -539,7 +542,7 @@ static int macvlan_init(struct net_device *dev) dev->state = (dev->state & ~MACVLAN_STATE_MASK) | (lowerdev->state & MACVLAN_STATE_MASK); dev->features = lowerdev->features & MACVLAN_FEATURES; - dev->features |= NETIF_F_LLTX; + dev->features |= ALWAYS_ON_FEATURES; dev->gso_max_size = lowerdev->gso_max_size; dev->iflink = lowerdev->ifindex; dev->hard_header_len = lowerdev->hard_header_len; @@ -699,7 +702,7 @@ static netdev_features_t macvlan_fix_features(struct net_device *dev, features = netdev_increment_features(vlan->lowerdev->features, features, mask); - features |= NETIF_F_LLTX; + features |= ALWAYS_ON_FEATURES; return features; } -- cgit v0.10.2 From c64d240df31513522901539a3206a41e9e6d00c8 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 3 Mar 2014 10:57:56 -0500 Subject: dm: fix Kconfig indentation Since DM_DEBUG_BLOCK_STACK_TRACING is a DM_PERSISTENT_DATA config option move it from drivers/md/Kconfig to drivers/md/persistent-data/Kconfig. Doing so fixes indentation for other DM config options. Signed-off-by: Mike Snitzer diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 9a06fe8..95ad936 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -254,16 +254,6 @@ config DM_THIN_PROVISIONING ---help--- Provides thin provisioning and snapshots that share a data store. -config DM_DEBUG_BLOCK_STACK_TRACING - boolean "Keep stack trace of persistent data block lock holders" - depends on STACKTRACE_SUPPORT && DM_PERSISTENT_DATA - select STACKTRACE - ---help--- - Enable this for messages that may help debug problems with the - block manager locking used by thin provisioning and caching. - - If unsure, say N. - config DM_CACHE tristate "Cache target (EXPERIMENTAL)" depends on BLK_DEV_DM diff --git a/drivers/md/persistent-data/Kconfig b/drivers/md/persistent-data/Kconfig index 19b2687..0c2dec7 100644 --- a/drivers/md/persistent-data/Kconfig +++ b/drivers/md/persistent-data/Kconfig @@ -6,3 +6,13 @@ config DM_PERSISTENT_DATA ---help--- Library providing immutable on-disk data structure support for device-mapper targets such as the thin provisioning target. + +config DM_DEBUG_BLOCK_STACK_TRACING + boolean "Keep stack trace of persistent data block lock holders" + depends on STACKTRACE_SUPPORT && DM_PERSISTENT_DATA + select STACKTRACE + ---help--- + Enable this for messages that may help debug problems with the + block manager locking used by thin provisioning and caching. + + If unsure, say N. -- cgit v0.10.2 From 2564338b13e6e132ee224edb63e1e872adf431f4 Mon Sep 17 00:00:00 2001 From: Marios Andreopoulos Date: Mon, 3 Mar 2014 18:19:59 +0200 Subject: libata: disable queued TRIM for Crucial M500 mSATA SSDs Queued TRIM commands cause problems and silent file system corruption on Crucial M500 SSDs. This patch disables them for the mSATA model of the drive. Signed-off-by: Marios Andreopoulos Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org # 3.12+ Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=71371 diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 1a3dbd1..191cdfb 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4225,6 +4225,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* devices that don't properly handle queued TRIM commands */ { "Micron_M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, { "Crucial_CT???M500SSD1", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, + { "Crucial_CT???M500SSD3", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, /* * Some WD SATA-I drives spin up and down erratically when the link -- cgit v0.10.2 From 2c945820cab96ebf265598d998e63ef22393d0d4 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 3 Mar 2014 17:19:22 -0500 Subject: dm snapshot: fix metadata corruption Commit 55494bf2947dccdf2 ("dm snapshot: use dm-bufio") broke snapshots. Before that 3.14-rc1 commit, loading a snapshot's list of exceptions involved reading exception areas one by one into ps->area and inserting those exceptions into the hash table. Commit 55494bf2947dccdf2 changed it so that dm-bufio with prefetch is used to load exceptions in batchs. Exceptions are loaded correctly, but ps->area is left uninitialized. When a new exception is allocated, it is stored in this uninitialized ps->area which will be written to the disk. This causes metadata corruption. Fix this corruption by copying the last area that was read via dm-bufio into ps->area. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index afc3d01..d6e8817 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -546,6 +546,9 @@ static int read_exceptions(struct pstore *ps, r = insert_exceptions(ps, area, callback, callback_context, &full); + if (!full) + memcpy(ps->area, area, ps->store->chunk_size << SECTOR_SHIFT); + dm_bufio_release(bp); dm_bufio_forget(client, chunk); -- cgit v0.10.2 From bff705950e2cdcf35641dee35eb14bad9ed49e8f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 14 Aug 2013 09:54:54 +0100 Subject: arm64: remove unnecessary cache flush at boot Currently we flush the entire dcache at boot within __cpu_setup, but this is unnecessary as the booting protocol demands that the dcache is invalid and off upon entering the kernel. The presence of the cache flush only serves to hide bugs in bootloaders, and is not safe in the presence of SMP. In an SMP boot scenario the CPUs enter coherency outside of the kernel, and the primary CPU enables its caches before bringing up secondary CPUs. Therefore if any secondary CPU has an entry in its cache (in violation of the boot protocol), the primary CPU might snoop it even if the secondary CPU's cache is disabled. The boot-time cache flush only serves to hide a firmware bug, and slows down a cpu boot unnecessarily. This patch removes the unnecessary boot-time cache flush. Signed-off-by: Mark Rutland Acked-by: Will Deacon [catalin.marinas@arm.com: make __flush_dcache_all local only] Signed-off-by: Catalin Marinas diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 97fcef5..c46f48b 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -30,7 +30,7 @@ * * Corrupted registers: x0-x7, x9-x11 */ -ENTRY(__flush_dcache_all) +__flush_dcache_all: dsb sy // ensure ordering with previous memory accesses mrs x0, clidr_el1 // read clidr and x3, x0, #0x7000000 // extract loc from clidr diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 1333e6f9..e0ef63c 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -173,12 +173,6 @@ ENDPROC(cpu_do_switch_mm) * value of the SCTLR_EL1 register. */ ENTRY(__cpu_setup) - /* - * Preserve the link register across the function call. - */ - mov x28, lr - bl __flush_dcache_all - mov lr, x28 ic iallu // I+BTB cache invalidate tlbi vmalle1is // invalidate I + D TLBs dsb sy -- cgit v0.10.2 From 45ab2813d40d88fc575e753c38478de242d03f88 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 26 Feb 2014 13:37:38 -0500 Subject: tracing: Do not add event files for modules that fail tracepoints If a module fails to add its tracepoints due to module tainting, do not create the module event infrastructure in the debugfs directory. As the events will not work and worse yet, they will silently fail, making the user wonder why the events they enable do not display anything. Having a warning on module load and the events not visible to the users will make the cause of the problem much clearer. Link: http://lkml.kernel.org/r/20140227154923.265882695@goodmis.org Fixes: 6d723736e472 "tracing/events: add support for modules to TRACE_EVENT" Acked-by: Mathieu Desnoyers Cc: stable@vger.kernel.org # 2.6.31+ Cc: Rusty Russell Signed-off-by: Steven Rostedt diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index accc497..7159a0a 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -60,6 +60,12 @@ struct tp_module { unsigned int num_tracepoints; struct tracepoint * const *tracepoints_ptrs; }; +bool trace_module_has_bad_taint(struct module *mod); +#else +static inline bool trace_module_has_bad_taint(struct module *mod) +{ + return false; +} #endif /* CONFIG_MODULES */ struct tracepoint_iter { diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index e71ffd4..f3989ce 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1777,6 +1777,16 @@ static void trace_module_add_events(struct module *mod) { struct ftrace_event_call **call, **start, **end; + if (!mod->num_trace_events) + return; + + /* Don't add infrastructure for mods without tracepoints */ + if (trace_module_has_bad_taint(mod)) { + pr_err("%s: module has bad taint, not creating trace events\n", + mod->name); + return; + } + start = mod->trace_events; end = mod->trace_events + mod->num_trace_events; diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 29f2654..031cc56 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -631,6 +631,11 @@ void tracepoint_iter_reset(struct tracepoint_iter *iter) EXPORT_SYMBOL_GPL(tracepoint_iter_reset); #ifdef CONFIG_MODULES +bool trace_module_has_bad_taint(struct module *mod) +{ + return mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP)); +} + static int tracepoint_module_coming(struct module *mod) { struct tp_module *tp_mod, *iter; @@ -641,7 +646,7 @@ static int tracepoint_module_coming(struct module *mod) * module headers (for forced load), to make sure we don't cause a crash. * Staging and out-of-tree GPL modules are fine. */ - if (mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP))) + if (trace_module_has_bad_taint(mod)) return 0; mutex_lock(&tracepoints_mutex); tp_mod = kmalloc(sizeof(struct tp_module), GFP_KERNEL); -- cgit v0.10.2 From ea2787f350ea5f649fda6bb708c4182cf4ec06ca Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Tue, 4 Mar 2014 00:45:18 +0200 Subject: ASoC: n810: fix init with DT boot Since 3.14-rc1 only DT boot has been supported on N810, so this file fails to init. Make a minimal fix to retain functionality. This file should be properly converted to DT in longer term. There seems to be still other unresolved issues with N810 audio support, but this patch is needed at minimum as otherwise the machine driver probing would completely fail. Signed-off-by: Aaro Koskinen Acked-by: Jarkko Nikula Signed-off-by: Mark Brown diff --git a/sound/soc/omap/n810.c b/sound/soc/omap/n810.c index 3fde9e4..d163e18 100644 --- a/sound/soc/omap/n810.c +++ b/sound/soc/omap/n810.c @@ -305,7 +305,9 @@ static int __init n810_soc_init(void) int err; struct device *dev; - if (!(machine_is_nokia_n810() || machine_is_nokia_n810_wimax())) + if (!of_have_populated_dt() || + (!of_machine_is_compatible("nokia,n810") && + !of_machine_is_compatible("nokia,n810-wimax"))) return -ENODEV; n810_snd_device = platform_device_alloc("soc-audio", -1); -- cgit v0.10.2 From 66d6e3b385778c2eef3b172a037235516207393e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 3 Mar 2014 07:34:44 +0000 Subject: binfmt_elf: add ELF_HWCAP2 to compat auxv entries Add ELF_HWCAP2 to the set of auxv entries that is passed to a 32-bit ELF program running in 32-bit compat mode under a 64-bit kernel. Signed-off-by: Ard Biesheuvel Acked-by: Andrew Morton Signed-off-by: Catalin Marinas diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c index a81147e..4d24d17 100644 --- a/fs/compat_binfmt_elf.c +++ b/fs/compat_binfmt_elf.c @@ -88,6 +88,11 @@ static void cputime_to_compat_timeval(const cputime_t cputime, #define ELF_HWCAP COMPAT_ELF_HWCAP #endif +#ifdef COMPAT_ELF_HWCAP2 +#undef ELF_HWCAP2 +#define ELF_HWCAP2 COMPAT_ELF_HWCAP2 +#endif + #ifdef COMPAT_ARCH_DLINFO #undef ARCH_DLINFO #define ARCH_DLINFO COMPAT_ARCH_DLINFO -- cgit v0.10.2 From 0473c9b5f05948df780bbc7b996dd7aefc4ec41d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 3 Mar 2014 10:44:03 +0100 Subject: compat: let architectures define __ARCH_WANT_COMPAT_SYS_GETDENTS64 For architecture dependent compat syscalls in common code an architecture must define something like __ARCH_WANT_ if it wants to use the code. This however is not true for compat_sys_getdents64 for which architectures must define __ARCH_OMIT_COMPAT_SYS_GETDENTS64 if they do not want the code. This leads to the situation where all architectures, except mips, get the compat code but only x86_64, arm64 and the generic syscall architectures actually use it. So invert the logic, so that architectures actively must do something to get the compat code. This way a couple of architectures get rid of otherwise dead code. Signed-off-by: Heiko Carstens diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 82ce217..a4654c6 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -14,6 +14,7 @@ * along with this program. If not, see . */ #ifdef CONFIG_COMPAT +#define __ARCH_WANT_COMPAT_SYS_GETDENTS64 #define __ARCH_WANT_COMPAT_STAT64 #define __ARCH_WANT_SYS_GETHOSTNAME #define __ARCH_WANT_SYS_PAUSE diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h index 4d3b928..413d6c6 100644 --- a/arch/mips/include/asm/unistd.h +++ b/arch/mips/include/asm/unistd.h @@ -24,7 +24,6 @@ #ifndef __ASSEMBLY__ -#define __ARCH_OMIT_COMPAT_SYS_GETDENTS64 #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_SYS_ALARM #define __ARCH_WANT_SYS_GETHOSTNAME diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index c2a4813..f4b5795 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -23,6 +23,7 @@ # include # include # define __ARCH_WANT_COMPAT_SYS_TIME +# define __ARCH_WANT_COMPAT_SYS_GETDENTS64 # endif diff --git a/fs/compat.c b/fs/compat.c index 6af20de..0095a69 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -981,7 +981,7 @@ asmlinkage long compat_sys_getdents(unsigned int fd, return error; } -#ifndef __ARCH_OMIT_COMPAT_SYS_GETDENTS64 +#ifdef __ARCH_WANT_COMPAT_SYS_GETDENTS64 struct compat_getdents_callback64 { struct dir_context ctx; @@ -1066,7 +1066,7 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, fdput(f); return error; } -#endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */ +#endif /* __ARCH_WANT_COMPAT_SYS_GETDENTS64 */ /* * Exactly like fs/open.c:sys_open(), except that it doesn't set the diff --git a/include/linux/compat.h b/include/linux/compat.h index 3f448c6..beded18 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -502,9 +502,11 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, asmlinkage long compat_sys_getdents(unsigned int fd, struct compat_linux_dirent __user *dirent, unsigned int count); +#ifdef __ARCH_WANT_COMPAT_SYS_GETDENTS64 asmlinkage long compat_sys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, unsigned int count); +#endif asmlinkage long compat_sys_vmsplice(int fd, const struct compat_iovec __user *, unsigned int nr_segs, unsigned int flags); asmlinkage long compat_sys_open(const char __user *filename, int flags, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index dde8041..6db6678 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -191,6 +191,7 @@ __SYSCALL(__NR_quotactl, sys_quotactl) /* fs/readdir.c */ #define __NR_getdents64 61 +#define __ARCH_WANT_COMPAT_SYS_GETDENTS64 __SC_COMP(__NR_getdents64, sys_getdents64, compat_sys_getdents64) /* fs/read_write.c */ -- cgit v0.10.2 From 217f4433fc2fe768a7f13f7e5586333bb8280e9e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 26 Feb 2014 10:56:20 +0100 Subject: compat: add COMPAT_SYSCALL_DEFINE0 macro For consistency reason add a COMPAT_SYSCALL_DEFINE0 macro. This macro should be used for compat system calls with zero parameters. Signed-off-by: Heiko Carstens diff --git a/include/linux/compat.h b/include/linux/compat.h index beded18..4c42df3 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -27,6 +27,9 @@ #define __SC_DELOUSE(t,v) ((t)(unsigned long)(v)) #endif +#define COMPAT_SYSCALL_DEFINE0(name) \ + asmlinkage long compat_sys_##name(void) + #define COMPAT_SYSCALL_DEFINE1(name, ...) \ COMPAT_SYSCALL_DEFINEx(1, _##name, __VA_ARGS__) #define COMPAT_SYSCALL_DEFINE2(name, ...) \ -- cgit v0.10.2 From c6c0f58f90cb8e159702ccc08772bc9221f1c35e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 25 Feb 2014 14:08:39 +0100 Subject: s390/compat: convert to COMPAT_SYSCALL_DEFINEx part 1 Convert s390 specific system calls to to the new COMPAT_SYSCALL_DEFINE macro. This allows us to get rid of the assembly compat wrappers. Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index db02052..9abf84e 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -86,27 +86,29 @@ #define SET_STAT_UID(stat, uid) (stat).st_uid = high2lowuid(uid) #define SET_STAT_GID(stat, gid) (stat).st_gid = high2lowgid(gid) -asmlinkage long sys32_chown16(const char __user * filename, u16 user, u16 group) +COMPAT_SYSCALL_DEFINE3(s390_chown16, const char __user *, filename, + u16, user, u16, group) { return sys_chown(filename, low2highuid(user), low2highgid(group)); } -asmlinkage long sys32_lchown16(const char __user * filename, u16 user, u16 group) +COMPAT_SYSCALL_DEFINE3(s390_lchown16, const char __user *, + filename, u16, user, u16, group) { return sys_lchown(filename, low2highuid(user), low2highgid(group)); } -asmlinkage long sys32_fchown16(unsigned int fd, u16 user, u16 group) +COMPAT_SYSCALL_DEFINE3(s390_fchown16, unsigned int, fd, u16, user, u16, group) { return sys_fchown(fd, low2highuid(user), low2highgid(group)); } -asmlinkage long sys32_setregid16(u16 rgid, u16 egid) +COMPAT_SYSCALL_DEFINE2(s390_setregid16, u16, rgid, u16, egid) { return sys_setregid(low2highgid(rgid), low2highgid(egid)); } -asmlinkage long sys32_setgid16(u16 gid) +COMPAT_SYSCALL_DEFINE1(s390_setgid16, u16, gid) { return sys_setgid((gid_t)gid); } diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index 1bfda3e..161b6c4 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -76,11 +76,11 @@ struct stat64_emu31; struct mmap_arg_struct_emu31; struct fadvise64_64_args; -long sys32_chown16(const char __user * filename, u16 user, u16 group); -long sys32_lchown16(const char __user * filename, u16 user, u16 group); -long sys32_fchown16(unsigned int fd, u16 user, u16 group); -long sys32_setregid16(u16 rgid, u16 egid); -long sys32_setgid16(u16 gid); +long compat_sys_s390_chown16(const char __user *filename, u16 user, u16 group); +long compat_sys_s390_lchown16(const char __user *filename, u16 user, u16 group); +long compat_sys_s390_fchown16(unsigned int fd, u16 user, u16 group); +long compat_sys_s390_setregid16(u16 rgid, u16 egid); +long compat_sys_s390_setgid16(u16 gid); long sys32_setreuid16(u16 ruid, u16 euid); long sys32_setuid16(u16 uid); long sys32_setresuid16(u16 ruid, u16 euid, u16 suid); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 0248949..cc85c722 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -61,12 +61,6 @@ ENTRY(sys32_chmod_wrapper) llgfr %r3,%r3 # mode_t jg sys_chmod # branch to system call -ENTRY(sys32_lchown16_wrapper) - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # __kernel_old_uid_emu31_t - llgfr %r4,%r4 # __kernel_old_uid_emu31_t - jg sys32_lchown16 # branch to system call - #sys32_getpid_wrapper # void ENTRY(sys32_mount_wrapper) @@ -149,10 +143,6 @@ ENTRY(sys32_brk_wrapper) llgtr %r2,%r2 # unsigned long jg sys_brk # branch to system call -ENTRY(sys32_setgid16_wrapper) - llgfr %r2,%r2 # __kernel_old_gid_emu31_t - jg sys32_setgid16 # branch to system call - #sys32_getgid16_wrapper # void ENTRY(sys32_signal_wrapper) @@ -219,11 +209,6 @@ ENTRY(sys32_setreuid16_wrapper) llgfr %r3,%r3 # __kernel_old_uid_emu31_t jg sys32_setreuid16 # branch to system call -ENTRY(sys32_setregid16_wrapper) - llgfr %r2,%r2 # __kernel_old_gid_emu31_t - llgfr %r3,%r3 # __kernel_old_gid_emu31_t - jg sys32_setregid16 # branch to system call - ENTRY(sys_sigsuspend_wrapper) lgfr %r2,%r2 # int lgfr %r3,%r3 # int @@ -325,12 +310,6 @@ ENTRY(sys32_fchmod_wrapper) llgfr %r3,%r3 # mode_t jg sys_fchmod # branch to system call -ENTRY(sys32_fchown16_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # compat_uid_t - llgfr %r4,%r4 # compat_uid_t - jg sys32_fchown16 # branch to system call - ENTRY(sys32_getpriority_wrapper) lgfr %r2,%r2 # int lgfr %r3,%r3 # int @@ -632,12 +611,6 @@ ENTRY(sys32_pwrite64_wrapper) llgfr %r6,%r6 # u32 jg sys32_pwrite64 # branch to system call -ENTRY(sys32_chown16_wrapper) - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # __kernel_old_uid_emu31_t - llgfr %r4,%r4 # __kernel_old_gid_emu31_t - jg sys32_chown16 # branch to system call - ENTRY(sys32_getcwd_wrapper) llgtr %r2,%r2 # char * llgfr %r3,%r3 # unsigned long diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 1439921..a298630 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -24,7 +24,7 @@ SYSCALL(sys_chdir,sys_chdir,sys32_chdir_wrapper) SYSCALL(sys_time,sys_ni_syscall,sys32_time_wrapper) /* old time syscall */ SYSCALL(sys_mknod,sys_mknod,sys32_mknod_wrapper) SYSCALL(sys_chmod,sys_chmod,sys32_chmod_wrapper) /* 15 */ -SYSCALL(sys_lchown16,sys_ni_syscall,sys32_lchown16_wrapper) /* old lchown16 syscall*/ +SYSCALL(sys_lchown16,sys_ni_syscall,compat_sys_s390_lchown16) /* old lchown16 syscall*/ NI_SYSCALL /* old break syscall holder */ NI_SYSCALL /* old stat syscall holder */ SYSCALL(sys_lseek,sys_lseek,compat_sys_lseek) @@ -54,7 +54,7 @@ SYSCALL(sys_pipe,sys_pipe,sys32_pipe_wrapper) SYSCALL(sys_times,sys_times,compat_sys_times_wrapper) NI_SYSCALL /* old prof syscall */ SYSCALL(sys_brk,sys_brk,sys32_brk_wrapper) /* 45 */ -SYSCALL(sys_setgid16,sys_ni_syscall,sys32_setgid16_wrapper) /* old setgid16 syscall*/ +SYSCALL(sys_setgid16,sys_ni_syscall,compat_sys_s390_setgid16) /* old setgid16 syscall*/ SYSCALL(sys_getgid16,sys_ni_syscall,sys32_getgid16) /* old getgid16 syscall*/ SYSCALL(sys_signal,sys_signal,sys32_signal_wrapper) SYSCALL(sys_geteuid16,sys_ni_syscall,sys32_geteuid16) /* old geteuid16 syscall */ @@ -79,7 +79,7 @@ SYSCALL(sys_sigaction,sys_sigaction,compat_sys_sigaction) NI_SYSCALL /* old sgetmask syscall*/ NI_SYSCALL /* old ssetmask syscall*/ SYSCALL(sys_setreuid16,sys_ni_syscall,sys32_setreuid16_wrapper) /* old setreuid16 syscall */ -SYSCALL(sys_setregid16,sys_ni_syscall,sys32_setregid16_wrapper) /* old setregid16 syscall */ +SYSCALL(sys_setregid16,sys_ni_syscall,compat_sys_s390_setregid16) /* old setregid16 syscall */ SYSCALL(sys_sigsuspend,sys_sigsuspend,sys_sigsuspend_wrapper) SYSCALL(sys_sigpending,sys_sigpending,compat_sys_sigpending_wrapper) SYSCALL(sys_sethostname,sys_sethostname,sys32_sethostname_wrapper) @@ -103,7 +103,7 @@ SYSCALL(sys_munmap,sys_munmap,sys32_munmap_wrapper) SYSCALL(sys_truncate,sys_truncate,compat_sys_truncate) SYSCALL(sys_ftruncate,sys_ftruncate,compat_sys_ftruncate) SYSCALL(sys_fchmod,sys_fchmod,sys32_fchmod_wrapper) -SYSCALL(sys_fchown16,sys_ni_syscall,sys32_fchown16_wrapper) /* 95 old fchown16 syscall*/ +SYSCALL(sys_fchown16,sys_ni_syscall,compat_sys_s390_fchown16) /* 95 old fchown16 syscall*/ SYSCALL(sys_getpriority,sys_getpriority,sys32_getpriority_wrapper) SYSCALL(sys_setpriority,sys_setpriority,sys32_setpriority_wrapper) NI_SYSCALL /* old profil syscall */ @@ -190,7 +190,7 @@ SYSCALL(sys_rt_sigqueueinfo,sys_rt_sigqueueinfo,compat_sys_rt_sigqueueinfo) SYSCALL(sys_rt_sigsuspend,sys_rt_sigsuspend,compat_sys_rt_sigsuspend) SYSCALL(sys_pread64,sys_pread64,sys32_pread64_wrapper) /* 180 */ SYSCALL(sys_pwrite64,sys_pwrite64,sys32_pwrite64_wrapper) -SYSCALL(sys_chown16,sys_ni_syscall,sys32_chown16_wrapper) /* old chown16 syscall */ +SYSCALL(sys_chown16,sys_ni_syscall,compat_sys_s390_chown16) /* old chown16 syscall */ SYSCALL(sys_getcwd,sys_getcwd,sys32_getcwd_wrapper) SYSCALL(sys_capget,sys_capget,sys32_capget_wrapper) SYSCALL(sys_capset,sys_capset,sys32_capset_wrapper) /* 185 */ -- cgit v0.10.2 From 208096eee2fa44de0464e1f3c1f0c1ca1afe67ad Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 25 Feb 2014 15:52:24 +0100 Subject: s390/compat: convert to COMPAT_SYSCALL_DEFINEx part 2 Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 9abf84e..b47a8f1 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -113,23 +113,24 @@ COMPAT_SYSCALL_DEFINE1(s390_setgid16, u16, gid) return sys_setgid((gid_t)gid); } -asmlinkage long sys32_setreuid16(u16 ruid, u16 euid) +COMPAT_SYSCALL_DEFINE2(s390_setreuid16, u16, ruid, u16, euid) { return sys_setreuid(low2highuid(ruid), low2highuid(euid)); } -asmlinkage long sys32_setuid16(u16 uid) +COMPAT_SYSCALL_DEFINE1(s390_setuid16, u16, uid) { return sys_setuid((uid_t)uid); } -asmlinkage long sys32_setresuid16(u16 ruid, u16 euid, u16 suid) +COMPAT_SYSCALL_DEFINE3(s390_setresuid16, u16, ruid, u16, euid, u16, suid) { return sys_setresuid(low2highuid(ruid), low2highuid(euid), - low2highuid(suid)); + low2highuid(suid)); } -asmlinkage long sys32_getresuid16(u16 __user *ruidp, u16 __user *euidp, u16 __user *suidp) +COMPAT_SYSCALL_DEFINE3(s390_getresuid16, u16 __user *, ruidp, + u16 __user *, euidp, u16 __user *, suidp) { const struct cred *cred = current_cred(); int retval; @@ -146,10 +147,10 @@ asmlinkage long sys32_getresuid16(u16 __user *ruidp, u16 __user *euidp, u16 __us return retval; } -asmlinkage long sys32_setresgid16(u16 rgid, u16 egid, u16 sgid) +COMPAT_SYSCALL_DEFINE3(s390_setresgid16, u16, rgid, u16, egid, u16, sgid) { return sys_setresgid(low2highgid(rgid), low2highgid(egid), - low2highgid(sgid)); + low2highgid(sgid)); } asmlinkage long sys32_getresgid16(u16 __user *rgidp, u16 __user *egidp, u16 __user *sgidp) diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index 161b6c4..2b8dfe6 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -81,11 +81,11 @@ long compat_sys_s390_lchown16(const char __user *filename, u16 user, u16 group); long compat_sys_s390_fchown16(unsigned int fd, u16 user, u16 group); long compat_sys_s390_setregid16(u16 rgid, u16 egid); long compat_sys_s390_setgid16(u16 gid); -long sys32_setreuid16(u16 ruid, u16 euid); -long sys32_setuid16(u16 uid); -long sys32_setresuid16(u16 ruid, u16 euid, u16 suid); -long sys32_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid); -long sys32_setresgid16(u16 rgid, u16 egid, u16 sgid); +long compat_sys_s390_setreuid16(u16 ruid, u16 euid); +long compat_sys_s390_setuid16(u16 uid); +long compat_sys_s390_setresuid16(u16 ruid, u16 euid, u16 suid); +long compat_sys_s390_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid); +long compat_sys_s390_setresgid16(u16 rgid, u16 egid, u16 sgid); long sys32_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid); long sys32_setfsuid16(u16 uid); long sys32_setfsgid16(u16 gid); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index cc85c722..a073e7f 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -75,10 +75,6 @@ ENTRY(sys32_oldumount_wrapper) llgtr %r2,%r2 # char * jg sys_oldumount # branch to system call -ENTRY(sys32_setuid16_wrapper) - llgfr %r2,%r2 # __kernel_old_uid_emu31_t - jg sys32_setuid16 # branch to system call - #sys32_getuid16_wrapper # void ENTRY(sys32_ptrace_wrapper) @@ -204,11 +200,6 @@ ENTRY(sys32_dup2_wrapper) #sys32_setsid_wrapper # void -ENTRY(sys32_setreuid16_wrapper) - llgfr %r2,%r2 # __kernel_old_uid_emu31_t - llgfr %r3,%r3 # __kernel_old_uid_emu31_t - jg sys32_setreuid16 # branch to system call - ENTRY(sys_sigsuspend_wrapper) lgfr %r2,%r2 # int lgfr %r3,%r3 # int @@ -555,30 +546,12 @@ ENTRY(sys32_mremap_wrapper) llgfr %r6,%r6 # unsigned long jg sys_mremap # branch to system call -ENTRY(sys32_setresuid16_wrapper) - llgfr %r2,%r2 # __kernel_old_uid_emu31_t - llgfr %r3,%r3 # __kernel_old_uid_emu31_t - llgfr %r4,%r4 # __kernel_old_uid_emu31_t - jg sys32_setresuid16 # branch to system call - -ENTRY(sys32_getresuid16_wrapper) - llgtr %r2,%r2 # __kernel_old_uid_emu31_t * - llgtr %r3,%r3 # __kernel_old_uid_emu31_t * - llgtr %r4,%r4 # __kernel_old_uid_emu31_t * - jg sys32_getresuid16 # branch to system call - ENTRY(sys32_poll_wrapper) llgtr %r2,%r2 # struct pollfd * llgfr %r3,%r3 # unsigned int lgfr %r4,%r4 # int jg sys_poll # branch to system call -ENTRY(sys32_setresgid16_wrapper) - llgfr %r2,%r2 # __kernel_old_gid_emu31_t - llgfr %r3,%r3 # __kernel_old_gid_emu31_t - llgfr %r4,%r4 # __kernel_old_gid_emu31_t - jg sys32_setresgid16 # branch to system call - ENTRY(sys32_getresgid16_wrapper) llgtr %r2,%r2 # __kernel_old_gid_emu31_t * llgtr %r3,%r3 # __kernel_old_gid_emu31_t * diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index a298630..8c7c4a6 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -31,7 +31,7 @@ SYSCALL(sys_lseek,sys_lseek,compat_sys_lseek) SYSCALL(sys_getpid,sys_getpid,sys_getpid) /* 20 */ SYSCALL(sys_mount,sys_mount,sys32_mount_wrapper) SYSCALL(sys_oldumount,sys_oldumount,sys32_oldumount_wrapper) -SYSCALL(sys_setuid16,sys_ni_syscall,sys32_setuid16_wrapper) /* old setuid16 syscall*/ +SYSCALL(sys_setuid16,sys_ni_syscall,compat_sys_s390_setuid16) /* old setuid16 syscall*/ SYSCALL(sys_getuid16,sys_ni_syscall,sys32_getuid16) /* old getuid16 syscall*/ SYSCALL(sys_stime,sys_ni_syscall,sys32_stime_wrapper) /* 25 old stime syscall */ SYSCALL(sys_ptrace,sys_ptrace,sys32_ptrace_wrapper) @@ -78,7 +78,7 @@ SYSCALL(sys_setsid,sys_setsid,sys_setsid) SYSCALL(sys_sigaction,sys_sigaction,compat_sys_sigaction) NI_SYSCALL /* old sgetmask syscall*/ NI_SYSCALL /* old ssetmask syscall*/ -SYSCALL(sys_setreuid16,sys_ni_syscall,sys32_setreuid16_wrapper) /* old setreuid16 syscall */ +SYSCALL(sys_setreuid16,sys_ni_syscall,compat_sys_s390_setreuid16) /* old setreuid16 syscall */ SYSCALL(sys_setregid16,sys_ni_syscall,compat_sys_s390_setregid16) /* old setregid16 syscall */ SYSCALL(sys_sigsuspend,sys_sigsuspend,sys_sigsuspend_wrapper) SYSCALL(sys_sigpending,sys_sigpending,compat_sys_sigpending_wrapper) @@ -172,13 +172,13 @@ SYSCALL(sys_sched_get_priority_min,sys_sched_get_priority_min,sys32_sched_get_pr SYSCALL(sys_sched_rr_get_interval,sys_sched_rr_get_interval,compat_sys_sched_rr_get_interval) SYSCALL(sys_nanosleep,sys_nanosleep,compat_sys_nanosleep_wrapper) SYSCALL(sys_mremap,sys_mremap,sys32_mremap_wrapper) -SYSCALL(sys_setresuid16,sys_ni_syscall,sys32_setresuid16_wrapper) /* old setresuid16 syscall */ -SYSCALL(sys_getresuid16,sys_ni_syscall,sys32_getresuid16_wrapper) /* 165 old getresuid16 syscall */ +SYSCALL(sys_setresuid16,sys_ni_syscall,compat_sys_s390_setresuid16) /* old setresuid16 syscall */ +SYSCALL(sys_getresuid16,sys_ni_syscall,compat_sys_s390_getresuid16) /* 165 old getresuid16 syscall */ NI_SYSCALL /* for vm86 */ NI_SYSCALL /* old sys_query_module */ SYSCALL(sys_poll,sys_poll,sys32_poll_wrapper) NI_SYSCALL /* old nfsservctl */ -SYSCALL(sys_setresgid16,sys_ni_syscall,sys32_setresgid16_wrapper) /* 170 old setresgid16 syscall */ +SYSCALL(sys_setresgid16,sys_ni_syscall,compat_sys_s390_setresgid16) /* 170 old setresgid16 syscall */ SYSCALL(sys_getresgid16,sys_ni_syscall,sys32_getresgid16_wrapper) /* old getresgid16 syscall */ SYSCALL(sys_prctl,sys_prctl,sys32_prctl_wrapper) SYSCALL(sys_rt_sigreturn,sys_rt_sigreturn,sys32_rt_sigreturn) -- cgit v0.10.2 From 4ca2ea58c8421784bdad43b05e9939549e4d34e9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 25 Feb 2014 16:17:41 +0100 Subject: s390/compat: convert to COMPAT_SYSCALL_DEFINEx part 3 Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index b47a8f1..65b5acb 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -153,7 +153,8 @@ COMPAT_SYSCALL_DEFINE3(s390_setresgid16, u16, rgid, u16, egid, u16, sgid) low2highgid(sgid)); } -asmlinkage long sys32_getresgid16(u16 __user *rgidp, u16 __user *egidp, u16 __user *sgidp) +COMPAT_SYSCALL_DEFINE3(s390_getresgid16, u16 __user *, rgidp, + u16 __user *, egidp, u16 __user *, sgidp) { const struct cred *cred = current_cred(); int retval; @@ -170,12 +171,12 @@ asmlinkage long sys32_getresgid16(u16 __user *rgidp, u16 __user *egidp, u16 __us return retval; } -asmlinkage long sys32_setfsuid16(u16 uid) +COMPAT_SYSCALL_DEFINE1(s390_setfsuid16, u16, uid) { return sys_setfsuid((uid_t)uid); } -asmlinkage long sys32_setfsgid16(u16 gid) +COMPAT_SYSCALL_DEFINE1(s390_setfsgid16, u16, gid) { return sys_setfsgid((gid_t)gid); } @@ -218,7 +219,7 @@ static int groups16_from_user(struct group_info *group_info, u16 __user *groupli return 0; } -asmlinkage long sys32_getgroups16(int gidsetsize, u16 __user *grouplist) +COMPAT_SYSCALL_DEFINE2(s390_getgroups16, int, gidsetsize, u16 __user *, grouplist) { const struct cred *cred = current_cred(); int i; @@ -243,7 +244,7 @@ out: return i; } -asmlinkage long sys32_setgroups16(int gidsetsize, u16 __user *grouplist) +COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplist) { struct group_info *group_info; int retval; diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index 2b8dfe6..76686723 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -86,11 +86,11 @@ long compat_sys_s390_setuid16(u16 uid); long compat_sys_s390_setresuid16(u16 ruid, u16 euid, u16 suid); long compat_sys_s390_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid); long compat_sys_s390_setresgid16(u16 rgid, u16 egid, u16 sgid); -long sys32_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid); -long sys32_setfsuid16(u16 uid); -long sys32_setfsgid16(u16 gid); -long sys32_getgroups16(int gidsetsize, u16 __user *grouplist); -long sys32_setgroups16(int gidsetsize, u16 __user *grouplist); +long compat_sys_s390_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid); +long compat_sys_s390_setfsuid16(u16 uid); +long compat_sys_s390_setfsgid16(u16 gid); +long compat_sys_s390_getgroups16(int gidsetsize, u16 __user *grouplist); +long compat_sys_s390_setgroups16(int gidsetsize, u16 __user *grouplist); long sys32_getuid16(void); long sys32_geteuid16(void); long sys32_getgid16(void); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index a073e7f..6c55523 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -244,16 +244,6 @@ ENTRY(compat_sys_settimeofday_wrapper) llgtr %r3,%r3 # struct timezone * jg compat_sys_settimeofday # branch to system call -ENTRY(sys32_getgroups16_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # __kernel_old_gid_emu31_t * - jg sys32_getgroups16 # branch to system call - -ENTRY(sys32_setgroups16_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # __kernel_old_gid_emu31_t * - jg sys32_setgroups16 # branch to system call - ENTRY(sys32_symlink_wrapper) llgtr %r2,%r2 # const char * llgtr %r3,%r3 # const char * @@ -426,14 +416,6 @@ ENTRY(sys32_personality_wrapper) llgfr %r2,%r2 # unsigned int jg sys_s390_personality # branch to system call -ENTRY(sys32_setfsuid16_wrapper) - llgfr %r2,%r2 # __kernel_old_uid_emu31_t - jg sys32_setfsuid16 # branch to system call - -ENTRY(sys32_setfsgid16_wrapper) - llgfr %r2,%r2 # __kernel_old_gid_emu31_t - jg sys32_setfsgid16 # branch to system call - ENTRY(sys32_llseek_wrapper) llgfr %r2,%r2 # unsigned int llgfr %r3,%r3 # unsigned long @@ -552,12 +534,6 @@ ENTRY(sys32_poll_wrapper) lgfr %r4,%r4 # int jg sys_poll # branch to system call -ENTRY(sys32_getresgid16_wrapper) - llgtr %r2,%r2 # __kernel_old_gid_emu31_t * - llgtr %r3,%r3 # __kernel_old_gid_emu31_t * - llgtr %r4,%r4 # __kernel_old_gid_emu31_t * - jg sys32_getresgid16 # branch to system call - ENTRY(sys32_prctl_wrapper) lgfr %r2,%r2 # int llgfr %r3,%r3 # unsigned long diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 8c7c4a6..bdba42a 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -88,8 +88,8 @@ SYSCALL(sys_old_getrlimit,sys_getrlimit,compat_sys_old_getrlimit_wrapper) SYSCALL(sys_getrusage,sys_getrusage,compat_sys_getrusage) SYSCALL(sys_gettimeofday,sys_gettimeofday,compat_sys_gettimeofday_wrapper) SYSCALL(sys_settimeofday,sys_settimeofday,compat_sys_settimeofday_wrapper) -SYSCALL(sys_getgroups16,sys_ni_syscall,sys32_getgroups16_wrapper) /* 80 old getgroups16 syscall */ -SYSCALL(sys_setgroups16,sys_ni_syscall,sys32_setgroups16_wrapper) /* old setgroups16 syscall */ +SYSCALL(sys_getgroups16,sys_ni_syscall,compat_sys_s390_getgroups16) /* 80 old getgroups16 syscall */ +SYSCALL(sys_setgroups16,sys_ni_syscall,compat_sys_s390_setgroups16) /* old setgroups16 syscall */ NI_SYSCALL /* old select syscall */ SYSCALL(sys_symlink,sys_symlink,sys32_symlink_wrapper) NI_SYSCALL /* old lstat syscall */ @@ -146,8 +146,8 @@ SYSCALL(sys_bdflush,sys_bdflush,sys32_bdflush_wrapper) SYSCALL(sys_sysfs,sys_sysfs,sys32_sysfs_wrapper) /* 135 */ SYSCALL(sys_personality,sys_s390_personality,sys32_personality_wrapper) NI_SYSCALL /* for afs_syscall */ -SYSCALL(sys_setfsuid16,sys_ni_syscall,sys32_setfsuid16_wrapper) /* old setfsuid16 syscall */ -SYSCALL(sys_setfsgid16,sys_ni_syscall,sys32_setfsgid16_wrapper) /* old setfsgid16 syscall */ +SYSCALL(sys_setfsuid16,sys_ni_syscall,compat_sys_s390_setfsuid16) /* old setfsuid16 syscall */ +SYSCALL(sys_setfsgid16,sys_ni_syscall,compat_sys_s390_setfsgid16) /* old setfsgid16 syscall */ SYSCALL(sys_llseek,sys_llseek,sys32_llseek_wrapper) /* 140 */ SYSCALL(sys_getdents,sys_getdents,sys32_getdents_wrapper) SYSCALL(sys_select,sys_select,compat_sys_select_wrapper) @@ -179,7 +179,7 @@ NI_SYSCALL /* old sys_query_module */ SYSCALL(sys_poll,sys_poll,sys32_poll_wrapper) NI_SYSCALL /* old nfsservctl */ SYSCALL(sys_setresgid16,sys_ni_syscall,compat_sys_s390_setresgid16) /* 170 old setresgid16 syscall */ -SYSCALL(sys_getresgid16,sys_ni_syscall,sys32_getresgid16_wrapper) /* old getresgid16 syscall */ +SYSCALL(sys_getresgid16,sys_ni_syscall,compat_sys_s390_getresgid16) /* old getresgid16 syscall */ SYSCALL(sys_prctl,sys_prctl,sys32_prctl_wrapper) SYSCALL(sys_rt_sigreturn,sys_rt_sigreturn,sys32_rt_sigreturn) SYSCALL(sys_rt_sigaction,sys_rt_sigaction,compat_sys_rt_sigaction) -- cgit v0.10.2 From e723e0cc17f8d8573b19a809277d81cecdd6e92b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 26 Feb 2014 10:58:09 +0100 Subject: s390/compat: convert to COMPAT_SYSCALL_DEFINEx part 4 Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 65b5acb..7957134 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -269,22 +269,22 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis return retval; } -asmlinkage long sys32_getuid16(void) +COMPAT_SYSCALL_DEFINE0(s390_getuid16) { return high2lowuid(from_kuid_munged(current_user_ns(), current_uid())); } -asmlinkage long sys32_geteuid16(void) +COMPAT_SYSCALL_DEFINE0(s390_geteuid16) { return high2lowuid(from_kuid_munged(current_user_ns(), current_euid())); } -asmlinkage long sys32_getgid16(void) +COMPAT_SYSCALL_DEFINE0(s390_getgid16) { return high2lowgid(from_kgid_munged(current_user_ns(), current_gid())); } -asmlinkage long sys32_getegid16(void) +COMPAT_SYSCALL_DEFINE0(s390_getegid16) { return high2lowgid(from_kgid_munged(current_user_ns(), current_egid())); } @@ -299,12 +299,9 @@ COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, compat_ulong_t, second, } #endif -asmlinkage long sys32_truncate64(const char __user * path, unsigned long high, unsigned long low) +COMPAT_SYSCALL_DEFINE3(s390_truncate64, const char __user *, path, u32, high, u32, low) { - if ((int)high < 0) - return -EINVAL; - else - return sys_truncate(path, (high << 32) | low); + return sys_truncate(path, (unsigned long)high << 32 | low); } asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low) diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index 76686723..3e03325 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -91,12 +91,11 @@ long compat_sys_s390_setfsuid16(u16 uid); long compat_sys_s390_setfsgid16(u16 gid); long compat_sys_s390_getgroups16(int gidsetsize, u16 __user *grouplist); long compat_sys_s390_setgroups16(int gidsetsize, u16 __user *grouplist); -long sys32_getuid16(void); -long sys32_geteuid16(void); -long sys32_getgid16(void); -long sys32_getegid16(void); -long sys32_truncate64(const char __user * path, unsigned long high, - unsigned long low); +long compat_sys_s390_getuid16(void); +long compat_sys_s390_geteuid16(void); +long compat_sys_s390_getgid16(void); +long compat_sys_s390_getegid16(void); +long compat_sys_s390_truncate64(const char __user *path, u32 high, u32 low); long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low); long sys32_init_module(void __user *umod, unsigned long len, const char __user *uargs); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 6c55523..9128f7b 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -75,8 +75,6 @@ ENTRY(sys32_oldumount_wrapper) llgtr %r2,%r2 # char * jg sys_oldumount # branch to system call -#sys32_getuid16_wrapper # void - ENTRY(sys32_ptrace_wrapper) lgfr %r2,%r2 # long lgfr %r3,%r3 # long @@ -139,17 +137,11 @@ ENTRY(sys32_brk_wrapper) llgtr %r2,%r2 # unsigned long jg sys_brk # branch to system call -#sys32_getgid16_wrapper # void - ENTRY(sys32_signal_wrapper) lgfr %r2,%r2 # int llgtr %r3,%r3 # __sighandler_t jg sys_signal -#sys32_geteuid16_wrapper # void - -#sys32_getegid16_wrapper # void - ENTRY(sys32_acct_wrapper) llgtr %r2,%r2 # char * jg sys_acct # branch to system call @@ -577,12 +569,6 @@ ENTRY(sys32_capset_wrapper) #sys32_vfork_wrapper # done in vfork_glue -ENTRY(sys32_truncate64_wrapper) - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - jg sys32_truncate64 # branch to system call - ENTRY(sys32_ftruncate64_wrapper) llgfr %r2,%r2 # unsigned int llgfr %r3,%r3 # unsigned long diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index bdba42a..fa17cef 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -32,7 +32,7 @@ SYSCALL(sys_getpid,sys_getpid,sys_getpid) /* 20 */ SYSCALL(sys_mount,sys_mount,sys32_mount_wrapper) SYSCALL(sys_oldumount,sys_oldumount,sys32_oldumount_wrapper) SYSCALL(sys_setuid16,sys_ni_syscall,compat_sys_s390_setuid16) /* old setuid16 syscall*/ -SYSCALL(sys_getuid16,sys_ni_syscall,sys32_getuid16) /* old getuid16 syscall*/ +SYSCALL(sys_getuid16,sys_ni_syscall,compat_sys_s390_getuid16) /* old getuid16 syscall*/ SYSCALL(sys_stime,sys_ni_syscall,sys32_stime_wrapper) /* 25 old stime syscall */ SYSCALL(sys_ptrace,sys_ptrace,sys32_ptrace_wrapper) SYSCALL(sys_alarm,sys_alarm,sys32_alarm_wrapper) @@ -55,10 +55,10 @@ SYSCALL(sys_times,sys_times,compat_sys_times_wrapper) NI_SYSCALL /* old prof syscall */ SYSCALL(sys_brk,sys_brk,sys32_brk_wrapper) /* 45 */ SYSCALL(sys_setgid16,sys_ni_syscall,compat_sys_s390_setgid16) /* old setgid16 syscall*/ -SYSCALL(sys_getgid16,sys_ni_syscall,sys32_getgid16) /* old getgid16 syscall*/ +SYSCALL(sys_getgid16,sys_ni_syscall,compat_sys_s390_getgid16) /* old getgid16 syscall*/ SYSCALL(sys_signal,sys_signal,sys32_signal_wrapper) -SYSCALL(sys_geteuid16,sys_ni_syscall,sys32_geteuid16) /* old geteuid16 syscall */ -SYSCALL(sys_getegid16,sys_ni_syscall,sys32_getegid16) /* 50 old getegid16 syscall */ +SYSCALL(sys_geteuid16,sys_ni_syscall,compat_sys_s390_geteuid16) /* old geteuid16 syscall */ +SYSCALL(sys_getegid16,sys_ni_syscall,compat_sys_s390_getegid16) /* 50 old getegid16 syscall */ SYSCALL(sys_acct,sys_acct,sys32_acct_wrapper) SYSCALL(sys_umount,sys_umount,sys32_umount_wrapper) NI_SYSCALL /* old lock syscall */ @@ -201,7 +201,7 @@ NI_SYSCALL /* streams2 */ SYSCALL(sys_vfork,sys_vfork,sys_vfork) /* 190 */ SYSCALL(sys_getrlimit,sys_getrlimit,compat_sys_getrlimit_wrapper) SYSCALL(sys_mmap2,sys_mmap2,sys32_mmap2_wrapper) -SYSCALL(sys_truncate64,sys_ni_syscall,sys32_truncate64_wrapper) +SYSCALL(sys_truncate64,sys_ni_syscall,compat_sys_s390_truncate64) SYSCALL(sys_ftruncate64,sys_ni_syscall,sys32_ftruncate64_wrapper) SYSCALL(sys_stat64,sys_ni_syscall,sys32_stat64_wrapper) /* 195 */ SYSCALL(sys_lstat64,sys_ni_syscall,sys32_lstat64_wrapper) -- cgit v0.10.2 From 52a0b536a3addcfe20325303b2dddc2538019476 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 26 Feb 2014 12:51:40 +0100 Subject: s390/compat: convert to COMPAT_SYSCALL_DEFINEx part 5 Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 7957134..08b20c2 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -304,33 +304,30 @@ COMPAT_SYSCALL_DEFINE3(s390_truncate64, const char __user *, path, u32, high, u3 return sys_truncate(path, (unsigned long)high << 32 | low); } -asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low) +COMPAT_SYSCALL_DEFINE3(s390_ftruncate64, unsigned int, fd, u32, high, u32, low) { - if ((int)high < 0) - return -EINVAL; - else - return sys_ftruncate(fd, (high << 32) | low); + return sys_ftruncate(fd, (unsigned long)high << 32 | low); } -asmlinkage long sys32_pread64(unsigned int fd, char __user *ubuf, - size_t count, u32 poshi, u32 poslo) +COMPAT_SYSCALL_DEFINE5(s390_pread64, unsigned int, fd, char __user *, ubuf, + compat_size_t, count, u32, high, u32, low) { if ((compat_ssize_t) count < 0) return -EINVAL; - return sys_pread64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); + return sys_pread64(fd, ubuf, count, (unsigned long)high << 32 | low); } -asmlinkage long sys32_pwrite64(unsigned int fd, const char __user *ubuf, - size_t count, u32 poshi, u32 poslo) +COMPAT_SYSCALL_DEFINE5(s390_pwrite64, unsigned int, fd, const char __user *, ubuf, + compat_size_t, count, u32, high, u32, low) { if ((compat_ssize_t) count < 0) return -EINVAL; - return sys_pwrite64(fd, ubuf, count, ((loff_t)AA(poshi) << 32) | AA(poslo)); + return sys_pwrite64(fd, ubuf, count, (unsigned long)high << 32 | low); } -asmlinkage compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count) +COMPAT_SYSCALL_DEFINE4(s390_readahead, int, fd, u32, high, u32, low, s32, count) { - return sys_readahead(fd, ((loff_t)AA(offhi) << 32) | AA(offlo), count); + return sys_readahead(fd, (unsigned long)high << 32 | low, count); } struct stat64_emu31 { @@ -382,7 +379,7 @@ static int cp_stat64(struct stat64_emu31 __user *ubuf, struct kstat *stat) return copy_to_user(ubuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; } -asmlinkage long sys32_stat64(const char __user * filename, struct stat64_emu31 __user * statbuf) +COMPAT_SYSCALL_DEFINE2(s390_stat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf) { struct kstat stat; int ret = vfs_stat(filename, &stat); diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index 3e03325..df5f715 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -96,16 +96,14 @@ long compat_sys_s390_geteuid16(void); long compat_sys_s390_getgid16(void); long compat_sys_s390_getegid16(void); long compat_sys_s390_truncate64(const char __user *path, u32 high, u32 low); -long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low); +long compat_sys_s390_ftruncate64(unsigned int fd, u32 high, u32 low); long sys32_init_module(void __user *umod, unsigned long len, const char __user *uargs); long sys32_delete_module(const char __user *name_user, unsigned int flags); -long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count, - u32 poshi, u32 poslo); -long sys32_pwrite64(unsigned int fd, const char __user *ubuf, - size_t count, u32 poshi, u32 poslo); -compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count); -long sys32_stat64(const char __user * filename, struct stat64_emu31 __user * statbuf); +long compat_sys_s390_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, u32 high, u32 low); +long compat_sys_s390_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, u32 high, u32 low); +long compat_sys_s390_readahead(int fd, u32 high, u32 low, s32 count); +long compat_sys_s390_stat64(const char __user *filename, struct stat64_emu31 __user *statbuf); long sys32_lstat64(const char __user * filename, struct stat64_emu31 __user * statbuf); long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * statbuf); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 9128f7b..399255d 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -536,22 +536,6 @@ ENTRY(sys32_prctl_wrapper) #sys32_rt_sigreturn_wrapper # done in rt_sigreturn_glue -ENTRY(sys32_pread64_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - llgfr %r5,%r5 # u32 - llgfr %r6,%r6 # u32 - jg sys32_pread64 # branch to system call - -ENTRY(sys32_pwrite64_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # size_t - llgfr %r5,%r5 # u32 - llgfr %r6,%r6 # u32 - jg sys32_pwrite64 # branch to system call - ENTRY(sys32_getcwd_wrapper) llgtr %r2,%r2 # char * llgfr %r3,%r3 # unsigned long @@ -569,12 +553,6 @@ ENTRY(sys32_capset_wrapper) #sys32_vfork_wrapper # done in vfork_glue -ENTRY(sys32_ftruncate64_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - jg sys32_ftruncate64 # branch to system call - ENTRY(sys32_lchown_wrapper) llgtr %r2,%r2 # const char * llgfr %r3,%r3 # uid_t @@ -687,11 +665,6 @@ ENTRY(compat_sys_fcntl64_wrapper) llgfr %r4,%r4 # unsigned long jg compat_sys_fcntl64 # branch to system call -ENTRY(sys32_stat64_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct stat64 * - jg sys32_stat64 # branch to system call - ENTRY(sys32_lstat64_wrapper) llgtr %r2,%r2 # char * llgtr %r3,%r3 # struct stat64 * @@ -1201,13 +1174,6 @@ ENTRY(sys_epoll_create1_wrapper) lgfr %r2,%r2 # int jg sys_epoll_create1 # branch to system call -ENTRY(sys32_readahead_wrapper) - lgfr %r2,%r2 # int - llgfr %r3,%r3 # u32 - llgfr %r4,%r4 # u32 - lgfr %r5,%r5 # s32 - jg sys32_readahead # branch to system call - ENTRY(sys_tkill_wrapper) lgfr %r2,%r2 # pid_t lgfr %r3,%r3 # int diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index fa17cef..8b07ce3 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -188,8 +188,8 @@ SYSCALL(sys_rt_sigpending,sys_rt_sigpending,compat_sys_rt_sigpending) SYSCALL(sys_rt_sigtimedwait,sys_rt_sigtimedwait,compat_sys_rt_sigtimedwait) SYSCALL(sys_rt_sigqueueinfo,sys_rt_sigqueueinfo,compat_sys_rt_sigqueueinfo) SYSCALL(sys_rt_sigsuspend,sys_rt_sigsuspend,compat_sys_rt_sigsuspend) -SYSCALL(sys_pread64,sys_pread64,sys32_pread64_wrapper) /* 180 */ -SYSCALL(sys_pwrite64,sys_pwrite64,sys32_pwrite64_wrapper) +SYSCALL(sys_pread64,sys_pread64,compat_sys_s390_pread64) /* 180 */ +SYSCALL(sys_pwrite64,sys_pwrite64,compat_sys_s390_pwrite64) SYSCALL(sys_chown16,sys_ni_syscall,compat_sys_s390_chown16) /* old chown16 syscall */ SYSCALL(sys_getcwd,sys_getcwd,sys32_getcwd_wrapper) SYSCALL(sys_capget,sys_capget,sys32_capget_wrapper) @@ -202,8 +202,8 @@ SYSCALL(sys_vfork,sys_vfork,sys_vfork) /* 190 */ SYSCALL(sys_getrlimit,sys_getrlimit,compat_sys_getrlimit_wrapper) SYSCALL(sys_mmap2,sys_mmap2,sys32_mmap2_wrapper) SYSCALL(sys_truncate64,sys_ni_syscall,compat_sys_s390_truncate64) -SYSCALL(sys_ftruncate64,sys_ni_syscall,sys32_ftruncate64_wrapper) -SYSCALL(sys_stat64,sys_ni_syscall,sys32_stat64_wrapper) /* 195 */ +SYSCALL(sys_ftruncate64,sys_ni_syscall,compat_sys_s390_ftruncate64) +SYSCALL(sys_stat64,sys_ni_syscall,compat_sys_s390_stat64) /* 195 */ SYSCALL(sys_lstat64,sys_ni_syscall,sys32_lstat64_wrapper) SYSCALL(sys_fstat64,sys_ni_syscall,sys32_fstat64_wrapper) SYSCALL(sys_lchown,sys_lchown,sys32_lchown_wrapper) @@ -230,7 +230,7 @@ SYSCALL(sys_mincore,sys_mincore,sys32_mincore_wrapper) SYSCALL(sys_madvise,sys_madvise,sys32_madvise_wrapper) SYSCALL(sys_getdents64,sys_getdents64,sys32_getdents64_wrapper) /* 220 */ SYSCALL(sys_fcntl64,sys_ni_syscall,compat_sys_fcntl64_wrapper) -SYSCALL(sys_readahead,sys_readahead,sys32_readahead_wrapper) +SYSCALL(sys_readahead,sys_readahead,compat_sys_s390_readahead) SYSCALL(sys_sendfile64,sys_ni_syscall,compat_sys_sendfile64) SYSCALL(sys_setxattr,sys_setxattr,sys32_setxattr_wrapper) SYSCALL(sys_lsetxattr,sys_lsetxattr,sys32_lsetxattr_wrapper) /* 225 */ -- cgit v0.10.2 From a0f8c6da8f3da63283e978ebece03e81d107b1d0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 26 Feb 2014 14:05:34 +0100 Subject: s390/compat: convert to COMPAT_SYSCALL_DEFINEx part 6 Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 08b20c2..861427f 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -388,7 +388,7 @@ COMPAT_SYSCALL_DEFINE2(s390_stat64, const char __user *, filename, struct stat64 return ret; } -asmlinkage long sys32_lstat64(const char __user * filename, struct stat64_emu31 __user * statbuf) +COMPAT_SYSCALL_DEFINE2(s390_lstat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf) { struct kstat stat; int ret = vfs_lstat(filename, &stat); @@ -397,7 +397,7 @@ asmlinkage long sys32_lstat64(const char __user * filename, struct stat64_emu31 return ret; } -asmlinkage long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * statbuf) +COMPAT_SYSCALL_DEFINE2(s390_fstat64, unsigned int, fd, struct stat64_emu31 __user *, statbuf) { struct kstat stat; int ret = vfs_fstat(fd, &stat); @@ -406,8 +406,8 @@ asmlinkage long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * sta return ret; } -asmlinkage long sys32_fstatat64(unsigned int dfd, const char __user *filename, - struct stat64_emu31 __user* statbuf, int flag) +COMPAT_SYSCALL_DEFINE4(s390_fstatat64, unsigned int, dfd, const char __user *, filename, + struct stat64_emu31 __user *, statbuf, int, flag) { struct kstat stat; int error; @@ -433,7 +433,7 @@ struct mmap_arg_struct_emu31 { compat_ulong_t offset; }; -asmlinkage unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg) +COMPAT_SYSCALL_DEFINE1(s390_old_mmap, struct mmap_arg_struct_emu31 __user *, arg) { struct mmap_arg_struct_emu31 a; @@ -445,7 +445,7 @@ asmlinkage unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg) a.offset >> PAGE_SHIFT); } -asmlinkage long sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg) +COMPAT_SYSCALL_DEFINE1(s390_mmap2, struct mmap_arg_struct_emu31 __user *, arg) { struct mmap_arg_struct_emu31 a; diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index df5f715..7e7afb9 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -104,13 +104,11 @@ long compat_sys_s390_pread64(unsigned int fd, char __user *ubuf, compat_size_t c long compat_sys_s390_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, u32 high, u32 low); long compat_sys_s390_readahead(int fd, u32 high, u32 low, s32 count); long compat_sys_s390_stat64(const char __user *filename, struct stat64_emu31 __user *statbuf); -long sys32_lstat64(const char __user * filename, - struct stat64_emu31 __user * statbuf); -long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * statbuf); -long sys32_fstatat64(unsigned int dfd, const char __user *filename, - struct stat64_emu31 __user* statbuf, int flag); -unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg); -long sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg); +long compat_sys_s390_lstat64(const char __user *filename, struct stat64_emu31 __user *statbuf); +long compat_sys_s390_fstat64(unsigned int fd, struct stat64_emu31 __user *statbuf); +long compat_sys_s390_fstatat64(unsigned int dfd, const char __user *filename, struct stat64_emu31 __user *statbuf, int flag); +long compat_sys_s390_old_mmap(struct mmap_arg_struct_emu31 __user *arg); +long compat_sys_s390_mmap2(struct mmap_arg_struct_emu31 __user *arg); long sys32_read(unsigned int fd, char __user * buf, size_t count); long sys32_write(unsigned int fd, const char __user * buf, size_t count); long sys32_fadvise64(int fd, loff_t offset, size_t len, int advise); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 399255d..c8ac063 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -222,10 +222,6 @@ ENTRY(compat_sys_getrlimit_wrapper) llgtr %r3,%r3 # struct rlimit_emu31 * jg compat_sys_getrlimit # branch to system call -ENTRY(sys32_mmap2_wrapper) - llgtr %r2,%r2 # struct mmap_arg_struct_emu31 * - jg sys32_mmap2 # branch to system call - ENTRY(compat_sys_gettimeofday_wrapper) llgtr %r2,%r2 # struct timeval_emu31 * llgtr %r3,%r3 # struct timezone * @@ -269,10 +265,6 @@ ENTRY(old32_readdir_wrapper) llgfr %r4,%r4 # unsigned int jg compat_sys_old_readdir # branch to system call -ENTRY(old32_mmap_wrapper) - llgtr %r2,%r2 # struct mmap_arg_struct_emu31 * - jg old32_mmap # branch to system call - ENTRY(sys32_munmap_wrapper) llgfr %r2,%r2 # unsigned long llgfr %r3,%r3 # size_t @@ -665,20 +657,10 @@ ENTRY(compat_sys_fcntl64_wrapper) llgfr %r4,%r4 # unsigned long jg compat_sys_fcntl64 # branch to system call -ENTRY(sys32_lstat64_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct stat64 * - jg sys32_lstat64 # branch to system call - ENTRY(sys32_stime_wrapper) llgtr %r2,%r2 # long * jg compat_sys_stime # branch to system call -ENTRY(sys32_fstat64_wrapper) - llgfr %r2,%r2 # unsigned long - llgtr %r3,%r3 # struct stat64 * - jg sys32_fstat64 # branch to system call - ENTRY(sys32_setxattr_wrapper) llgtr %r2,%r2 # char * llgtr %r3,%r3 # char * @@ -1013,13 +995,6 @@ ENTRY(compat_sys_futimesat_wrapper) llgtr %r4,%r4 # struct timeval * jg compat_sys_futimesat -ENTRY(sys32_fstatat64_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # struct stat64 * - lgfr %r5,%r5 # int - jg sys32_fstatat64 - ENTRY(sys_unlinkat_wrapper) lgfr %r2,%r2 # int llgtr %r3,%r3 # const char * diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 8b07ce3..9d4e6b5 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -98,7 +98,7 @@ SYSCALL(sys_uselib,sys_uselib,sys32_uselib_wrapper) SYSCALL(sys_swapon,sys_swapon,sys32_swapon_wrapper) SYSCALL(sys_reboot,sys_reboot,sys32_reboot_wrapper) SYSCALL(sys_ni_syscall,sys_ni_syscall,old32_readdir_wrapper) /* old readdir syscall */ -SYSCALL(sys_old_mmap,sys_old_mmap,old32_mmap_wrapper) /* 90 */ +SYSCALL(sys_old_mmap,sys_old_mmap,compat_sys_s390_old_mmap) /* 90 */ SYSCALL(sys_munmap,sys_munmap,sys32_munmap_wrapper) SYSCALL(sys_truncate,sys_truncate,compat_sys_truncate) SYSCALL(sys_ftruncate,sys_ftruncate,compat_sys_ftruncate) @@ -200,12 +200,12 @@ NI_SYSCALL /* streams1 */ NI_SYSCALL /* streams2 */ SYSCALL(sys_vfork,sys_vfork,sys_vfork) /* 190 */ SYSCALL(sys_getrlimit,sys_getrlimit,compat_sys_getrlimit_wrapper) -SYSCALL(sys_mmap2,sys_mmap2,sys32_mmap2_wrapper) +SYSCALL(sys_mmap2,sys_mmap2,compat_sys_s390_mmap2) SYSCALL(sys_truncate64,sys_ni_syscall,compat_sys_s390_truncate64) SYSCALL(sys_ftruncate64,sys_ni_syscall,compat_sys_s390_ftruncate64) SYSCALL(sys_stat64,sys_ni_syscall,compat_sys_s390_stat64) /* 195 */ -SYSCALL(sys_lstat64,sys_ni_syscall,sys32_lstat64_wrapper) -SYSCALL(sys_fstat64,sys_ni_syscall,sys32_fstat64_wrapper) +SYSCALL(sys_lstat64,sys_ni_syscall,compat_sys_s390_lstat64) +SYSCALL(sys_fstat64,sys_ni_syscall,compat_sys_s390_fstat64) SYSCALL(sys_lchown,sys_lchown,sys32_lchown_wrapper) SYSCALL(sys_getuid,sys_getuid,sys_getuid) SYSCALL(sys_getgid,sys_getgid,sys_getgid) /* 200 */ @@ -301,7 +301,7 @@ SYSCALL(sys_mkdirat,sys_mkdirat,sys_mkdirat_wrapper) SYSCALL(sys_mknodat,sys_mknodat,sys_mknodat_wrapper) /* 290 */ SYSCALL(sys_fchownat,sys_fchownat,sys_fchownat_wrapper) SYSCALL(sys_futimesat,sys_futimesat,compat_sys_futimesat_wrapper) -SYSCALL(sys_fstatat64,sys_newfstatat,sys32_fstatat64_wrapper) +SYSCALL(sys_fstatat64,sys_newfstatat,compat_sys_s390_fstatat64) SYSCALL(sys_unlinkat,sys_unlinkat,sys_unlinkat_wrapper) SYSCALL(sys_renameat,sys_renameat,sys_renameat_wrapper) /* 295 */ SYSCALL(sys_linkat,sys_linkat,sys_linkat_wrapper) -- cgit v0.10.2 From 5383d2c8b3ee61a762043818d7c07bbc0049b031 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 26 Feb 2014 14:40:43 +0100 Subject: s390/compat: convert to COMPAT_SYSCALL_DEFINEx part 7 Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 861427f..5909774 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -454,7 +454,7 @@ COMPAT_SYSCALL_DEFINE1(s390_mmap2, struct mmap_arg_struct_emu31 __user *, arg) return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); } -asmlinkage long sys32_read(unsigned int fd, char __user * buf, size_t count) +COMPAT_SYSCALL_DEFINE3(s390_read, unsigned int, fd, char __user *, buf, compat_size_t, count) { if ((compat_ssize_t) count < 0) return -EINVAL; @@ -462,7 +462,7 @@ asmlinkage long sys32_read(unsigned int fd, char __user * buf, size_t count) return sys_read(fd, buf, count); } -asmlinkage long sys32_write(unsigned int fd, const char __user * buf, size_t count) +COMPAT_SYSCALL_DEFINE3(s390_write, unsigned int, fd, const char __user *, buf, compat_size_t, count) { if ((compat_ssize_t) count < 0) return -EINVAL; @@ -476,14 +476,13 @@ asmlinkage long sys32_write(unsigned int fd, const char __user * buf, size_t cou * because the 31 bit values differ from the 64 bit values. */ -asmlinkage long -sys32_fadvise64(int fd, loff_t offset, size_t len, int advise) +COMPAT_SYSCALL_DEFINE5(s390_fadvise64, int, fd, u32, high, u32, low, compat_size_t, len, int, advise) { if (advise == 4) advise = POSIX_FADV_DONTNEED; else if (advise == 5) advise = POSIX_FADV_NOREUSE; - return sys_fadvise64(fd, offset, len, advise); + return sys_fadvise64(fd, (unsigned long)high << 32 | low, len, advise); } struct fadvise64_64_args { @@ -493,8 +492,7 @@ struct fadvise64_64_args { int advice; }; -asmlinkage long -sys32_fadvise64_64(struct fadvise64_64_args __user *args) +COMPAT_SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args) { struct fadvise64_64_args a; diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index 7e7afb9..7d2ce4b 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -97,9 +97,6 @@ long compat_sys_s390_getgid16(void); long compat_sys_s390_getegid16(void); long compat_sys_s390_truncate64(const char __user *path, u32 high, u32 low); long compat_sys_s390_ftruncate64(unsigned int fd, u32 high, u32 low); -long sys32_init_module(void __user *umod, unsigned long len, - const char __user *uargs); -long sys32_delete_module(const char __user *name_user, unsigned int flags); long compat_sys_s390_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, u32 high, u32 low); long compat_sys_s390_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, u32 high, u32 low); long compat_sys_s390_readahead(int fd, u32 high, u32 low, s32 count); @@ -109,8 +106,8 @@ long compat_sys_s390_fstat64(unsigned int fd, struct stat64_emu31 __user *statbu long compat_sys_s390_fstatat64(unsigned int dfd, const char __user *filename, struct stat64_emu31 __user *statbuf, int flag); long compat_sys_s390_old_mmap(struct mmap_arg_struct_emu31 __user *arg); long compat_sys_s390_mmap2(struct mmap_arg_struct_emu31 __user *arg); -long sys32_read(unsigned int fd, char __user * buf, size_t count); -long sys32_write(unsigned int fd, const char __user * buf, size_t count); -long sys32_fadvise64(int fd, loff_t offset, size_t len, int advise); -long sys32_fadvise64_64(struct fadvise64_64_args __user *args); +long compat_sys_s390_read(unsigned int fd, char __user * buf, compat_size_t count); +long compat_sys_s390_write(unsigned int fd, const char __user * buf, compat_size_t count); +long compat_sys_s390_fadvise64(int fd, u32 high, u32 low, compat_size_t len, int advise); +long compat_sys_s390_fadvise64_64(struct fadvise64_64_args __user *args); #endif /* _ASM_S390X_S390_H */ diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index c8ac063..16ce718 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -12,18 +12,6 @@ ENTRY(sys32_exit_wrapper) lgfr %r2,%r2 # int jg sys_exit # branch to sys_exit -ENTRY(sys32_read_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - jg sys32_read # branch to sys_read - -ENTRY(sys32_write_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # size_t - jg sys32_write # branch to system call - ENTRY(sys32_close_wrapper) llgfr %r2,%r2 # unsigned int jg sys_close # branch to system call @@ -777,18 +765,6 @@ ENTRY(sys_epoll_wait_wrapper) lgfr %r5,%r5 # int jg sys_epoll_wait # branch to system call -ENTRY(sys32_fadvise64_wrapper) - lgfr %r2,%r2 # int - sllg %r3,%r3,32 # get high word of 64bit loff_t - or %r3,%r4 # get low word of 64bit loff_t - llgfr %r4,%r5 # size_t (unsigned long) - lgfr %r5,%r6 # int - jg sys32_fadvise64 - -ENTRY(sys32_fadvise64_64_wrapper) - llgtr %r2,%r2 # struct fadvise64_64_args * - jg sys32_fadvise64_64 - ENTRY(sys32_clock_settime_wrapper) lgfr %r2,%r2 # clockid_t (int) llgtr %r3,%r3 # struct compat_timespec * diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 9d4e6b5..5a83f6a 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -11,8 +11,8 @@ NI_SYSCALL /* 0 */ SYSCALL(sys_exit,sys_exit,sys32_exit_wrapper) SYSCALL(sys_fork,sys_fork,sys_fork) -SYSCALL(sys_read,sys_read,sys32_read_wrapper) -SYSCALL(sys_write,sys_write,sys32_write_wrapper) +SYSCALL(sys_read,sys_read,compat_sys_s390_read) +SYSCALL(sys_write,sys_write,compat_sys_s390_write) SYSCALL(sys_open,sys_open,compat_sys_open) /* 5 */ SYSCALL(sys_close,sys_close,sys32_close_wrapper) SYSCALL(sys_restart_syscall,sys_restart_syscall,sys_restart_syscall) @@ -261,7 +261,7 @@ SYSCALL(sys_epoll_create,sys_epoll_create,sys_epoll_create_wrapper) SYSCALL(sys_epoll_ctl,sys_epoll_ctl,sys_epoll_ctl_wrapper) /* 250 */ SYSCALL(sys_epoll_wait,sys_epoll_wait,sys_epoll_wait_wrapper) SYSCALL(sys_set_tid_address,sys_set_tid_address,sys32_set_tid_address_wrapper) -SYSCALL(sys_s390_fadvise64,sys_fadvise64_64,sys32_fadvise64_wrapper) +SYSCALL(sys_s390_fadvise64,sys_fadvise64_64,compat_sys_s390_fadvise64) SYSCALL(sys_timer_create,sys_timer_create,sys32_timer_create_wrapper) SYSCALL(sys_timer_settime,sys_timer_settime,sys32_timer_settime_wrapper) /* 255 */ SYSCALL(sys_timer_gettime,sys_timer_gettime,sys32_timer_gettime_wrapper) @@ -272,7 +272,7 @@ SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper) /* 260 SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper) SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper) NI_SYSCALL /* reserved for vserver */ -SYSCALL(sys_s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper) +SYSCALL(sys_s390_fadvise64_64,sys_ni_syscall,compat_sys_s390_fadvise64_64) SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper) SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper) SYSCALL(sys_remap_file_pages,sys_remap_file_pages,sys32_remap_file_pages_wrapper) -- cgit v0.10.2 From b07edab23c7f93db0e7ab09bd7f5eddc421f6e8c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 27 Feb 2014 09:52:01 +0100 Subject: s390/compat: convert system call wrappers to C part 01 Introduce a new compat_wrap.c file which contains the s390 specific compat system call wrappers. The s390 specific system call wrappers only perform sign, zero and pointer conversion of system call arguments before actually calling the non-compat system call. Therefore introduce COMPAT_SYSCALL_WRAPx macros which generate C code that is nearly identical to the assembly code. This has the advantage that the compile will generate correct code, and we avoid the frequent copy-paste errors seen in the compat_wrapper.S file. Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 1b3ac09..725515f9 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -50,6 +50,7 @@ compat-obj-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o \ compat_wrapper.o compat_exec_domain.o \ $(compat-obj-y) +obj-$(CONFIG_COMPAT) += compat_wrap.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_KPROBES) += kprobes.o diff --git a/arch/s390/kernel/compat_wrap.c b/arch/s390/kernel/compat_wrap.c new file mode 100644 index 0000000..5357190 --- /dev/null +++ b/arch/s390/kernel/compat_wrap.c @@ -0,0 +1,35 @@ +#include +#include + +#define COMPAT_SYSCALL_WRAP1(name, ...) \ + COMPAT_SYSCALL_WRAPx(1, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP2(name, ...) \ + COMPAT_SYSCALL_WRAPx(2, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP3(name, ...) \ + COMPAT_SYSCALL_WRAPx(3, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP4(name, ...) \ + COMPAT_SYSCALL_WRAPx(4, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP5(name, ...) \ + COMPAT_SYSCALL_WRAPx(5, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP6(name, ...) \ + COMPAT_SYSCALL_WRAPx(6, _##name, __VA_ARGS__) + +#define COMPAT_SYSCALL_WRAPx(x, name, ...) \ + asmlinkage long compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ + asmlinkage long compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + { \ + return sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__)); \ + } + +COMPAT_SYSCALL_WRAP1(exit, int, error_code); +COMPAT_SYSCALL_WRAP1(close, unsigned int, fd); +COMPAT_SYSCALL_WRAP2(creat, const char __user *, pathname, umode_t, mode); +COMPAT_SYSCALL_WRAP2(link, const char __user *, oldname, const char __user *, newname); +COMPAT_SYSCALL_WRAP1(unlink, const char __user *, pathname); +COMPAT_SYSCALL_WRAP1(chdir, const char __user *, filename); +COMPAT_SYSCALL_WRAP3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev); +COMPAT_SYSCALL_WRAP2(chmod, const char __user *, filename, umode_t, mode); +COMPAT_SYSCALL_WRAP1(oldumount, char __user *, name); +COMPAT_SYSCALL_WRAP1(alarm, unsigned int, seconds); +COMPAT_SYSCALL_WRAP2(access, const char __user *, filename, int, mode); +COMPAT_SYSCALL_WRAP1(nice, int, increment); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 16ce718..09e65cd 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -8,47 +8,10 @@ #include -ENTRY(sys32_exit_wrapper) - lgfr %r2,%r2 # int - jg sys_exit # branch to sys_exit - -ENTRY(sys32_close_wrapper) - llgfr %r2,%r2 # unsigned int - jg sys_close # branch to system call - -ENTRY(sys32_creat_wrapper) - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - jg sys_creat # branch to system call - -ENTRY(sys32_link_wrapper) - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - jg sys_link # branch to system call - -ENTRY(sys32_unlink_wrapper) - llgtr %r2,%r2 # const char * - jg sys_unlink # branch to system call - -ENTRY(sys32_chdir_wrapper) - llgtr %r2,%r2 # const char * - jg sys_chdir # branch to system call - ENTRY(sys32_time_wrapper) llgtr %r2,%r2 # int * jg compat_sys_time # branch to system call -ENTRY(sys32_mknod_wrapper) - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - llgfr %r4,%r4 # dev - jg sys_mknod # branch to system call - -ENTRY(sys32_chmod_wrapper) - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # mode_t - jg sys_chmod # branch to system call - #sys32_getpid_wrapper # void ENTRY(sys32_mount_wrapper) @@ -59,10 +22,6 @@ ENTRY(sys32_mount_wrapper) llgtr %r6,%r6 # void * jg compat_sys_mount # branch to system call -ENTRY(sys32_oldumount_wrapper) - llgtr %r2,%r2 # char * - jg sys_oldumount # branch to system call - ENTRY(sys32_ptrace_wrapper) lgfr %r2,%r2 # long lgfr %r3,%r3 # long @@ -70,26 +29,11 @@ ENTRY(sys32_ptrace_wrapper) llgfr %r5,%r5 # long jg compat_sys_ptrace # branch to system call -ENTRY(sys32_alarm_wrapper) - llgfr %r2,%r2 # unsigned int - jg sys_alarm # branch to system call - ENTRY(compat_sys_utime_wrapper) llgtr %r2,%r2 # char * llgtr %r3,%r3 # struct compat_utimbuf * jg compat_sys_utime # branch to system call -ENTRY(sys32_access_wrapper) - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - jg sys_access # branch to system call - -ENTRY(sys32_nice_wrapper) - lgfr %r2,%r2 # int - jg sys_nice # branch to system call - -#sys32_sync_wrapper # void - ENTRY(sys32_kill_wrapper) lgfr %r2,%r2 # int lgfr %r3,%r3 # int diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 5a83f6a..ec1de1b 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -9,40 +9,40 @@ #define NI_SYSCALL SYSCALL(sys_ni_syscall,sys_ni_syscall,sys_ni_syscall) NI_SYSCALL /* 0 */ -SYSCALL(sys_exit,sys_exit,sys32_exit_wrapper) +SYSCALL(sys_exit,sys_exit,compat_sys_exit) SYSCALL(sys_fork,sys_fork,sys_fork) SYSCALL(sys_read,sys_read,compat_sys_s390_read) SYSCALL(sys_write,sys_write,compat_sys_s390_write) SYSCALL(sys_open,sys_open,compat_sys_open) /* 5 */ -SYSCALL(sys_close,sys_close,sys32_close_wrapper) +SYSCALL(sys_close,sys_close,compat_sys_close) SYSCALL(sys_restart_syscall,sys_restart_syscall,sys_restart_syscall) -SYSCALL(sys_creat,sys_creat,sys32_creat_wrapper) -SYSCALL(sys_link,sys_link,sys32_link_wrapper) -SYSCALL(sys_unlink,sys_unlink,sys32_unlink_wrapper) /* 10 */ +SYSCALL(sys_creat,sys_creat,compat_sys_creat) +SYSCALL(sys_link,sys_link,compat_sys_link) +SYSCALL(sys_unlink,sys_unlink,compat_sys_unlink) /* 10 */ SYSCALL(sys_execve,sys_execve,sys32_execve_wrapper) -SYSCALL(sys_chdir,sys_chdir,sys32_chdir_wrapper) +SYSCALL(sys_chdir,sys_chdir,compat_sys_chdir) SYSCALL(sys_time,sys_ni_syscall,sys32_time_wrapper) /* old time syscall */ -SYSCALL(sys_mknod,sys_mknod,sys32_mknod_wrapper) -SYSCALL(sys_chmod,sys_chmod,sys32_chmod_wrapper) /* 15 */ +SYSCALL(sys_mknod,sys_mknod,compat_sys_mknod) +SYSCALL(sys_chmod,sys_chmod,compat_sys_chmod) /* 15 */ SYSCALL(sys_lchown16,sys_ni_syscall,compat_sys_s390_lchown16) /* old lchown16 syscall*/ NI_SYSCALL /* old break syscall holder */ NI_SYSCALL /* old stat syscall holder */ SYSCALL(sys_lseek,sys_lseek,compat_sys_lseek) SYSCALL(sys_getpid,sys_getpid,sys_getpid) /* 20 */ SYSCALL(sys_mount,sys_mount,sys32_mount_wrapper) -SYSCALL(sys_oldumount,sys_oldumount,sys32_oldumount_wrapper) +SYSCALL(sys_oldumount,sys_oldumount,compat_sys_oldumount) SYSCALL(sys_setuid16,sys_ni_syscall,compat_sys_s390_setuid16) /* old setuid16 syscall*/ SYSCALL(sys_getuid16,sys_ni_syscall,compat_sys_s390_getuid16) /* old getuid16 syscall*/ SYSCALL(sys_stime,sys_ni_syscall,sys32_stime_wrapper) /* 25 old stime syscall */ SYSCALL(sys_ptrace,sys_ptrace,sys32_ptrace_wrapper) -SYSCALL(sys_alarm,sys_alarm,sys32_alarm_wrapper) +SYSCALL(sys_alarm,sys_alarm,compat_sys_alarm) NI_SYSCALL /* old fstat syscall */ SYSCALL(sys_pause,sys_pause,sys_pause) SYSCALL(sys_utime,sys_utime,compat_sys_utime_wrapper) /* 30 */ NI_SYSCALL /* old stty syscall */ NI_SYSCALL /* old gtty syscall */ -SYSCALL(sys_access,sys_access,sys32_access_wrapper) -SYSCALL(sys_nice,sys_nice,sys32_nice_wrapper) +SYSCALL(sys_access,sys_access,compat_sys_access) +SYSCALL(sys_nice,sys_nice,compat_sys_nice) NI_SYSCALL /* 35 old ftime syscall */ SYSCALL(sys_sync,sys_sync,sys_sync) SYSCALL(sys_kill,sys_kill,sys32_kill_wrapper) -- cgit v0.10.2 From 473a06572fcd6b2e321d4b82c19ecafe383e8be9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 27 Feb 2014 13:42:10 +0100 Subject: s390/compat: convert system call wrappers to C part 02 Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/compat_wrap.c b/arch/s390/kernel/compat_wrap.c index 5357190..93f13f2 100644 --- a/arch/s390/kernel/compat_wrap.c +++ b/arch/s390/kernel/compat_wrap.c @@ -33,3 +33,13 @@ COMPAT_SYSCALL_WRAP1(oldumount, char __user *, name); COMPAT_SYSCALL_WRAP1(alarm, unsigned int, seconds); COMPAT_SYSCALL_WRAP2(access, const char __user *, filename, int, mode); COMPAT_SYSCALL_WRAP1(nice, int, increment); +COMPAT_SYSCALL_WRAP2(kill, int, pid, int, sig); +COMPAT_SYSCALL_WRAP2(rename, const char __user *, oldname, const char __user *, newname); +COMPAT_SYSCALL_WRAP2(mkdir, const char __user *, pathname, umode_t, mode); +COMPAT_SYSCALL_WRAP1(rmdir, const char __user *, pathname); +COMPAT_SYSCALL_WRAP1(dup, unsigned int, fildes); +COMPAT_SYSCALL_WRAP1(pipe, int __user *, fildes); +COMPAT_SYSCALL_WRAP1(brk, compat_ulong_t, brk); +COMPAT_SYSCALL_WRAP2(signal, int, sig, __sighandler_t, handler); +COMPAT_SYSCALL_WRAP1(acct, const char __user *, name); +COMPAT_SYSCALL_WRAP2(umount, char __user *, name, int, flags); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 09e65cd..e1b9d4a 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -34,55 +34,10 @@ ENTRY(compat_sys_utime_wrapper) llgtr %r3,%r3 # struct compat_utimbuf * jg compat_sys_utime # branch to system call -ENTRY(sys32_kill_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - jg sys_kill # branch to system call - -ENTRY(sys32_rename_wrapper) - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - jg sys_rename # branch to system call - -ENTRY(sys32_mkdir_wrapper) - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - jg sys_mkdir # branch to system call - -ENTRY(sys32_rmdir_wrapper) - llgtr %r2,%r2 # const char * - jg sys_rmdir # branch to system call - -ENTRY(sys32_dup_wrapper) - llgfr %r2,%r2 # unsigned int - jg sys_dup # branch to system call - -ENTRY(sys32_pipe_wrapper) - llgtr %r2,%r2 # u32 * - jg sys_pipe # branch to system call - ENTRY(compat_sys_times_wrapper) llgtr %r2,%r2 # struct compat_tms * jg compat_sys_times # branch to system call -ENTRY(sys32_brk_wrapper) - llgtr %r2,%r2 # unsigned long - jg sys_brk # branch to system call - -ENTRY(sys32_signal_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # __sighandler_t - jg sys_signal - -ENTRY(sys32_acct_wrapper) - llgtr %r2,%r2 # char * - jg sys_acct # branch to system call - -ENTRY(sys32_umount_wrapper) - llgtr %r2,%r2 # char * - lgfr %r3,%r3 # int - jg sys_umount # branch to system call - ENTRY(compat_sys_ioctl_wrapper) llgfr %r2,%r2 # unsigned int llgfr %r3,%r3 # unsigned int diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index ec1de1b..da6bffd 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -45,22 +45,22 @@ SYSCALL(sys_access,sys_access,compat_sys_access) SYSCALL(sys_nice,sys_nice,compat_sys_nice) NI_SYSCALL /* 35 old ftime syscall */ SYSCALL(sys_sync,sys_sync,sys_sync) -SYSCALL(sys_kill,sys_kill,sys32_kill_wrapper) -SYSCALL(sys_rename,sys_rename,sys32_rename_wrapper) -SYSCALL(sys_mkdir,sys_mkdir,sys32_mkdir_wrapper) -SYSCALL(sys_rmdir,sys_rmdir,sys32_rmdir_wrapper) /* 40 */ -SYSCALL(sys_dup,sys_dup,sys32_dup_wrapper) -SYSCALL(sys_pipe,sys_pipe,sys32_pipe_wrapper) +SYSCALL(sys_kill,sys_kill,compat_sys_kill) +SYSCALL(sys_rename,sys_rename,compat_sys_rename) +SYSCALL(sys_mkdir,sys_mkdir,compat_sys_mkdir) +SYSCALL(sys_rmdir,sys_rmdir,compat_sys_rmdir) /* 40 */ +SYSCALL(sys_dup,sys_dup,compat_sys_dup) +SYSCALL(sys_pipe,sys_pipe,compat_sys_pipe) SYSCALL(sys_times,sys_times,compat_sys_times_wrapper) NI_SYSCALL /* old prof syscall */ -SYSCALL(sys_brk,sys_brk,sys32_brk_wrapper) /* 45 */ +SYSCALL(sys_brk,sys_brk,compat_sys_brk) /* 45 */ SYSCALL(sys_setgid16,sys_ni_syscall,compat_sys_s390_setgid16) /* old setgid16 syscall*/ SYSCALL(sys_getgid16,sys_ni_syscall,compat_sys_s390_getgid16) /* old getgid16 syscall*/ -SYSCALL(sys_signal,sys_signal,sys32_signal_wrapper) +SYSCALL(sys_signal,sys_signal,compat_sys_signal) SYSCALL(sys_geteuid16,sys_ni_syscall,compat_sys_s390_geteuid16) /* old geteuid16 syscall */ SYSCALL(sys_getegid16,sys_ni_syscall,compat_sys_s390_getegid16) /* 50 old getegid16 syscall */ -SYSCALL(sys_acct,sys_acct,sys32_acct_wrapper) -SYSCALL(sys_umount,sys_umount,sys32_umount_wrapper) +SYSCALL(sys_acct,sys_acct,compat_sys_acct) +SYSCALL(sys_umount,sys_umount,compat_sys_umount) NI_SYSCALL /* old lock syscall */ SYSCALL(sys_ioctl,sys_ioctl,compat_sys_ioctl_wrapper) SYSCALL(sys_fcntl,sys_fcntl,compat_sys_fcntl_wrapper) /* 55 */ -- cgit v0.10.2 From be06fbf816020a846991ef7f5489ac0027320ea3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 27 Feb 2014 14:04:43 +0100 Subject: s390/compat: convert system call wrappers to C part 03 Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/compat_wrap.c b/arch/s390/kernel/compat_wrap.c index 93f13f2..ac26ee7 100644 --- a/arch/s390/kernel/compat_wrap.c +++ b/arch/s390/kernel/compat_wrap.c @@ -43,3 +43,13 @@ COMPAT_SYSCALL_WRAP1(brk, compat_ulong_t, brk); COMPAT_SYSCALL_WRAP2(signal, int, sig, __sighandler_t, handler); COMPAT_SYSCALL_WRAP1(acct, const char __user *, name); COMPAT_SYSCALL_WRAP2(umount, char __user *, name, int, flags); +COMPAT_SYSCALL_WRAP2(setpgid, compat_pid_t, pid, compat_pid_t, pgid); +COMPAT_SYSCALL_WRAP1(umask, int, mask); +COMPAT_SYSCALL_WRAP1(chroot, const char __user *, filename); +COMPAT_SYSCALL_WRAP2(dup2, unsigned int, oldfd, unsigned int, newfd); +COMPAT_SYSCALL_WRAP3(sigsuspend, int, unused1, int, unused2, compat_old_sigset_t, mask); +COMPAT_SYSCALL_WRAP2(sethostname, char __user *, name, int, len); +COMPAT_SYSCALL_WRAP2(symlink, const char __user *, old, const char __user *, new); +COMPAT_SYSCALL_WRAP3(readlink, const char __user *, path, char __user *, buf, int, bufsiz); +COMPAT_SYSCALL_WRAP1(uselib, const char __user *, library); +COMPAT_SYSCALL_WRAP2(swapon, const char __user *, specialfile, int, swap_flags); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index e1b9d4a..3597511 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -50,50 +50,15 @@ ENTRY(compat_sys_fcntl_wrapper) llgfr %r4,%r4 # unsigned long jg compat_sys_fcntl # branch to system call -ENTRY(sys32_setpgid_wrapper) - lgfr %r2,%r2 # pid_t - lgfr %r3,%r3 # pid_t - jg sys_setpgid # branch to system call - -ENTRY(sys32_umask_wrapper) - lgfr %r2,%r2 # int - jg sys_umask # branch to system call - -ENTRY(sys32_chroot_wrapper) - llgtr %r2,%r2 # char * - jg sys_chroot # branch to system call - ENTRY(sys32_ustat_wrapper) llgfr %r2,%r2 # dev_t llgtr %r3,%r3 # struct ustat * jg compat_sys_ustat -ENTRY(sys32_dup2_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - jg sys_dup2 # branch to system call - -#sys32_getppid_wrapper # void - -#sys32_getpgrp_wrapper # void - -#sys32_setsid_wrapper # void - -ENTRY(sys_sigsuspend_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgfr %r4,%r4 # old_sigset_t - jg sys_sigsuspend - ENTRY(compat_sys_sigpending_wrapper) llgtr %r2,%r2 # compat_old_sigset_t * jg compat_sys_sigpending # branch to system call -ENTRY(sys32_sethostname_wrapper) - llgtr %r2,%r2 # char * - lgfr %r3,%r3 # int - jg sys_sethostname # branch to system call - ENTRY(compat_sys_setrlimit_wrapper) llgfr %r2,%r2 # unsigned int llgtr %r3,%r3 # struct rlimit_emu31 * @@ -119,26 +84,6 @@ ENTRY(compat_sys_settimeofday_wrapper) llgtr %r3,%r3 # struct timezone * jg compat_sys_settimeofday # branch to system call -ENTRY(sys32_symlink_wrapper) - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - jg sys_symlink # branch to system call - -ENTRY(sys32_readlink_wrapper) - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # char * - lgfr %r4,%r4 # int - jg sys_readlink # branch to system call - -ENTRY(sys32_uselib_wrapper) - llgtr %r2,%r2 # const char * - jg sys_uselib # branch to system call - -ENTRY(sys32_swapon_wrapper) - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - jg sys_swapon # branch to system call - ENTRY(sys32_reboot_wrapper) lgfr %r2,%r2 # int lgfr %r3,%r3 # int diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index da6bffd..5bc24d9 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -65,13 +65,13 @@ NI_SYSCALL /* old lock syscall */ SYSCALL(sys_ioctl,sys_ioctl,compat_sys_ioctl_wrapper) SYSCALL(sys_fcntl,sys_fcntl,compat_sys_fcntl_wrapper) /* 55 */ NI_SYSCALL /* intel mpx syscall */ -SYSCALL(sys_setpgid,sys_setpgid,sys32_setpgid_wrapper) +SYSCALL(sys_setpgid,sys_setpgid,compat_sys_setpgid) NI_SYSCALL /* old ulimit syscall */ NI_SYSCALL /* old uname syscall */ -SYSCALL(sys_umask,sys_umask,sys32_umask_wrapper) /* 60 */ -SYSCALL(sys_chroot,sys_chroot,sys32_chroot_wrapper) +SYSCALL(sys_umask,sys_umask,compat_sys_umask) /* 60 */ +SYSCALL(sys_chroot,sys_chroot,compat_sys_chroot) SYSCALL(sys_ustat,sys_ustat,sys32_ustat_wrapper) -SYSCALL(sys_dup2,sys_dup2,sys32_dup2_wrapper) +SYSCALL(sys_dup2,sys_dup2,compat_sys_dup2) SYSCALL(sys_getppid,sys_getppid,sys_getppid) SYSCALL(sys_getpgrp,sys_getpgrp,sys_getpgrp) /* 65 */ SYSCALL(sys_setsid,sys_setsid,sys_setsid) @@ -80,9 +80,9 @@ NI_SYSCALL /* old sgetmask syscall*/ NI_SYSCALL /* old ssetmask syscall*/ SYSCALL(sys_setreuid16,sys_ni_syscall,compat_sys_s390_setreuid16) /* old setreuid16 syscall */ SYSCALL(sys_setregid16,sys_ni_syscall,compat_sys_s390_setregid16) /* old setregid16 syscall */ -SYSCALL(sys_sigsuspend,sys_sigsuspend,sys_sigsuspend_wrapper) +SYSCALL(sys_sigsuspend,sys_sigsuspend,compat_sys_sigsuspend) SYSCALL(sys_sigpending,sys_sigpending,compat_sys_sigpending_wrapper) -SYSCALL(sys_sethostname,sys_sethostname,sys32_sethostname_wrapper) +SYSCALL(sys_sethostname,sys_sethostname,compat_sys_sethostname) SYSCALL(sys_setrlimit,sys_setrlimit,compat_sys_setrlimit_wrapper) /* 75 */ SYSCALL(sys_old_getrlimit,sys_getrlimit,compat_sys_old_getrlimit_wrapper) SYSCALL(sys_getrusage,sys_getrusage,compat_sys_getrusage) @@ -91,11 +91,11 @@ SYSCALL(sys_settimeofday,sys_settimeofday,compat_sys_settimeofday_wrapper) SYSCALL(sys_getgroups16,sys_ni_syscall,compat_sys_s390_getgroups16) /* 80 old getgroups16 syscall */ SYSCALL(sys_setgroups16,sys_ni_syscall,compat_sys_s390_setgroups16) /* old setgroups16 syscall */ NI_SYSCALL /* old select syscall */ -SYSCALL(sys_symlink,sys_symlink,sys32_symlink_wrapper) +SYSCALL(sys_symlink,sys_symlink,compat_sys_symlink) NI_SYSCALL /* old lstat syscall */ -SYSCALL(sys_readlink,sys_readlink,sys32_readlink_wrapper) /* 85 */ -SYSCALL(sys_uselib,sys_uselib,sys32_uselib_wrapper) -SYSCALL(sys_swapon,sys_swapon,sys32_swapon_wrapper) +SYSCALL(sys_readlink,sys_readlink,compat_sys_readlink) /* 85 */ +SYSCALL(sys_uselib,sys_uselib,compat_sys_uselib) +SYSCALL(sys_swapon,sys_swapon,compat_sys_swapon) SYSCALL(sys_reboot,sys_reboot,sys32_reboot_wrapper) SYSCALL(sys_ni_syscall,sys_ni_syscall,old32_readdir_wrapper) /* old readdir syscall */ SYSCALL(sys_old_mmap,sys_old_mmap,compat_sys_s390_old_mmap) /* 90 */ -- cgit v0.10.2 From c355ce182a2e3f88d6382a9720be61369c38c5c7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 27 Feb 2014 14:20:53 +0100 Subject: s390/compat: convert system call wrappers to C part 04 Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/compat_wrap.c b/arch/s390/kernel/compat_wrap.c index ac26ee7..f2b8540 100644 --- a/arch/s390/kernel/compat_wrap.c +++ b/arch/s390/kernel/compat_wrap.c @@ -53,3 +53,13 @@ COMPAT_SYSCALL_WRAP2(symlink, const char __user *, old, const char __user *, new COMPAT_SYSCALL_WRAP3(readlink, const char __user *, path, char __user *, buf, int, bufsiz); COMPAT_SYSCALL_WRAP1(uselib, const char __user *, library); COMPAT_SYSCALL_WRAP2(swapon, const char __user *, specialfile, int, swap_flags); +COMPAT_SYSCALL_WRAP4(reboot, int, magic1, int, magic2, unsigned int, cmd, void __user *, arg); +COMPAT_SYSCALL_WRAP2(munmap, compat_ulong_t, addr, compat_size_t, len); +COMPAT_SYSCALL_WRAP2(fchmod, unsigned int, fd, umode_t, mode); +COMPAT_SYSCALL_WRAP2(getpriority, int, which, int, who); +COMPAT_SYSCALL_WRAP3(setpriority, int, which, int, who, int, niceval); +COMPAT_SYSCALL_WRAP3(syslog, int, type, char __user *, buf, int, len); +COMPAT_SYSCALL_WRAP1(swapoff, const char __user *, specialfile); +COMPAT_SYSCALL_WRAP1(fsync, unsigned int, fd); +COMPAT_SYSCALL_WRAP2(setdomainname, char __user *, name, int, len); +COMPAT_SYSCALL_WRAP1(newuname, struct new_utsname __user *, name); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 3597511..3ea908f 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -84,40 +84,12 @@ ENTRY(compat_sys_settimeofday_wrapper) llgtr %r3,%r3 # struct timezone * jg compat_sys_settimeofday # branch to system call -ENTRY(sys32_reboot_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgfr %r4,%r4 # unsigned int - llgtr %r5,%r5 # void * - jg sys_reboot # branch to system call - ENTRY(old32_readdir_wrapper) llgfr %r2,%r2 # unsigned int llgtr %r3,%r3 # void * llgfr %r4,%r4 # unsigned int jg compat_sys_old_readdir # branch to system call -ENTRY(sys32_munmap_wrapper) - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - jg sys_munmap # branch to system call - -ENTRY(sys32_fchmod_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # mode_t - jg sys_fchmod # branch to system call - -ENTRY(sys32_getpriority_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - jg sys_getpriority # branch to system call - -ENTRY(sys32_setpriority_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - jg sys_setpriority # branch to system call - ENTRY(compat_sys_statfs_wrapper) llgtr %r2,%r2 # char * llgtr %r3,%r3 # struct compat_statfs * @@ -133,12 +105,6 @@ ENTRY(compat_sys_socketcall_wrapper) llgtr %r3,%r3 # u32 * jg compat_sys_socketcall # branch to system call -ENTRY(sys32_syslog_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - lgfr %r4,%r4 # int - jg sys_syslog # branch to system call - ENTRY(compat_sys_newstat_wrapper) llgtr %r2,%r2 # char * llgtr %r3,%r3 # struct stat_emu31 * @@ -154,33 +120,10 @@ ENTRY(compat_sys_newfstat_wrapper) llgtr %r3,%r3 # struct stat_emu31 * jg compat_sys_newfstat # branch to system call -#sys32_vhangup_wrapper # void - -ENTRY(sys32_swapoff_wrapper) - llgtr %r2,%r2 # const char * - jg sys_swapoff # branch to system call - ENTRY(compat_sys_sysinfo_wrapper) llgtr %r2,%r2 # struct sysinfo_emu31 * jg compat_sys_sysinfo # branch to system call -ENTRY(sys32_fsync_wrapper) - llgfr %r2,%r2 # unsigned int - jg sys_fsync # branch to system call - -#sys32_sigreturn_wrapper # done in sigreturn_glue - -#sys32_clone_wrapper # done in clone_glue - -ENTRY(sys32_setdomainname_wrapper) - llgtr %r2,%r2 # char * - lgfr %r3,%r3 # int - jg sys_setdomainname # branch to system call - -ENTRY(sys32_newuname_wrapper) - llgtr %r2,%r2 # struct new_utsname * - jg sys_newuname # branch to system call - ENTRY(compat_sys_adjtimex_wrapper) llgtr %r2,%r2 # struct compat_timex * jg compat_sys_adjtimex # branch to system call diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 5bc24d9..4cf5a52 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -96,22 +96,22 @@ NI_SYSCALL /* old lstat syscall */ SYSCALL(sys_readlink,sys_readlink,compat_sys_readlink) /* 85 */ SYSCALL(sys_uselib,sys_uselib,compat_sys_uselib) SYSCALL(sys_swapon,sys_swapon,compat_sys_swapon) -SYSCALL(sys_reboot,sys_reboot,sys32_reboot_wrapper) +SYSCALL(sys_reboot,sys_reboot,compat_sys_reboot) SYSCALL(sys_ni_syscall,sys_ni_syscall,old32_readdir_wrapper) /* old readdir syscall */ SYSCALL(sys_old_mmap,sys_old_mmap,compat_sys_s390_old_mmap) /* 90 */ -SYSCALL(sys_munmap,sys_munmap,sys32_munmap_wrapper) +SYSCALL(sys_munmap,sys_munmap,compat_sys_munmap) SYSCALL(sys_truncate,sys_truncate,compat_sys_truncate) SYSCALL(sys_ftruncate,sys_ftruncate,compat_sys_ftruncate) -SYSCALL(sys_fchmod,sys_fchmod,sys32_fchmod_wrapper) +SYSCALL(sys_fchmod,sys_fchmod,compat_sys_fchmod) SYSCALL(sys_fchown16,sys_ni_syscall,compat_sys_s390_fchown16) /* 95 old fchown16 syscall*/ -SYSCALL(sys_getpriority,sys_getpriority,sys32_getpriority_wrapper) -SYSCALL(sys_setpriority,sys_setpriority,sys32_setpriority_wrapper) +SYSCALL(sys_getpriority,sys_getpriority,compat_sys_getpriority) +SYSCALL(sys_setpriority,sys_setpriority,compat_sys_setpriority) NI_SYSCALL /* old profil syscall */ SYSCALL(sys_statfs,sys_statfs,compat_sys_statfs_wrapper) SYSCALL(sys_fstatfs,sys_fstatfs,compat_sys_fstatfs_wrapper) /* 100 */ NI_SYSCALL /* ioperm for i386 */ SYSCALL(sys_socketcall,sys_socketcall,compat_sys_socketcall_wrapper) -SYSCALL(sys_syslog,sys_syslog,sys32_syslog_wrapper) +SYSCALL(sys_syslog,sys_syslog,compat_sys_syslog) SYSCALL(sys_setitimer,sys_setitimer,compat_sys_setitimer) SYSCALL(sys_getitimer,sys_getitimer,compat_sys_getitimer) /* 105 */ SYSCALL(sys_newstat,sys_newstat,compat_sys_newstat_wrapper) @@ -123,14 +123,14 @@ SYSCALL(sys_vhangup,sys_vhangup,sys_vhangup) NI_SYSCALL /* old "idle" system call */ NI_SYSCALL /* vm86old for i386 */ SYSCALL(sys_wait4,sys_wait4,compat_sys_wait4) -SYSCALL(sys_swapoff,sys_swapoff,sys32_swapoff_wrapper) /* 115 */ +SYSCALL(sys_swapoff,sys_swapoff,compat_sys_swapoff) /* 115 */ SYSCALL(sys_sysinfo,sys_sysinfo,compat_sys_sysinfo_wrapper) SYSCALL(sys_s390_ipc,sys_s390_ipc,compat_sys_s390_ipc) -SYSCALL(sys_fsync,sys_fsync,sys32_fsync_wrapper) +SYSCALL(sys_fsync,sys_fsync,compat_sys_fsync) SYSCALL(sys_sigreturn,sys_sigreturn,sys32_sigreturn) SYSCALL(sys_clone,sys_clone,sys_clone_wrapper) /* 120 */ -SYSCALL(sys_setdomainname,sys_setdomainname,sys32_setdomainname_wrapper) -SYSCALL(sys_newuname,sys_newuname,sys32_newuname_wrapper) +SYSCALL(sys_setdomainname,sys_setdomainname,compat_sys_setdomainname) +SYSCALL(sys_newuname,sys_newuname,compat_sys_newuname) NI_SYSCALL /* modify_ldt for i386 */ SYSCALL(sys_adjtimex,sys_adjtimex,compat_sys_adjtimex_wrapper) SYSCALL(sys_mprotect,sys_mprotect,sys32_mprotect_wrapper) /* 125 */ -- cgit v0.10.2 From 86d295e1cc59b42d0d2f9fb5f7ff9b1894f90e0c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 27 Feb 2014 15:16:04 +0100 Subject: s390/compat: convert system call wrappers to C part 05 Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/compat_wrap.c b/arch/s390/kernel/compat_wrap.c index f2b8540..558a1d4 100644 --- a/arch/s390/kernel/compat_wrap.c +++ b/arch/s390/kernel/compat_wrap.c @@ -1,5 +1,6 @@ #include #include +#include "entry.h" #define COMPAT_SYSCALL_WRAP1(name, ...) \ COMPAT_SYSCALL_WRAPx(1, _##name, __VA_ARGS__) @@ -63,3 +64,13 @@ COMPAT_SYSCALL_WRAP1(swapoff, const char __user *, specialfile); COMPAT_SYSCALL_WRAP1(fsync, unsigned int, fd); COMPAT_SYSCALL_WRAP2(setdomainname, char __user *, name, int, len); COMPAT_SYSCALL_WRAP1(newuname, struct new_utsname __user *, name); +COMPAT_SYSCALL_WRAP3(mprotect, compat_ulong_t, start, compat_size_t, len, compat_ulong_t, prot); +COMPAT_SYSCALL_WRAP3(init_module, void __user *, umod, compat_ulong_t, len, const char __user *, uargs); +COMPAT_SYSCALL_WRAP2(delete_module, const char __user *, name_user, unsigned int, flags); +COMPAT_SYSCALL_WRAP4(quotactl, unsigned int, cmd, const char __user *, special, qid_t, id, void __user *, addr); +COMPAT_SYSCALL_WRAP1(getpgid, compat_pid_t, pid); +COMPAT_SYSCALL_WRAP1(fchdir, unsigned int, fd); +COMPAT_SYSCALL_WRAP2(bdflush, int, func, compat_long_t, data); +COMPAT_SYSCALL_WRAP3(sysfs, int, option, compat_ulong_t, arg1, compat_ulong_t, arg2); +COMPAT_SYSCALL_WRAP1(s390_personality, unsigned int, personality); +COMPAT_SYSCALL_WRAP5(llseek, unsigned int, fd, u32, high, u32, low, loff_t __user *, result, unsigned int, whence); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 3ea908f..78b7899 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -128,61 +128,6 @@ ENTRY(compat_sys_adjtimex_wrapper) llgtr %r2,%r2 # struct compat_timex * jg compat_sys_adjtimex # branch to system call -ENTRY(sys32_mprotect_wrapper) - llgtr %r2,%r2 # unsigned long (actually pointer - llgfr %r3,%r3 # size_t - llgfr %r4,%r4 # unsigned long - jg sys_mprotect # branch to system call - -ENTRY(sys_init_module_wrapper) - llgtr %r2,%r2 # void * - llgfr %r3,%r3 # unsigned long - llgtr %r4,%r4 # char * - jg sys_init_module # branch to system call - -ENTRY(sys_delete_module_wrapper) - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # unsigned int - jg sys_delete_module # branch to system call - -ENTRY(sys32_quotactl_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # qid_t - llgtr %r5,%r5 # caddr_t - jg sys_quotactl # branch to system call - -ENTRY(sys32_getpgid_wrapper) - lgfr %r2,%r2 # pid_t - jg sys_getpgid # branch to system call - -ENTRY(sys32_fchdir_wrapper) - llgfr %r2,%r2 # unsigned int - jg sys_fchdir # branch to system call - -ENTRY(sys32_bdflush_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # long - jg sys_bdflush # branch to system call - -ENTRY(sys32_sysfs_wrapper) - lgfr %r2,%r2 # int - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - jg sys_sysfs # branch to system call - -ENTRY(sys32_personality_wrapper) - llgfr %r2,%r2 # unsigned int - jg sys_s390_personality # branch to system call - -ENTRY(sys32_llseek_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - llgtr %r5,%r5 # loff_t * - llgfr %r6,%r6 # unsigned int - jg sys_llseek # branch to system call - ENTRY(sys32_getdents_wrapper) llgfr %r2,%r2 # unsigned int llgtr %r3,%r3 # void * diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index cb533f7..8c6c022 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -72,4 +72,6 @@ long sys_rt_sigreturn(void); long sys32_sigreturn(void); long sys32_rt_sigreturn(void); +long sys_s390_personality(unsigned int personality); + #endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 4cf5a52..88c85e4 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -133,22 +133,22 @@ SYSCALL(sys_setdomainname,sys_setdomainname,compat_sys_setdomainname) SYSCALL(sys_newuname,sys_newuname,compat_sys_newuname) NI_SYSCALL /* modify_ldt for i386 */ SYSCALL(sys_adjtimex,sys_adjtimex,compat_sys_adjtimex_wrapper) -SYSCALL(sys_mprotect,sys_mprotect,sys32_mprotect_wrapper) /* 125 */ +SYSCALL(sys_mprotect,sys_mprotect,compat_sys_mprotect) /* 125 */ SYSCALL(sys_sigprocmask,sys_sigprocmask,compat_sys_sigprocmask) NI_SYSCALL /* old "create module" */ -SYSCALL(sys_init_module,sys_init_module,sys_init_module_wrapper) -SYSCALL(sys_delete_module,sys_delete_module,sys_delete_module_wrapper) +SYSCALL(sys_init_module,sys_init_module,compat_sys_init_module) +SYSCALL(sys_delete_module,sys_delete_module,compat_sys_delete_module) NI_SYSCALL /* 130: old get_kernel_syms */ -SYSCALL(sys_quotactl,sys_quotactl,sys32_quotactl_wrapper) -SYSCALL(sys_getpgid,sys_getpgid,sys32_getpgid_wrapper) -SYSCALL(sys_fchdir,sys_fchdir,sys32_fchdir_wrapper) -SYSCALL(sys_bdflush,sys_bdflush,sys32_bdflush_wrapper) -SYSCALL(sys_sysfs,sys_sysfs,sys32_sysfs_wrapper) /* 135 */ -SYSCALL(sys_personality,sys_s390_personality,sys32_personality_wrapper) +SYSCALL(sys_quotactl,sys_quotactl,compat_sys_quotactl) +SYSCALL(sys_getpgid,sys_getpgid,compat_sys_getpgid) +SYSCALL(sys_fchdir,sys_fchdir,compat_sys_fchdir) +SYSCALL(sys_bdflush,sys_bdflush,compat_sys_bdflush) +SYSCALL(sys_sysfs,sys_sysfs,compat_sys_sysfs) /* 135 */ +SYSCALL(sys_personality,sys_s390_personality,compat_sys_s390_personality) NI_SYSCALL /* for afs_syscall */ SYSCALL(sys_setfsuid16,sys_ni_syscall,compat_sys_s390_setfsuid16) /* old setfsuid16 syscall */ SYSCALL(sys_setfsgid16,sys_ni_syscall,compat_sys_s390_setfsgid16) /* old setfsgid16 syscall */ -SYSCALL(sys_llseek,sys_llseek,sys32_llseek_wrapper) /* 140 */ +SYSCALL(sys_llseek,sys_llseek,compat_sys_llseek) /* 140 */ SYSCALL(sys_getdents,sys_getdents,sys32_getdents_wrapper) SYSCALL(sys_select,sys_select,compat_sys_select_wrapper) SYSCALL(sys_flock,sys_flock,sys32_flock_wrapper) -- cgit v0.10.2 From ce5cef7edecd21b993c6019dae01a7ff1b453379 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 27 Feb 2014 15:33:06 +0100 Subject: s390/compat: convert system call wrappers to C part 06 Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/compat_wrap.c b/arch/s390/kernel/compat_wrap.c index 558a1d4..60813d5 100644 --- a/arch/s390/kernel/compat_wrap.c +++ b/arch/s390/kernel/compat_wrap.c @@ -74,3 +74,13 @@ COMPAT_SYSCALL_WRAP2(bdflush, int, func, compat_long_t, data); COMPAT_SYSCALL_WRAP3(sysfs, int, option, compat_ulong_t, arg1, compat_ulong_t, arg2); COMPAT_SYSCALL_WRAP1(s390_personality, unsigned int, personality); COMPAT_SYSCALL_WRAP5(llseek, unsigned int, fd, u32, high, u32, low, loff_t __user *, result, unsigned int, whence); +COMPAT_SYSCALL_WRAP2(flock, unsigned int, fd, unsigned int, cmd); +COMPAT_SYSCALL_WRAP3(msync, compat_ulong_t, start, compat_size_t, len, int, flags); +COMPAT_SYSCALL_WRAP1(getsid, compat_pid_t, pid); +COMPAT_SYSCALL_WRAP1(fdatasync, unsigned int, fd); +COMPAT_SYSCALL_WRAP2(mlock, compat_ulong_t, start, compat_size_t, len); +COMPAT_SYSCALL_WRAP2(munlock, compat_ulong_t, start, compat_size_t, len); +COMPAT_SYSCALL_WRAP1(mlockall, int, flags); +COMPAT_SYSCALL_WRAP2(sched_setparam, compat_pid_t, pid, struct sched_param __user *, param); +COMPAT_SYSCALL_WRAP2(sched_getparam, compat_pid_t, pid, struct sched_param __user *, param); +COMPAT_SYSCALL_WRAP3(sched_setscheduler, compat_pid_t, pid, int, policy, struct sched_param __user *, param); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 78b7899..707e375 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -142,17 +142,6 @@ ENTRY(compat_sys_select_wrapper) llgtr %r6,%r6 # struct compat_timeval * jg compat_sys_select # branch to system call -ENTRY(sys32_flock_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - jg sys_flock # branch to system call - -ENTRY(sys32_msync_wrapper) - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - lgfr %r4,%r4 # int - jg sys_msync # branch to system call - ENTRY(compat_sys_readv_wrapper) lgfr %r2,%r2 # int llgtr %r3,%r3 # const struct compat_iovec * @@ -165,46 +154,6 @@ ENTRY(compat_sys_writev_wrapper) llgfr %r4,%r4 # unsigned long jg compat_sys_writev # branch to system call -ENTRY(sys32_getsid_wrapper) - lgfr %r2,%r2 # pid_t - jg sys_getsid # branch to system call - -ENTRY(sys32_fdatasync_wrapper) - llgfr %r2,%r2 # unsigned int - jg sys_fdatasync # branch to system call - -ENTRY(sys32_mlock_wrapper) - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - jg sys_mlock # branch to system call - -ENTRY(sys32_munlock_wrapper) - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - jg sys_munlock # branch to system call - -ENTRY(sys32_mlockall_wrapper) - lgfr %r2,%r2 # int - jg sys_mlockall # branch to system call - -#sys32_munlockall_wrapper # void - -ENTRY(sys32_sched_setparam_wrapper) - lgfr %r2,%r2 # pid_t - llgtr %r3,%r3 # struct sched_param * - jg sys_sched_setparam # branch to system call - -ENTRY(sys32_sched_getparam_wrapper) - lgfr %r2,%r2 # pid_t - llgtr %r3,%r3 # struct sched_param * - jg sys_sched_getparam # branch to system call - -ENTRY(sys32_sched_setscheduler_wrapper) - lgfr %r2,%r2 # pid_t - lgfr %r3,%r3 # int - llgtr %r4,%r4 # struct sched_param * - jg sys_sched_setscheduler # branch to system call - ENTRY(sys32_sched_getscheduler_wrapper) lgfr %r2,%r2 # pid_t jg sys_sched_getscheduler # branch to system call diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 88c85e4..c667bb1 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -151,20 +151,20 @@ SYSCALL(sys_setfsgid16,sys_ni_syscall,compat_sys_s390_setfsgid16) /* old setfsgi SYSCALL(sys_llseek,sys_llseek,compat_sys_llseek) /* 140 */ SYSCALL(sys_getdents,sys_getdents,sys32_getdents_wrapper) SYSCALL(sys_select,sys_select,compat_sys_select_wrapper) -SYSCALL(sys_flock,sys_flock,sys32_flock_wrapper) -SYSCALL(sys_msync,sys_msync,sys32_msync_wrapper) +SYSCALL(sys_flock,sys_flock,compat_sys_flock) +SYSCALL(sys_msync,sys_msync,compat_sys_msync) SYSCALL(sys_readv,sys_readv,compat_sys_readv_wrapper) /* 145 */ SYSCALL(sys_writev,sys_writev,compat_sys_writev_wrapper) -SYSCALL(sys_getsid,sys_getsid,sys32_getsid_wrapper) -SYSCALL(sys_fdatasync,sys_fdatasync,sys32_fdatasync_wrapper) +SYSCALL(sys_getsid,sys_getsid,compat_sys_getsid) +SYSCALL(sys_fdatasync,sys_fdatasync,compat_sys_fdatasync) SYSCALL(sys_sysctl,sys_sysctl,compat_sys_sysctl) -SYSCALL(sys_mlock,sys_mlock,sys32_mlock_wrapper) /* 150 */ -SYSCALL(sys_munlock,sys_munlock,sys32_munlock_wrapper) -SYSCALL(sys_mlockall,sys_mlockall,sys32_mlockall_wrapper) +SYSCALL(sys_mlock,sys_mlock,compat_sys_mlock) /* 150 */ +SYSCALL(sys_munlock,sys_munlock,compat_sys_munlock) +SYSCALL(sys_mlockall,sys_mlockall,compat_sys_mlockall) SYSCALL(sys_munlockall,sys_munlockall,sys_munlockall) -SYSCALL(sys_sched_setparam,sys_sched_setparam,sys32_sched_setparam_wrapper) -SYSCALL(sys_sched_getparam,sys_sched_getparam,sys32_sched_getparam_wrapper) /* 155 */ -SYSCALL(sys_sched_setscheduler,sys_sched_setscheduler,sys32_sched_setscheduler_wrapper) +SYSCALL(sys_sched_setparam,sys_sched_setparam,compat_sys_sched_setparam) +SYSCALL(sys_sched_getparam,sys_sched_getparam,compat_sys_sched_getparam) /* 155 */ +SYSCALL(sys_sched_setscheduler,sys_sched_setscheduler,compat_sys_sched_setscheduler) SYSCALL(sys_sched_getscheduler,sys_sched_getscheduler,sys32_sched_getscheduler_wrapper) SYSCALL(sys_sched_yield,sys_sched_yield,sys_sched_yield) SYSCALL(sys_sched_get_priority_max,sys_sched_get_priority_max,sys32_sched_get_priority_max_wrapper) -- cgit v0.10.2 From 0ebe3eec1e38bb1253e7542b0e567f0478f7bf4f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 27 Feb 2014 16:49:10 +0100 Subject: s390/compat: convert system call wrappers to C part 07 Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/compat_wrap.c b/arch/s390/kernel/compat_wrap.c index 60813d5..2e132f9 100644 --- a/arch/s390/kernel/compat_wrap.c +++ b/arch/s390/kernel/compat_wrap.c @@ -84,3 +84,13 @@ COMPAT_SYSCALL_WRAP1(mlockall, int, flags); COMPAT_SYSCALL_WRAP2(sched_setparam, compat_pid_t, pid, struct sched_param __user *, param); COMPAT_SYSCALL_WRAP2(sched_getparam, compat_pid_t, pid, struct sched_param __user *, param); COMPAT_SYSCALL_WRAP3(sched_setscheduler, compat_pid_t, pid, int, policy, struct sched_param __user *, param); +COMPAT_SYSCALL_WRAP1(sched_getscheduler, compat_pid_t, pid); +COMPAT_SYSCALL_WRAP1(sched_get_priority_max, int, policy); +COMPAT_SYSCALL_WRAP1(sched_get_priority_min, int, policy); +COMPAT_SYSCALL_WRAP5(mremap, u32, addr, u32, old_len, u32, new_len, u32, flags, u32, new_addr); +COMPAT_SYSCALL_WRAP3(poll, struct pollfd __user *, ufds, unsigned int, nfds, int, timeout); +COMPAT_SYSCALL_WRAP5(prctl, int, option, u32, arg2, u32, arg3, u32, arg4, u32, arg5); +COMPAT_SYSCALL_WRAP2(getcwd, char __user *, buf, u32, size); +COMPAT_SYSCALL_WRAP2(capget, cap_user_header_t, header, cap_user_data_t, dataptr); +COMPAT_SYSCALL_WRAP2(capset, cap_user_header_t, header, const cap_user_data_t, data); +COMPAT_SYSCALL_WRAP3(lchown, const char __user *, filename, compat_uid_t, user, compat_gid_t, group); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 707e375..ac12c0f1 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -154,77 +154,11 @@ ENTRY(compat_sys_writev_wrapper) llgfr %r4,%r4 # unsigned long jg compat_sys_writev # branch to system call -ENTRY(sys32_sched_getscheduler_wrapper) - lgfr %r2,%r2 # pid_t - jg sys_sched_getscheduler # branch to system call - -#sys32_sched_yield_wrapper # void - -ENTRY(sys32_sched_get_priority_max_wrapper) - lgfr %r2,%r2 # int - jg sys_sched_get_priority_max # branch to system call - -ENTRY(sys32_sched_get_priority_min_wrapper) - lgfr %r2,%r2 # int - jg sys_sched_get_priority_min # branch to system call - ENTRY(compat_sys_nanosleep_wrapper) llgtr %r2,%r2 # struct compat_timespec * llgtr %r3,%r3 # struct compat_timespec * jg compat_sys_nanosleep # branch to system call -ENTRY(sys32_mremap_wrapper) - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - llgfr %r5,%r5 # unsigned long - llgfr %r6,%r6 # unsigned long - jg sys_mremap # branch to system call - -ENTRY(sys32_poll_wrapper) - llgtr %r2,%r2 # struct pollfd * - llgfr %r3,%r3 # unsigned int - lgfr %r4,%r4 # int - jg sys_poll # branch to system call - -ENTRY(sys32_prctl_wrapper) - lgfr %r2,%r2 # int - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - llgfr %r5,%r5 # unsigned long - llgfr %r6,%r6 # unsigned long - jg sys_prctl # branch to system call - -#sys32_rt_sigreturn_wrapper # done in rt_sigreturn_glue - -ENTRY(sys32_getcwd_wrapper) - llgtr %r2,%r2 # char * - llgfr %r3,%r3 # unsigned long - jg sys_getcwd # branch to system call - -ENTRY(sys32_capget_wrapper) - llgtr %r2,%r2 # cap_user_header_t - llgtr %r3,%r3 # cap_user_data_t - jg sys_capget # branch to system call - -ENTRY(sys32_capset_wrapper) - llgtr %r2,%r2 # cap_user_header_t - llgtr %r3,%r3 # const cap_user_data_t - jg sys_capset # branch to system call - -#sys32_vfork_wrapper # done in vfork_glue - -ENTRY(sys32_lchown_wrapper) - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # uid_t - llgfr %r4,%r4 # gid_t - jg sys_lchown # branch to system call - -#sys32_getuid_wrapper # void -#sys32_getgid_wrapper # void -#sys32_geteuid_wrapper # void -#sys32_getegid_wrapper # void - ENTRY(sys32_setreuid_wrapper) llgfr %r2,%r2 # uid_t llgfr %r3,%r3 # uid_t diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index c667bb1..775856a 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -165,22 +165,22 @@ SYSCALL(sys_munlockall,sys_munlockall,sys_munlockall) SYSCALL(sys_sched_setparam,sys_sched_setparam,compat_sys_sched_setparam) SYSCALL(sys_sched_getparam,sys_sched_getparam,compat_sys_sched_getparam) /* 155 */ SYSCALL(sys_sched_setscheduler,sys_sched_setscheduler,compat_sys_sched_setscheduler) -SYSCALL(sys_sched_getscheduler,sys_sched_getscheduler,sys32_sched_getscheduler_wrapper) +SYSCALL(sys_sched_getscheduler,sys_sched_getscheduler,compat_sys_sched_getscheduler) SYSCALL(sys_sched_yield,sys_sched_yield,sys_sched_yield) -SYSCALL(sys_sched_get_priority_max,sys_sched_get_priority_max,sys32_sched_get_priority_max_wrapper) -SYSCALL(sys_sched_get_priority_min,sys_sched_get_priority_min,sys32_sched_get_priority_min_wrapper) /* 160 */ +SYSCALL(sys_sched_get_priority_max,sys_sched_get_priority_max,compat_sys_sched_get_priority_max) +SYSCALL(sys_sched_get_priority_min,sys_sched_get_priority_min,compat_sys_sched_get_priority_min) /* 160 */ SYSCALL(sys_sched_rr_get_interval,sys_sched_rr_get_interval,compat_sys_sched_rr_get_interval) SYSCALL(sys_nanosleep,sys_nanosleep,compat_sys_nanosleep_wrapper) -SYSCALL(sys_mremap,sys_mremap,sys32_mremap_wrapper) +SYSCALL(sys_mremap,sys_mremap,compat_sys_mremap) SYSCALL(sys_setresuid16,sys_ni_syscall,compat_sys_s390_setresuid16) /* old setresuid16 syscall */ SYSCALL(sys_getresuid16,sys_ni_syscall,compat_sys_s390_getresuid16) /* 165 old getresuid16 syscall */ NI_SYSCALL /* for vm86 */ NI_SYSCALL /* old sys_query_module */ -SYSCALL(sys_poll,sys_poll,sys32_poll_wrapper) +SYSCALL(sys_poll,sys_poll,compat_sys_poll) NI_SYSCALL /* old nfsservctl */ SYSCALL(sys_setresgid16,sys_ni_syscall,compat_sys_s390_setresgid16) /* 170 old setresgid16 syscall */ SYSCALL(sys_getresgid16,sys_ni_syscall,compat_sys_s390_getresgid16) /* old getresgid16 syscall */ -SYSCALL(sys_prctl,sys_prctl,sys32_prctl_wrapper) +SYSCALL(sys_prctl,sys_prctl,compat_sys_prctl) SYSCALL(sys_rt_sigreturn,sys_rt_sigreturn,sys32_rt_sigreturn) SYSCALL(sys_rt_sigaction,sys_rt_sigaction,compat_sys_rt_sigaction) SYSCALL(sys_rt_sigprocmask,sys_rt_sigprocmask,compat_sys_rt_sigprocmask) /* 175 */ @@ -191,9 +191,9 @@ SYSCALL(sys_rt_sigsuspend,sys_rt_sigsuspend,compat_sys_rt_sigsuspend) SYSCALL(sys_pread64,sys_pread64,compat_sys_s390_pread64) /* 180 */ SYSCALL(sys_pwrite64,sys_pwrite64,compat_sys_s390_pwrite64) SYSCALL(sys_chown16,sys_ni_syscall,compat_sys_s390_chown16) /* old chown16 syscall */ -SYSCALL(sys_getcwd,sys_getcwd,sys32_getcwd_wrapper) -SYSCALL(sys_capget,sys_capget,sys32_capget_wrapper) -SYSCALL(sys_capset,sys_capset,sys32_capset_wrapper) /* 185 */ +SYSCALL(sys_getcwd,sys_getcwd,compat_sys_getcwd) +SYSCALL(sys_capget,sys_capget,compat_sys_capget) +SYSCALL(sys_capset,sys_capset,compat_sys_capset) /* 185 */ SYSCALL(sys_sigaltstack,sys_sigaltstack,compat_sys_sigaltstack) SYSCALL(sys_sendfile,sys_sendfile64,compat_sys_sendfile) NI_SYSCALL /* streams1 */ @@ -206,7 +206,7 @@ SYSCALL(sys_ftruncate64,sys_ni_syscall,compat_sys_s390_ftruncate64) SYSCALL(sys_stat64,sys_ni_syscall,compat_sys_s390_stat64) /* 195 */ SYSCALL(sys_lstat64,sys_ni_syscall,compat_sys_s390_lstat64) SYSCALL(sys_fstat64,sys_ni_syscall,compat_sys_s390_fstat64) -SYSCALL(sys_lchown,sys_lchown,sys32_lchown_wrapper) +SYSCALL(sys_lchown,sys_lchown,compat_sys_lchown) SYSCALL(sys_getuid,sys_getuid,sys_getuid) SYSCALL(sys_getgid,sys_getgid,sys_getgid) /* 200 */ SYSCALL(sys_geteuid,sys_geteuid,sys_geteuid) -- cgit v0.10.2 From 47b3ae9b8cb0a6778a9920efaf3c47b19b4e6468 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 27 Feb 2014 17:35:23 +0100 Subject: s390/compat: convert system call wrappers to C part 08 Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/compat_wrap.c b/arch/s390/kernel/compat_wrap.c index 2e132f9..3016772 100644 --- a/arch/s390/kernel/compat_wrap.c +++ b/arch/s390/kernel/compat_wrap.c @@ -94,3 +94,13 @@ COMPAT_SYSCALL_WRAP2(getcwd, char __user *, buf, u32, size); COMPAT_SYSCALL_WRAP2(capget, cap_user_header_t, header, cap_user_data_t, dataptr); COMPAT_SYSCALL_WRAP2(capset, cap_user_header_t, header, const cap_user_data_t, data); COMPAT_SYSCALL_WRAP3(lchown, const char __user *, filename, compat_uid_t, user, compat_gid_t, group); +COMPAT_SYSCALL_WRAP2(setreuid, compat_uid_t, ruid, compat_uid_t, euid); +COMPAT_SYSCALL_WRAP2(setregid, compat_gid_t, rgid, compat_gid_t, egid); +COMPAT_SYSCALL_WRAP2(getgroups, int, gidsetsize, compat_gid_t __user *, grouplist); +COMPAT_SYSCALL_WRAP2(setgroups, int, gidsetsize, compat_gid_t __user *, grouplist); +COMPAT_SYSCALL_WRAP3(fchown, unsigned int, fd, compat_uid_t, user, compat_gid_t, group); +COMPAT_SYSCALL_WRAP3(setresuid, compat_uid_t, ruid, compat_uid_t, euid, compat_uid_t, suid); +COMPAT_SYSCALL_WRAP3(getresuid, compat_uid_t __user *, ruid, compat_uid_t __user *, euid, compat_uid_t __user *, suid); +COMPAT_SYSCALL_WRAP3(setresgid, compat_gid_t, rgid, compat_gid_t, egid, compat_gid_t, sgid); +COMPAT_SYSCALL_WRAP3(getresgid, compat_gid_t __user *, rgid, compat_gid_t __user *, egid, compat_gid_t __user *, sgid); +COMPAT_SYSCALL_WRAP3(chown, const char __user *, filename, compat_uid_t, user, compat_gid_t, group); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index ac12c0f1..e430f28 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -159,62 +159,6 @@ ENTRY(compat_sys_nanosleep_wrapper) llgtr %r3,%r3 # struct compat_timespec * jg compat_sys_nanosleep # branch to system call -ENTRY(sys32_setreuid_wrapper) - llgfr %r2,%r2 # uid_t - llgfr %r3,%r3 # uid_t - jg sys_setreuid # branch to system call - -ENTRY(sys32_setregid_wrapper) - llgfr %r2,%r2 # gid_t - llgfr %r3,%r3 # gid_t - jg sys_setregid # branch to system call - -ENTRY(sys32_getgroups_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # gid_t * - jg sys_getgroups # branch to system call - -ENTRY(sys32_setgroups_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # gid_t * - jg sys_setgroups # branch to system call - -ENTRY(sys32_fchown_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # uid_t - llgfr %r4,%r4 # gid_t - jg sys_fchown # branch to system call - -ENTRY(sys32_setresuid_wrapper) - llgfr %r2,%r2 # uid_t - llgfr %r3,%r3 # uid_t - llgfr %r4,%r4 # uid_t - jg sys_setresuid # branch to system call - -ENTRY(sys32_getresuid_wrapper) - llgtr %r2,%r2 # uid_t * - llgtr %r3,%r3 # uid_t * - llgtr %r4,%r4 # uid_t * - jg sys_getresuid # branch to system call - -ENTRY(sys32_setresgid_wrapper) - llgfr %r2,%r2 # gid_t - llgfr %r3,%r3 # gid_t - llgfr %r4,%r4 # gid_t - jg sys_setresgid # branch to system call - -ENTRY(sys32_getresgid_wrapper) - llgtr %r2,%r2 # gid_t * - llgtr %r3,%r3 # gid_t * - llgtr %r4,%r4 # gid_t * - jg sys_getresgid # branch to system call - -ENTRY(sys32_chown_wrapper) - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # uid_t - llgfr %r4,%r4 # gid_t - jg sys_chown # branch to system call - ENTRY(sys32_setuid_wrapper) llgfr %r2,%r2 # uid_t jg sys_setuid # branch to system call diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 775856a..609de5a 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -211,16 +211,16 @@ SYSCALL(sys_getuid,sys_getuid,sys_getuid) SYSCALL(sys_getgid,sys_getgid,sys_getgid) /* 200 */ SYSCALL(sys_geteuid,sys_geteuid,sys_geteuid) SYSCALL(sys_getegid,sys_getegid,sys_getegid) -SYSCALL(sys_setreuid,sys_setreuid,sys32_setreuid_wrapper) -SYSCALL(sys_setregid,sys_setregid,sys32_setregid_wrapper) -SYSCALL(sys_getgroups,sys_getgroups,sys32_getgroups_wrapper) /* 205 */ -SYSCALL(sys_setgroups,sys_setgroups,sys32_setgroups_wrapper) -SYSCALL(sys_fchown,sys_fchown,sys32_fchown_wrapper) -SYSCALL(sys_setresuid,sys_setresuid,sys32_setresuid_wrapper) -SYSCALL(sys_getresuid,sys_getresuid,sys32_getresuid_wrapper) -SYSCALL(sys_setresgid,sys_setresgid,sys32_setresgid_wrapper) /* 210 */ -SYSCALL(sys_getresgid,sys_getresgid,sys32_getresgid_wrapper) -SYSCALL(sys_chown,sys_chown,sys32_chown_wrapper) +SYSCALL(sys_setreuid,sys_setreuid,compat_sys_setreuid) +SYSCALL(sys_setregid,sys_setregid,compat_sys_setregid) +SYSCALL(sys_getgroups,sys_getgroups,compat_sys_getgroups) /* 205 */ +SYSCALL(sys_setgroups,sys_setgroups,compat_sys_setgroups) +SYSCALL(sys_fchown,sys_fchown,compat_sys_fchown) +SYSCALL(sys_setresuid,sys_setresuid,compat_sys_setresuid) +SYSCALL(sys_getresuid,sys_getresuid,compat_sys_getresuid) +SYSCALL(sys_setresgid,sys_setresgid,compat_sys_setresgid) /* 210 */ +SYSCALL(sys_getresgid,sys_getresgid,compat_sys_getresgid) +SYSCALL(sys_chown,sys_chown,compat_sys_chown) SYSCALL(sys_setuid,sys_setuid,sys32_setuid_wrapper) SYSCALL(sys_setgid,sys_setgid,sys32_setgid_wrapper) SYSCALL(sys_setfsuid,sys_setfsuid,sys32_setfsuid_wrapper) /* 215 */ -- cgit v0.10.2 From 24e4c2aaef149eafc1958205ace2b202de0dbf16 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 28 Feb 2014 10:31:37 +0100 Subject: s390/compat: convert system call wrappers to C part 09 Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/compat_wrap.c b/arch/s390/kernel/compat_wrap.c index 3016772..84d2e25 100644 --- a/arch/s390/kernel/compat_wrap.c +++ b/arch/s390/kernel/compat_wrap.c @@ -104,3 +104,13 @@ COMPAT_SYSCALL_WRAP3(getresuid, compat_uid_t __user *, ruid, compat_uid_t __user COMPAT_SYSCALL_WRAP3(setresgid, compat_gid_t, rgid, compat_gid_t, egid, compat_gid_t, sgid); COMPAT_SYSCALL_WRAP3(getresgid, compat_gid_t __user *, rgid, compat_gid_t __user *, egid, compat_gid_t __user *, sgid); COMPAT_SYSCALL_WRAP3(chown, const char __user *, filename, compat_uid_t, user, compat_gid_t, group); +COMPAT_SYSCALL_WRAP1(setuid, compat_uid_t, uid); +COMPAT_SYSCALL_WRAP1(setgid, compat_gid_t, gid); +COMPAT_SYSCALL_WRAP1(setfsuid, compat_uid_t, uid); +COMPAT_SYSCALL_WRAP1(setfsgid, compat_gid_t, gid); +COMPAT_SYSCALL_WRAP2(pivot_root, const char __user *, new_root, const char __user *, put_old); +COMPAT_SYSCALL_WRAP3(mincore, compat_ulong_t, start, compat_size_t, len, unsigned char __user *, vec); +COMPAT_SYSCALL_WRAP3(madvise, compat_ulong_t, start, compat_size_t, len, int, behavior); +COMPAT_SYSCALL_WRAP5(setxattr, const char __user *, path, const char __user *, name, const void __user *, value, compat_size_t, size, int, flags); +COMPAT_SYSCALL_WRAP5(lsetxattr, const char __user *, path, const char __user *, name, const void __user *, value, compat_size_t, size, int, flags); +COMPAT_SYSCALL_WRAP5(fsetxattr, int, fd, const char __user *, name, const void __user *, value, compat_size_t, size, int, flags); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index e430f28..1e1b71f 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -159,39 +159,6 @@ ENTRY(compat_sys_nanosleep_wrapper) llgtr %r3,%r3 # struct compat_timespec * jg compat_sys_nanosleep # branch to system call -ENTRY(sys32_setuid_wrapper) - llgfr %r2,%r2 # uid_t - jg sys_setuid # branch to system call - -ENTRY(sys32_setgid_wrapper) - llgfr %r2,%r2 # gid_t - jg sys_setgid # branch to system call - -ENTRY(sys32_setfsuid_wrapper) - llgfr %r2,%r2 # uid_t - jg sys_setfsuid # branch to system call - -ENTRY(sys32_setfsgid_wrapper) - llgfr %r2,%r2 # gid_t - jg sys_setfsgid # branch to system call - -ENTRY(sys32_pivot_root_wrapper) - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - jg sys_pivot_root # branch to system call - -ENTRY(sys32_mincore_wrapper) - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - llgtr %r4,%r4 # unsigned char * - jg sys_mincore # branch to system call - -ENTRY(sys32_madvise_wrapper) - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # size_t - lgfr %r4,%r4 # int - jg sys_madvise # branch to system call - ENTRY(sys32_getdents64_wrapper) llgfr %r2,%r2 # unsigned int llgtr %r3,%r3 # void * @@ -208,30 +175,6 @@ ENTRY(sys32_stime_wrapper) llgtr %r2,%r2 # long * jg compat_sys_stime # branch to system call -ENTRY(sys32_setxattr_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - lgfr %r6,%r6 # int - jg sys_setxattr - -ENTRY(sys32_lsetxattr_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - lgfr %r6,%r6 # int - jg sys_lsetxattr - -ENTRY(sys32_fsetxattr_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - lgfr %r6,%r6 # int - jg sys_fsetxattr - ENTRY(sys32_getxattr_wrapper) llgtr %r2,%r2 # char * llgtr %r3,%r3 # char * diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 609de5a..053b271 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -221,20 +221,20 @@ SYSCALL(sys_getresuid,sys_getresuid,compat_sys_getresuid) SYSCALL(sys_setresgid,sys_setresgid,compat_sys_setresgid) /* 210 */ SYSCALL(sys_getresgid,sys_getresgid,compat_sys_getresgid) SYSCALL(sys_chown,sys_chown,compat_sys_chown) -SYSCALL(sys_setuid,sys_setuid,sys32_setuid_wrapper) -SYSCALL(sys_setgid,sys_setgid,sys32_setgid_wrapper) -SYSCALL(sys_setfsuid,sys_setfsuid,sys32_setfsuid_wrapper) /* 215 */ -SYSCALL(sys_setfsgid,sys_setfsgid,sys32_setfsgid_wrapper) -SYSCALL(sys_pivot_root,sys_pivot_root,sys32_pivot_root_wrapper) -SYSCALL(sys_mincore,sys_mincore,sys32_mincore_wrapper) -SYSCALL(sys_madvise,sys_madvise,sys32_madvise_wrapper) +SYSCALL(sys_setuid,sys_setuid,compat_sys_setuid) +SYSCALL(sys_setgid,sys_setgid,compat_sys_setgid) +SYSCALL(sys_setfsuid,sys_setfsuid,compat_sys_setfsuid) /* 215 */ +SYSCALL(sys_setfsgid,sys_setfsgid,compat_sys_setfsgid) +SYSCALL(sys_pivot_root,sys_pivot_root,compat_sys_pivot_root) +SYSCALL(sys_mincore,sys_mincore,compat_sys_mincore) +SYSCALL(sys_madvise,sys_madvise,compat_sys_madvise) SYSCALL(sys_getdents64,sys_getdents64,sys32_getdents64_wrapper) /* 220 */ SYSCALL(sys_fcntl64,sys_ni_syscall,compat_sys_fcntl64_wrapper) SYSCALL(sys_readahead,sys_readahead,compat_sys_s390_readahead) SYSCALL(sys_sendfile64,sys_ni_syscall,compat_sys_sendfile64) -SYSCALL(sys_setxattr,sys_setxattr,sys32_setxattr_wrapper) -SYSCALL(sys_lsetxattr,sys_lsetxattr,sys32_lsetxattr_wrapper) /* 225 */ -SYSCALL(sys_fsetxattr,sys_fsetxattr,sys32_fsetxattr_wrapper) +SYSCALL(sys_setxattr,sys_setxattr,compat_sys_setxattr) +SYSCALL(sys_lsetxattr,sys_lsetxattr,compat_sys_lsetxattr) /* 225 */ +SYSCALL(sys_fsetxattr,sys_fsetxattr,compat_sys_fsetxattr) SYSCALL(sys_getxattr,sys_getxattr,sys32_getxattr_wrapper) SYSCALL(sys_lgetxattr,sys_lgetxattr,sys32_lgetxattr_wrapper) SYSCALL(sys_fgetxattr,sys_fgetxattr,sys32_fgetxattr_wrapper) -- cgit v0.10.2 From 18421166e8360bbb29b5eedd2cd3a9aedc0c90b3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 28 Feb 2014 12:59:44 +0100 Subject: s390/compat: convert system call wrappers to C part 10 Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/compat_wrap.c b/arch/s390/kernel/compat_wrap.c index 84d2e25..97197b8 100644 --- a/arch/s390/kernel/compat_wrap.c +++ b/arch/s390/kernel/compat_wrap.c @@ -114,3 +114,13 @@ COMPAT_SYSCALL_WRAP3(madvise, compat_ulong_t, start, compat_size_t, len, int, be COMPAT_SYSCALL_WRAP5(setxattr, const char __user *, path, const char __user *, name, const void __user *, value, compat_size_t, size, int, flags); COMPAT_SYSCALL_WRAP5(lsetxattr, const char __user *, path, const char __user *, name, const void __user *, value, compat_size_t, size, int, flags); COMPAT_SYSCALL_WRAP5(fsetxattr, int, fd, const char __user *, name, const void __user *, value, compat_size_t, size, int, flags); +COMPAT_SYSCALL_WRAP3(getdents64, unsigned int, fd, struct linux_dirent64 __user *, dirent, unsigned int, count); +COMPAT_SYSCALL_WRAP4(getxattr, const char __user *, path, const char __user *, name, void __user *, value, compat_size_t, size); +COMPAT_SYSCALL_WRAP4(lgetxattr, const char __user *, path, const char __user *, name, void __user *, value, compat_size_t, size); +COMPAT_SYSCALL_WRAP4(fgetxattr, int, fd, const char __user *, name, void __user *, value, compat_size_t, size); +COMPAT_SYSCALL_WRAP3(listxattr, const char __user *, path, char __user *, list, compat_size_t, size); +COMPAT_SYSCALL_WRAP3(llistxattr, const char __user *, path, char __user *, list, compat_size_t, size); +COMPAT_SYSCALL_WRAP3(flistxattr, int, fd, char __user *, list, compat_size_t, size); +COMPAT_SYSCALL_WRAP2(removexattr, const char __user *, path, const char __user *, name); +COMPAT_SYSCALL_WRAP2(lremovexattr, const char __user *, path, const char __user *, name); +COMPAT_SYSCALL_WRAP2(fremovexattr, int, fd, const char __user *, name); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 1e1b71f..f23a462 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -159,12 +159,6 @@ ENTRY(compat_sys_nanosleep_wrapper) llgtr %r3,%r3 # struct compat_timespec * jg compat_sys_nanosleep # branch to system call -ENTRY(sys32_getdents64_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # void * - llgfr %r4,%r4 # unsigned int - jg sys_getdents64 # branch to system call - ENTRY(compat_sys_fcntl64_wrapper) llgfr %r2,%r2 # unsigned int llgfr %r3,%r3 # unsigned int @@ -175,60 +169,6 @@ ENTRY(sys32_stime_wrapper) llgtr %r2,%r2 # long * jg compat_sys_stime # branch to system call -ENTRY(sys32_getxattr_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - jg sys_getxattr - -ENTRY(sys32_lgetxattr_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - jg sys_lgetxattr - -ENTRY(sys32_fgetxattr_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # void * - llgfr %r5,%r5 # size_t - jg sys_fgetxattr - -ENTRY(sys32_listxattr_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - jg sys_listxattr - -ENTRY(sys32_llistxattr_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - jg sys_llistxattr - -ENTRY(sys32_flistxattr_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - jg sys_flistxattr - -ENTRY(sys32_removexattr_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - jg sys_removexattr - -ENTRY(sys32_lremovexattr_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - jg sys_lremovexattr - -ENTRY(sys32_fremovexattr_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # char * - jg sys_fremovexattr - ENTRY(sys32_sched_setaffinity_wrapper) lgfr %r2,%r2 # int llgfr %r3,%r3 # unsigned int diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 053b271..cb6fc8a 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -228,22 +228,22 @@ SYSCALL(sys_setfsgid,sys_setfsgid,compat_sys_setfsgid) SYSCALL(sys_pivot_root,sys_pivot_root,compat_sys_pivot_root) SYSCALL(sys_mincore,sys_mincore,compat_sys_mincore) SYSCALL(sys_madvise,sys_madvise,compat_sys_madvise) -SYSCALL(sys_getdents64,sys_getdents64,sys32_getdents64_wrapper) /* 220 */ +SYSCALL(sys_getdents64,sys_getdents64,compat_sys_getdents64) /* 220 */ SYSCALL(sys_fcntl64,sys_ni_syscall,compat_sys_fcntl64_wrapper) SYSCALL(sys_readahead,sys_readahead,compat_sys_s390_readahead) SYSCALL(sys_sendfile64,sys_ni_syscall,compat_sys_sendfile64) SYSCALL(sys_setxattr,sys_setxattr,compat_sys_setxattr) SYSCALL(sys_lsetxattr,sys_lsetxattr,compat_sys_lsetxattr) /* 225 */ SYSCALL(sys_fsetxattr,sys_fsetxattr,compat_sys_fsetxattr) -SYSCALL(sys_getxattr,sys_getxattr,sys32_getxattr_wrapper) -SYSCALL(sys_lgetxattr,sys_lgetxattr,sys32_lgetxattr_wrapper) -SYSCALL(sys_fgetxattr,sys_fgetxattr,sys32_fgetxattr_wrapper) -SYSCALL(sys_listxattr,sys_listxattr,sys32_listxattr_wrapper) /* 230 */ -SYSCALL(sys_llistxattr,sys_llistxattr,sys32_llistxattr_wrapper) -SYSCALL(sys_flistxattr,sys_flistxattr,sys32_flistxattr_wrapper) -SYSCALL(sys_removexattr,sys_removexattr,sys32_removexattr_wrapper) -SYSCALL(sys_lremovexattr,sys_lremovexattr,sys32_lremovexattr_wrapper) -SYSCALL(sys_fremovexattr,sys_fremovexattr,sys32_fremovexattr_wrapper) /* 235 */ +SYSCALL(sys_getxattr,sys_getxattr,compat_sys_getxattr) +SYSCALL(sys_lgetxattr,sys_lgetxattr,compat_sys_lgetxattr) +SYSCALL(sys_fgetxattr,sys_fgetxattr,compat_sys_fgetxattr) +SYSCALL(sys_listxattr,sys_listxattr,compat_sys_listxattr) /* 230 */ +SYSCALL(sys_llistxattr,sys_llistxattr,compat_sys_llistxattr) +SYSCALL(sys_flistxattr,sys_flistxattr,compat_sys_flistxattr) +SYSCALL(sys_removexattr,sys_removexattr,compat_sys_removexattr) +SYSCALL(sys_lremovexattr,sys_lremovexattr,compat_sys_lremovexattr) +SYSCALL(sys_fremovexattr,sys_fremovexattr,compat_sys_fremovexattr) /* 235 */ SYSCALL(sys_gettid,sys_gettid,sys_gettid) SYSCALL(sys_tkill,sys_tkill,sys_tkill_wrapper) SYSCALL(sys_futex,sys_futex,compat_sys_futex) -- cgit v0.10.2 From 9c4d62fab4d29655a5ab198f3756693481a6b2da Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 28 Feb 2014 13:22:47 +0100 Subject: s390/compat: convert system call wrappers to C part 11 Signed-off-by: Heiko Carstens diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index 5d7e8cf..1174ea2 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -65,6 +65,7 @@ typedef u32 compat_caddr_t; typedef __kernel_fsid_t compat_fsid_t; typedef s32 compat_key_t; typedef s32 compat_timer_t; +typedef u32 compat_aio_context_t; typedef s32 compat_int_t; typedef s32 compat_long_t; diff --git a/arch/s390/kernel/compat_wrap.c b/arch/s390/kernel/compat_wrap.c index 97197b8..76d714e 100644 --- a/arch/s390/kernel/compat_wrap.c +++ b/arch/s390/kernel/compat_wrap.c @@ -124,3 +124,13 @@ COMPAT_SYSCALL_WRAP3(flistxattr, int, fd, char __user *, list, compat_size_t, si COMPAT_SYSCALL_WRAP2(removexattr, const char __user *, path, const char __user *, name); COMPAT_SYSCALL_WRAP2(lremovexattr, const char __user *, path, const char __user *, name); COMPAT_SYSCALL_WRAP2(fremovexattr, int, fd, const char __user *, name); +COMPAT_SYSCALL_WRAP1(exit_group, int, error_code); +COMPAT_SYSCALL_WRAP1(set_tid_address, int __user *, tidptr); +COMPAT_SYSCALL_WRAP1(epoll_create, int, size); +COMPAT_SYSCALL_WRAP4(epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event __user *, event); +COMPAT_SYSCALL_WRAP4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout); +COMPAT_SYSCALL_WRAP1(timer_getoverrun, timer_t, timer_id); +COMPAT_SYSCALL_WRAP1(timer_delete, compat_timer_t, compat_timer_id); +COMPAT_SYSCALL_WRAP1(io_destroy, compat_aio_context_t, ctx); +COMPAT_SYSCALL_WRAP3(io_cancel, compat_aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result); +COMPAT_SYSCALL_WRAP1(mq_unlink, const char __user *, name); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index f23a462..9ada8fb 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -181,32 +181,6 @@ ENTRY(sys32_sched_getaffinity_wrapper) llgtr %r4,%r4 # unsigned long * jg compat_sys_sched_getaffinity -ENTRY(sys32_exit_group_wrapper) - lgfr %r2,%r2 # int - jg sys_exit_group # branch to system call - -ENTRY(sys32_set_tid_address_wrapper) - llgtr %r2,%r2 # int * - jg sys_set_tid_address # branch to system call - -ENTRY(sys_epoll_create_wrapper) - lgfr %r2,%r2 # int - jg sys_epoll_create # branch to system call - -ENTRY(sys_epoll_ctl_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - llgtr %r5,%r5 # struct epoll_event * - jg sys_epoll_ctl # branch to system call - -ENTRY(sys_epoll_wait_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct epoll_event * - lgfr %r4,%r4 # int - lgfr %r5,%r5 # int - jg sys_epoll_wait # branch to system call - ENTRY(sys32_clock_settime_wrapper) lgfr %r2,%r2 # clockid_t (int) llgtr %r3,%r3 # struct compat_timespec * @@ -247,23 +221,11 @@ ENTRY(sys32_timer_gettime_wrapper) llgtr %r3,%r3 # struct compat_itimerspec * jg compat_sys_timer_gettime -ENTRY(sys32_timer_getoverrun_wrapper) - lgfr %r2,%r2 # timer_t (int) - jg sys_timer_getoverrun - -ENTRY(sys32_timer_delete_wrapper) - lgfr %r2,%r2 # timer_t (int) - jg sys_timer_delete - ENTRY(sys32_io_setup_wrapper) llgfr %r2,%r2 # unsigned int llgtr %r3,%r3 # u32 * jg compat_sys_io_setup -ENTRY(sys32_io_destroy_wrapper) - llgfr %r2,%r2 # (aio_context_t) u32 - jg sys_io_destroy - ENTRY(sys32_io_getevents_wrapper) llgfr %r2,%r2 # (aio_context_t) u32 lgfr %r3,%r3 # long @@ -278,12 +240,6 @@ ENTRY(sys32_io_submit_wrapper) llgtr %r4,%r4 # struct iocb ** jg compat_sys_io_submit -ENTRY(sys32_io_cancel_wrapper) - llgfr %r2,%r2 # (aio_context_t) u32 - llgtr %r3,%r3 # struct iocb * - llgtr %r4,%r4 # struct io_event * - jg sys_io_cancel - ENTRY(compat_sys_statfs64_wrapper) llgtr %r2,%r2 # const char * llgfr %r3,%r3 # compat_size_t @@ -303,10 +259,6 @@ ENTRY(compat_sys_mq_open_wrapper) llgtr %r5,%r5 # struct compat_mq_attr * jg compat_sys_mq_open -ENTRY(sys32_mq_unlink_wrapper) - llgtr %r2,%r2 # const char * - jg sys_mq_unlink - ENTRY(compat_sys_mq_timedsend_wrapper) lgfr %r2,%r2 # mqd_t llgtr %r3,%r3 # const char * diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index cb6fc8a..927da9c 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -252,21 +252,21 @@ SYSCALL(sys_sched_getaffinity,sys_sched_getaffinity,sys32_sched_getaffinity_wrap SYSCALL(sys_tgkill,sys_tgkill,sys_tgkill_wrapper) NI_SYSCALL /* reserved for TUX */ SYSCALL(sys_io_setup,sys_io_setup,sys32_io_setup_wrapper) -SYSCALL(sys_io_destroy,sys_io_destroy,sys32_io_destroy_wrapper) +SYSCALL(sys_io_destroy,sys_io_destroy,compat_sys_io_destroy) SYSCALL(sys_io_getevents,sys_io_getevents,sys32_io_getevents_wrapper) /* 245 */ SYSCALL(sys_io_submit,sys_io_submit,sys32_io_submit_wrapper) -SYSCALL(sys_io_cancel,sys_io_cancel,sys32_io_cancel_wrapper) -SYSCALL(sys_exit_group,sys_exit_group,sys32_exit_group_wrapper) -SYSCALL(sys_epoll_create,sys_epoll_create,sys_epoll_create_wrapper) -SYSCALL(sys_epoll_ctl,sys_epoll_ctl,sys_epoll_ctl_wrapper) /* 250 */ -SYSCALL(sys_epoll_wait,sys_epoll_wait,sys_epoll_wait_wrapper) -SYSCALL(sys_set_tid_address,sys_set_tid_address,sys32_set_tid_address_wrapper) +SYSCALL(sys_io_cancel,sys_io_cancel,compat_sys_io_cancel) +SYSCALL(sys_exit_group,sys_exit_group,compat_sys_exit_group) +SYSCALL(sys_epoll_create,sys_epoll_create,compat_sys_epoll_create) +SYSCALL(sys_epoll_ctl,sys_epoll_ctl,compat_sys_epoll_ctl) /* 250 */ +SYSCALL(sys_epoll_wait,sys_epoll_wait,compat_sys_epoll_wait) +SYSCALL(sys_set_tid_address,sys_set_tid_address,compat_sys_set_tid_address) SYSCALL(sys_s390_fadvise64,sys_fadvise64_64,compat_sys_s390_fadvise64) SYSCALL(sys_timer_create,sys_timer_create,sys32_timer_create_wrapper) SYSCALL(sys_timer_settime,sys_timer_settime,sys32_timer_settime_wrapper) /* 255 */ SYSCALL(sys_timer_gettime,sys_timer_gettime,sys32_timer_gettime_wrapper) -SYSCALL(sys_timer_getoverrun,sys_timer_getoverrun,sys32_timer_getoverrun_wrapper) -SYSCALL(sys_timer_delete,sys_timer_delete,sys32_timer_delete_wrapper) +SYSCALL(sys_timer_getoverrun,sys_timer_getoverrun,compat_sys_timer_getoverrun) +SYSCALL(sys_timer_delete,sys_timer_delete,compat_sys_timer_delete) SYSCALL(sys_clock_settime,sys_clock_settime,sys32_clock_settime_wrapper) SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper) /* 260 */ SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper) @@ -280,7 +280,7 @@ NI_SYSCALL /* 268 sys_mbind */ NI_SYSCALL /* 269 sys_get_mempolicy */ NI_SYSCALL /* 270 sys_set_mempolicy */ SYSCALL(sys_mq_open,sys_mq_open,compat_sys_mq_open_wrapper) -SYSCALL(sys_mq_unlink,sys_mq_unlink,sys32_mq_unlink_wrapper) +SYSCALL(sys_mq_unlink,sys_mq_unlink,compat_sys_mq_unlink) SYSCALL(sys_mq_timedsend,sys_mq_timedsend,compat_sys_mq_timedsend_wrapper) SYSCALL(sys_mq_timedreceive,sys_mq_timedreceive,compat_sys_mq_timedreceive_wrapper) SYSCALL(sys_mq_notify,sys_mq_notify,compat_sys_mq_notify_wrapper) /* 275 */ -- cgit v0.10.2 From 20f7835c0e1836ba73516abac06725155d2149d2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 28 Feb 2014 13:46:09 +0100 Subject: s390/compat: convert system call wrappers to C part 12 Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/compat_wrap.c b/arch/s390/kernel/compat_wrap.c index 76d714e..a89daaf 100644 --- a/arch/s390/kernel/compat_wrap.c +++ b/arch/s390/kernel/compat_wrap.c @@ -134,3 +134,13 @@ COMPAT_SYSCALL_WRAP1(timer_delete, compat_timer_t, compat_timer_id); COMPAT_SYSCALL_WRAP1(io_destroy, compat_aio_context_t, ctx); COMPAT_SYSCALL_WRAP3(io_cancel, compat_aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result); COMPAT_SYSCALL_WRAP1(mq_unlink, const char __user *, name); +COMPAT_SYSCALL_WRAP5(add_key, const char __user *, tp, const char __user *, dsc, const void __user *, pld, compat_size_t, len, key_serial_t, id); +COMPAT_SYSCALL_WRAP4(request_key, const char __user *, tp, const char __user *, dsc, const char __user *, info, key_serial_t, id); +COMPAT_SYSCALL_WRAP5(remap_file_pages, u32, start, u32, size, u32, prot, u32, pgoff, u32, flags); +COMPAT_SYSCALL_WRAP3(ioprio_set, int, which, int, who, int, ioprio); +COMPAT_SYSCALL_WRAP2(ioprio_get, int, which, int, who); +COMPAT_SYSCALL_WRAP3(inotify_add_watch, int, fd, const char __user *, path, u32, mask); +COMPAT_SYSCALL_WRAP2(inotify_rm_watch, int, fd, __s32, wd); +COMPAT_SYSCALL_WRAP3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode); +COMPAT_SYSCALL_WRAP4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, unsigned, dev); +COMPAT_SYSCALL_WRAP5(fchownat, int, dfd, const char __user *, filename, compat_uid_t, user, compat_gid_t, group, int, flag); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 9ada8fb..74b8128 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -286,29 +286,6 @@ ENTRY(compat_sys_mq_getsetattr_wrapper) llgtr %r4,%r4 # struct compat_mq_attr * jg compat_sys_mq_getsetattr -ENTRY(compat_sys_add_key_wrapper) - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - llgtr %r4,%r4 # const void * - llgfr %r5,%r5 # size_t - llgfr %r6,%r6 # (key_serial_t) u32 - jg sys_add_key - -ENTRY(compat_sys_request_key_wrapper) - llgtr %r2,%r2 # const char * - llgtr %r3,%r3 # const char * - llgtr %r4,%r4 # const void * - llgfr %r5,%r5 # (key_serial_t) u32 - jg sys_request_key - -ENTRY(sys32_remap_file_pages_wrapper) - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # unsigned long - llgfr %r4,%r4 # unsigned long - llgfr %r5,%r5 # unsigned long - llgfr %r6,%r6 # unsigned long - jg sys_remap_file_pages - ENTRY(compat_sys_kexec_load_wrapper) llgfr %r2,%r2 # unsigned long llgfr %r3,%r3 # unsigned long @@ -316,49 +293,6 @@ ENTRY(compat_sys_kexec_load_wrapper) llgfr %r5,%r5 # unsigned long jg compat_sys_kexec_load -ENTRY(sys_ioprio_set_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - lgfr %r4,%r4 # int - jg sys_ioprio_set - -ENTRY(sys_ioprio_get_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - jg sys_ioprio_get - -ENTRY(sys_inotify_add_watch_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # u32 - jg sys_inotify_add_watch - -ENTRY(sys_inotify_rm_watch_wrapper) - lgfr %r2,%r2 # int - llgfr %r3,%r3 # u32 - jg sys_inotify_rm_watch - -ENTRY(sys_mkdirat_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - jg sys_mkdirat - -ENTRY(sys_mknodat_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - llgfr %r5,%r5 # unsigned int - jg sys_mknodat - -ENTRY(sys_fchownat_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # uid_t - llgfr %r5,%r5 # gid_t - lgfr %r6,%r6 # int - jg sys_fchownat - ENTRY(compat_sys_futimesat_wrapper) llgfr %r2,%r2 # unsigned int llgtr %r3,%r3 # char * diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 927da9c..dbc9e41 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -275,7 +275,7 @@ NI_SYSCALL /* reserved for vserver */ SYSCALL(sys_s390_fadvise64_64,sys_ni_syscall,compat_sys_s390_fadvise64_64) SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper) SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper) -SYSCALL(sys_remap_file_pages,sys_remap_file_pages,sys32_remap_file_pages_wrapper) +SYSCALL(sys_remap_file_pages,sys_remap_file_pages,compat_sys_remap_file_pages) NI_SYSCALL /* 268 sys_mbind */ NI_SYSCALL /* 269 sys_get_mempolicy */ NI_SYSCALL /* 270 sys_set_mempolicy */ @@ -286,20 +286,20 @@ SYSCALL(sys_mq_timedreceive,sys_mq_timedreceive,compat_sys_mq_timedreceive_wrapp SYSCALL(sys_mq_notify,sys_mq_notify,compat_sys_mq_notify_wrapper) /* 275 */ SYSCALL(sys_mq_getsetattr,sys_mq_getsetattr,compat_sys_mq_getsetattr_wrapper) SYSCALL(sys_kexec_load,sys_kexec_load,compat_sys_kexec_load_wrapper) -SYSCALL(sys_add_key,sys_add_key,compat_sys_add_key_wrapper) -SYSCALL(sys_request_key,sys_request_key,compat_sys_request_key_wrapper) +SYSCALL(sys_add_key,sys_add_key,compat_sys_add_key) +SYSCALL(sys_request_key,sys_request_key,compat_sys_request_key) SYSCALL(sys_keyctl,sys_keyctl,compat_sys_keyctl_wrapper) /* 280 */ SYSCALL(sys_waitid,sys_waitid,compat_sys_waitid) -SYSCALL(sys_ioprio_set,sys_ioprio_set,sys_ioprio_set_wrapper) -SYSCALL(sys_ioprio_get,sys_ioprio_get,sys_ioprio_get_wrapper) +SYSCALL(sys_ioprio_set,sys_ioprio_set,compat_sys_ioprio_set) +SYSCALL(sys_ioprio_get,sys_ioprio_get,compat_sys_ioprio_get) SYSCALL(sys_inotify_init,sys_inotify_init,sys_inotify_init) -SYSCALL(sys_inotify_add_watch,sys_inotify_add_watch,sys_inotify_add_watch_wrapper) /* 285 */ -SYSCALL(sys_inotify_rm_watch,sys_inotify_rm_watch,sys_inotify_rm_watch_wrapper) +SYSCALL(sys_inotify_add_watch,sys_inotify_add_watch,compat_sys_inotify_add_watch) /* 285 */ +SYSCALL(sys_inotify_rm_watch,sys_inotify_rm_watch,compat_sys_inotify_rm_watch) NI_SYSCALL /* 287 sys_migrate_pages */ SYSCALL(sys_openat,sys_openat,compat_sys_openat) -SYSCALL(sys_mkdirat,sys_mkdirat,sys_mkdirat_wrapper) -SYSCALL(sys_mknodat,sys_mknodat,sys_mknodat_wrapper) /* 290 */ -SYSCALL(sys_fchownat,sys_fchownat,sys_fchownat_wrapper) +SYSCALL(sys_mkdirat,sys_mkdirat,compat_sys_mkdirat) +SYSCALL(sys_mknodat,sys_mknodat,compat_sys_mknodat) /* 290 */ +SYSCALL(sys_fchownat,sys_fchownat,compat_sys_fchownat) SYSCALL(sys_futimesat,sys_futimesat,compat_sys_futimesat_wrapper) SYSCALL(sys_fstatat64,sys_newfstatat,compat_sys_s390_fstatat64) SYSCALL(sys_unlinkat,sys_unlinkat,sys_unlinkat_wrapper) -- cgit v0.10.2 From 28798abc9c21087755a26e782868cde27801a1da Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 28 Feb 2014 15:33:00 +0100 Subject: s390/compat: convert system call wrappers to C part 13 Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/compat_wrap.c b/arch/s390/kernel/compat_wrap.c index a89daaf..df631af 100644 --- a/arch/s390/kernel/compat_wrap.c +++ b/arch/s390/kernel/compat_wrap.c @@ -144,3 +144,13 @@ COMPAT_SYSCALL_WRAP2(inotify_rm_watch, int, fd, __s32, wd); COMPAT_SYSCALL_WRAP3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode); COMPAT_SYSCALL_WRAP4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, unsigned, dev); COMPAT_SYSCALL_WRAP5(fchownat, int, dfd, const char __user *, filename, compat_uid_t, user, compat_gid_t, group, int, flag); +COMPAT_SYSCALL_WRAP3(unlinkat, int, dfd, const char __user *, pathname, int, flag); +COMPAT_SYSCALL_WRAP4(renameat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname); +COMPAT_SYSCALL_WRAP5(linkat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, int, flags); +COMPAT_SYSCALL_WRAP3(symlinkat, const char __user *, oldname, int, newdfd, const char __user *, newname); +COMPAT_SYSCALL_WRAP4(readlinkat, int, dfd, const char __user *, path, char __user *, buf, int, bufsiz); +COMPAT_SYSCALL_WRAP3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode); +COMPAT_SYSCALL_WRAP3(faccessat, int, dfd, const char __user *, filename, int, mode); +COMPAT_SYSCALL_WRAP1(unshare, compat_ulong_t, unshare_flags); +COMPAT_SYSCALL_WRAP6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, compat_size_t, len, unsigned int, flags); +COMPAT_SYSCALL_WRAP4(tee, int, fdin, int, fdout, compat_size_t, len, unsigned int, flags); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 74b8128..583cac7 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -299,52 +299,6 @@ ENTRY(compat_sys_futimesat_wrapper) llgtr %r4,%r4 # struct timeval * jg compat_sys_futimesat -ENTRY(sys_unlinkat_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - jg sys_unlinkat - -ENTRY(sys_renameat_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - llgtr %r5,%r5 # const char * - jg sys_renameat - -ENTRY(sys_linkat_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - llgtr %r5,%r5 # const char * - lgfr %r6,%r6 # int - jg sys_linkat - -ENTRY(sys_symlinkat_wrapper) - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - llgtr %r4,%r4 # const char * - jg sys_symlinkat - -ENTRY(sys_readlinkat_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - llgtr %r4,%r4 # char * - lgfr %r5,%r5 # int - jg sys_readlinkat - -ENTRY(sys_fchmodat_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # mode_t - jg sys_fchmodat - -ENTRY(sys_faccessat_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char * - lgfr %r4,%r4 # int - jg sys_faccessat - ENTRY(compat_sys_pselect6_wrapper) lgfr %r2,%r2 # int llgtr %r3,%r3 # fd_set * @@ -363,20 +317,6 @@ ENTRY(compat_sys_ppoll_wrapper) llgfr %r6,%r6 # size_t jg compat_sys_ppoll -ENTRY(sys_unshare_wrapper) - llgfr %r2,%r2 # unsigned long - jg sys_unshare - -ENTRY(sys_splice_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # loff_t * - lgfr %r4,%r4 # int - llgtr %r5,%r5 # loff_t * - llgfr %r6,%r6 # size_t - llgf %r0,164(%r15) # unsigned int - stg %r0,160(%r15) - jg sys_splice - ENTRY(sys_sync_file_range_wrapper) lgfr %r2,%r2 # int sllg %r3,%r3,32 # get high word of 64bit loff_t @@ -386,13 +326,6 @@ ENTRY(sys_sync_file_range_wrapper) llgf %r5,164(%r15) # unsigned int jg sys_sync_file_range -ENTRY(sys_tee_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - llgfr %r4,%r4 # size_t - llgfr %r5,%r5 # unsigned int - jg sys_tee - ENTRY(sys_getcpu_wrapper) llgtr %r2,%r2 # unsigned * llgtr %r3,%r3 # unsigned * diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index dbc9e41..7069bec 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -302,21 +302,21 @@ SYSCALL(sys_mknodat,sys_mknodat,compat_sys_mknodat) /* 290 */ SYSCALL(sys_fchownat,sys_fchownat,compat_sys_fchownat) SYSCALL(sys_futimesat,sys_futimesat,compat_sys_futimesat_wrapper) SYSCALL(sys_fstatat64,sys_newfstatat,compat_sys_s390_fstatat64) -SYSCALL(sys_unlinkat,sys_unlinkat,sys_unlinkat_wrapper) -SYSCALL(sys_renameat,sys_renameat,sys_renameat_wrapper) /* 295 */ -SYSCALL(sys_linkat,sys_linkat,sys_linkat_wrapper) -SYSCALL(sys_symlinkat,sys_symlinkat,sys_symlinkat_wrapper) -SYSCALL(sys_readlinkat,sys_readlinkat,sys_readlinkat_wrapper) -SYSCALL(sys_fchmodat,sys_fchmodat,sys_fchmodat_wrapper) -SYSCALL(sys_faccessat,sys_faccessat,sys_faccessat_wrapper) /* 300 */ +SYSCALL(sys_unlinkat,sys_unlinkat,compat_sys_unlinkat) +SYSCALL(sys_renameat,sys_renameat,compat_sys_renameat) /* 295 */ +SYSCALL(sys_linkat,sys_linkat,compat_sys_linkat) +SYSCALL(sys_symlinkat,sys_symlinkat,compat_sys_symlinkat) +SYSCALL(sys_readlinkat,sys_readlinkat,compat_sys_readlinkat) +SYSCALL(sys_fchmodat,sys_fchmodat,compat_sys_fchmodat) +SYSCALL(sys_faccessat,sys_faccessat,compat_sys_faccessat) /* 300 */ SYSCALL(sys_pselect6,sys_pselect6,compat_sys_pselect6_wrapper) SYSCALL(sys_ppoll,sys_ppoll,compat_sys_ppoll_wrapper) -SYSCALL(sys_unshare,sys_unshare,sys_unshare_wrapper) +SYSCALL(sys_unshare,sys_unshare,compat_sys_unshare) SYSCALL(sys_set_robust_list,sys_set_robust_list,compat_sys_set_robust_list) SYSCALL(sys_get_robust_list,sys_get_robust_list,compat_sys_get_robust_list) -SYSCALL(sys_splice,sys_splice,sys_splice_wrapper) +SYSCALL(sys_splice,sys_splice,compat_sys_splice) SYSCALL(sys_sync_file_range,sys_sync_file_range,sys_sync_file_range_wrapper) -SYSCALL(sys_tee,sys_tee,sys_tee_wrapper) +SYSCALL(sys_tee,sys_tee,compat_sys_tee) SYSCALL(sys_vmsplice,sys_vmsplice,compat_sys_vmsplice) NI_SYSCALL /* 310 sys_move_pages */ SYSCALL(sys_getcpu,sys_getcpu,sys_getcpu_wrapper) -- cgit v0.10.2 From 7f6afe87a02be1d889b36ce076d7beddd4cd5332 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 1 Mar 2014 11:52:48 +0100 Subject: s390/compat: convert system call wrappers to C part 14 Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/compat_wrap.c b/arch/s390/kernel/compat_wrap.c index df631af..7b7c9f6 100644 --- a/arch/s390/kernel/compat_wrap.c +++ b/arch/s390/kernel/compat_wrap.c @@ -154,3 +154,13 @@ COMPAT_SYSCALL_WRAP3(faccessat, int, dfd, const char __user *, filename, int, mo COMPAT_SYSCALL_WRAP1(unshare, compat_ulong_t, unshare_flags); COMPAT_SYSCALL_WRAP6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, compat_size_t, len, unsigned int, flags); COMPAT_SYSCALL_WRAP4(tee, int, fdin, int, fdout, compat_size_t, len, unsigned int, flags); +COMPAT_SYSCALL_WRAP3(getcpu, unsigned __user *, cpu, unsigned __user *, node, struct getcpu_cache __user *, cache); +COMPAT_SYSCALL_WRAP1(eventfd, unsigned int, count); +COMPAT_SYSCALL_WRAP2(timerfd_create, int, clockid, int, flags); +COMPAT_SYSCALL_WRAP2(eventfd2, unsigned int, count, int, flags); +COMPAT_SYSCALL_WRAP1(inotify_init1, int, flags); +COMPAT_SYSCALL_WRAP2(pipe2, int __user *, fildes, int, flags); +COMPAT_SYSCALL_WRAP3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags); +COMPAT_SYSCALL_WRAP1(epoll_create1, int, flags); +COMPAT_SYSCALL_WRAP2(tkill, int, pid, int, sig); +COMPAT_SYSCALL_WRAP3(tgkill, int, tgid, int, pid, int, sig); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 583cac7..d17bc86 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -326,12 +326,6 @@ ENTRY(sys_sync_file_range_wrapper) llgf %r5,164(%r15) # unsigned int jg sys_sync_file_range -ENTRY(sys_getcpu_wrapper) - llgtr %r2,%r2 # unsigned * - llgtr %r3,%r3 # unsigned * - llgtr %r4,%r4 # struct getcpu_cache * - jg sys_getcpu - ENTRY(compat_sys_utimes_wrapper) llgtr %r2,%r2 # char * llgtr %r3,%r3 # struct compat_timeval * @@ -344,10 +338,6 @@ ENTRY(compat_sys_utimensat_wrapper) lgfr %r5,%r5 # int jg compat_sys_utimensat -ENTRY(sys_eventfd_wrapper) - llgfr %r2,%r2 # unsigned int - jg sys_eventfd - ENTRY(sys_fallocate_wrapper) lgfr %r2,%r2 # int lgfr %r3,%r3 # int @@ -357,46 +347,6 @@ ENTRY(sys_fallocate_wrapper) l %r5,164(%r15) # get low word of 64bit loff_t jg sys_fallocate -ENTRY(sys_timerfd_create_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - jg sys_timerfd_create - -ENTRY(sys_eventfd2_wrapper) - llgfr %r2,%r2 # unsigned int - lgfr %r3,%r3 # int - jg sys_eventfd2 - -ENTRY(sys_inotify_init1_wrapper) - lgfr %r2,%r2 # int - jg sys_inotify_init1 - -ENTRY(sys_pipe2_wrapper) - llgtr %r2,%r2 # u32 * - lgfr %r3,%r3 # int - jg sys_pipe2 # branch to system call - -ENTRY(sys_dup3_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - lgfr %r4,%r4 # int - jg sys_dup3 # branch to system call - -ENTRY(sys_epoll_create1_wrapper) - lgfr %r2,%r2 # int - jg sys_epoll_create1 # branch to system call - -ENTRY(sys_tkill_wrapper) - lgfr %r2,%r2 # pid_t - lgfr %r3,%r3 # int - jg sys_tkill # branch to system call - -ENTRY(sys_tgkill_wrapper) - lgfr %r2,%r2 # pid_t - lgfr %r3,%r3 # pid_t - lgfr %r4,%r4 # int - jg sys_tgkill # branch to system call - ENTRY(compat_sys_keyctl_wrapper) llgfr %r2,%r2 # u32 llgfr %r3,%r3 # u32 diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 7069bec..5cfbd42 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -245,11 +245,11 @@ SYSCALL(sys_removexattr,sys_removexattr,compat_sys_removexattr) SYSCALL(sys_lremovexattr,sys_lremovexattr,compat_sys_lremovexattr) SYSCALL(sys_fremovexattr,sys_fremovexattr,compat_sys_fremovexattr) /* 235 */ SYSCALL(sys_gettid,sys_gettid,sys_gettid) -SYSCALL(sys_tkill,sys_tkill,sys_tkill_wrapper) +SYSCALL(sys_tkill,sys_tkill,compat_sys_tkill) SYSCALL(sys_futex,sys_futex,compat_sys_futex) SYSCALL(sys_sched_setaffinity,sys_sched_setaffinity,sys32_sched_setaffinity_wrapper) SYSCALL(sys_sched_getaffinity,sys_sched_getaffinity,sys32_sched_getaffinity_wrapper) /* 240 */ -SYSCALL(sys_tgkill,sys_tgkill,sys_tgkill_wrapper) +SYSCALL(sys_tgkill,sys_tgkill,compat_sys_tgkill) NI_SYSCALL /* reserved for TUX */ SYSCALL(sys_io_setup,sys_io_setup,sys32_io_setup_wrapper) SYSCALL(sys_io_destroy,sys_io_destroy,compat_sys_io_destroy) @@ -319,23 +319,23 @@ SYSCALL(sys_sync_file_range,sys_sync_file_range,sys_sync_file_range_wrapper) SYSCALL(sys_tee,sys_tee,compat_sys_tee) SYSCALL(sys_vmsplice,sys_vmsplice,compat_sys_vmsplice) NI_SYSCALL /* 310 sys_move_pages */ -SYSCALL(sys_getcpu,sys_getcpu,sys_getcpu_wrapper) +SYSCALL(sys_getcpu,sys_getcpu,compat_sys_getcpu) SYSCALL(sys_epoll_pwait,sys_epoll_pwait,compat_sys_epoll_pwait) SYSCALL(sys_utimes,sys_utimes,compat_sys_utimes_wrapper) SYSCALL(sys_s390_fallocate,sys_fallocate,sys_fallocate_wrapper) SYSCALL(sys_utimensat,sys_utimensat,compat_sys_utimensat_wrapper) /* 315 */ SYSCALL(sys_signalfd,sys_signalfd,compat_sys_signalfd) NI_SYSCALL /* 317 old sys_timer_fd */ -SYSCALL(sys_eventfd,sys_eventfd,sys_eventfd_wrapper) -SYSCALL(sys_timerfd_create,sys_timerfd_create,sys_timerfd_create_wrapper) +SYSCALL(sys_eventfd,sys_eventfd,compat_sys_eventfd) +SYSCALL(sys_timerfd_create,sys_timerfd_create,compat_sys_timerfd_create) SYSCALL(sys_timerfd_settime,sys_timerfd_settime,compat_sys_timerfd_settime) /* 320 */ SYSCALL(sys_timerfd_gettime,sys_timerfd_gettime,compat_sys_timerfd_gettime) SYSCALL(sys_signalfd4,sys_signalfd4,compat_sys_signalfd4) -SYSCALL(sys_eventfd2,sys_eventfd2,sys_eventfd2_wrapper) -SYSCALL(sys_inotify_init1,sys_inotify_init1,sys_inotify_init1_wrapper) -SYSCALL(sys_pipe2,sys_pipe2,sys_pipe2_wrapper) /* 325 */ -SYSCALL(sys_dup3,sys_dup3,sys_dup3_wrapper) -SYSCALL(sys_epoll_create1,sys_epoll_create1,sys_epoll_create1_wrapper) +SYSCALL(sys_eventfd2,sys_eventfd2,compat_sys_eventfd2) +SYSCALL(sys_inotify_init1,sys_inotify_init1,compat_sys_inotify_init1) +SYSCALL(sys_pipe2,sys_pipe2,compat_sys_pipe2) /* 325 */ +SYSCALL(sys_dup3,sys_dup3,compat_sys_dup3) +SYSCALL(sys_epoll_create1,sys_epoll_create1,compat_sys_epoll_create1) SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv) SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev) SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo) /* 330 */ -- cgit v0.10.2 From 00fcb1494f645be4e0cd703756448fc64ca2a44a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 1 Mar 2014 12:18:46 +0100 Subject: s390/compat: convert system call wrappers to C part 15 Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/compat_wrap.c b/arch/s390/kernel/compat_wrap.c index 7b7c9f6..17600b1 100644 --- a/arch/s390/kernel/compat_wrap.c +++ b/arch/s390/kernel/compat_wrap.c @@ -164,3 +164,15 @@ COMPAT_SYSCALL_WRAP3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) COMPAT_SYSCALL_WRAP1(epoll_create1, int, flags); COMPAT_SYSCALL_WRAP2(tkill, int, pid, int, sig); COMPAT_SYSCALL_WRAP3(tgkill, int, tgid, int, pid, int, sig); +COMPAT_SYSCALL_WRAP5(perf_event_open, struct perf_event_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, u32, flags); +COMPAT_SYSCALL_WRAP5(clone, u32, newsp, u32, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val); +COMPAT_SYSCALL_WRAP2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags); +COMPAT_SYSCALL_WRAP4(prlimit64, pid_t, pid, unsigned int, resource, const struct rlimit64 __user *, new_rlim, struct rlimit64 __user *, old_rlim); +COMPAT_SYSCALL_WRAP5(name_to_handle_at, int, dfd, const char __user *, name, struct file_handle __user *, handle, int __user *, mnt_id, int, flag); +COMPAT_SYSCALL_WRAP1(syncfs, int, fd); +COMPAT_SYSCALL_WRAP2(setns, int, fd, int, nstype); +COMPAT_SYSCALL_WRAP2(s390_runtime_instr, int, command, int, signum); +COMPAT_SYSCALL_WRAP5(kcmp, pid_t, pid1, pid_t, pid2, int, type, u32, idx1, u32, idx2); +COMPAT_SYSCALL_WRAP3(finit_module, int, fd, const char __user *, uargs, int, flags); +COMPAT_SYSCALL_WRAP3(sched_setattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, flags); +COMPAT_SYSCALL_WRAP4(sched_getattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, size, unsigned int, flags); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index d17bc86..a1a1367 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -355,61 +355,17 @@ ENTRY(compat_sys_keyctl_wrapper) llgfr %r6,%r6 # u32 jg compat_sys_keyctl # branch to system call -ENTRY(sys_perf_event_open_wrapper) - llgtr %r2,%r2 # const struct perf_event_attr * - lgfr %r3,%r3 # pid_t - lgfr %r4,%r4 # int - lgfr %r5,%r5 # int - llgfr %r6,%r6 # unsigned long - jg sys_perf_event_open # branch to system call - -ENTRY(sys_clone_wrapper) - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # unsigned long - llgtr %r4,%r4 # int * - llgtr %r5,%r5 # int * - jg sys_clone # branch to system call - ENTRY(sys32_execve_wrapper) llgtr %r2,%r2 # char * llgtr %r3,%r3 # compat_uptr_t * llgtr %r4,%r4 # compat_uptr_t * jg compat_sys_execve # branch to system call -ENTRY(sys_fanotify_init_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - jg sys_fanotify_init # branch to system call - -ENTRY(sys_prlimit64_wrapper) - lgfr %r2,%r2 # pid_t - llgfr %r3,%r3 # unsigned int - llgtr %r4,%r4 # const struct rlimit64 __user * - llgtr %r5,%r5 # struct rlimit64 __user * - jg sys_prlimit64 # branch to system call - -ENTRY(sys_name_to_handle_at_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char __user * - llgtr %r4,%r4 # struct file_handle __user * - llgtr %r5,%r5 # int __user * - lgfr %r6,%r6 # int - jg sys_name_to_handle_at - ENTRY(compat_sys_clock_adjtime_wrapper) lgfr %r2,%r2 # clockid_t (int) llgtr %r3,%r3 # struct compat_timex __user * jg compat_sys_clock_adjtime -ENTRY(sys_syncfs_wrapper) - lgfr %r2,%r2 # int - jg sys_syncfs - -ENTRY(sys_setns_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - jg sys_setns - ENTRY(compat_sys_process_vm_readv_wrapper) lgfr %r2,%r2 # compat_pid_t llgtr %r3,%r3 # struct compat_iovec __user * @@ -429,33 +385,3 @@ ENTRY(compat_sys_process_vm_writev_wrapper) llgf %r0,164(%r15) # unsigned long stg %r0,160(%r15) jg compat_sys_process_vm_writev - -ENTRY(sys_s390_runtime_instr_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - jg sys_s390_runtime_instr - -ENTRY(sys_kcmp_wrapper) - lgfr %r2,%r2 # pid_t - lgfr %r3,%r3 # pid_t - lgfr %r4,%r4 # int - llgfr %r5,%r5 # unsigned long - llgfr %r6,%r6 # unsigned long - jg sys_kcmp - -ENTRY(sys_finit_module_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const char __user * - lgfr %r4,%r4 # int - jg sys_finit_module - -ENTRY(sys_sched_setattr_wrapper) - lgfr %r2,%r2 # pid_t - llgtr %r3,%r3 # struct sched_attr __user * - jg sys_sched_setattr - -ENTRY(sys_sched_getattr_wrapper) - lgfr %r2,%r2 # pid_t - llgtr %r3,%r3 # const char __user * - llgfr %r4,%r4 # unsigned int - jg sys_sched_getattr diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 8c6c022..e916788 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -73,5 +73,6 @@ long sys32_sigreturn(void); long sys32_rt_sigreturn(void); long sys_s390_personality(unsigned int personality); +long sys_s390_runtime_instr(int command, int signum); #endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 5cfbd42..3ad73a6 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -128,7 +128,7 @@ SYSCALL(sys_sysinfo,sys_sysinfo,compat_sys_sysinfo_wrapper) SYSCALL(sys_s390_ipc,sys_s390_ipc,compat_sys_s390_ipc) SYSCALL(sys_fsync,sys_fsync,compat_sys_fsync) SYSCALL(sys_sigreturn,sys_sigreturn,sys32_sigreturn) -SYSCALL(sys_clone,sys_clone,sys_clone_wrapper) /* 120 */ +SYSCALL(sys_clone,sys_clone,compat_sys_clone) /* 120 */ SYSCALL(sys_setdomainname,sys_setdomainname,compat_sys_setdomainname) SYSCALL(sys_newuname,sys_newuname,compat_sys_newuname) NI_SYSCALL /* modify_ldt for i386 */ @@ -339,19 +339,19 @@ SYSCALL(sys_epoll_create1,sys_epoll_create1,compat_sys_epoll_create1) SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv) SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev) SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo) /* 330 */ -SYSCALL(sys_perf_event_open,sys_perf_event_open,sys_perf_event_open_wrapper) -SYSCALL(sys_fanotify_init,sys_fanotify_init,sys_fanotify_init_wrapper) +SYSCALL(sys_perf_event_open,sys_perf_event_open,compat_sys_perf_event_open) +SYSCALL(sys_fanotify_init,sys_fanotify_init,compat_sys_fanotify_init) SYSCALL(sys_fanotify_mark,sys_fanotify_mark,compat_sys_fanotify_mark) -SYSCALL(sys_prlimit64,sys_prlimit64,sys_prlimit64_wrapper) -SYSCALL(sys_name_to_handle_at,sys_name_to_handle_at,sys_name_to_handle_at_wrapper) /* 335 */ +SYSCALL(sys_prlimit64,sys_prlimit64,compat_sys_prlimit64) +SYSCALL(sys_name_to_handle_at,sys_name_to_handle_at,compat_sys_name_to_handle_at) /* 335 */ SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at) SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime_wrapper) -SYSCALL(sys_syncfs,sys_syncfs,sys_syncfs_wrapper) -SYSCALL(sys_setns,sys_setns,sys_setns_wrapper) +SYSCALL(sys_syncfs,sys_syncfs,compat_sys_syncfs) +SYSCALL(sys_setns,sys_setns,compat_sys_setns) SYSCALL(sys_process_vm_readv,sys_process_vm_readv,compat_sys_process_vm_readv_wrapper) /* 340 */ SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev_wrapper) -SYSCALL(sys_ni_syscall,sys_s390_runtime_instr,sys_s390_runtime_instr_wrapper) -SYSCALL(sys_kcmp,sys_kcmp,sys_kcmp_wrapper) -SYSCALL(sys_finit_module,sys_finit_module,sys_finit_module_wrapper) -SYSCALL(sys_sched_setattr,sys_sched_setattr,sys_sched_setattr_wrapper) /* 345 */ -SYSCALL(sys_sched_getattr,sys_sched_getattr,sys_sched_getattr_wrapper) +SYSCALL(sys_ni_syscall,sys_s390_runtime_instr,compat_sys_s390_runtime_instr) +SYSCALL(sys_kcmp,sys_kcmp,compat_sys_kcmp) +SYSCALL(sys_finit_module,sys_finit_module,compat_sys_finit_module) +SYSCALL(sys_sched_setattr,sys_sched_setattr,compat_sys_sched_setattr) /* 345 */ +SYSCALL(sys_sched_getattr,sys_sched_getattr,compat_sys_sched_getattr) -- cgit v0.10.2 From 2c81fc4fb41f589454861bbdb90ffde73840b8f8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 1 Mar 2014 13:09:26 +0100 Subject: s390/compat: add sync_file_range and fallocate compat syscalls The compat syscall wrappers for sync_file_range and fallocate merged 32 bit parameters into 64 bit parameters. Therefore they did more than just the usual zero and/or sign extension of system call parameters. So convert these two wrappers to full s390 specific compat sytem calls. Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 5909774..ca38139 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -504,3 +504,17 @@ COMPAT_SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, arg a.advice = POSIX_FADV_NOREUSE; return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice); } + +COMPAT_SYSCALL_DEFINE6(s390_sync_file_range, int, fd, u32, offhigh, u32, offlow, + u32, nhigh, u32, nlow, unsigned int, flags) +{ + return sys_sync_file_range(fd, ((loff_t)offhigh << 32) + offlow, + ((u64)nhigh << 32) + nlow, flags); +} + +COMPAT_SYSCALL_DEFINE6(s390_fallocate, int, fd, int, mode, u32, offhigh, u32, offlow, + u32, lenhigh, u32, lenlow) +{ + return sys_fallocate(fd, mode, ((loff_t)offhigh << 32) + offlow, + ((u64)lenhigh << 32) + lenlow); +} diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index 7d2ce4b..330e11d 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -110,4 +110,7 @@ long compat_sys_s390_read(unsigned int fd, char __user * buf, compat_size_t coun long compat_sys_s390_write(unsigned int fd, const char __user * buf, compat_size_t count); long compat_sys_s390_fadvise64(int fd, u32 high, u32 low, compat_size_t len, int advise); long compat_sys_s390_fadvise64_64(struct fadvise64_64_args __user *args); +long compat_sys_s390_sync_file_range(int fd, u32 offhigh, u32 offlow, u32 nhigh, u32 nlow, unsigned int flags); +long compat_sys_s390_fallocate(int fd, int mode, u32 offhigh, u32 offlow, u32 lenhigh, u32 lenlow); + #endif /* _ASM_S390X_S390_H */ diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index a1a1367..23d9f8a 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -317,15 +317,6 @@ ENTRY(compat_sys_ppoll_wrapper) llgfr %r6,%r6 # size_t jg compat_sys_ppoll -ENTRY(sys_sync_file_range_wrapper) - lgfr %r2,%r2 # int - sllg %r3,%r3,32 # get high word of 64bit loff_t - or %r3,%r4 # get low word of 64bit loff_t - sllg %r4,%r5,32 # get high word of 64bit loff_t - or %r4,%r6 # get low word of 64bit loff_t - llgf %r5,164(%r15) # unsigned int - jg sys_sync_file_range - ENTRY(compat_sys_utimes_wrapper) llgtr %r2,%r2 # char * llgtr %r3,%r3 # struct compat_timeval * @@ -338,15 +329,6 @@ ENTRY(compat_sys_utimensat_wrapper) lgfr %r5,%r5 # int jg compat_sys_utimensat -ENTRY(sys_fallocate_wrapper) - lgfr %r2,%r2 # int - lgfr %r3,%r3 # int - sllg %r4,%r4,32 # get high word of 64bit loff_t - lr %r4,%r5 # get low word of 64bit loff_t - sllg %r5,%r6,32 # get high word of 64bit loff_t - l %r5,164(%r15) # get low word of 64bit loff_t - jg sys_fallocate - ENTRY(compat_sys_keyctl_wrapper) llgfr %r2,%r2 # u32 llgfr %r3,%r3 # u32 diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 3ad73a6..dbdec47 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -315,14 +315,14 @@ SYSCALL(sys_unshare,sys_unshare,compat_sys_unshare) SYSCALL(sys_set_robust_list,sys_set_robust_list,compat_sys_set_robust_list) SYSCALL(sys_get_robust_list,sys_get_robust_list,compat_sys_get_robust_list) SYSCALL(sys_splice,sys_splice,compat_sys_splice) -SYSCALL(sys_sync_file_range,sys_sync_file_range,sys_sync_file_range_wrapper) +SYSCALL(sys_sync_file_range,sys_sync_file_range,compat_sys_s390_sync_file_range) SYSCALL(sys_tee,sys_tee,compat_sys_tee) SYSCALL(sys_vmsplice,sys_vmsplice,compat_sys_vmsplice) NI_SYSCALL /* 310 sys_move_pages */ SYSCALL(sys_getcpu,sys_getcpu,compat_sys_getcpu) SYSCALL(sys_epoll_pwait,sys_epoll_pwait,compat_sys_epoll_pwait) SYSCALL(sys_utimes,sys_utimes,compat_sys_utimes_wrapper) -SYSCALL(sys_s390_fallocate,sys_fallocate,sys_fallocate_wrapper) +SYSCALL(sys_s390_fallocate,sys_fallocate,compat_sys_s390_fallocate) SYSCALL(sys_utimensat,sys_utimensat,compat_sys_utimensat_wrapper) /* 315 */ SYSCALL(sys_signalfd,sys_signalfd,compat_sys_signalfd) NI_SYSCALL /* 317 old sys_timer_fd */ -- cgit v0.10.2 From 28964d32d495a0753986d464c48c8e1ae73699be Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 3 Mar 2014 07:34:45 +0000 Subject: arm64: add AT_HWCAP2 support for 32-bit compat Add support for the ELF auxv entry AT_HWCAP2 when running 32-bit ELF binaries in compat mode. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 6cddbb0..9a4cbd6 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -41,7 +41,8 @@ #ifdef CONFIG_COMPAT #define COMPAT_ELF_HWCAP (compat_elf_hwcap) -extern unsigned int compat_elf_hwcap; +#define COMPAT_ELF_HWCAP2 (compat_elf_hwcap2) +extern unsigned int compat_elf_hwcap, compat_elf_hwcap2; #endif extern unsigned long elf_hwcap; diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index c8e9eff..349c492 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -69,6 +69,7 @@ EXPORT_SYMBOL_GPL(elf_hwcap); COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV) unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; +unsigned int compat_elf_hwcap2 __read_mostly; #endif static const char *cpu_name; -- cgit v0.10.2 From 4cf761cdccc3b050f768f25dc36342cdfec4efdd Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 3 Mar 2014 07:34:46 +0000 Subject: arm64: advertise ARMv8 extensions to 32-bit compat ELF binaries This adds support for advertising the presence of ARMv8 Crypto Extensions in the Aarch32 execution state to 32-bit ELF binaries running in 32-bit compat mode under the arm64 kernel. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 9a4cbd6..024c461 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -32,6 +32,12 @@ #define COMPAT_HWCAP_IDIV (COMPAT_HWCAP_IDIVA|COMPAT_HWCAP_IDIVT) #define COMPAT_HWCAP_EVTSTRM (1 << 21) +#define COMPAT_HWCAP2_AES (1 << 0) +#define COMPAT_HWCAP2_PMULL (1 << 1) +#define COMPAT_HWCAP2_SHA1 (1 << 2) +#define COMPAT_HWCAP2_SHA2 (1 << 3) +#define COMPAT_HWCAP2_CRC32 (1 << 4) + #ifndef __ASSEMBLY__ /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 349c492..67da307 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -243,6 +243,38 @@ static void __init setup_processor(void) block = (features >> 16) & 0xf; if (block && !(block & 0x8)) elf_hwcap |= HWCAP_CRC32; + +#ifdef CONFIG_COMPAT + /* + * ID_ISAR5_EL1 carries similar information as above, but pertaining to + * the Aarch32 32-bit execution state. + */ + features = read_cpuid(ID_ISAR5_EL1); + block = (features >> 4) & 0xf; + if (!(block & 0x8)) { + switch (block) { + default: + case 2: + compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL; + case 1: + compat_elf_hwcap2 |= COMPAT_HWCAP2_AES; + case 0: + break; + } + } + + block = (features >> 8) & 0xf; + if (block && !(block & 0x8)) + compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1; + + block = (features >> 12) & 0xf; + if (block && !(block & 0x8)) + compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2; + + block = (features >> 16) & 0xf; + if (block && !(block & 0x8)) + compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32; +#endif } static void __init setup_machine_fdt(phys_addr_t dt_phys) -- cgit v0.10.2 From ab4f8bba19323eb78b7473df42b225eb14090fcc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 1 Mar 2014 13:45:03 +0100 Subject: s390/compat: automatic zero, sign and pointer conversion of syscalls Instead of explicitly changing compat system call parameters from e.g. unsigned long to compat_ulong_t let the COMPAT_SYSCALL_WRAP macros automatically detect (unsigned) long parameters and zero and sign extend them automatically. The resulting binary is completely identical. In addition add a sys_[system call name] prototype for each system call wrapper. This will cause compile errors if the prototype does not match the prototype in include/linux/syscall.h. Therefore we should now always get the correct zero and sign extension of system call parameters. Pointers are handled like before. Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/compat_wrap.c b/arch/s390/kernel/compat_wrap.c index 17600b1..d6a2cac 100644 --- a/arch/s390/kernel/compat_wrap.c +++ b/arch/s390/kernel/compat_wrap.c @@ -15,11 +15,27 @@ #define COMPAT_SYSCALL_WRAP6(name, ...) \ COMPAT_SYSCALL_WRAPx(6, _##name, __VA_ARGS__) +#define __SC_COMPAT_CAST(t, a) \ +({ \ + long __ReS = a; \ + \ + BUILD_BUG_ON((sizeof(t) > 4) && !__TYPE_IS_L(t) && \ + !__TYPE_IS_UL(t) && !__TYPE_IS_PTR(t)); \ + if (__TYPE_IS_L(t)) \ + __ReS = (s32)a; \ + if (__TYPE_IS_UL(t)) \ + __ReS = (u32)a; \ + if (__TYPE_IS_PTR(t)) \ + __ReS = a & 0x7fffffff; \ + (t)__ReS; \ +}) + #define COMPAT_SYSCALL_WRAPx(x, name, ...) \ + asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ asmlinkage long compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ asmlinkage long compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ { \ - return sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__)); \ + return sys##name(__MAP(x,__SC_COMPAT_CAST,__VA_ARGS__)); \ } COMPAT_SYSCALL_WRAP1(exit, int, error_code); @@ -40,22 +56,22 @@ COMPAT_SYSCALL_WRAP2(mkdir, const char __user *, pathname, umode_t, mode); COMPAT_SYSCALL_WRAP1(rmdir, const char __user *, pathname); COMPAT_SYSCALL_WRAP1(dup, unsigned int, fildes); COMPAT_SYSCALL_WRAP1(pipe, int __user *, fildes); -COMPAT_SYSCALL_WRAP1(brk, compat_ulong_t, brk); +COMPAT_SYSCALL_WRAP1(brk, unsigned long, brk); COMPAT_SYSCALL_WRAP2(signal, int, sig, __sighandler_t, handler); COMPAT_SYSCALL_WRAP1(acct, const char __user *, name); COMPAT_SYSCALL_WRAP2(umount, char __user *, name, int, flags); -COMPAT_SYSCALL_WRAP2(setpgid, compat_pid_t, pid, compat_pid_t, pgid); +COMPAT_SYSCALL_WRAP2(setpgid, pid_t, pid, pid_t, pgid); COMPAT_SYSCALL_WRAP1(umask, int, mask); COMPAT_SYSCALL_WRAP1(chroot, const char __user *, filename); COMPAT_SYSCALL_WRAP2(dup2, unsigned int, oldfd, unsigned int, newfd); -COMPAT_SYSCALL_WRAP3(sigsuspend, int, unused1, int, unused2, compat_old_sigset_t, mask); +COMPAT_SYSCALL_WRAP3(sigsuspend, int, unused1, int, unused2, old_sigset_t, mask); COMPAT_SYSCALL_WRAP2(sethostname, char __user *, name, int, len); COMPAT_SYSCALL_WRAP2(symlink, const char __user *, old, const char __user *, new); COMPAT_SYSCALL_WRAP3(readlink, const char __user *, path, char __user *, buf, int, bufsiz); COMPAT_SYSCALL_WRAP1(uselib, const char __user *, library); COMPAT_SYSCALL_WRAP2(swapon, const char __user *, specialfile, int, swap_flags); COMPAT_SYSCALL_WRAP4(reboot, int, magic1, int, magic2, unsigned int, cmd, void __user *, arg); -COMPAT_SYSCALL_WRAP2(munmap, compat_ulong_t, addr, compat_size_t, len); +COMPAT_SYSCALL_WRAP2(munmap, unsigned long, addr, size_t, len); COMPAT_SYSCALL_WRAP2(fchmod, unsigned int, fd, umode_t, mode); COMPAT_SYSCALL_WRAP2(getpriority, int, which, int, who); COMPAT_SYSCALL_WRAP3(setpriority, int, which, int, who, int, niceval); @@ -64,63 +80,63 @@ COMPAT_SYSCALL_WRAP1(swapoff, const char __user *, specialfile); COMPAT_SYSCALL_WRAP1(fsync, unsigned int, fd); COMPAT_SYSCALL_WRAP2(setdomainname, char __user *, name, int, len); COMPAT_SYSCALL_WRAP1(newuname, struct new_utsname __user *, name); -COMPAT_SYSCALL_WRAP3(mprotect, compat_ulong_t, start, compat_size_t, len, compat_ulong_t, prot); -COMPAT_SYSCALL_WRAP3(init_module, void __user *, umod, compat_ulong_t, len, const char __user *, uargs); +COMPAT_SYSCALL_WRAP3(mprotect, unsigned long, start, size_t, len, unsigned long, prot); +COMPAT_SYSCALL_WRAP3(init_module, void __user *, umod, unsigned long, len, const char __user *, uargs); COMPAT_SYSCALL_WRAP2(delete_module, const char __user *, name_user, unsigned int, flags); COMPAT_SYSCALL_WRAP4(quotactl, unsigned int, cmd, const char __user *, special, qid_t, id, void __user *, addr); -COMPAT_SYSCALL_WRAP1(getpgid, compat_pid_t, pid); +COMPAT_SYSCALL_WRAP1(getpgid, pid_t, pid); COMPAT_SYSCALL_WRAP1(fchdir, unsigned int, fd); -COMPAT_SYSCALL_WRAP2(bdflush, int, func, compat_long_t, data); -COMPAT_SYSCALL_WRAP3(sysfs, int, option, compat_ulong_t, arg1, compat_ulong_t, arg2); +COMPAT_SYSCALL_WRAP2(bdflush, int, func, long, data); +COMPAT_SYSCALL_WRAP3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2); COMPAT_SYSCALL_WRAP1(s390_personality, unsigned int, personality); -COMPAT_SYSCALL_WRAP5(llseek, unsigned int, fd, u32, high, u32, low, loff_t __user *, result, unsigned int, whence); +COMPAT_SYSCALL_WRAP5(llseek, unsigned int, fd, unsigned long, high, unsigned long, low, loff_t __user *, result, unsigned int, whence); COMPAT_SYSCALL_WRAP2(flock, unsigned int, fd, unsigned int, cmd); -COMPAT_SYSCALL_WRAP3(msync, compat_ulong_t, start, compat_size_t, len, int, flags); -COMPAT_SYSCALL_WRAP1(getsid, compat_pid_t, pid); +COMPAT_SYSCALL_WRAP3(msync, unsigned long, start, size_t, len, int, flags); +COMPAT_SYSCALL_WRAP1(getsid, pid_t, pid); COMPAT_SYSCALL_WRAP1(fdatasync, unsigned int, fd); -COMPAT_SYSCALL_WRAP2(mlock, compat_ulong_t, start, compat_size_t, len); -COMPAT_SYSCALL_WRAP2(munlock, compat_ulong_t, start, compat_size_t, len); +COMPAT_SYSCALL_WRAP2(mlock, unsigned long, start, size_t, len); +COMPAT_SYSCALL_WRAP2(munlock, unsigned long, start, size_t, len); COMPAT_SYSCALL_WRAP1(mlockall, int, flags); -COMPAT_SYSCALL_WRAP2(sched_setparam, compat_pid_t, pid, struct sched_param __user *, param); -COMPAT_SYSCALL_WRAP2(sched_getparam, compat_pid_t, pid, struct sched_param __user *, param); -COMPAT_SYSCALL_WRAP3(sched_setscheduler, compat_pid_t, pid, int, policy, struct sched_param __user *, param); -COMPAT_SYSCALL_WRAP1(sched_getscheduler, compat_pid_t, pid); +COMPAT_SYSCALL_WRAP2(sched_setparam, pid_t, pid, struct sched_param __user *, param); +COMPAT_SYSCALL_WRAP2(sched_getparam, pid_t, pid, struct sched_param __user *, param); +COMPAT_SYSCALL_WRAP3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param); +COMPAT_SYSCALL_WRAP1(sched_getscheduler, pid_t, pid); COMPAT_SYSCALL_WRAP1(sched_get_priority_max, int, policy); COMPAT_SYSCALL_WRAP1(sched_get_priority_min, int, policy); -COMPAT_SYSCALL_WRAP5(mremap, u32, addr, u32, old_len, u32, new_len, u32, flags, u32, new_addr); +COMPAT_SYSCALL_WRAP5(mremap, unsigned long, addr, unsigned long, old_len, unsigned long, new_len, unsigned long, flags, unsigned long, new_addr); COMPAT_SYSCALL_WRAP3(poll, struct pollfd __user *, ufds, unsigned int, nfds, int, timeout); -COMPAT_SYSCALL_WRAP5(prctl, int, option, u32, arg2, u32, arg3, u32, arg4, u32, arg5); -COMPAT_SYSCALL_WRAP2(getcwd, char __user *, buf, u32, size); +COMPAT_SYSCALL_WRAP5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5); +COMPAT_SYSCALL_WRAP2(getcwd, char __user *, buf, unsigned long, size); COMPAT_SYSCALL_WRAP2(capget, cap_user_header_t, header, cap_user_data_t, dataptr); COMPAT_SYSCALL_WRAP2(capset, cap_user_header_t, header, const cap_user_data_t, data); -COMPAT_SYSCALL_WRAP3(lchown, const char __user *, filename, compat_uid_t, user, compat_gid_t, group); -COMPAT_SYSCALL_WRAP2(setreuid, compat_uid_t, ruid, compat_uid_t, euid); -COMPAT_SYSCALL_WRAP2(setregid, compat_gid_t, rgid, compat_gid_t, egid); -COMPAT_SYSCALL_WRAP2(getgroups, int, gidsetsize, compat_gid_t __user *, grouplist); -COMPAT_SYSCALL_WRAP2(setgroups, int, gidsetsize, compat_gid_t __user *, grouplist); -COMPAT_SYSCALL_WRAP3(fchown, unsigned int, fd, compat_uid_t, user, compat_gid_t, group); -COMPAT_SYSCALL_WRAP3(setresuid, compat_uid_t, ruid, compat_uid_t, euid, compat_uid_t, suid); -COMPAT_SYSCALL_WRAP3(getresuid, compat_uid_t __user *, ruid, compat_uid_t __user *, euid, compat_uid_t __user *, suid); -COMPAT_SYSCALL_WRAP3(setresgid, compat_gid_t, rgid, compat_gid_t, egid, compat_gid_t, sgid); -COMPAT_SYSCALL_WRAP3(getresgid, compat_gid_t __user *, rgid, compat_gid_t __user *, egid, compat_gid_t __user *, sgid); -COMPAT_SYSCALL_WRAP3(chown, const char __user *, filename, compat_uid_t, user, compat_gid_t, group); -COMPAT_SYSCALL_WRAP1(setuid, compat_uid_t, uid); -COMPAT_SYSCALL_WRAP1(setgid, compat_gid_t, gid); -COMPAT_SYSCALL_WRAP1(setfsuid, compat_uid_t, uid); -COMPAT_SYSCALL_WRAP1(setfsgid, compat_gid_t, gid); +COMPAT_SYSCALL_WRAP3(lchown, const char __user *, filename, uid_t, user, gid_t, group); +COMPAT_SYSCALL_WRAP2(setreuid, uid_t, ruid, uid_t, euid); +COMPAT_SYSCALL_WRAP2(setregid, gid_t, rgid, gid_t, egid); +COMPAT_SYSCALL_WRAP2(getgroups, int, gidsetsize, gid_t __user *, grouplist); +COMPAT_SYSCALL_WRAP2(setgroups, int, gidsetsize, gid_t __user *, grouplist); +COMPAT_SYSCALL_WRAP3(fchown, unsigned int, fd, uid_t, user, gid_t, group); +COMPAT_SYSCALL_WRAP3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid); +COMPAT_SYSCALL_WRAP3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid); +COMPAT_SYSCALL_WRAP3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid); +COMPAT_SYSCALL_WRAP3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid); +COMPAT_SYSCALL_WRAP3(chown, const char __user *, filename, uid_t, user, gid_t, group); +COMPAT_SYSCALL_WRAP1(setuid, uid_t, uid); +COMPAT_SYSCALL_WRAP1(setgid, gid_t, gid); +COMPAT_SYSCALL_WRAP1(setfsuid, uid_t, uid); +COMPAT_SYSCALL_WRAP1(setfsgid, gid_t, gid); COMPAT_SYSCALL_WRAP2(pivot_root, const char __user *, new_root, const char __user *, put_old); -COMPAT_SYSCALL_WRAP3(mincore, compat_ulong_t, start, compat_size_t, len, unsigned char __user *, vec); -COMPAT_SYSCALL_WRAP3(madvise, compat_ulong_t, start, compat_size_t, len, int, behavior); -COMPAT_SYSCALL_WRAP5(setxattr, const char __user *, path, const char __user *, name, const void __user *, value, compat_size_t, size, int, flags); -COMPAT_SYSCALL_WRAP5(lsetxattr, const char __user *, path, const char __user *, name, const void __user *, value, compat_size_t, size, int, flags); -COMPAT_SYSCALL_WRAP5(fsetxattr, int, fd, const char __user *, name, const void __user *, value, compat_size_t, size, int, flags); +COMPAT_SYSCALL_WRAP3(mincore, unsigned long, start, size_t, len, unsigned char __user *, vec); +COMPAT_SYSCALL_WRAP3(madvise, unsigned long, start, size_t, len, int, behavior); +COMPAT_SYSCALL_WRAP5(setxattr, const char __user *, path, const char __user *, name, const void __user *, value, size_t, size, int, flags); +COMPAT_SYSCALL_WRAP5(lsetxattr, const char __user *, path, const char __user *, name, const void __user *, value, size_t, size, int, flags); +COMPAT_SYSCALL_WRAP5(fsetxattr, int, fd, const char __user *, name, const void __user *, value, size_t, size, int, flags); COMPAT_SYSCALL_WRAP3(getdents64, unsigned int, fd, struct linux_dirent64 __user *, dirent, unsigned int, count); -COMPAT_SYSCALL_WRAP4(getxattr, const char __user *, path, const char __user *, name, void __user *, value, compat_size_t, size); -COMPAT_SYSCALL_WRAP4(lgetxattr, const char __user *, path, const char __user *, name, void __user *, value, compat_size_t, size); -COMPAT_SYSCALL_WRAP4(fgetxattr, int, fd, const char __user *, name, void __user *, value, compat_size_t, size); -COMPAT_SYSCALL_WRAP3(listxattr, const char __user *, path, char __user *, list, compat_size_t, size); -COMPAT_SYSCALL_WRAP3(llistxattr, const char __user *, path, char __user *, list, compat_size_t, size); -COMPAT_SYSCALL_WRAP3(flistxattr, int, fd, char __user *, list, compat_size_t, size); +COMPAT_SYSCALL_WRAP4(getxattr, const char __user *, path, const char __user *, name, void __user *, value, size_t, size); +COMPAT_SYSCALL_WRAP4(lgetxattr, const char __user *, path, const char __user *, name, void __user *, value, size_t, size); +COMPAT_SYSCALL_WRAP4(fgetxattr, int, fd, const char __user *, name, void __user *, value, size_t, size); +COMPAT_SYSCALL_WRAP3(listxattr, const char __user *, path, char __user *, list, size_t, size); +COMPAT_SYSCALL_WRAP3(llistxattr, const char __user *, path, char __user *, list, size_t, size); +COMPAT_SYSCALL_WRAP3(flistxattr, int, fd, char __user *, list, size_t, size); COMPAT_SYSCALL_WRAP2(removexattr, const char __user *, path, const char __user *, name); COMPAT_SYSCALL_WRAP2(lremovexattr, const char __user *, path, const char __user *, name); COMPAT_SYSCALL_WRAP2(fremovexattr, int, fd, const char __user *, name); @@ -131,19 +147,19 @@ COMPAT_SYSCALL_WRAP4(epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event COMPAT_SYSCALL_WRAP4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout); COMPAT_SYSCALL_WRAP1(timer_getoverrun, timer_t, timer_id); COMPAT_SYSCALL_WRAP1(timer_delete, compat_timer_t, compat_timer_id); -COMPAT_SYSCALL_WRAP1(io_destroy, compat_aio_context_t, ctx); -COMPAT_SYSCALL_WRAP3(io_cancel, compat_aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result); +COMPAT_SYSCALL_WRAP1(io_destroy, aio_context_t, ctx); +COMPAT_SYSCALL_WRAP3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result); COMPAT_SYSCALL_WRAP1(mq_unlink, const char __user *, name); -COMPAT_SYSCALL_WRAP5(add_key, const char __user *, tp, const char __user *, dsc, const void __user *, pld, compat_size_t, len, key_serial_t, id); +COMPAT_SYSCALL_WRAP5(add_key, const char __user *, tp, const char __user *, dsc, const void __user *, pld, size_t, len, key_serial_t, id); COMPAT_SYSCALL_WRAP4(request_key, const char __user *, tp, const char __user *, dsc, const char __user *, info, key_serial_t, id); -COMPAT_SYSCALL_WRAP5(remap_file_pages, u32, start, u32, size, u32, prot, u32, pgoff, u32, flags); +COMPAT_SYSCALL_WRAP5(remap_file_pages, unsigned long, start, unsigned long, size, unsigned long, prot, unsigned long, pgoff, unsigned long, flags); COMPAT_SYSCALL_WRAP3(ioprio_set, int, which, int, who, int, ioprio); COMPAT_SYSCALL_WRAP2(ioprio_get, int, which, int, who); COMPAT_SYSCALL_WRAP3(inotify_add_watch, int, fd, const char __user *, path, u32, mask); COMPAT_SYSCALL_WRAP2(inotify_rm_watch, int, fd, __s32, wd); COMPAT_SYSCALL_WRAP3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode); COMPAT_SYSCALL_WRAP4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, unsigned, dev); -COMPAT_SYSCALL_WRAP5(fchownat, int, dfd, const char __user *, filename, compat_uid_t, user, compat_gid_t, group, int, flag); +COMPAT_SYSCALL_WRAP5(fchownat, int, dfd, const char __user *, filename, uid_t, user, gid_t, group, int, flag); COMPAT_SYSCALL_WRAP3(unlinkat, int, dfd, const char __user *, pathname, int, flag); COMPAT_SYSCALL_WRAP4(renameat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname); COMPAT_SYSCALL_WRAP5(linkat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, int, flags); @@ -151,9 +167,9 @@ COMPAT_SYSCALL_WRAP3(symlinkat, const char __user *, oldname, int, newdfd, const COMPAT_SYSCALL_WRAP4(readlinkat, int, dfd, const char __user *, path, char __user *, buf, int, bufsiz); COMPAT_SYSCALL_WRAP3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode); COMPAT_SYSCALL_WRAP3(faccessat, int, dfd, const char __user *, filename, int, mode); -COMPAT_SYSCALL_WRAP1(unshare, compat_ulong_t, unshare_flags); -COMPAT_SYSCALL_WRAP6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, compat_size_t, len, unsigned int, flags); -COMPAT_SYSCALL_WRAP4(tee, int, fdin, int, fdout, compat_size_t, len, unsigned int, flags); +COMPAT_SYSCALL_WRAP1(unshare, unsigned long, unshare_flags); +COMPAT_SYSCALL_WRAP6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); +COMPAT_SYSCALL_WRAP4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags); COMPAT_SYSCALL_WRAP3(getcpu, unsigned __user *, cpu, unsigned __user *, node, struct getcpu_cache __user *, cache); COMPAT_SYSCALL_WRAP1(eventfd, unsigned int, count); COMPAT_SYSCALL_WRAP2(timerfd_create, int, clockid, int, flags); @@ -164,15 +180,15 @@ COMPAT_SYSCALL_WRAP3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) COMPAT_SYSCALL_WRAP1(epoll_create1, int, flags); COMPAT_SYSCALL_WRAP2(tkill, int, pid, int, sig); COMPAT_SYSCALL_WRAP3(tgkill, int, tgid, int, pid, int, sig); -COMPAT_SYSCALL_WRAP5(perf_event_open, struct perf_event_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, u32, flags); -COMPAT_SYSCALL_WRAP5(clone, u32, newsp, u32, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val); +COMPAT_SYSCALL_WRAP5(perf_event_open, struct perf_event_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, unsigned long, flags); +COMPAT_SYSCALL_WRAP5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val); COMPAT_SYSCALL_WRAP2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags); COMPAT_SYSCALL_WRAP4(prlimit64, pid_t, pid, unsigned int, resource, const struct rlimit64 __user *, new_rlim, struct rlimit64 __user *, old_rlim); COMPAT_SYSCALL_WRAP5(name_to_handle_at, int, dfd, const char __user *, name, struct file_handle __user *, handle, int __user *, mnt_id, int, flag); COMPAT_SYSCALL_WRAP1(syncfs, int, fd); COMPAT_SYSCALL_WRAP2(setns, int, fd, int, nstype); COMPAT_SYSCALL_WRAP2(s390_runtime_instr, int, command, int, signum); -COMPAT_SYSCALL_WRAP5(kcmp, pid_t, pid1, pid_t, pid2, int, type, u32, idx1, u32, idx2); +COMPAT_SYSCALL_WRAP5(kcmp, pid_t, pid1, pid_t, pid2, int, type, unsigned long, idx1, unsigned long, idx2); COMPAT_SYSCALL_WRAP3(finit_module, int, fd, const char __user *, uargs, int, flags); COMPAT_SYSCALL_WRAP3(sched_setattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, flags); COMPAT_SYSCALL_WRAP4(sched_getattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, size, unsigned int, flags); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a747a77..1e67b7a 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -98,6 +98,8 @@ struct sigaltstack; #define __MAP(n,...) __MAP##n(__VA_ARGS__) #define __SC_DECL(t, a) t a +#define __TYPE_IS_L(t) (__same_type((t)0, 0L)) +#define __TYPE_IS_UL(t) (__same_type((t)0, 0UL)) #define __TYPE_IS_LL(t) (__same_type((t)0, 0LL) || __same_type((t)0, 0ULL)) #define __SC_LONG(t, a) __typeof(__builtin_choose_expr(__TYPE_IS_LL(t), 0LL, 0L)) a #define __SC_CAST(t, a) (t) a -- cgit v0.10.2 From c2e7c3d0ef0d1a3765792a35ae9e6f91a3025214 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 3 Mar 2014 10:06:12 +0100 Subject: s390/compat: partial parameter conversion within syscall wrappers Parameter conversion within the system call wrappers is only needed for parameters which differ in size and have a size of eight bytes on 64 bit. For system call parameters with a size of less than eight byte the called system call itself will perform parameter conversion anyway. So we can save the double conversion of e.g. int parameters. The only types which need to be converted are therefore pointer and (unsigned) long parameters. Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/compat_wrap.c b/arch/s390/kernel/compat_wrap.c index d6a2cac..d123f5d 100644 --- a/arch/s390/kernel/compat_wrap.c +++ b/arch/s390/kernel/compat_wrap.c @@ -15,6 +15,9 @@ #define COMPAT_SYSCALL_WRAP6(name, ...) \ COMPAT_SYSCALL_WRAPx(6, _##name, __VA_ARGS__) +#define __SC_COMPAT_TYPE(t, a) \ + __typeof(__builtin_choose_expr(sizeof(t) > 4, 0L, (t)0)) a + #define __SC_COMPAT_CAST(t, a) \ ({ \ long __ReS = a; \ @@ -30,10 +33,22 @@ (t)__ReS; \ }) +/* + * The COMPAT_SYSCALL_WRAP macro generates system call wrappers to be used by + * compat tasks. These wrappers will only be used for system calls where only + * the system call arguments need sign or zero extension or zeroing of the upper + * 33 bits of pointers. + * Note: since the wrapper function will afterwards call a system call which + * again performs zero and sign extension for all system call arguments with + * a size of less than eight bytes, these compat wrappers only touch those + * system call arguments with a size of eight bytes ((unsigned) long and + * pointers). Zero and sign extension for e.g. int parameters will be done by + * the regular system call wrappers. + */ #define COMPAT_SYSCALL_WRAPx(x, name, ...) \ asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ - asmlinkage long compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ - asmlinkage long compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + asmlinkage long compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__));\ + asmlinkage long compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__)) \ { \ return sys##name(__MAP(x,__SC_COMPAT_CAST,__VA_ARGS__)); \ } -- cgit v0.10.2 From 3b4467522630b7ea0d65a691007ef0a93d471f8f Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Tue, 4 Mar 2014 16:20:18 +0800 Subject: ALSA: hda - add automute fix for another dell AIO model When plugging a headphone or headset, lots of noise is heard from internal speaker, after changing the automute via amp instead of pinctl, the noise disappears. BugLink: https://bugs.launchpad.net/bugs/1268468 Cc: David Henningsson Cc: stable@vger.kernel.org Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 7ba7a11f..850296a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5164,7 +5164,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0625, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0626, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0628, "Dell", ALC668_FIXUP_AUTO_MUTE), - SND_PCI_QUIRK(0x1028, 0x064e, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x064e, "Dell", ALC668_FIXUP_AUTO_MUTE), SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_BASS_1A_CHMAP), SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_BASS_CHMAP), -- cgit v0.10.2 From eb9cf4e8ec646a553f3b561d7a9e81acf044d876 Mon Sep 17 00:00:00 2001 From: Jason Cooper Date: Tue, 4 Mar 2014 05:32:40 +0000 Subject: Revert irqchip: irq-dove: Add PMU interrupt controller This reverts commit 40b367d95fb3d60fc1edb9ba8f6ef52272e48936. Russell King has raised the idea of creating a proper PMU driver for this SoC that would incorporate the functionality currently in this driver. It would also cover the use case for the graphics subsystem on this SoC. To prevent having to maintain the devicetree ABI for this limited interrupt-handler driver, we revert the driver before it hits a mainline tagged release (eg v3.15). Signed-off-by: Jason Cooper Cc: linux-arm-kernel@lists.infradead.org Cc: Andrew Lunn Cc: Sebastian Hesselbarth Cc: Gregory CLEMENT Cc: Russell King - ARM Linux Link: http://lkml.kernel.org/r/1393911160-7688-1-git-send-email-jason@lakedaemon.net Signed-off-by: Thomas Gleixner diff --git a/Documentation/devicetree/bindings/interrupt-controller/marvell,dove-pmu-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/marvell,dove-pmu-intc.txt deleted file mode 100644 index 1feb582..0000000 --- a/Documentation/devicetree/bindings/interrupt-controller/marvell,dove-pmu-intc.txt +++ /dev/null @@ -1,17 +0,0 @@ -Marvell Dove Power Management Unit interrupt controller - -Required properties: -- compatible: shall be "marvell,dove-pmu-intc" -- reg: base address of PMU interrupt registers starting with CAUSE register -- interrupts: PMU interrupt of the main interrupt controller -- interrupt-controller: identifies the node as an interrupt controller -- #interrupt-cells: number of cells to encode an interrupt source, shall be 1 - -Example: - pmu_intc: pmu-interrupt-ctrl@d0050 { - compatible = "marvell,dove-pmu-intc"; - interrupt-controller; - #interrupt-cells = <1>; - reg = <0xd0050 0x8>; - interrupts = <33>; - }; diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 98589e7..5194afb 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -1,7 +1,6 @@ obj-$(CONFIG_IRQCHIP) += irqchip.o obj-$(CONFIG_ARCH_BCM2835) += irq-bcm2835.o -obj-$(CONFIG_ARCH_DOVE) += irq-dove.o obj-$(CONFIG_ARCH_EXYNOS) += exynos-combiner.o obj-$(CONFIG_ARCH_MMP) += irq-mmp.o obj-$(CONFIG_ARCH_MVEBU) += irq-armada-370-xp.o diff --git a/drivers/irqchip/irq-dove.c b/drivers/irqchip/irq-dove.c deleted file mode 100644 index 788acd8..0000000 --- a/drivers/irqchip/irq-dove.c +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Marvell Dove SoCs PMU IRQ chip driver. - * - * Andrew Lunn - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "irqchip.h" - -#define DOVE_PMU_IRQ_CAUSE 0x00 -#define DOVE_PMU_IRQ_MASK 0x04 - -static void dove_pmu_irq_handler(unsigned int irq, struct irq_desc *desc) -{ - struct irq_domain *d = irq_get_handler_data(irq); - struct irq_chip_generic *gc = irq_get_domain_generic_chip(d, 0); - u32 stat = readl_relaxed(gc->reg_base + DOVE_PMU_IRQ_CAUSE) & - gc->mask_cache; - - while (stat) { - u32 hwirq = ffs(stat) - 1; - - generic_handle_irq(irq_find_mapping(d, gc->irq_base + hwirq)); - stat &= ~(1 << hwirq); - } -} - -static void pmu_irq_ack(struct irq_data *d) -{ - struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); - struct irq_chip_type *ct = irq_data_get_chip_type(d); - u32 mask = ~d->mask; - - /* - * The PMU mask register is not RW0C: it is RW. This means that - * the bits take whatever value is written to them; if you write - * a '1', you will set the interrupt. - * - * Unfortunately this means there is NO race free way to clear - * these interrupts. - * - * So, let's structure the code so that the window is as small as - * possible. - */ - irq_gc_lock(gc); - mask &= irq_reg_readl(gc->reg_base + ct->regs.ack); - irq_reg_writel(mask, gc->reg_base + ct->regs.ack); - irq_gc_unlock(gc); -} - -static int __init dove_pmu_irq_init(struct device_node *np, - struct device_node *parent) -{ - unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; - struct resource r; - struct irq_domain *domain; - struct irq_chip_generic *gc; - int ret, irq, nrirqs = 7; - - domain = irq_domain_add_linear(np, nrirqs, - &irq_generic_chip_ops, NULL); - if (!domain) { - pr_err("%s: unable to add irq domain\n", np->name); - return -ENOMEM; - } - - ret = irq_alloc_domain_generic_chips(domain, nrirqs, 1, np->name, - handle_level_irq, clr, 0, IRQ_GC_INIT_MASK_CACHE); - if (ret) { - pr_err("%s: unable to alloc irq domain gc\n", np->name); - return ret; - } - - ret = of_address_to_resource(np, 0, &r); - if (ret) { - pr_err("%s: unable to get resource\n", np->name); - return ret; - } - - if (!request_mem_region(r.start, resource_size(&r), np->name)) { - pr_err("%s: unable to request mem region\n", np->name); - return -ENOMEM; - } - - /* Map the parent interrupt for the chained handler */ - irq = irq_of_parse_and_map(np, 0); - if (irq <= 0) { - pr_err("%s: unable to parse irq\n", np->name); - return -EINVAL; - } - - gc = irq_get_domain_generic_chip(domain, 0); - gc->reg_base = ioremap(r.start, resource_size(&r)); - if (!gc->reg_base) { - pr_err("%s: unable to map resource\n", np->name); - return -ENOMEM; - } - - gc->chip_types[0].regs.ack = DOVE_PMU_IRQ_CAUSE; - gc->chip_types[0].regs.mask = DOVE_PMU_IRQ_MASK; - gc->chip_types[0].chip.irq_ack = pmu_irq_ack; - gc->chip_types[0].chip.irq_mask = irq_gc_mask_clr_bit; - gc->chip_types[0].chip.irq_unmask = irq_gc_mask_set_bit; - - /* mask and clear all interrupts */ - writel(0, gc->reg_base + DOVE_PMU_IRQ_MASK); - writel(0, gc->reg_base + DOVE_PMU_IRQ_CAUSE); - - irq_set_handler_data(irq, domain); - irq_set_chained_handler(irq, dove_pmu_irq_handler); - - return 0; -} -IRQCHIP_DECLARE(dove_pmu_intc, - "marvell,dove-pmu-intc", dove_pmu_irq_init); -- cgit v0.10.2 From f6e763b93a6cd3411fd8df925344022719bcba62 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 4 Mar 2014 07:51:17 +0000 Subject: arm64: topology: Implement basic CPU topology support Add basic CPU topology support to arm64, based on the existing pre-v8 code and some work done by Mark Hambleton. This patch does not implement any topology discovery support since that should be based on information from firmware, it merely implements the scaffolding for integration of topology support in the architecture. No locking of the topology data is done since it is only modified during CPU bringup with external serialisation from the SMP code. The goal is to separate the architecture hookup for providing topology information from the DT parsing in order to ease review and avoid blocking the architecture code (which will be built on by other work) with the DT code review by providing something simple and basic. Following patches will implement support for interpreting topology information from MPIDR and for parsing the DT topology bindings for ARM, similar patches will be needed for ACPI. Signed-off-by: Mark Brown Acked-by: Mark Rutland [catalin.marinas@arm.com: removed CONFIG_CPU_TOPOLOGY, always on if SMP] Signed-off-by: Catalin Marinas diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c2056ca..140cd1a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -165,6 +165,22 @@ config SMP If you don't know what to do here, say N. +config SCHED_MC + bool "Multi-core scheduler support" + depends on SMP + help + Multi-core scheduler support improves the CPU scheduler's decision + making when dealing with multi-core CPU chips at a cost of slightly + increased overhead in some places. If unsure say N here. + +config SCHED_SMT + bool "SMT scheduler support" + depends on SMP + help + Improves the CPU scheduler's decision making when dealing with + MultiThreading at a cost of slightly increased overhead in some + places. If unsure say N here. + config NR_CPUS int "Maximum number of CPUs (2-32)" range 2 32 diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h new file mode 100644 index 0000000..0172e6d --- /dev/null +++ b/arch/arm64/include/asm/topology.h @@ -0,0 +1,39 @@ +#ifndef __ASM_TOPOLOGY_H +#define __ASM_TOPOLOGY_H + +#ifdef CONFIG_SMP + +#include + +struct cpu_topology { + int thread_id; + int core_id; + int cluster_id; + cpumask_t thread_sibling; + cpumask_t core_sibling; +}; + +extern struct cpu_topology cpu_topology[NR_CPUS]; + +#define topology_physical_package_id(cpu) (cpu_topology[cpu].cluster_id) +#define topology_core_id(cpu) (cpu_topology[cpu].core_id) +#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) +#define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) + +#define mc_capable() (cpu_topology[0].cluster_id != -1) +#define smt_capable() (cpu_topology[0].thread_id != -1) + +void init_cpu_topology(void); +void store_cpu_topology(unsigned int cpuid); +const struct cpumask *cpu_coregroup_mask(int cpu); + +#else + +static inline void init_cpu_topology(void) { } +static inline void store_cpu_topology(unsigned int cpuid) { } + +#endif + +#include + +#endif /* _ASM_ARM_TOPOLOGY_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index e52bcdc..cfee827 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -14,7 +14,7 @@ arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o -arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o +arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o topology.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5070dc3..f0a141d 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -114,6 +114,11 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) return ret; } +static void smp_store_cpu_info(unsigned int cpuid) +{ + store_cpu_topology(cpuid); +} + /* * This is the secondary CPU boot entry. We're using this CPUs * idle thread stack, but a set of temporary page tables. @@ -152,6 +157,8 @@ asmlinkage void secondary_start_kernel(void) */ notify_cpu_starting(cpu); + smp_store_cpu_info(cpu); + /* * OK, now it's safe to let the boot CPU continue. Wait for * the CPU migration code to notice that the CPU is online @@ -391,6 +398,10 @@ void __init smp_prepare_cpus(unsigned int max_cpus) int err; unsigned int cpu, ncores = num_possible_cpus(); + init_cpu_topology(); + + smp_store_cpu_info(smp_processor_id()); + /* * are we trying to boot more cores than exist? */ diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c new file mode 100644 index 0000000..3e06b0b --- /dev/null +++ b/arch/arm64/kernel/topology.c @@ -0,0 +1,95 @@ +/* + * arch/arm64/kernel/topology.c + * + * Copyright (C) 2011,2013,2014 Linaro Limited. + * + * Based on the arm32 version written by Vincent Guittot in turn based on + * arch/sh/kernel/topology.c + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * cpu topology table + */ +struct cpu_topology cpu_topology[NR_CPUS]; +EXPORT_SYMBOL_GPL(cpu_topology); + +const struct cpumask *cpu_coregroup_mask(int cpu) +{ + return &cpu_topology[cpu].core_sibling; +} + +static void update_siblings_masks(unsigned int cpuid) +{ + struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; + int cpu; + + if (cpuid_topo->cluster_id == -1) { + /* + * DT does not contain topology information for this cpu + * reset it to default behaviour + */ + pr_debug("CPU%u: No topology information configured\n", cpuid); + cpuid_topo->core_id = 0; + cpumask_set_cpu(cpuid, &cpuid_topo->core_sibling); + cpumask_set_cpu(cpuid, &cpuid_topo->thread_sibling); + return; + } + + /* update core and thread sibling masks */ + for_each_possible_cpu(cpu) { + cpu_topo = &cpu_topology[cpu]; + + if (cpuid_topo->cluster_id != cpu_topo->cluster_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); + + if (cpuid_topo->core_id != cpu_topo->core_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); + } +} + +void store_cpu_topology(unsigned int cpuid) +{ + update_siblings_masks(cpuid); +} + +/* + * init_cpu_topology is called at boot when only one cpu is running + * which prevent simultaneous write access to cpu_topology array + */ +void __init init_cpu_topology(void) +{ + unsigned int cpu; + + /* init core mask and power*/ + for_each_possible_cpu(cpu) { + struct cpu_topology *cpu_topo = &cpu_topology[cpu]; + + cpu_topo->thread_id = -1; + cpu_topo->core_id = -1; + cpu_topo->cluster_id = -1; + cpumask_clear(&cpu_topo->core_sibling); + cpumask_clear(&cpu_topo->thread_sibling); + } +} -- cgit v0.10.2 From c24a4a369419c360c323865b91198878275c1481 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 28 Feb 2014 14:15:21 +0530 Subject: timer: Check failure of timer_cpu_notify() before calling init_timer_stats() timer_cpu_notify() should return NOTIFY_OK and nothing else. Anything else would trigger a BUG_ON(). Return value of this routine is already checked correctly but is done after issuing a call to init_timer_stats(). The right order would be to check the error case first and then call init_timer_stats(). Lets do it. Signed-off-by: Viresh Kumar Cc: linaro-kernel@lists.linaro.org Cc: fweisbec@gmail.com Cc: tj@kernel.org Cc: peterz@infradead.org Link: http://lkml.kernel.org/r/c439f5b6bbc2047e1662f4d523350531425bcf9d.1393576981.git.viresh.kumar@linaro.org Signed-off-by: Thomas Gleixner diff --git a/kernel/timer.c b/kernel/timer.c index a71bdfd..31824ef 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1681,9 +1681,9 @@ void __init init_timers(void) err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE, (void *)(long)smp_processor_id()); - init_timer_stats(); - BUG_ON(err != NOTIFY_OK); + + init_timer_stats(); register_cpu_notifier(&timers_nb); open_softirq(TIMER_SOFTIRQ, run_timer_softirq); } -- cgit v0.10.2 From 38edbb0b913d73713c23dcc742669f7e78b52aa7 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 28 Feb 2014 14:15:22 +0530 Subject: timer: Make sure TIMER_FLAG_MASK bits are free in allocated base Currently we are using two lowest bit of base for internal purpose and so they both should be zero in the allocated address. The code was doing the right thing before this patch came in: commit c5f66e99b (timer: Implement TIMER_IRQSAFE) Tejun probably forgot to update this piece of code which checks if the lowest 'n' bits are zero or not and so wasn't updated according to the new flag. Lets use TIMER_FLAG_MASK in the calculations here. [ tglx: Massaged changelog ] Signed-off-by: Viresh Kumar Cc: linaro-kernel@lists.linaro.org Cc: fweisbec@gmail.com Cc: tj@kernel.org Cc: peterz@infradead.org Link: http://lkml.kernel.org/r/9144e10d7e854a0aa8a673332adec356d81a923c.1393576981.git.viresh.kumar@linaro.org Signed-off-by: Thomas Gleixner diff --git a/kernel/timer.c b/kernel/timer.c index 31824ef..949d74e 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1555,9 +1555,8 @@ static int init_timers_cpu(int cpu) if (!base) return -ENOMEM; - /* Make sure that tvec_base is 2 byte aligned */ - if (tbase_get_deferrable(base)) { - WARN_ON(1); + /* Make sure tvec_base has TIMER_FLAG_MASK bits free */ + if (WARN_ON(base != tbase_get_base(base))) { kfree(base); return -ENOMEM; } -- cgit v0.10.2 From aa15aa0e869cb218f68faff8f229fa6b6b5cdf08 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 3 Mar 2014 15:38:17 -0800 Subject: MAINTAINERS: EDAC: add Mauro and Borislav as interim patch collectors We're more or less collecting EDAC patches already anyway so let's hold it down so that get_maintainer sees it too. Signed-off-by: Borislav Petkov Acked-by: Mauro Carvalho Chehab Cc: Doug Thompson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index c6d0e93..7abe81e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3095,6 +3095,8 @@ F: fs/ecryptfs/ EDAC-CORE M: Doug Thompson +M: Borislav Petkov +M: Mauro Carvalho Chehab L: linux-edac@vger.kernel.org W: bluesmoke.sourceforge.net S: Supported -- cgit v0.10.2 From 668f9abbd4334e6c29fa8acd71635c4f9101caa7 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 3 Mar 2014 15:38:18 -0800 Subject: mm: close PageTail race Commit bf6bddf1924e ("mm: introduce compaction and migration for ballooned pages") introduces page_count(page) into memory compaction which dereferences page->first_page if PageTail(page). This results in a very rare NULL pointer dereference on the aforementioned page_count(page). Indeed, anything that does compound_head(), including page_count() is susceptible to racing with prep_compound_page() and seeing a NULL or dangling page->first_page pointer. This patch uses Andrea's implementation of compound_trans_head() that deals with such a race and makes it the default compound_head() implementation. This includes a read memory barrier that ensures that if PageTail(head) is true that we return a head page that is neither NULL nor dangling. The patch then adds a store memory barrier to prep_compound_page() to ensure page->first_page is set. This is the safest way to ensure we see the head page that we are expecting, PageTail(page) is already in the unlikely() path and the memory barriers are unfortunately required. Hugetlbfs is the exception, we don't enforce a store memory barrier during init since no race is possible. Signed-off-by: David Rientjes Cc: Holger Kiehl Cc: Christoph Lameter Cc: Rafael Aquini Cc: Vlastimil Babka Cc: Michal Hocko Cc: Mel Gorman Cc: Andrea Arcangeli Cc: Rik van Riel Cc: "Kirill A. Shutemov" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 8184451..422b7d8 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -874,7 +874,7 @@ bio_pageinc(struct bio *bio) /* Non-zero page count for non-head members of * compound pages is no longer allowed by the kernel. */ - page = compound_trans_head(bv.bv_page); + page = compound_head(bv.bv_page); atomic_inc(&page->_count); } } @@ -887,7 +887,7 @@ bio_pagedec(struct bio *bio) struct bvec_iter iter; bio_for_each_segment(bv, bio, iter) { - page = compound_trans_head(bv.bv_page); + page = compound_head(bv.bv_page); atomic_dec(&page->_count); } } diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 4fb7a8f..54af4e9 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -186,12 +186,12 @@ static bool is_invalid_reserved_pfn(unsigned long pfn) if (pfn_valid(pfn)) { bool reserved; struct page *tail = pfn_to_page(pfn); - struct page *head = compound_trans_head(tail); + struct page *head = compound_head(tail); reserved = !!(PageReserved(head)); if (head != tail) { /* * "head" is not a dangling pointer - * (compound_trans_head takes care of that) + * (compound_head takes care of that) * but the hugepage may have been split * from under us (and we may not hold a * reference count on the head page so it can diff --git a/fs/proc/page.c b/fs/proc/page.c index 02174a6..e647c55 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -121,9 +121,8 @@ u64 stable_page_flags(struct page *page) * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon * to make sure a given page is a thp, not a non-huge compound page. */ - else if (PageTransCompound(page) && - (PageLRU(compound_trans_head(page)) || - PageAnon(compound_trans_head(page)))) + else if (PageTransCompound(page) && (PageLRU(compound_head(page)) || + PageAnon(compound_head(page)))) u |= 1 << KPF_THP; /* diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index db51201..b826239 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -157,46 +157,6 @@ static inline int hpage_nr_pages(struct page *page) return HPAGE_PMD_NR; return 1; } -/* - * compound_trans_head() should be used instead of compound_head(), - * whenever the "page" passed as parameter could be the tail of a - * transparent hugepage that could be undergoing a - * __split_huge_page_refcount(). The page structure layout often - * changes across releases and it makes extensive use of unions. So if - * the page structure layout will change in a way that - * page->first_page gets clobbered by __split_huge_page_refcount, the - * implementation making use of smp_rmb() will be required. - * - * Currently we define compound_trans_head as compound_head, because - * page->private is in the same union with page->first_page, and - * page->private isn't clobbered. However this also means we're - * currently leaving dirt into the page->private field of anonymous - * pages resulting from a THP split, instead of setting page->private - * to zero like for every other page that has PG_private not set. But - * anonymous pages don't use page->private so this is not a problem. - */ -#if 0 -/* This will be needed if page->private will be clobbered in split_huge_page */ -static inline struct page *compound_trans_head(struct page *page) -{ - if (PageTail(page)) { - struct page *head; - head = page->first_page; - smp_rmb(); - /* - * head may be a dangling pointer. - * __split_huge_page_refcount clears PageTail before - * overwriting first_page, so if PageTail is still - * there it means the head pointer isn't dangling. - */ - if (PageTail(page)) - return head; - } - return page; -} -#else -#define compound_trans_head(page) compound_head(page) -#endif extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pmd_t pmd, pmd_t *pmdp); @@ -226,7 +186,6 @@ static inline int split_huge_page(struct page *page) do { } while (0) #define split_huge_page_pmd_mm(__mm, __address, __pmd) \ do { } while (0) -#define compound_trans_head(page) compound_head(page) static inline int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice) { diff --git a/include/linux/mm.h b/include/linux/mm.h index f28f46e..03ab3e5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -399,8 +399,18 @@ static inline void compound_unlock_irqrestore(struct page *page, static inline struct page *compound_head(struct page *page) { - if (unlikely(PageTail(page))) - return page->first_page; + if (unlikely(PageTail(page))) { + struct page *head = page->first_page; + + /* + * page->first_page may be a dangling pointer to an old + * compound page, so recheck that it is still a tail + * page before returning. + */ + smp_rmb(); + if (likely(PageTail(page))) + return head; + } return page; } diff --git a/mm/ksm.c b/mm/ksm.c index aa4c7c7..68710e8 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -444,7 +444,7 @@ static void break_cow(struct rmap_item *rmap_item) static struct page *page_trans_compound_anon(struct page *page) { if (PageTransCompound(page)) { - struct page *head = compound_trans_head(page); + struct page *head = compound_head(page); /* * head may actually be splitted and freed from under * us but it's ok here. diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 2f2f34a..90002ea 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1651,7 +1651,7 @@ int soft_offline_page(struct page *page, int flags) { int ret; unsigned long pfn = page_to_pfn(page); - struct page *hpage = compound_trans_head(page); + struct page *hpage = compound_head(page); if (PageHWPoison(page)) { pr_info("soft offline: %#lx page already poisoned\n", pfn); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e3758a0..3d1bf88 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -369,9 +369,11 @@ void prep_compound_page(struct page *page, unsigned long order) __SetPageHead(page); for (i = 1; i < nr_pages; i++) { struct page *p = page + i; - __SetPageTail(p); set_page_count(p, 0); p->first_page = page; + /* Make sure p->first_page is always valid for PageTail() */ + smp_wmb(); + __SetPageTail(p); } } diff --git a/mm/swap.c b/mm/swap.c index b31ba67..0092097 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -98,7 +98,7 @@ static void put_compound_page(struct page *page) } /* __split_huge_page_refcount can run under us */ - page_head = compound_trans_head(page); + page_head = compound_head(page); /* * THP can not break up slab pages so avoid taking @@ -253,7 +253,7 @@ bool __get_page_tail(struct page *page) */ unsigned long flags; bool got; - struct page *page_head = compound_trans_head(page); + struct page *page_head = compound_head(page); /* Ref to put_compound_page() comment. */ if (!__compound_tail_refcounted(page_head)) { -- cgit v0.10.2 From 3b7a6418c7494b8bf0bf0537ddee1dedbca10f51 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 3 Mar 2014 15:38:21 -0800 Subject: dma debug: account for cachelines and read-only mappings in overlap tracking While debug_dma_assert_idle() checks if a given *page* is actively undergoing dma the valid granularity of a dma mapping is a *cacheline*. Sander's testing shows that the warning message "DMA-API: exceeded 7 overlapping mappings of pfn..." is falsely triggering. The test is simply mapping multiple cachelines in a given page. Ultimately we want overlap tracking to be valid as it is a real api violation, so we need to track active mappings by cachelines. Update the active dma tracking to use the page-frame-relative cacheline of the mapping as the key, and update debug_dma_assert_idle() to check for all possible mapped cachelines for a given page. However, the need to track active mappings is only relevant when the dma-mapping is writable by the device. In fact it is fairly standard for read-only mappings to have hundreds or thousands of overlapping mappings at once. Limiting the overlap tracking to writable (!DMA_TO_DEVICE) eliminates this class of false-positive overlap reports. Note, the radix gang lookup is sub-optimal. It would be best if it stopped fetching entries once the search passed a page boundary. Nevertheless, this implementation does not perturb the original net_dma failing case. That is to say the extra overhead does not show up in terms of making the failing case pass due to a timing change. References: http://marc.info/?l=linux-netdev&m=139232263419315&w=2 http://marc.info/?l=linux-netdev&m=139217088107122&w=2 Signed-off-by: Dan Williams Reported-by: Sander Eikelenboom Reported-by: Dave Jones Tested-by: Dave Jones Tested-by: Sander Eikelenboom Cc: Konrad Rzeszutek Wilk Cc: Francois Romieu Cc: Eric Dumazet Cc: Wei Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 2defd13..98f2d7e 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -424,111 +424,134 @@ void debug_dma_dump_mappings(struct device *dev) EXPORT_SYMBOL(debug_dma_dump_mappings); /* - * For each page mapped (initial page in the case of - * dma_alloc_coherent/dma_map_{single|page}, or each page in a - * scatterlist) insert into this tree using the pfn as the key. At + * For each mapping (initial cacheline in the case of + * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a + * scatterlist, or the cacheline specified in dma_map_single) insert + * into this tree using the cacheline as the key. At * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry. If - * the pfn already exists at insertion time add a tag as a reference + * the entry already exists at insertion time add a tag as a reference * count for the overlapping mappings. For now, the overlap tracking - * just ensures that 'unmaps' balance 'maps' before marking the pfn - * idle, but we should also be flagging overlaps as an API violation. + * just ensures that 'unmaps' balance 'maps' before marking the + * cacheline idle, but we should also be flagging overlaps as an API + * violation. * * Memory usage is mostly constrained by the maximum number of available * dma-debug entries in that we need a free dma_debug_entry before - * inserting into the tree. In the case of dma_map_{single|page} and - * dma_alloc_coherent there is only one dma_debug_entry and one pfn to - * track per event. dma_map_sg(), on the other hand, - * consumes a single dma_debug_entry, but inserts 'nents' entries into - * the tree. + * inserting into the tree. In the case of dma_map_page and + * dma_alloc_coherent there is only one dma_debug_entry and one + * dma_active_cacheline entry to track per event. dma_map_sg(), on the + * other hand, consumes a single dma_debug_entry, but inserts 'nents' + * entries into the tree. * * At any time debug_dma_assert_idle() can be called to trigger a - * warning if the given page is in the active set. + * warning if any cachelines in the given page are in the active set. */ -static RADIX_TREE(dma_active_pfn, GFP_NOWAIT); +static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT); static DEFINE_SPINLOCK(radix_lock); -#define ACTIVE_PFN_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) +#define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) +#define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) +#define CACHELINES_PER_PAGE (1 << CACHELINE_PER_PAGE_SHIFT) -static int active_pfn_read_overlap(unsigned long pfn) +static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry) +{ + return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) + + (entry->offset >> L1_CACHE_SHIFT); +} + +static int active_cacheline_read_overlap(phys_addr_t cln) { int overlap = 0, i; for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) - if (radix_tree_tag_get(&dma_active_pfn, pfn, i)) + if (radix_tree_tag_get(&dma_active_cacheline, cln, i)) overlap |= 1 << i; return overlap; } -static int active_pfn_set_overlap(unsigned long pfn, int overlap) +static int active_cacheline_set_overlap(phys_addr_t cln, int overlap) { int i; - if (overlap > ACTIVE_PFN_MAX_OVERLAP || overlap < 0) + if (overlap > ACTIVE_CACHELINE_MAX_OVERLAP || overlap < 0) return overlap; for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) if (overlap & 1 << i) - radix_tree_tag_set(&dma_active_pfn, pfn, i); + radix_tree_tag_set(&dma_active_cacheline, cln, i); else - radix_tree_tag_clear(&dma_active_pfn, pfn, i); + radix_tree_tag_clear(&dma_active_cacheline, cln, i); return overlap; } -static void active_pfn_inc_overlap(unsigned long pfn) +static void active_cacheline_inc_overlap(phys_addr_t cln) { - int overlap = active_pfn_read_overlap(pfn); + int overlap = active_cacheline_read_overlap(cln); - overlap = active_pfn_set_overlap(pfn, ++overlap); + overlap = active_cacheline_set_overlap(cln, ++overlap); /* If we overflowed the overlap counter then we're potentially * leaking dma-mappings. Otherwise, if maps and unmaps are * balanced then this overflow may cause false negatives in - * debug_dma_assert_idle() as the pfn may be marked idle + * debug_dma_assert_idle() as the cacheline may be marked idle * prematurely. */ - WARN_ONCE(overlap > ACTIVE_PFN_MAX_OVERLAP, - "DMA-API: exceeded %d overlapping mappings of pfn %lx\n", - ACTIVE_PFN_MAX_OVERLAP, pfn); + WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP, + "DMA-API: exceeded %d overlapping mappings of cacheline %pa\n", + ACTIVE_CACHELINE_MAX_OVERLAP, &cln); } -static int active_pfn_dec_overlap(unsigned long pfn) +static int active_cacheline_dec_overlap(phys_addr_t cln) { - int overlap = active_pfn_read_overlap(pfn); + int overlap = active_cacheline_read_overlap(cln); - return active_pfn_set_overlap(pfn, --overlap); + return active_cacheline_set_overlap(cln, --overlap); } -static int active_pfn_insert(struct dma_debug_entry *entry) +static int active_cacheline_insert(struct dma_debug_entry *entry) { + phys_addr_t cln = to_cacheline_number(entry); unsigned long flags; int rc; + /* If the device is not writing memory then we don't have any + * concerns about the cpu consuming stale data. This mitigates + * legitimate usages of overlapping mappings. + */ + if (entry->direction == DMA_TO_DEVICE) + return 0; + spin_lock_irqsave(&radix_lock, flags); - rc = radix_tree_insert(&dma_active_pfn, entry->pfn, entry); + rc = radix_tree_insert(&dma_active_cacheline, cln, entry); if (rc == -EEXIST) - active_pfn_inc_overlap(entry->pfn); + active_cacheline_inc_overlap(cln); spin_unlock_irqrestore(&radix_lock, flags); return rc; } -static void active_pfn_remove(struct dma_debug_entry *entry) +static void active_cacheline_remove(struct dma_debug_entry *entry) { + phys_addr_t cln = to_cacheline_number(entry); unsigned long flags; + /* ...mirror the insert case */ + if (entry->direction == DMA_TO_DEVICE) + return; + spin_lock_irqsave(&radix_lock, flags); /* since we are counting overlaps the final put of the - * entry->pfn will occur when the overlap count is 0. - * active_pfn_dec_overlap() returns -1 in that case + * cacheline will occur when the overlap count is 0. + * active_cacheline_dec_overlap() returns -1 in that case */ - if (active_pfn_dec_overlap(entry->pfn) < 0) - radix_tree_delete(&dma_active_pfn, entry->pfn); + if (active_cacheline_dec_overlap(cln) < 0) + radix_tree_delete(&dma_active_cacheline, cln); spin_unlock_irqrestore(&radix_lock, flags); } /** * debug_dma_assert_idle() - assert that a page is not undergoing dma - * @page: page to lookup in the dma_active_pfn tree + * @page: page to lookup in the dma_active_cacheline tree * * Place a call to this routine in cases where the cpu touching the page * before the dma completes (page is dma_unmapped) will lead to data @@ -536,22 +559,38 @@ static void active_pfn_remove(struct dma_debug_entry *entry) */ void debug_dma_assert_idle(struct page *page) { + static struct dma_debug_entry *ents[CACHELINES_PER_PAGE]; + struct dma_debug_entry *entry = NULL; + void **results = (void **) &ents; + unsigned int nents, i; unsigned long flags; - struct dma_debug_entry *entry; + phys_addr_t cln; if (!page) return; + cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT; spin_lock_irqsave(&radix_lock, flags); - entry = radix_tree_lookup(&dma_active_pfn, page_to_pfn(page)); + nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln, + CACHELINES_PER_PAGE); + for (i = 0; i < nents; i++) { + phys_addr_t ent_cln = to_cacheline_number(ents[i]); + + if (ent_cln == cln) { + entry = ents[i]; + break; + } else if (ent_cln >= cln + CACHELINES_PER_PAGE) + break; + } spin_unlock_irqrestore(&radix_lock, flags); if (!entry) return; + cln = to_cacheline_number(entry); err_printk(entry->dev, entry, - "DMA-API: cpu touching an active dma mapped page " - "[pfn=0x%lx]\n", entry->pfn); + "DMA-API: cpu touching an active dma mapped cacheline [cln=%pa]\n", + &cln); } /* @@ -568,9 +607,9 @@ static void add_dma_entry(struct dma_debug_entry *entry) hash_bucket_add(bucket, entry); put_hash_bucket(bucket, &flags); - rc = active_pfn_insert(entry); + rc = active_cacheline_insert(entry); if (rc == -ENOMEM) { - pr_err("DMA-API: pfn tracking ENOMEM, dma-debug disabled\n"); + pr_err("DMA-API: cacheline tracking ENOMEM, dma-debug disabled\n"); global_disable = true; } @@ -631,7 +670,7 @@ static void dma_entry_free(struct dma_debug_entry *entry) { unsigned long flags; - active_pfn_remove(entry); + active_cacheline_remove(entry); /* * add to beginning of the list - this way the entries are -- cgit v0.10.2 From 5f30fc94ca985974fd54de454c7a6070388443db Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 3 Mar 2014 15:38:23 -0800 Subject: lib/radix-tree.c: swapoff tmpfs radix_tree: remember to rcu_read_unlock Running fsx on tmpfs with concurrent memhog-swapoff-swapon, lots of BUG: sleeping function called from invalid context at kernel/fork.c:606 in_atomic(): 0, irqs_disabled(): 0, pid: 1394, name: swapoff 1 lock held by swapoff/1394: #0: (rcu_read_lock){.+.+.+}, at: [] radix_tree_locate_item+0x1f/0x2b6 followed by ================================================ [ BUG: lock held when returning to user space! ] 3.14.0-rc1 #3 Not tainted ------------------------------------------------ swapoff/1394 is leaving the kernel with locks still held! 1 lock held by swapoff/1394: #0: (rcu_read_lock){.+.+.+}, at: [] radix_tree_locate_item+0x1f/0x2b6 after which the system recovered nicely. Whoops, I long ago forgot the rcu_read_unlock() on one unlikely branch. Fixes e504f3fdd63d ("tmpfs radix_tree: locate_item to speed up swapoff") Signed-off-by: Hugh Dickins Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 7811ed3..bd4a8df 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1253,8 +1253,10 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) node = indirect_to_ptr(node); max_index = radix_tree_maxindex(node->height); - if (cur_index > max_index) + if (cur_index > max_index) { + rcu_read_unlock(); break; + } cur_index = __locate(node, item, cur_index, &found_index); rcu_read_unlock(); -- cgit v0.10.2 From ce48225fe3b1b0d1fc9fceb96ac3d8a879e45114 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 3 Mar 2014 15:38:24 -0800 Subject: memcg: fix endless loop in __mem_cgroup_iter_next() Commit 0eef615665ed ("memcg: fix css reference leak and endless loop in mem_cgroup_iter") got the interaction with the commit a few before it d8ad30559715 ("mm/memcg: iteration skip memcgs not yet fully initialized") slightly wrong, and we didn't notice at the time. It's elusive, and harder to get than the original, but for a couple of days before rc1, I several times saw a endless loop similar to that supposedly being fixed. This time it was a tighter loop in __mem_cgroup_iter_next(): because we can get here when our root has already been offlined, and the ordering of conditions was such that we then just cycled around forever. Fixes: 0eef615665ed ("memcg: fix css reference leak and endless loop in mem_cgroup_iter"). Signed-off-by: Hugh Dickins Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Greg Thelen Cc: [3.12+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ce7a8cc..9d17310 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1127,8 +1127,8 @@ skip_node: * skipping css reference should be safe. */ if (next_css) { - if ((next_css->flags & CSS_ONLINE) && - (next_css == &root->css || css_tryget(next_css))) + if ((next_css == &root->css) || + ((next_css->flags & CSS_ONLINE) && css_tryget(next_css))) return mem_cgroup_from_css(next_css); prev_css = next_css; -- cgit v0.10.2 From 4fb1a86fb5e4209a7d4426d4e586c58e9edc74ac Mon Sep 17 00:00:00 2001 From: Filipe Brandenburger Date: Mon, 3 Mar 2014 15:38:25 -0800 Subject: memcg: reparent charges of children before processing parent Sometimes the cleanup after memcg hierarchy testing gets stuck in mem_cgroup_reparent_charges(), unable to bring non-kmem usage down to 0. There may turn out to be several causes, but a major cause is this: the workitem to offline parent can get run before workitem to offline child; parent's mem_cgroup_reparent_charges() circles around waiting for the child's pages to be reparented to its lrus, but it's holding cgroup_mutex which prevents the child from reaching its mem_cgroup_reparent_charges(). Further testing showed that an ordered workqueue for cgroup_destroy_wq is not always good enough: percpu_ref_kill_and_confirm's call_rcu_sched stage on the way can mess up the order before reaching the workqueue. Instead, when offlining a memcg, call mem_cgroup_reparent_charges() on all its children (and grandchildren, in the correct order) to have their charges reparented first. Fixes: e5fca243abae ("cgroup: use a dedicated workqueue for cgroup destruction") Signed-off-by: Filipe Brandenburger Signed-off-by: Hugh Dickins Reviewed-by: Tejun Heo Acked-by: Michal Hocko Cc: Johannes Weiner Cc: [v3.10+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9d17310..5b6b003 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6595,6 +6595,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup_event *event, *tmp; + struct cgroup_subsys_state *iter; /* * Unregister events and notify userspace. @@ -6611,7 +6612,14 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) kmem_cgroup_css_offline(memcg); mem_cgroup_invalidate_reclaim_iterators(memcg); - mem_cgroup_reparent_charges(memcg); + + /* + * This requires that offlining is serialized. Right now that is + * guaranteed because css_killed_work_fn() holds the cgroup_mutex. + */ + css_for_each_descendant_post(iter, css) + mem_cgroup_reparent_charges(mem_cgroup_from_css(iter)); + mem_cgroup_destroy_all_caches(memcg); vmpressure_cleanup(&memcg->vmpressure); } -- cgit v0.10.2 From 9050d7eba40b3d79551668f54e68fd6f51945ef3 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 3 Mar 2014 15:38:27 -0800 Subject: mm: include VM_MIXEDMAP flag in the VM_SPECIAL list to avoid m(un)locking Daniel Borkmann reported a VM_BUG_ON assertion failing: ------------[ cut here ]------------ kernel BUG at mm/mlock.c:528! invalid opcode: 0000 [#1] SMP Modules linked in: ccm arc4 iwldvm [...] video CPU: 3 PID: 2266 Comm: netsniff-ng Not tainted 3.14.0-rc2+ #8 Hardware name: LENOVO 2429BP3/2429BP3, BIOS G4ET37WW (1.12 ) 05/29/2012 task: ffff8801f87f9820 ti: ffff88002cb44000 task.ti: ffff88002cb44000 RIP: 0010:[] [] munlock_vma_pages_range+0x2e0/0x2f0 Call Trace: do_munmap+0x18f/0x3b0 vm_munmap+0x41/0x60 SyS_munmap+0x22/0x30 system_call_fastpath+0x1a/0x1f RIP munlock_vma_pages_range+0x2e0/0x2f0 ---[ end trace a0088dcf07ae10f2 ]--- because munlock_vma_pages_range() thinks it's unexpectedly in the middle of a THP page. This can be reproduced with default config since 3.11 kernels. A reproducer can be found in the kernel's selftest directory for networking by running ./psock_tpacket. The problem is that an order=2 compound page (allocated by alloc_one_pg_vec_page() is part of the munlocked VM_MIXEDMAP vma (mapped by packet_mmap()) and mistaken for a THP page and assumed to be order=9. The checks for THP in munlock came with commit ff6a6da60b89 ("mm: accelerate munlock() treatment of THP pages"), i.e. since 3.9, but did not trigger a bug. It just makes munlock_vma_pages_range() skip such compound pages until the next 512-pages-aligned page, when it encounters a head page. This is however not a problem for vma's where mlocking has no effect anyway, but it can distort the accounting. Since commit 7225522bb429 ("mm: munlock: batch non-THP page isolation and munlock+putback using pagevec") this can trigger a VM_BUG_ON in PageTransHuge() check. This patch fixes the issue by adding VM_MIXEDMAP flag to VM_SPECIAL, a list of flags that make vma's non-mlockable and non-mergeable. The reasoning is that VM_MIXEDMAP vma's are similar to VM_PFNMAP, which is already on the VM_SPECIAL list, and both are intended for non-LRU pages where mlocking makes no sense anyway. Related Lkml discussion can be found in [2]. [1] tools/testing/selftests/net/psock_tpacket [2] https://lkml.org/lkml/2014/1/10/427 Signed-off-by: Vlastimil Babka Signed-off-by: Daniel Borkmann Reported-by: Daniel Borkmann Tested-by: Daniel Borkmann Cc: Thomas Hellstrom Cc: John David Anglin Cc: HATAYAMA Daisuke Cc: Konstantin Khlebnikov Cc: Carsten Otte Cc: Jared Hulbert Tested-by: Hannes Frederic Sowa Cc: Kirill A. Shutemov Acked-by: Rik van Riel Cc: Andrea Arcangeli Cc: [3.11.x+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mm.h b/include/linux/mm.h index 03ab3e5..a1fe251 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -175,7 +175,7 @@ extern unsigned int kobjsize(const void *objp); * Special vmas that are non-mergable, non-mlock()able. * Note: mm/huge_memory.c VM_NO_THP depends on this definition. */ -#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP) +#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) /* * mapping from the currently active vm_flags protection bits (the diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 4df39b1..1546655 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1961,7 +1961,7 @@ out: return ret; } -#define VM_NO_THP (VM_SPECIAL|VM_MIXEDMAP|VM_HUGETLB|VM_SHARED|VM_MAYSHARE) +#define VM_NO_THP (VM_SPECIAL | VM_HUGETLB | VM_SHARED | VM_MAYSHARE) int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice) -- cgit v0.10.2 From 5ec384d45b42fc9274df835be435e964f885b6ed Mon Sep 17 00:00:00 2001 From: "Daniel M. Weeks" Date: Mon, 3 Mar 2014 15:38:28 -0800 Subject: scripts/gen_initramfs_list.sh: fix flags for initramfs LZ4 compression LZ4 as implemented in the kernel differs from the default method now used by the reference implementation of LZ4. Until the in-kernel method is updated to support the new default, passing the legacy flag (-l) to the compressor is necessary. Without this flag the kernel-generated, LZ4-compressed initramfs is junk. Kyungsik said: : It seems that lz4 supports legacy format with the same option as lz4c : does. Just looking at the first few bytes of lz4 compressed image, we can : see whether it is new format or not. : : It shows new format magic number without this patch. New format magic : number is 0x184d2204. : : $ hexdump -C ./initramfs_data.cpio.lz4 |more : 00000000 04 22 4d 18 64 70 b9 69 (Little Endian) : ... : : Currently kernel supports legacy format only. Signed-off-by: Daniel M. Weeks Cc: Michal Marek Acked-by: Kyungsik Lee Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/gen_initramfs_list.sh b/scripts/gen_initramfs_list.sh index ef47409..17fa901 100644 --- a/scripts/gen_initramfs_list.sh +++ b/scripts/gen_initramfs_list.sh @@ -257,7 +257,7 @@ case "$arg" in && compr="lzop -9 -f" echo "$output_file" | grep -q "\.lz4$" \ && [ -x "`which lz4 2> /dev/null`" ] \ - && compr="lz4 -9 -f" + && compr="lz4 -l -9 -f" echo "$output_file" | grep -q "\.cpio$" && compr="cat" shift ;; -- cgit v0.10.2 From 0f55159d091cb1e555ee52abc1b2455f301b99a8 Mon Sep 17 00:00:00 2001 From: Andy Honig Date: Mon, 3 Mar 2014 15:38:30 -0800 Subject: kallsyms: fix absolute addresses for kASLR Currently symbols that are absolute addresses are incorrectly displayed in /proc/kallsyms if the kernel is loaded with kASLR. The problem was that the scripts/kallsyms.c file which generates the array of symbol names and addresses uses an relocatable value for all symbols, even absolute symbols. This patch fixes that. Several kallsyms output in different boot states for comparison: $ egrep '_(stext|_per_cpu_(start|end))' /root/kallsyms.nokaslr 0000000000000000 D __per_cpu_start 0000000000014280 D __per_cpu_end ffffffff810001c8 T _stext $ egrep '_(stext|_per_cpu_(start|end))' /root/kallsyms.kaslr1 000000001f200000 D __per_cpu_start 000000001f214280 D __per_cpu_end ffffffffa02001c8 T _stext $ egrep '_(stext|_per_cpu_(start|end))' /root/kallsyms.kaslr2 000000000d400000 D __per_cpu_start 000000000d414280 D __per_cpu_end ffffffff8e4001c8 T _stext $ egrep '_(stext|_per_cpu_(start|end))' /root/kallsyms.kaslr-fixed 0000000000000000 D __per_cpu_start 0000000000014280 D __per_cpu_end ffffffffadc001c8 T _stext Signed-off-by: Andy Honig Signed-off-by: Kees Cook Cc: Michal Marek Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 10085de..276e84b 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -330,8 +330,7 @@ static void write_src(void) printf("\tPTR\t_text + %#llx\n", table[i].addr - _text); else - printf("\tPTR\t_text - %#llx\n", - _text - table[i].addr); + printf("\tPTR\t%#llx\n", table[i].addr); } else { printf("\tPTR\t%#llx\n", table[i].addr); } -- cgit v0.10.2 From 40d2d968e99aa267ac1e5d70deb97faa41a7dfc8 Mon Sep 17 00:00:00 2001 From: Vikas Sajjan Date: Mon, 3 Mar 2014 15:38:31 -0800 Subject: drivers/rtc/rtc-s3c.c: fix incorrect way of save/restore of S3C2410_TICNT for TYPE_S3C64XX On exynos5250, exynos5420 and exynos5260 it was observed that, after 1 cycle of S2R, the rtc-tick occurs at a very fast rate as compared to the rtc-tick occuring before S2R. This patch fixes the above issue by correcting the wrong way of save/restore of S3C2410_TICNT for TYPE_S3C64XX. Signed-off-by: Vikas Sajjan Cc: Grant Likely Cc: Rob Herring Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index 7afd373..c4cde9c 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -580,10 +580,12 @@ static int s3c_rtc_suspend(struct device *dev) clk_enable(rtc_clk); /* save TICNT for anyone using periodic interrupts */ - ticnt_save = readb(s3c_rtc_base + S3C2410_TICNT); if (s3c_rtc_cpu_type == TYPE_S3C64XX) { ticnt_en_save = readw(s3c_rtc_base + S3C2410_RTCCON); ticnt_en_save &= S3C64XX_RTCCON_TICEN; + ticnt_save = readl(s3c_rtc_base + S3C2410_TICNT); + } else { + ticnt_save = readb(s3c_rtc_base + S3C2410_TICNT); } s3c_rtc_enable(pdev, 0); @@ -605,10 +607,15 @@ static int s3c_rtc_resume(struct device *dev) clk_enable(rtc_clk); s3c_rtc_enable(pdev, 1); - writeb(ticnt_save, s3c_rtc_base + S3C2410_TICNT); - if (s3c_rtc_cpu_type == TYPE_S3C64XX && ticnt_en_save) { - tmp = readw(s3c_rtc_base + S3C2410_RTCCON); - writew(tmp | ticnt_en_save, s3c_rtc_base + S3C2410_RTCCON); + if (s3c_rtc_cpu_type == TYPE_S3C64XX) { + writel(ticnt_save, s3c_rtc_base + S3C2410_TICNT); + if (ticnt_en_save) { + tmp = readw(s3c_rtc_base + S3C2410_RTCCON); + writew(tmp | ticnt_en_save, + s3c_rtc_base + S3C2410_RTCCON); + } + } else { + writeb(ticnt_save, s3c_rtc_base + S3C2410_TICNT); } if (device_may_wakeup(dev) && wake_en) { -- cgit v0.10.2 From 15c34a760630ca2c803848fba90ca0646a9907dd Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 3 Mar 2014 15:38:32 -0800 Subject: ocfs2: fix quota file corruption Global quota files are accessed from different nodes. Thus we cannot cache offset of quota structure in the quota file after we drop our node reference count to it because after that moment quota structure may be freed and reallocated elsewhere by a different node resulting in corruption of quota file. Fix the problem by clearing dq_off when we are releasing dquot structure. We also remove the DB_READ_B handling because it is useless - DQ_ACTIVE_B is set iff DQ_READ_B is set. Signed-off-by: Jan Kara Cc: Goldwyn Rodrigues Cc: Joel Becker Reviewed-by: Mark Fasheh Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index aaa5061..d7b5108 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -717,6 +717,12 @@ static int ocfs2_release_dquot(struct dquot *dquot) */ if (status < 0) mlog_errno(status); + /* + * Clear dq_off so that we search for the structure in quota file next + * time we acquire it. The structure might be deleted and reallocated + * elsewhere by another node while our dquot structure is on freelist. + */ + dquot->dq_off = 0; clear_bit(DQ_ACTIVE_B, &dquot->dq_flags); out_trans: ocfs2_commit_trans(osb, handle); @@ -756,16 +762,17 @@ static int ocfs2_acquire_dquot(struct dquot *dquot) status = ocfs2_lock_global_qf(info, 1); if (status < 0) goto out; - if (!test_bit(DQ_READ_B, &dquot->dq_flags)) { - status = ocfs2_qinfo_lock(info, 0); - if (status < 0) - goto out_dq; - status = qtree_read_dquot(&info->dqi_gi, dquot); - ocfs2_qinfo_unlock(info, 0); - if (status < 0) - goto out_dq; - } - set_bit(DQ_READ_B, &dquot->dq_flags); + status = ocfs2_qinfo_lock(info, 0); + if (status < 0) + goto out_dq; + /* + * We always want to read dquot structure from disk because we don't + * know what happened with it while it was on freelist. + */ + status = qtree_read_dquot(&info->dqi_gi, dquot); + ocfs2_qinfo_unlock(info, 0); + if (status < 0) + goto out_dq; OCFS2_DQUOT(dquot)->dq_use_count++; OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 2e4344b..2001862 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -1303,10 +1303,6 @@ int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot) ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh); out: - /* Clear the read bit so that next time someone uses this - * dquot he reads fresh info from disk and allocates local - * dquot structure */ - clear_bit(DQ_READ_B, &dquot->dq_flags); return status; } -- cgit v0.10.2 From a5f6ea29f9a918403629f8369ae55fac6b09cb53 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 3 Mar 2014 15:38:33 -0800 Subject: sh: prefix sh-specific "CCR" and "CCR2" by "SH_" Commit bcf24e1daa94 ("mmc: omap_hsmmc: use the generic config for omap2plus devices"), enabled the build for other platforms for compile testing. sh-allmodconfig now fails with: include/linux/omap-dma.h:171:8: error: expected identifier before numeric constant make[4]: *** [drivers/mmc/host/omap_hsmmc.o] Error 1 This happens because SuperH #defines "CCR", which is one of the enum values in include/linux/omap-dma.h. There's a similar issue with "CCR2" on sh2a. As "CCR" and "CCR2" are too generic names for global #defines, prefix them with "SH_" to fix this. Signed-off-by: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/sh/include/cpu-sh2/cpu/cache.h b/arch/sh/include/cpu-sh2/cpu/cache.h index 673515b..aa1b2b9 100644 --- a/arch/sh/include/cpu-sh2/cpu/cache.h +++ b/arch/sh/include/cpu-sh2/cpu/cache.h @@ -18,7 +18,7 @@ #define SH_CACHE_ASSOC 8 #if defined(CONFIG_CPU_SUBTYPE_SH7619) -#define CCR 0xffffffec +#define SH_CCR 0xffffffec #define CCR_CACHE_CE 0x01 /* Cache enable */ #define CCR_CACHE_WT 0x02 /* CCR[bit1=1,bit2=1] */ diff --git a/arch/sh/include/cpu-sh2a/cpu/cache.h b/arch/sh/include/cpu-sh2a/cpu/cache.h index defb0ba..b27ce92 100644 --- a/arch/sh/include/cpu-sh2a/cpu/cache.h +++ b/arch/sh/include/cpu-sh2a/cpu/cache.h @@ -17,8 +17,8 @@ #define SH_CACHE_COMBINED 4 #define SH_CACHE_ASSOC 8 -#define CCR 0xfffc1000 /* CCR1 */ -#define CCR2 0xfffc1004 +#define SH_CCR 0xfffc1000 /* CCR1 */ +#define SH_CCR2 0xfffc1004 /* * Most of the SH-2A CCR1 definitions resemble the SH-4 ones. All others not diff --git a/arch/sh/include/cpu-sh3/cpu/cache.h b/arch/sh/include/cpu-sh3/cpu/cache.h index bee2d81..29700fd 100644 --- a/arch/sh/include/cpu-sh3/cpu/cache.h +++ b/arch/sh/include/cpu-sh3/cpu/cache.h @@ -17,7 +17,7 @@ #define SH_CACHE_COMBINED 4 #define SH_CACHE_ASSOC 8 -#define CCR 0xffffffec /* Address of Cache Control Register */ +#define SH_CCR 0xffffffec /* Address of Cache Control Register */ #define CCR_CACHE_CE 0x01 /* Cache Enable */ #define CCR_CACHE_WT 0x02 /* Write-Through (for P0,U0,P3) (else writeback) */ diff --git a/arch/sh/include/cpu-sh4/cpu/cache.h b/arch/sh/include/cpu-sh4/cpu/cache.h index 7bfb9e8..92c4cd1 100644 --- a/arch/sh/include/cpu-sh4/cpu/cache.h +++ b/arch/sh/include/cpu-sh4/cpu/cache.h @@ -17,7 +17,7 @@ #define SH_CACHE_COMBINED 4 #define SH_CACHE_ASSOC 8 -#define CCR 0xff00001c /* Address of Cache Control Register */ +#define SH_CCR 0xff00001c /* Address of Cache Control Register */ #define CCR_CACHE_OCE 0x0001 /* Operand Cache Enable */ #define CCR_CACHE_WT 0x0002 /* Write-Through (for P0,U0,P3) (else writeback)*/ #define CCR_CACHE_CB 0x0004 /* Copy-Back (for P1) (else writethrough) */ diff --git a/arch/sh/kernel/cpu/init.c b/arch/sh/kernel/cpu/init.c index ecf83cd..0d7360d 100644 --- a/arch/sh/kernel/cpu/init.c +++ b/arch/sh/kernel/cpu/init.c @@ -112,7 +112,7 @@ static void cache_init(void) unsigned long ccr, flags; jump_to_uncached(); - ccr = __raw_readl(CCR); + ccr = __raw_readl(SH_CCR); /* * At this point we don't know whether the cache is enabled or not - a @@ -189,7 +189,7 @@ static void cache_init(void) l2_cache_init(); - __raw_writel(flags, CCR); + __raw_writel(flags, SH_CCR); back_to_cached(); } #else diff --git a/arch/sh/mm/cache-debugfs.c b/arch/sh/mm/cache-debugfs.c index 1157251..777e50f 100644 --- a/arch/sh/mm/cache-debugfs.c +++ b/arch/sh/mm/cache-debugfs.c @@ -36,7 +36,7 @@ static int cache_seq_show(struct seq_file *file, void *iter) */ jump_to_uncached(); - ccr = __raw_readl(CCR); + ccr = __raw_readl(SH_CCR); if ((ccr & CCR_CACHE_ENABLE) == 0) { back_to_cached(); diff --git a/arch/sh/mm/cache-sh2.c b/arch/sh/mm/cache-sh2.c index defcf71..a74259f 100644 --- a/arch/sh/mm/cache-sh2.c +++ b/arch/sh/mm/cache-sh2.c @@ -63,9 +63,9 @@ static void sh2__flush_invalidate_region(void *start, int size) local_irq_save(flags); jump_to_uncached(); - ccr = __raw_readl(CCR); + ccr = __raw_readl(SH_CCR); ccr |= CCR_CACHE_INVALIDATE; - __raw_writel(ccr, CCR); + __raw_writel(ccr, SH_CCR); back_to_cached(); local_irq_restore(flags); diff --git a/arch/sh/mm/cache-sh2a.c b/arch/sh/mm/cache-sh2a.c index 949e2d3..ee87d08 100644 --- a/arch/sh/mm/cache-sh2a.c +++ b/arch/sh/mm/cache-sh2a.c @@ -134,7 +134,8 @@ static void sh2a__flush_invalidate_region(void *start, int size) /* If there are too many pages then just blow the cache */ if (((end - begin) >> PAGE_SHIFT) >= MAX_OCACHE_PAGES) { - __raw_writel(__raw_readl(CCR) | CCR_OCACHE_INVALIDATE, CCR); + __raw_writel(__raw_readl(SH_CCR) | CCR_OCACHE_INVALIDATE, + SH_CCR); } else { for (v = begin; v < end; v += L1_CACHE_BYTES) sh2a_invalidate_line(CACHE_OC_ADDRESS_ARRAY, v); @@ -167,7 +168,8 @@ static void sh2a_flush_icache_range(void *args) /* I-Cache invalidate */ /* If there are too many pages then just blow the cache */ if (((end - start) >> PAGE_SHIFT) >= MAX_ICACHE_PAGES) { - __raw_writel(__raw_readl(CCR) | CCR_ICACHE_INVALIDATE, CCR); + __raw_writel(__raw_readl(SH_CCR) | CCR_ICACHE_INVALIDATE, + SH_CCR); } else { for (v = start; v < end; v += L1_CACHE_BYTES) sh2a_invalidate_line(CACHE_IC_ADDRESS_ARRAY, v); diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c index 0e52928..51d8f7f 100644 --- a/arch/sh/mm/cache-sh4.c +++ b/arch/sh/mm/cache-sh4.c @@ -133,9 +133,9 @@ static void flush_icache_all(void) jump_to_uncached(); /* Flush I-cache */ - ccr = __raw_readl(CCR); + ccr = __raw_readl(SH_CCR); ccr |= CCR_CACHE_ICI; - __raw_writel(ccr, CCR); + __raw_writel(ccr, SH_CCR); /* * back_to_cached() will take care of the barrier for us, don't add diff --git a/arch/sh/mm/cache-shx3.c b/arch/sh/mm/cache-shx3.c index c0adbee..24c58b7 100644 --- a/arch/sh/mm/cache-shx3.c +++ b/arch/sh/mm/cache-shx3.c @@ -19,7 +19,7 @@ void __init shx3_cache_init(void) { unsigned int ccr; - ccr = __raw_readl(CCR); + ccr = __raw_readl(SH_CCR); /* * If we've got cache aliases, resolve them in hardware. @@ -40,5 +40,5 @@ void __init shx3_cache_init(void) ccr |= CCR_CACHE_IBE; #endif - writel_uncached(ccr, CCR); + writel_uncached(ccr, SH_CCR); } diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c index 616966a..097c2cd 100644 --- a/arch/sh/mm/cache.c +++ b/arch/sh/mm/cache.c @@ -285,8 +285,8 @@ void __init cpu_cache_init(void) { unsigned int cache_disabled = 0; -#ifdef CCR - cache_disabled = !(__raw_readl(CCR) & CCR_CACHE_ENABLE); +#ifdef SH_CCR + cache_disabled = !(__raw_readl(SH_CCR) & CCR_CACHE_ENABLE); #endif compute_alias(&boot_cpu_data.icache); -- cgit v0.10.2 From db5d711e2db776f18219b033e5dc4fb7e4264dd7 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 3 Mar 2014 15:38:34 -0800 Subject: zram: avoid null access when fail to alloc meta zram_meta_alloc could fail so caller should check it. Otherwise, your system will hang. Signed-off-by: Minchan Kim Acked-by: Jerome Marchand Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 011e55d..51c557c 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -612,6 +612,8 @@ static ssize_t disksize_store(struct device *dev, disksize = PAGE_ALIGN(disksize); meta = zram_meta_alloc(disksize); + if (!meta) + return -ENOMEM; down_write(&zram->init_lock); if (zram->init_done) { up_write(&zram->init_lock); -- cgit v0.10.2 From bd2c00353286d63542a8a0896a8c747f7c880edd Mon Sep 17 00:00:00 2001 From: Vyacheslav Dubeyko Date: Mon, 3 Mar 2014 15:38:35 -0800 Subject: hfsplus: fix remount issue Current implementation of HFS+ driver has small issue with remount option. Namely, for example, you are unable to remount from RO mode into RW mode by means of command "mount -o remount,rw /dev/loop0 /mnt/hfsplus". Trying to execute sequence of commands results in an error message: mount /dev/loop0 /mnt/hfsplus mount -o remount,ro /dev/loop0 /mnt/hfsplus mount -o remount,rw /dev/loop0 /mnt/hfsplus mount: you must specify the filesystem type mount -t hfsplus -o remount,rw /dev/loop0 /mnt/hfsplus mount: /mnt/hfsplus not mounted or bad option The reason of such issue is failure of mount syscall: mount("/dev/loop0", "/mnt/hfsplus", 0x2282a60, MS_MGC_VAL|MS_REMOUNT, NULL) = -1 EINVAL (Invalid argument) Namely, hfsplus_parse_options_remount() method receives empty "input" argument and return false in such case. As a result, hfsplus_remount() returns -EINVAL error code. This patch fixes the issue by means of return true for the case of empty "input" argument in hfsplus_parse_options_remount() method. Signed-off-by: Vyacheslav Dubeyko Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index 968eab5..68537e8 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c @@ -75,7 +75,7 @@ int hfsplus_parse_options_remount(char *input, int *force) int token; if (!input) - return 0; + return 1; while ((p = strsep(&input, ",")) != NULL) { if (!*p) -- cgit v0.10.2 From 04379dffdd4da820d51a1566ad2e86f3b1ad97ed Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Mon, 3 Mar 2014 15:38:36 -0800 Subject: rapidio/tsi721: fix tasklet termination in dma channel release This patch is a modification of the patch originally proposed by Xiaotian Feng : https://lkml.org/lkml/2012/11/5/413 This new version disables DMA channel interrupts and ensures that the tasklet wil not be scheduled again before calling tasklet_kill(). Unfortunately the updated patch was not released at that time due to planned rework of Tsi721 mport driver to use threaded interrupts (which has yet to happen). Recently the issue was reported again: https://lkml.org/lkml/2014/2/19/762. Description from the original Xiaotian's patch: "Some drivers use tasklet_disable in device remove/release process, tasklet_disable will inc tasklet->count and return. If the tasklet is not handled yet under some softirq pressure, the tasklet will be placed on the tasklet_vec, never have a chance to be excuted. This might lead to a heavy loaded ksoftirqd, wakeup with pending_softirq, but tasklet is disabled. tasklet_kill should be used in this case." This patch is applicable to kernel versions starting from v3.5. Signed-off-by: Alexandre Bounine Cc: Matt Porter Cc: Xiaotian Feng Reviewed-by: Thomas Gleixner Cc: Mike Galbraith Cc: [3.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rapidio/devices/tsi721.h b/drivers/rapidio/devices/tsi721.h index b4b0d83..7061ac0 100644 --- a/drivers/rapidio/devices/tsi721.h +++ b/drivers/rapidio/devices/tsi721.h @@ -678,6 +678,7 @@ struct tsi721_bdma_chan { struct list_head free_list; dma_cookie_t completed_cookie; struct tasklet_struct tasklet; + bool active; }; #endif /* CONFIG_RAPIDIO_DMA_ENGINE */ diff --git a/drivers/rapidio/devices/tsi721_dma.c b/drivers/rapidio/devices/tsi721_dma.c index 502663f..91245f5 100644 --- a/drivers/rapidio/devices/tsi721_dma.c +++ b/drivers/rapidio/devices/tsi721_dma.c @@ -206,8 +206,8 @@ void tsi721_bdma_handler(struct tsi721_bdma_chan *bdma_chan) { /* Disable BDMA channel interrupts */ iowrite32(0, bdma_chan->regs + TSI721_DMAC_INTE); - - tasklet_schedule(&bdma_chan->tasklet); + if (bdma_chan->active) + tasklet_schedule(&bdma_chan->tasklet); } #ifdef CONFIG_PCI_MSI @@ -562,7 +562,7 @@ static int tsi721_alloc_chan_resources(struct dma_chan *dchan) } #endif /* CONFIG_PCI_MSI */ - tasklet_enable(&bdma_chan->tasklet); + bdma_chan->active = true; tsi721_bdma_interrupt_enable(bdma_chan, 1); return bdma_chan->bd_num - 1; @@ -576,9 +576,7 @@ err_out: static void tsi721_free_chan_resources(struct dma_chan *dchan) { struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan); -#ifdef CONFIG_PCI_MSI struct tsi721_device *priv = to_tsi721(dchan->device); -#endif LIST_HEAD(list); dev_dbg(dchan->device->dev, "%s: Entry\n", __func__); @@ -589,14 +587,25 @@ static void tsi721_free_chan_resources(struct dma_chan *dchan) BUG_ON(!list_empty(&bdma_chan->active_list)); BUG_ON(!list_empty(&bdma_chan->queue)); - tasklet_disable(&bdma_chan->tasklet); + tsi721_bdma_interrupt_enable(bdma_chan, 0); + bdma_chan->active = false; + +#ifdef CONFIG_PCI_MSI + if (priv->flags & TSI721_USING_MSIX) { + synchronize_irq(priv->msix[TSI721_VECT_DMA0_DONE + + bdma_chan->id].vector); + synchronize_irq(priv->msix[TSI721_VECT_DMA0_INT + + bdma_chan->id].vector); + } else +#endif + synchronize_irq(priv->pdev->irq); + + tasklet_kill(&bdma_chan->tasklet); spin_lock_bh(&bdma_chan->lock); list_splice_init(&bdma_chan->free_list, &list); spin_unlock_bh(&bdma_chan->lock); - tsi721_bdma_interrupt_enable(bdma_chan, 0); - #ifdef CONFIG_PCI_MSI if (priv->flags & TSI721_USING_MSIX) { free_irq(priv->msix[TSI721_VECT_DMA0_DONE + @@ -790,6 +799,7 @@ int tsi721_register_dma(struct tsi721_device *priv) bdma_chan->dchan.cookie = 1; bdma_chan->dchan.chan_id = i; bdma_chan->id = i; + bdma_chan->active = false; spin_lock_init(&bdma_chan->lock); @@ -799,7 +809,6 @@ int tsi721_register_dma(struct tsi721_device *priv) tasklet_init(&bdma_chan->tasklet, tsi721_dma_tasklet, (unsigned long)bdma_chan); - tasklet_disable(&bdma_chan->tasklet); list_add_tail(&bdma_chan->dchan.device_node, &mport->dma.channels); } -- cgit v0.10.2 From b75f005076234311301c6241125cb29d5f124eff Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 3 Mar 2014 15:38:37 -0800 Subject: MAINTAINERS: use tab for separator Convert whitespace to single tab for separators. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index 7abe81e..b392889 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1612,11 +1612,11 @@ S: Maintained F: drivers/net/wireless/atmel* ATTO EXPRESSSAS SAS/SATA RAID SCSI DRIVER -M: Bradley Grove -L: linux-scsi@vger.kernel.org -W: http://www.attotech.com -S: Supported -F: drivers/scsi/esas2r +M: Bradley Grove +L: linux-scsi@vger.kernel.org +W: http://www.attotech.com +S: Supported +F: drivers/scsi/esas2r AUDIT SUBSYSTEM M: Eric Paris @@ -2179,9 +2179,9 @@ S: Supported F: drivers/net/ethernet/cisco/enic/ CISCO VIC LOW LATENCY NIC DRIVER -M: Upinder Malhi -S: Supported -F: drivers/infiniband/hw/usnic +M: Upinder Malhi +S: Supported +F: drivers/infiniband/hw/usnic CIRRUS LOGIC EP93XX ETHERNET DRIVER M: Hartley Sweeten @@ -2378,13 +2378,13 @@ F: drivers/cpufreq/arm_big_little.c F: drivers/cpufreq/arm_big_little_dt.c CPUIDLE DRIVER - ARM BIG LITTLE -M: Lorenzo Pieralisi -M: Daniel Lezcano -L: linux-pm@vger.kernel.org -L: linux-arm-kernel@lists.infradead.org -T: git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git -S: Maintained -F: drivers/cpuidle/cpuidle-big_little.c +M: Lorenzo Pieralisi +M: Daniel Lezcano +L: linux-pm@vger.kernel.org +L: linux-arm-kernel@lists.infradead.org +T: git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git +S: Maintained +F: drivers/cpuidle/cpuidle-big_little.c CPUIDLE DRIVERS M: Rafael J. Wysocki @@ -2458,9 +2458,9 @@ S: Maintained F: sound/pci/cs5535audio/ CW1200 WLAN driver -M: Solomon Peachy -S: Maintained -F: drivers/net/wireless/cw1200/ +M: Solomon Peachy +S: Maintained +F: drivers/net/wireless/cw1200/ CX18 VIDEO4LINUX DRIVER M: Andy Walls @@ -5473,11 +5473,11 @@ S: Maintained F: drivers/media/tuners/m88ts2022* MA901 MASTERKIT USB FM RADIO DRIVER -M: Alexey Klimov -L: linux-media@vger.kernel.org -T: git git://linuxtv.org/media_tree.git -S: Maintained -F: drivers/media/radio/radio-ma901.c +M: Alexey Klimov +L: linux-media@vger.kernel.org +T: git git://linuxtv.org/media_tree.git +S: Maintained +F: drivers/media/radio/radio-ma901.c MAC80211 M: Johannes Berg @@ -5638,7 +5638,7 @@ F: drivers/scsi/megaraid/ MELLANOX ETHERNET DRIVER (mlx4_en) M: Amir Vadai -L: netdev@vger.kernel.org +L: netdev@vger.kernel.org S: Supported W: http://www.mellanox.com Q: http://patchwork.ozlabs.org/project/netdev/list/ @@ -5679,7 +5679,7 @@ F: include/linux/mtd/ F: include/uapi/mtd/ MEN A21 WATCHDOG DRIVER -M: Johannes Thumshirn +M: Johannes Thumshirn L: linux-watchdog@vger.kernel.org S: Supported F: drivers/watchdog/mena21_wdt.c @@ -5741,14 +5741,14 @@ F: drivers/net/ethernet/mellanox/mlx5/core/ F: include/linux/mlx5/ Mellanox MLX5 IB driver -M: Eli Cohen -L: linux-rdma@vger.kernel.org -W: http://www.mellanox.com -Q: http://patchwork.kernel.org/project/linux-rdma/list/ -T: git://openfabrics.org/~eli/connect-ib.git -S: Supported -F: include/linux/mlx5/ -F: drivers/infiniband/hw/mlx5/ +M: Eli Cohen +L: linux-rdma@vger.kernel.org +W: http://www.mellanox.com +Q: http://patchwork.kernel.org/project/linux-rdma/list/ +T: git://openfabrics.org/~eli/connect-ib.git +S: Supported +F: include/linux/mlx5/ +F: drivers/infiniband/hw/mlx5/ MODULE SUPPORT M: Rusty Russell @@ -8702,17 +8702,17 @@ S: Maintained F: drivers/media/radio/radio-raremono.c THERMAL -M: Zhang Rui -M: Eduardo Valentin -L: linux-pm@vger.kernel.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux.git -T: git git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal.git -Q: https://patchwork.kernel.org/project/linux-pm/list/ -S: Supported -F: drivers/thermal/ -F: include/linux/thermal.h -F: include/linux/cpu_cooling.h -F: Documentation/devicetree/bindings/thermal/ +M: Zhang Rui +M: Eduardo Valentin +L: linux-pm@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal.git +Q: https://patchwork.kernel.org/project/linux-pm/list/ +S: Supported +F: drivers/thermal/ +F: include/linux/thermal.h +F: include/linux/cpu_cooling.h +F: Documentation/devicetree/bindings/thermal/ THINGM BLINK(1) USB RGB LED DRIVER M: Vivien Didelot -- cgit v0.10.2 From cea8321cd7f730f96a419cc6a8e9f763df69a7f2 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 3 Mar 2014 15:38:38 -0800 Subject: MAINTAINERS: add and correct types of some "T:" entries Tree location entries should start with the appropriate type. Add git to some, hg to another. Neaten tree type description. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index b392889..99d5b4f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -73,7 +73,8 @@ Descriptions of section entries: L: Mailing list that is relevant to this area W: Web-page with status/info Q: Patchwork web based patch tracking system site - T: SCM tree type and location. Type is one of: git, hg, quilt, stgit, topgit. + T: SCM tree type and location. + Type is one of: git, hg, quilt, stgit, topgit S: Status, one of the following: Supported: Someone is actually paid to look after this. Maintained: Someone actually looks after it. @@ -2159,7 +2160,7 @@ F: Documentation/zh_CN/ CHIPIDEA USB HIGH SPEED DUAL ROLE CONTROLLER M: Peter Chen -T: git://github.com/hzpeterchen/linux-usb.git +T: git git://github.com/hzpeterchen/linux-usb.git L: linux-usb@vger.kernel.org S: Maintained F: drivers/usb/chipidea/ @@ -2382,7 +2383,7 @@ M: Lorenzo Pieralisi M: Daniel Lezcano L: linux-pm@vger.kernel.org L: linux-arm-kernel@lists.infradead.org -T: git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git S: Maintained F: drivers/cpuidle/cpuidle-big_little.c @@ -2391,7 +2392,7 @@ M: Rafael J. Wysocki M: Daniel Lezcano L: linux-pm@vger.kernel.org S: Maintained -T: git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git F: drivers/cpuidle/* F: include/linux/cpuidle.h @@ -4916,7 +4917,7 @@ F: drivers/staging/ktap/ KCONFIG M: "Yann E. MORIN" L: linux-kbuild@vger.kernel.org -T: git://gitorious.org/linux-kconfig/linux-kconfig +T: git git://gitorious.org/linux-kconfig/linux-kconfig S: Maintained F: Documentation/kbuild/kconfig-language.txt F: scripts/kconfig/ @@ -5735,7 +5736,7 @@ L: linux-rdma@vger.kernel.org W: http://www.mellanox.com Q: http://patchwork.ozlabs.org/project/netdev/list/ Q: http://patchwork.kernel.org/project/linux-rdma/list/ -T: git://openfabrics.org/~eli/connect-ib.git +T: git git://openfabrics.org/~eli/connect-ib.git S: Supported F: drivers/net/ethernet/mellanox/mlx5/core/ F: include/linux/mlx5/ @@ -5745,7 +5746,7 @@ M: Eli Cohen L: linux-rdma@vger.kernel.org W: http://www.mellanox.com Q: http://patchwork.kernel.org/project/linux-rdma/list/ -T: git://openfabrics.org/~eli/connect-ib.git +T: git git://openfabrics.org/~eli/connect-ib.git S: Supported F: include/linux/mlx5/ F: drivers/infiniband/hw/mlx5/ @@ -9814,7 +9815,7 @@ ZR36067 VIDEO FOR LINUX DRIVER L: mjpeg-users@lists.sourceforge.net L: linux-media@vger.kernel.org W: http://mjpeg.sourceforge.net/driver-zoran/ -T: Mercurial http://linuxtv.org/hg/v4l-dvb +T: hg http://linuxtv.org/hg/v4l-dvb S: Odd Fixes F: drivers/media/pci/zoran/ -- cgit v0.10.2 From 1ae71d03194ea7424cbd14e449581f67c463d20d Mon Sep 17 00:00:00 2001 From: Liu Ping Fan Date: Mon, 3 Mar 2014 15:38:39 -0800 Subject: mm: numa: bugfix for LAST_CPUPID_NOT_IN_PAGE_FLAGS When doing some numa tests on powerpc, I triggered an oops bug. I find it is caused by using page->_last_cpupid. It should be initialized as "-1 & LAST_CPUPID_MASK", but not "-1". Otherwise, in task_numa_fault(), we will miss the checking (last_cpupid == (-1 & LAST_CPUPID_MASK)). And finally cause an oops bug in task_numa_group(), since the online cpu is less than possible cpu. This happen with CONFIG_SPARSE_VMEMMAP disabled Call trace: SMP NR_CPUS=64 NUMA PowerNV Modules linked in: CPU: 24 PID: 804 Comm: systemd-udevd Not tainted3.13.0-rc1+ #32 task: c000001e2746aa80 ti: c000001e32c50000 task.ti:c000001e32c50000 REGS: c000001e32c53510 TRAP: 0300 Not tainted(3.13.0-rc1+) MSR: 9000000000009032 CR:28024424 XER: 20000000 CFAR: c000000000009324 DAR: 7265717569726857 DSISR:40000000 SOFTE: 1 NIP .task_numa_fault+0x1470/0x2370 LR .task_numa_fault+0x1468/0x2370 Call Trace: .task_numa_fault+0x1468/0x2370 (unreliable) .do_numa_page+0x480/0x4a0 .handle_mm_fault+0x4ec/0xc90 .do_page_fault+0x3a8/0x890 handle_page_fault+0x10/0x30 Instruction dump: 3c82fefb 3884b138 48d9cff1 60000000 48000574 3c62fefb3863af78 3c82fefb 3884b138 48d9cfd5 60000000 e93f0100 <812902e4> 7d2907b45529063e 7d2a07b4 ---[ end trace 15f2510da5ae07cf ]--- Signed-off-by: Liu Ping Fan Signed-off-by: Aneesh Kumar K.V Acked-by: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/mm.h b/include/linux/mm.h index a1fe251..c1b7414 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -767,7 +767,7 @@ static inline bool __cpupid_match_pid(pid_t task_pid, int cpupid) #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS static inline int page_cpupid_xchg_last(struct page *page, int cpupid) { - return xchg(&page->_last_cpupid, cpupid); + return xchg(&page->_last_cpupid, cpupid & LAST_CPUPID_MASK); } static inline int page_cpupid_last(struct page *page) @@ -776,7 +776,7 @@ static inline int page_cpupid_last(struct page *page) } static inline void page_cpupid_reset_last(struct page *page) { - page->_last_cpupid = -1; + page->_last_cpupid = -1 & LAST_CPUPID_MASK; } #else static inline int page_cpupid_last(struct page *page) -- cgit v0.10.2 From 27329369c9ecf37771b2a65202cbf5578cff3331 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 3 Mar 2014 15:38:41 -0800 Subject: mm: page_alloc: exempt GFP_THISNODE allocations from zone fairness Jan Stancek reports manual page migration encountering allocation failures after some pages when there is still plenty of memory free, and bisected the problem down to commit 81c0a2bb515f ("mm: page_alloc: fair zone allocator policy"). The problem is that GFP_THISNODE obeys the zone fairness allocation batches on one hand, but doesn't reset them and wake kswapd on the other hand. After a few of those allocations, the batches are exhausted and the allocations fail. Fixing this means either having GFP_THISNODE wake up kswapd, or GFP_THISNODE not participating in zone fairness at all. The latter seems safer as an acute bugfix, we can clean up later. Reported-by: Jan Stancek Signed-off-by: Johannes Weiner Acked-by: Rik van Riel Acked-by: Mel Gorman Cc: [3.12+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3d1bf88..3bac76a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1238,6 +1238,15 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) } local_irq_restore(flags); } +static bool gfp_thisnode_allocation(gfp_t gfp_mask) +{ + return (gfp_mask & GFP_THISNODE) == GFP_THISNODE; +} +#else +static bool gfp_thisnode_allocation(gfp_t gfp_mask) +{ + return false; +} #endif /* @@ -1574,7 +1583,13 @@ again: get_pageblock_migratetype(page)); } - __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); + /* + * NOTE: GFP_THISNODE allocations do not partake in the kswapd + * aging protocol, so they can't be fair. + */ + if (!gfp_thisnode_allocation(gfp_flags)) + __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); + __count_zone_vm_events(PGALLOC, zone, 1 << order); zone_statistics(preferred_zone, zone, gfp_flags); local_irq_restore(flags); @@ -1946,8 +1961,12 @@ zonelist_scan: * ultimately fall back to remote zones that do not * partake in the fairness round-robin cycle of this * zonelist. + * + * NOTE: GFP_THISNODE allocations do not partake in + * the kswapd aging protocol, so they can't be fair. */ - if (alloc_flags & ALLOC_WMARK_LOW) { + if ((alloc_flags & ALLOC_WMARK_LOW) && + !gfp_thisnode_allocation(gfp_mask)) { if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0) continue; if (!zone_local(preferred_zone, zone)) @@ -2503,8 +2522,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, * allowed per node queues are empty and that nodes are * over allocated. */ - if (IS_ENABLED(CONFIG_NUMA) && - (gfp_mask & GFP_THISNODE) == GFP_THISNODE) + if (gfp_thisnode_allocation(gfp_mask)) goto nopage; restart: -- cgit v0.10.2 From 3e909599215456928e6b42a04f11c2517881570b Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 15 Jan 2014 13:21:22 +0000 Subject: efi: Move facility flags to struct efi As we grow support for more EFI architectures they're going to want the ability to query which EFI features are available on the running system. Instead of storing this information in an architecture-specific place, stick it in the global 'struct efi', which is already the central location for EFI state. While we're at it, let's change the return value of efi_enabled() to be bool and replace all references to 'facility' with 'feature', which is the usual word used to describe the attributes of the running system. Signed-off-by: Matt Fleming diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 3d6b9f8..a7a8bd8 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -119,7 +119,6 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, #endif /* CONFIG_X86_32 */ extern int add_efi_memmap; -extern unsigned long x86_efi_facility; extern struct efi_scratch efi_scratch; extern void efi_set_executable(efi_memory_desc_t *md, bool executable); extern int efi_memblock_x86_reserve_range(void); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 06853e6..3b5d278 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -926,11 +926,11 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_EFI if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, "EL32", 4)) { - set_bit(EFI_BOOT, &x86_efi_facility); + set_bit(EFI_BOOT, &efi.flags); } else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, "EL64", 4)) { - set_bit(EFI_BOOT, &x86_efi_facility); - set_bit(EFI_64BIT, &x86_efi_facility); + set_bit(EFI_BOOT, &efi.flags); + set_bit(EFI_64BIT, &efi.flags); } if (efi_enabled(EFI_BOOT)) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 1a201ac..8215629 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -67,8 +67,6 @@ struct efi_memory_map memmap; static struct efi efi_phys __initdata; static efi_system_table_t efi_systab __initdata; -unsigned long x86_efi_facility; - static __initdata efi_config_table_type_t arch_tables[] = { #ifdef CONFIG_X86_UV {UV_SYSTEM_TABLE_GUID, "UVsystab", &efi.uv_systab}, @@ -78,15 +76,6 @@ static __initdata efi_config_table_type_t arch_tables[] = { u64 efi_setup; /* efi setup_data physical address */ -/* - * Returns 1 if 'facility' is enabled, 0 otherwise. - */ -int efi_enabled(int facility) -{ - return test_bit(facility, &x86_efi_facility) != 0; -} -EXPORT_SYMBOL(efi_enabled); - static bool __initdata disable_runtime = false; static int __init setup_noefi(char *arg) { @@ -455,7 +444,7 @@ void __init efi_reserve_boot_services(void) void __init efi_unmap_memmap(void) { - clear_bit(EFI_MEMMAP, &x86_efi_facility); + clear_bit(EFI_MEMMAP, &efi.flags); if (memmap.map) { early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); memmap.map = NULL; @@ -722,7 +711,7 @@ void __init efi_init(void) if (efi_systab_init(efi_phys.systab)) return; - set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); + set_bit(EFI_SYSTEM_TABLES, &efi.flags); efi.config_table = (unsigned long)efi.systab->tables; efi.fw_vendor = (unsigned long)efi.systab->fw_vendor; @@ -750,7 +739,7 @@ void __init efi_init(void) if (efi_config_init(arch_tables)) return; - set_bit(EFI_CONFIG_TABLES, &x86_efi_facility); + set_bit(EFI_CONFIG_TABLES, &efi.flags); /* * Note: We currently don't support runtime services on an EFI @@ -762,12 +751,12 @@ void __init efi_init(void) else { if (disable_runtime || efi_runtime_init()) return; - set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility); + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); } if (efi_memmap_init()) return; - set_bit(EFI_MEMMAP, &x86_efi_facility); + set_bit(EFI_MEMMAP, &efi.flags); print_efi_memmap(); } diff --git a/include/linux/efi.h b/include/linux/efi.h index 0a819e7..214833b 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -573,6 +573,7 @@ extern struct efi { efi_reset_system_t *reset_system; efi_set_virtual_address_map_t *set_virtual_address_map; struct efi_memory_map *memmap; + unsigned long flags; } efi; static inline int @@ -660,17 +661,24 @@ extern int __init efi_setup_pcdp_console(char *); #ifdef CONFIG_EFI # ifdef CONFIG_X86 -extern int efi_enabled(int facility); + +/* + * Test whether the above EFI_* bits are enabled. + */ +static inline bool efi_enabled(int feature) +{ + return test_bit(feature, &efi.flags) != 0; +} # else -static inline int efi_enabled(int facility) +static inline bool efi_enabled(int feature) { - return 1; + return true; } # endif #else -static inline int efi_enabled(int facility) +static inline bool efi_enabled(int feature) { - return 0; + return false; } #endif -- cgit v0.10.2 From 0f8093a92da77afe24fd258a5a1037bb2b97a870 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 15 Jan 2014 13:36:33 +0000 Subject: efi: Set feature flags inside feature init functions It makes more sense to set the feature flag in the success path of the detection function than it does to rely on the caller doing it. Apart from it being more logical to group the code and data together, it sets a much better example for new EFI architectures. Signed-off-by: Matt Fleming diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 8215629..541e7db 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -577,6 +577,8 @@ static int __init efi_systab_init(void *phys) efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff); + set_bit(EFI_SYSTEM_TABLES, &efi.flags); + return 0; } @@ -612,6 +614,8 @@ static int __init efi_runtime_init(void) efi.get_time = phys_efi_get_time; early_iounmap(runtime, sizeof(efi_runtime_services_t)); + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return 0; } @@ -629,6 +633,8 @@ static int __init efi_memmap_init(void) if (add_efi_memmap) do_add_efi_memmap(); + set_bit(EFI_MEMMAP, &efi.flags); + return 0; } @@ -739,8 +745,6 @@ void __init efi_init(void) if (efi_config_init(arch_tables)) return; - set_bit(EFI_CONFIG_TABLES, &efi.flags); - /* * Note: We currently don't support runtime services on an EFI * that doesn't match the kernel 32/64-bit mode. @@ -751,7 +755,6 @@ void __init efi_init(void) else { if (disable_runtime || efi_runtime_init()) return; - set_bit(EFI_RUNTIME_SERVICES, &efi.flags); } if (efi_memmap_init()) return; @@ -1194,7 +1197,7 @@ static int __init parse_efi_cmdline(char *str) str++; if (!strncmp(str, "old_map", 7)) - set_bit(EFI_OLD_MEMMAP, &x86_efi_facility); + set_bit(EFI_OLD_MEMMAP, &efi.flags); return 0; } diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 4753bac..b25b36b 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -313,5 +313,8 @@ int __init efi_config_init(efi_config_table_type_t *arch_tables) } pr_cont("\n"); early_iounmap(config_tables, efi.systab->nr_tables * sz); + + set_bit(EFI_CONFIG_TABLES, &efi.flags); + return 0; } -- cgit v0.10.2 From aca32b5768bb314f5f3f42e26822424468412f5a Mon Sep 17 00:00:00 2001 From: "Geyslan G. Bem" Date: Wed, 30 Oct 2013 15:57:41 -0300 Subject: efivarfs: 'efivarfs_file_write' function reorganization This reorganization removes useless 'bytes' prior assignment and uses 'memdup_user' instead 'kmalloc' + 'copy_from_user'. Signed-off-by: Geyslan G. Bem Signed-off-by: Matt Fleming diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c index 8dd524f..cdb2971 100644 --- a/fs/efivarfs/file.c +++ b/fs/efivarfs/file.c @@ -21,7 +21,7 @@ static ssize_t efivarfs_file_write(struct file *file, u32 attributes; struct inode *inode = file->f_mapping->host; unsigned long datasize = count - sizeof(attributes); - ssize_t bytes = 0; + ssize_t bytes; bool set = false; if (count < sizeof(attributes)) @@ -33,14 +33,9 @@ static ssize_t efivarfs_file_write(struct file *file, if (attributes & ~(EFI_VARIABLE_MASK)) return -EINVAL; - data = kmalloc(datasize, GFP_KERNEL); - if (!data) - return -ENOMEM; - - if (copy_from_user(data, userbuf + sizeof(attributes), datasize)) { - bytes = -EFAULT; - goto out; - } + data = memdup_user(userbuf + sizeof(attributes), datasize); + if (IS_ERR(data)) + return PTR_ERR(data); bytes = efivar_entry_set_get_size(var, attributes, &datasize, data, &set); -- cgit v0.10.2 From 5db80c65140c6502d5c2345d0b0fa166eecedc61 Mon Sep 17 00:00:00 2001 From: Madper Xie Date: Sun, 10 Nov 2013 22:15:11 +0800 Subject: x86/efi: Delete out-of-date comments of efi_query_variable_store For now we only ensure about 5kb free space for avoiding our board refusing boot. But the comment lies that we retain 50% space. Signed-off-by: Madper Xie Signed-off-by: Matt Fleming diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 541e7db..dc1dd6d 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -1110,9 +1110,8 @@ u64 efi_mem_attributes(unsigned long phys_addr) } /* - * Some firmware has serious problems when using more than 50% of the EFI - * variable store, i.e. it triggers bugs that can brick machines. Ensure that - * we never use more than this safe limit. + * Some firmware implementations refuse to boot if there's insufficient space + * in the variable store. Ensure that we never use more than a safe limit. * * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable * store. @@ -1131,10 +1130,9 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) return status; /* - * Some firmware implementations refuse to boot if there's insufficient - * space in the variable store. We account for that by refusing the - * write if permitting it would reduce the available space to under - * 5KB. This figure was provided by Samsung, so should be safe. + * We account for that by refusing the write if permitting it would + * reduce the available space to under 5KB. This figure was provided by + * Samsung, so should be safe. */ if ((remaining_size - size < EFI_MIN_RESERVE) && !efi_no_storage_paranoia) { -- cgit v0.10.2 From 9b7d204982cf6e4e5b9b0f8bff151e48250cce58 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 3 Jan 2014 16:08:48 -0800 Subject: x86/efi: Style neatening Coalesce formats and remove spaces before tabs. Move __initdata after the variable declaration. Signed-off-by: Joe Perches Signed-off-by: Matt Fleming diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index dc1dd6d..876dc5b 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -67,7 +67,7 @@ struct efi_memory_map memmap; static struct efi efi_phys __initdata; static efi_system_table_t efi_systab __initdata; -static __initdata efi_config_table_type_t arch_tables[] = { +static efi_config_table_type_t arch_tables[] __initdata = { #ifdef CONFIG_X86_UV {UV_SYSTEM_TABLE_GUID, "UVsystab", &efi.uv_systab}, #endif @@ -76,7 +76,7 @@ static __initdata efi_config_table_type_t arch_tables[] = { u64 efi_setup; /* efi setup_data physical address */ -static bool __initdata disable_runtime = false; +static bool disable_runtime __initdata = false; static int __init setup_noefi(char *arg) { disable_runtime = true; @@ -263,9 +263,9 @@ static efi_status_t __init phys_efi_get_time(efi_time_t *tm, int efi_set_rtc_mmss(const struct timespec *now) { unsigned long nowtime = now->tv_sec; - efi_status_t status; - efi_time_t eft; - efi_time_cap_t cap; + efi_status_t status; + efi_time_t eft; + efi_time_cap_t cap; struct rtc_time tm; status = efi.get_time(&eft, &cap); @@ -283,9 +283,8 @@ int efi_set_rtc_mmss(const struct timespec *now) eft.second = tm.tm_sec; eft.nanosecond = 0; } else { - printk(KERN_ERR - "%s: Invalid EFI RTC value: write of %lx to EFI RTC failed\n", - __FUNCTION__, nowtime); + pr_err("%s: Invalid EFI RTC value: write of %lx to EFI RTC failed\n", + __func__, nowtime); return -1; } @@ -401,8 +400,7 @@ static void __init print_efi_memmap(void) p < memmap.map_end; p += memmap.desc_size, i++) { md = p; - pr_info("mem%02u: type=%u, attr=0x%llx, " - "range=[0x%016llx-0x%016llx) (%lluMB)\n", + pr_info("mem%02u: type=%u, attr=0x%llx, range=[0x%016llx-0x%016llx) (%lluMB)\n", i, md->type, md->attribute, md->phys_addr, md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), (md->num_pages >> (20 - EFI_PAGE_SHIFT))); @@ -434,9 +432,8 @@ void __init efi_reserve_boot_services(void) memblock_is_region_reserved(start, size)) { /* Could not reserve, skip it */ md->num_pages = 0; - memblock_dbg("Could not reserve boot range " - "[0x%010llx-0x%010llx]\n", - start, start+size-1); + memblock_dbg("Could not reserve boot range [0x%010llx-0x%010llx]\n", + start, start+size-1); } else memblock_reserve(start, size); } @@ -572,8 +569,7 @@ static int __init efi_systab_init(void *phys) return -EINVAL; } if ((efi.systab->hdr.revision >> 16) == 0) - pr_err("Warning: System table version " - "%d.%02d, expected 1.00 or greater!\n", + pr_err("Warning: System table version %d.%02d, expected 1.00 or greater!\n", efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff); -- cgit v0.10.2 From ef6bea6ddf0e76077d0798e57b374015b23a837e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 18 Jan 2014 12:48:14 +0100 Subject: x86, ptdump: Add the functionality to dump an arbitrary pagetable With reusing the ->trampoline_pgd page table for mapping EFI regions in order to use them after having switched to EFI virtual mode, it is very useful to be able to dump aforementioned page table in dmesg. This adds that functionality through the walk_pgd_level() interface which can be called from somewhere else. The original functionality of dumping to debugfs remains untouched. Cc: Arjan van de Ven Signed-off-by: Borislav Petkov Tested-by: Toshi Kani Signed-off-by: Matt Fleming diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5ad38ad..938ef1d 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -15,9 +15,10 @@ : (prot)) #ifndef __ASSEMBLY__ - #include +void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); + /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 0002a3a..20621d7 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -30,6 +30,7 @@ struct pg_state { unsigned long start_address; unsigned long current_address; const struct addr_marker *marker; + bool to_dmesg; }; struct addr_marker { @@ -88,10 +89,28 @@ static struct addr_marker address_markers[] = { #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT) #define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT) +#define pt_dump_seq_printf(m, to_dmesg, fmt, args...) \ +({ \ + if (to_dmesg) \ + printk(KERN_INFO fmt, ##args); \ + else \ + if (m) \ + seq_printf(m, fmt, ##args); \ +}) + +#define pt_dump_cont_printf(m, to_dmesg, fmt, args...) \ +({ \ + if (to_dmesg) \ + printk(KERN_CONT fmt, ##args); \ + else \ + if (m) \ + seq_printf(m, fmt, ##args); \ +}) + /* * Print a readable form of a pgprot_t to the seq_file */ -static void printk_prot(struct seq_file *m, pgprot_t prot, int level) +static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) { pgprotval_t pr = pgprot_val(prot); static const char * const level_name[] = @@ -99,47 +118,47 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level) if (!pgprot_val(prot)) { /* Not present */ - seq_printf(m, " "); + pt_dump_cont_printf(m, dmsg, " "); } else { if (pr & _PAGE_USER) - seq_printf(m, "USR "); + pt_dump_cont_printf(m, dmsg, "USR "); else - seq_printf(m, " "); + pt_dump_cont_printf(m, dmsg, " "); if (pr & _PAGE_RW) - seq_printf(m, "RW "); + pt_dump_cont_printf(m, dmsg, "RW "); else - seq_printf(m, "ro "); + pt_dump_cont_printf(m, dmsg, "ro "); if (pr & _PAGE_PWT) - seq_printf(m, "PWT "); + pt_dump_cont_printf(m, dmsg, "PWT "); else - seq_printf(m, " "); + pt_dump_cont_printf(m, dmsg, " "); if (pr & _PAGE_PCD) - seq_printf(m, "PCD "); + pt_dump_cont_printf(m, dmsg, "PCD "); else - seq_printf(m, " "); + pt_dump_cont_printf(m, dmsg, " "); /* Bit 9 has a different meaning on level 3 vs 4 */ if (level <= 3) { if (pr & _PAGE_PSE) - seq_printf(m, "PSE "); + pt_dump_cont_printf(m, dmsg, "PSE "); else - seq_printf(m, " "); + pt_dump_cont_printf(m, dmsg, " "); } else { if (pr & _PAGE_PAT) - seq_printf(m, "pat "); + pt_dump_cont_printf(m, dmsg, "pat "); else - seq_printf(m, " "); + pt_dump_cont_printf(m, dmsg, " "); } if (pr & _PAGE_GLOBAL) - seq_printf(m, "GLB "); + pt_dump_cont_printf(m, dmsg, "GLB "); else - seq_printf(m, " "); + pt_dump_cont_printf(m, dmsg, " "); if (pr & _PAGE_NX) - seq_printf(m, "NX "); + pt_dump_cont_printf(m, dmsg, "NX "); else - seq_printf(m, "x "); + pt_dump_cont_printf(m, dmsg, "x "); } - seq_printf(m, "%s\n", level_name[level]); + pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]); } /* @@ -178,7 +197,8 @@ static void note_page(struct seq_file *m, struct pg_state *st, st->current_prot = new_prot; st->level = level; st->marker = address_markers; - seq_printf(m, "---[ %s ]---\n", st->marker->name); + pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", + st->marker->name); } else if (prot != cur || level != st->level || st->current_address >= st->marker[1].start_address) { const char *unit = units; @@ -188,17 +208,17 @@ static void note_page(struct seq_file *m, struct pg_state *st, /* * Now print the actual finished series */ - seq_printf(m, "0x%0*lx-0x%0*lx ", - width, st->start_address, - width, st->current_address); + pt_dump_seq_printf(m, st->to_dmesg, "0x%0*lx-0x%0*lx ", + width, st->start_address, + width, st->current_address); delta = (st->current_address - st->start_address) >> 10; while (!(delta & 1023) && unit[1]) { delta >>= 10; unit++; } - seq_printf(m, "%9lu%c ", delta, *unit); - printk_prot(m, st->current_prot, st->level); + pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ", delta, *unit); + printk_prot(m, st->current_prot, st->level, st->to_dmesg); /* * We print markers for special areas of address space, @@ -207,7 +227,8 @@ static void note_page(struct seq_file *m, struct pg_state *st, */ if (st->current_address >= st->marker[1].start_address) { st->marker++; - seq_printf(m, "---[ %s ]---\n", st->marker->name); + pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", + st->marker->name); } st->start_address = st->current_address; @@ -296,7 +317,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, #define pgd_none(a) pud_none(__pud(pgd_val(a))) #endif -static void walk_pgd_level(struct seq_file *m) +void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) { #ifdef CONFIG_X86_64 pgd_t *start = (pgd_t *) &init_level4_pgt; @@ -304,9 +325,12 @@ static void walk_pgd_level(struct seq_file *m) pgd_t *start = swapper_pg_dir; #endif int i; - struct pg_state st; + struct pg_state st = {}; - memset(&st, 0, sizeof(st)); + if (pgd) { + start = pgd; + st.to_dmesg = true; + } for (i = 0; i < PTRS_PER_PGD; i++) { st.current_address = normalize_addr(i * PGD_LEVEL_MULT); @@ -331,7 +355,7 @@ static void walk_pgd_level(struct seq_file *m) static int ptdump_show(struct seq_file *m, void *v) { - walk_pgd_level(m); + ptdump_walk_pgd_level(m, NULL); return 0; } -- cgit v0.10.2 From 11cc851254b4bc3bd4430be8db2a41469303a427 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 18 Jan 2014 12:48:15 +0100 Subject: x86/efi: Dump the EFI page table This is very useful for debugging issues with the recently added pagetable switching code for EFI virtual mode. Signed-off-by: Borislav Petkov Tested-by: Toshi Kani Signed-off-by: Matt Fleming diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 321a52c..61bd2ad 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -81,6 +81,15 @@ config X86_PTDUMP kernel. If in doubt, say "N" +config EFI_PGT_DUMP + bool "Dump the EFI pagetable" + depends on EFI && X86_PTDUMP + ---help--- + Enable this if you want to dump the EFI page table before + enabling virtual mode. This can be used to debug miscellaneous + issues with the mapping of the EFI runtime regions into that + table. + config DEBUG_RODATA bool "Write protect kernel read-only data structures" default y diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index a7a8bd8..4afd3b3 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -133,6 +133,7 @@ extern void efi_setup_page_tables(void); extern void __init old_map_region(efi_memory_desc_t *md); extern void __init runtime_code_page_mkexec(void); extern void __init efi_runtime_mkexec(void); +extern void __init efi_dump_pagetable(void); struct efi_setup_data { u64 fw_vendor; diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 876dc5b..37f20d7 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -1021,6 +1021,7 @@ void __init efi_enter_virtual_mode(void) efi_setup_page_tables(); efi_sync_low_kernel_mappings(); + efi_dump_pagetable(); if (!efi_setup) { status = phys_efi_set_virtual_address_map( diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 0b74cdf..39496ae 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -41,6 +41,7 @@ static unsigned long efi_rt_eflags; void efi_sync_low_kernel_mappings(void) {} void efi_setup_page_tables(void) {} +void __init efi_dump_pagetable(void) {} void __init efi_map_region(efi_memory_desc_t *md) { diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 0c2a234..e05c69b 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -242,3 +242,12 @@ void __init efi_runtime_mkexec(void) if (__supported_pte_mask & _PAGE_NX) runtime_code_page_mkexec(); } + +void __init efi_dump_pagetable(void) +{ +#ifdef CONFIG_EFI_PGT_DUMP + pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); + + ptdump_walk_pgd_level(NULL, pgd); +#endif +} -- cgit v0.10.2 From 42a5477251f0e0f33ad5f6a95c48d685ec03191e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 18 Jan 2014 12:48:16 +0100 Subject: x86, pageattr: Export page unmapping interface We will use it in efi so expose it. Signed-off-by: Borislav Petkov Tested-by: Toshi Kani Signed-off-by: Matt Fleming diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 1aa9ccd..94e40f1 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -385,6 +385,8 @@ extern pte_t *lookup_address(unsigned long address, unsigned int *level); extern phys_addr_t slow_virt_to_phys(void *__address); extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, unsigned numpages, unsigned long page_flags); +void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address, + unsigned numpages); #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_DEFS_H */ diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index b3b19f4..a348868 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -692,6 +692,18 @@ static bool try_to_free_pmd_page(pmd_t *pmd) return true; } +static bool try_to_free_pud_page(pud_t *pud) +{ + int i; + + for (i = 0; i < PTRS_PER_PUD; i++) + if (!pud_none(pud[i])) + return false; + + free_page((unsigned long)pud); + return true; +} + static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end) { pte_t *pte = pte_offset_kernel(pmd, start); @@ -805,6 +817,16 @@ static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) */ } +static void unmap_pgd_range(pgd_t *root, unsigned long addr, unsigned long end) +{ + pgd_t *pgd_entry = root + pgd_index(addr); + + unmap_pud_range(pgd_entry, addr, end); + + if (try_to_free_pud_page((pud_t *)pgd_page_vaddr(*pgd_entry))) + pgd_clear(pgd_entry); +} + static int alloc_pte_page(pmd_t *pmd) { pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); @@ -999,9 +1021,8 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, static int populate_pgd(struct cpa_data *cpa, unsigned long addr) { pgprot_t pgprot = __pgprot(_KERNPG_TABLE); - bool allocd_pgd = false; - pgd_t *pgd_entry; pud_t *pud = NULL; /* shut up gcc */ + pgd_t *pgd_entry; int ret; pgd_entry = cpa->pgd + pgd_index(addr); @@ -1015,7 +1036,6 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) return -1; set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE)); - allocd_pgd = true; } pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr); @@ -1023,19 +1043,11 @@ static int populate_pgd(struct cpa_data *cpa, unsigned long addr) ret = populate_pud(cpa, addr, pgd_entry, pgprot); if (ret < 0) { - unmap_pud_range(pgd_entry, addr, + unmap_pgd_range(cpa->pgd, addr, addr + (cpa->numpages << PAGE_SHIFT)); - - if (allocd_pgd) { - /* - * If I allocated this PUD page, I can just as well - * free it in this error path. - */ - pgd_clear(pgd_entry); - free_page((unsigned long)pud); - } return ret; } + cpa->numpages = ret; return 0; } @@ -1861,6 +1873,12 @@ out: return retval; } +void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address, + unsigned numpages) +{ + unmap_pgd_range(root, address, address + (numpages << PAGE_SHIFT)); +} + /* * The testcases use internal knowledge of the implementation that shouldn't * be exposed to the rest of the kernel. Include these directly here. -- cgit v0.10.2 From b7b898ae0c0a82489511a1ce1b35f26215e6beb5 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 18 Jan 2014 12:48:17 +0100 Subject: x86/efi: Make efi virtual runtime map passing more robust Currently, running SetVirtualAddressMap() and passing the physical address of the virtual map array was working only by a lucky coincidence because the memory was present in the EFI page table too. Until Toshi went and booted this on a big HP box - the krealloc() manner of resizing the memmap we're doing did allocate from such physical addresses which were not mapped anymore and boom: http://lkml.kernel.org/r/1386806463.1791.295.camel@misato.fc.hp.com One way to take care of that issue is to reimplement the krealloc thing but with pages. We start with contiguous pages of order 1, i.e. 2 pages, and when we deplete that memory (shouldn't happen all that often but you know firmware) we realloc the next power-of-two pages. Having the pages, it is much more handy and easy to map them into the EFI page table with the already existing mapping code which we're using for building the virtual mappings. Thanks to Toshi Kani and Matt for the great debugging help. Reported-by: Toshi Kani Signed-off-by: Borislav Petkov Tested-by: Toshi Kani Signed-off-by: Matt Fleming diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 4afd3b3..e985d6b 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -129,7 +129,8 @@ extern void efi_memory_uc(u64 addr, unsigned long size); extern void __init efi_map_region(efi_memory_desc_t *md); extern void __init efi_map_region_fixed(efi_memory_desc_t *md); extern void efi_sync_low_kernel_mappings(void); -extern void efi_setup_page_tables(void); +extern int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); +extern void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); extern void __init old_map_region(efi_memory_desc_t *md); extern void __init runtime_code_page_mkexec(void); extern void __init efi_runtime_mkexec(void); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 37f20d7..576bb12 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -926,14 +926,36 @@ static void __init efi_map_regions_fixed(void) } +static void *realloc_pages(void *old_memmap, int old_shift) +{ + void *ret; + + ret = (void *)__get_free_pages(GFP_KERNEL, old_shift + 1); + if (!ret) + goto out; + + /* + * A first-time allocation doesn't have anything to copy. + */ + if (!old_memmap) + return ret; + + memcpy(ret, old_memmap, PAGE_SIZE << old_shift); + +out: + free_pages((unsigned long)old_memmap, old_shift); + return ret; +} + /* - * Map efi memory ranges for runtime serivce and update new_memmap with virtual - * addresses. + * Map the efi memory ranges of the runtime services and update new_mmap with + * virtual addresses. */ -static void * __init efi_map_regions(int *count) +static void * __init efi_map_regions(int *count, int *pg_shift) { + void *p, *new_memmap = NULL; + unsigned long left = 0; efi_memory_desc_t *md; - void *p, *tmp, *new_memmap = NULL; for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { md = p; @@ -948,20 +970,23 @@ static void * __init efi_map_regions(int *count) efi_map_region(md); get_systab_virt_addr(md); - tmp = krealloc(new_memmap, (*count + 1) * memmap.desc_size, - GFP_KERNEL); - if (!tmp) - goto out; - new_memmap = tmp; + if (left < memmap.desc_size) { + new_memmap = realloc_pages(new_memmap, *pg_shift); + if (!new_memmap) + return NULL; + + left += PAGE_SIZE << *pg_shift; + (*pg_shift)++; + } + memcpy(new_memmap + (*count * memmap.desc_size), md, memmap.desc_size); + + left -= memmap.desc_size; (*count)++; } return new_memmap; -out: - kfree(new_memmap); - return NULL; } /* @@ -987,9 +1012,9 @@ out: */ void __init efi_enter_virtual_mode(void) { - efi_status_t status; + int err, count = 0, pg_shift = 0; void *new_memmap = NULL; - int err, count = 0; + efi_status_t status; efi.systab = NULL; @@ -1006,20 +1031,24 @@ void __init efi_enter_virtual_mode(void) efi_map_regions_fixed(); } else { efi_merge_regions(); - new_memmap = efi_map_regions(&count); + new_memmap = efi_map_regions(&count, &pg_shift); if (!new_memmap) { pr_err("Error reallocating memory, EFI runtime non-functional!\n"); return; } - } - err = save_runtime_map(); - if (err) - pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n"); + err = save_runtime_map(); + if (err) + pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n"); + } BUG_ON(!efi.systab); - efi_setup_page_tables(); + if (!efi_setup) { + if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) + return; + } + efi_sync_low_kernel_mappings(); efi_dump_pagetable(); @@ -1060,7 +1089,35 @@ void __init efi_enter_virtual_mode(void) efi_runtime_mkexec(); - kfree(new_memmap); + + /* + * We mapped the descriptor array into the EFI pagetable above but we're + * not unmapping it here. Here's why: + * + * We're copying select PGDs from the kernel page table to the EFI page + * table and when we do so and make changes to those PGDs like unmapping + * stuff from them, those changes appear in the kernel page table and we + * go boom. + * + * From setup_real_mode(): + * + * ... + * trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd; + * + * In this particular case, our allocation is in PGD 0 of the EFI page + * table but we've copied that PGD from PGD[272] of the EFI page table: + * + * pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272 + * + * where the direct memory mapping in kernel space is. + * + * new_memmap's VA comes from that direct mapping and thus clearing it, + * it would get cleared in the kernel page table too. + * + * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift); + */ + if (!efi_setup) + free_pages((unsigned long)new_memmap, pg_shift); /* clean DUMMY object */ efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 39496ae..9ee3491 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c @@ -40,8 +40,12 @@ static unsigned long efi_rt_eflags; void efi_sync_low_kernel_mappings(void) {} -void efi_setup_page_tables(void) {} void __init efi_dump_pagetable(void) {} +int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) +{ + return 0; +} +void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) {} void __init efi_map_region(efi_memory_desc_t *md) { diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index e05c69b..1928090 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -137,12 +137,38 @@ void efi_sync_low_kernel_mappings(void) sizeof(pgd_t) * num_pgds); } -void efi_setup_page_tables(void) +int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) { + pgd_t *pgd; + + if (efi_enabled(EFI_OLD_MEMMAP)) + return 0; + efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd; + pgd = __va(efi_scratch.efi_pgt); - if (!efi_enabled(EFI_OLD_MEMMAP)) - efi_scratch.use_pgd = true; + /* + * It can happen that the physical address of new_memmap lands in memory + * which is not mapped in the EFI page table. Therefore we need to go + * and ident-map those pages containing the map before calling + * phys_efi_set_virtual_address_map(). + */ + if (kernel_map_pages_in_pgd(pgd, pa_memmap, pa_memmap, num_pages, _PAGE_NX)) { + pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap); + return 1; + } + + efi_scratch.use_pgd = true; + + + return 0; +} + +void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) +{ + pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); + + kernel_unmap_pages_in_pgd(pgd, pa_memmap, num_pages); } static void __init __map_region(efi_memory_desc_t *md, u64 va) -- cgit v0.10.2 From fabb37c736f9f688fe3eec98550a5c032a07cfaa Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 18 Jan 2014 12:48:18 +0100 Subject: x86/efi: Split efi_enter_virtual_mode ... into a kexec flavor for better code readability and simplicity. The original one was getting ugly with ifdeffery. Signed-off-by: Borislav Petkov Tested-by: Toshi Kani Signed-off-by: Matt Fleming diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 576bb12..6f0a467 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -879,8 +879,9 @@ static void __init get_systab_virt_addr(efi_memory_desc_t *md) } } -static int __init save_runtime_map(void) +static void __init save_runtime_map(void) { +#ifdef CONFIG_KEXEC efi_memory_desc_t *md; void *tmp, *p, *q = NULL; int count = 0; @@ -902,28 +903,12 @@ static int __init save_runtime_map(void) } efi_runtime_map_setup(q, count, memmap.desc_size); + return; - return 0; out: kfree(q); - return -ENOMEM; -} - -/* - * Map efi regions which were passed via setup_data. The virt_addr is a fixed - * addr which was used in first kernel of a kexec boot. - */ -static void __init efi_map_regions_fixed(void) -{ - void *p; - efi_memory_desc_t *md; - - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { - md = p; - efi_map_region_fixed(md); /* FIXME: add error handling */ - get_systab_virt_addr(md); - } - + pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n"); +#endif } static void *realloc_pages(void *old_memmap, int old_shift) @@ -989,6 +974,72 @@ static void * __init efi_map_regions(int *count, int *pg_shift) return new_memmap; } +static void __init kexec_enter_virtual_mode(void) +{ +#ifdef CONFIG_KEXEC + efi_memory_desc_t *md; + void *p; + + efi.systab = NULL; + + /* + * We don't do virtual mode, since we don't do runtime services, on + * non-native EFI + */ + if (!efi_is_native()) { + efi_unmap_memmap(); + return; + } + + /* + * Map efi regions which were passed via setup_data. The virt_addr is a + * fixed addr which was used in first kernel of a kexec boot. + */ + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; + efi_map_region_fixed(md); /* FIXME: add error handling */ + get_systab_virt_addr(md); + } + + save_runtime_map(); + + BUG_ON(!efi.systab); + + efi_sync_low_kernel_mappings(); + + /* + * Now that EFI is in virtual mode, update the function + * pointers in the runtime service table to the new virtual addresses. + * + * Call EFI services through wrapper functions. + */ + efi.runtime_version = efi_systab.hdr.revision; + efi.get_time = virt_efi_get_time; + efi.set_time = virt_efi_set_time; + efi.get_wakeup_time = virt_efi_get_wakeup_time; + efi.set_wakeup_time = virt_efi_set_wakeup_time; + efi.get_variable = virt_efi_get_variable; + efi.get_next_variable = virt_efi_get_next_variable; + efi.set_variable = virt_efi_set_variable; + efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; + efi.reset_system = virt_efi_reset_system; + efi.set_virtual_address_map = NULL; + efi.query_variable_info = virt_efi_query_variable_info; + efi.update_capsule = virt_efi_update_capsule; + efi.query_capsule_caps = virt_efi_query_capsule_caps; + + if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX)) + runtime_code_page_mkexec(); + + /* clean DUMMY object */ + efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, + EFI_VARIABLE_NON_VOLATILE | + EFI_VARIABLE_BOOTSERVICE_ACCESS | + EFI_VARIABLE_RUNTIME_ACCESS, + 0, NULL); +#endif +} + /* * This function will switch the EFI runtime services to virtual mode. * Essentially, we look through the EFI memmap and map every region that @@ -1008,11 +1059,12 @@ static void * __init efi_map_regions(int *count, int *pg_shift) * * Specially for kexec boot, efi runtime maps in previous kernel should * be passed in via setup_data. In that case runtime ranges will be mapped - * to the same virtual addresses as the first kernel. + * to the same virtual addresses as the first kernel, see + * kexec_enter_virtual_mode(). */ -void __init efi_enter_virtual_mode(void) +static void __init __efi_enter_virtual_mode(void) { - int err, count = 0, pg_shift = 0; + int count = 0, pg_shift = 0; void *new_memmap = NULL; efi_status_t status; @@ -1027,43 +1079,33 @@ void __init efi_enter_virtual_mode(void) return; } - if (efi_setup) { - efi_map_regions_fixed(); - } else { - efi_merge_regions(); - new_memmap = efi_map_regions(&count, &pg_shift); - if (!new_memmap) { - pr_err("Error reallocating memory, EFI runtime non-functional!\n"); - return; - } - - err = save_runtime_map(); - if (err) - pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n"); + efi_merge_regions(); + new_memmap = efi_map_regions(&count, &pg_shift); + if (!new_memmap) { + pr_err("Error reallocating memory, EFI runtime non-functional!\n"); + return; } + save_runtime_map(); + BUG_ON(!efi.systab); - if (!efi_setup) { - if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) - return; - } + if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) + return; efi_sync_low_kernel_mappings(); efi_dump_pagetable(); - if (!efi_setup) { - status = phys_efi_set_virtual_address_map( + status = phys_efi_set_virtual_address_map( memmap.desc_size * count, memmap.desc_size, memmap.desc_version, (efi_memory_desc_t *)__pa(new_memmap)); - if (status != EFI_SUCCESS) { - pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n", - status); - panic("EFI call to SetVirtualAddressMap() failed!"); - } + if (status != EFI_SUCCESS) { + pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n", + status); + panic("EFI call to SetVirtualAddressMap() failed!"); } /* @@ -1089,7 +1131,6 @@ void __init efi_enter_virtual_mode(void) efi_runtime_mkexec(); - /* * We mapped the descriptor array into the EFI pagetable above but we're * not unmapping it here. Here's why: @@ -1116,8 +1157,7 @@ void __init efi_enter_virtual_mode(void) * * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift); */ - if (!efi_setup) - free_pages((unsigned long)new_memmap, pg_shift); + free_pages((unsigned long)new_memmap, pg_shift); /* clean DUMMY object */ efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, @@ -1127,6 +1167,14 @@ void __init efi_enter_virtual_mode(void) 0, NULL); } +void __init efi_enter_virtual_mode(void) +{ + if (efi_setup) + kexec_enter_virtual_mode(); + else + __efi_enter_virtual_mode(); +} + /* * Convenience functions to obtain memory types and attributes */ -- cgit v0.10.2 From 092063808c498eccac8e891973bf143e7b60d723 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 15 Jan 2014 13:49:51 +0000 Subject: ia64/efi: Implement efi_enabled() There's no good reason to keep efi_enabled() under CONFIG_X86 anymore, since nothing about the implementation is specific to x86. Set EFI feature flags in the ia64 boot path instead of claiming to support all features. The old behaviour was actually buggy since efi.memmap never points to a valid memory map, so we shouldn't be claiming to support EFI_MEMMAP. Fortunately, this bug was never triggered because EFI_MEMMAP isn't used outside of arch/x86 currently, but that may not always be the case. Reviewed-and-tested-by: Tony Luck Signed-off-by: Matt Fleming diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index da5b462..741b99c 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -477,6 +477,9 @@ efi_init (void) char *cp, vendor[100] = "unknown"; int i; + set_bit(EFI_BOOT, &efi.flags); + set_bit(EFI_64BIT, &efi.flags); + /* * It's too early to be able to use the standard kernel command line * support... @@ -529,6 +532,8 @@ efi_init (void) efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor); + set_bit(EFI_SYSTEM_TABLES, &efi.flags); + palo_phys = EFI_INVALID_TABLE_ADDR; if (efi_config_init(arch_tables) != 0) @@ -657,6 +662,8 @@ efi_enter_virtual_mode (void) return; } + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); + /* * Now that EFI is in virtual mode, we call the EFI functions more * efficiently: diff --git a/include/linux/efi.h b/include/linux/efi.h index 214833b..64d532c 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -660,8 +660,6 @@ extern int __init efi_setup_pcdp_console(char *); #define EFI_ARCH_1 6 /* First arch-specific bit */ #ifdef CONFIG_EFI -# ifdef CONFIG_X86 - /* * Test whether the above EFI_* bits are enabled. */ @@ -669,12 +667,6 @@ static inline bool efi_enabled(int feature) { return test_bit(feature, &efi.flags) != 0; } -# else -static inline bool efi_enabled(int feature) -{ - return true; -} -# endif #else static inline bool efi_enabled(int feature) { -- cgit v0.10.2 From 69e608411473ac56358ef35277563982d0565381 Mon Sep 17 00:00:00 2001 From: Daeseok Youn Date: Thu, 13 Feb 2014 17:16:36 +0900 Subject: efi: Use NULL instead of 0 for pointer Fix following sparse warnings: drivers/firmware/efi/efivars.c:230:66: warning: Using plain integer as NULL pointer drivers/firmware/efi/efi.c:236:27: warning: Using plain integer as NULL pointer Signed-off-by: Daeseok Youn Signed-off-by: Matt Fleming diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index b25b36b..af20f17 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -233,7 +233,7 @@ static __initdata efi_config_table_type_t common_tables[] = { {SAL_SYSTEM_TABLE_GUID, "SALsystab", &efi.sal_systab}, {SMBIOS_TABLE_GUID, "SMBIOS", &efi.smbios}, {UGA_IO_PROTOCOL_GUID, "UGA", &efi.uga}, - {NULL_GUID, NULL, 0}, + {NULL_GUID, NULL, NULL}, }; static __init int match_config_table(efi_guid_t *guid, diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index 3dc2482..50ea412 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -227,7 +227,7 @@ efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count) memcpy(&entry->var, new_var, count); err = efivar_entry_set(entry, new_var->Attributes, - new_var->DataSize, new_var->Data, false); + new_var->DataSize, new_var->Data, NULL); if (err) { printk(KERN_WARNING "efivars: set_variable() failed: status=%d\n", err); return -EIO; -- cgit v0.10.2 From cdc2b4158405f1975f9d5205096f08430eda1c0e Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 14 Feb 2014 18:10:55 -0500 Subject: dm thin: synchronize the pool mode during suspend Commit b5330655 ("dm thin: handle metadata failures more consistently") increased potential for the pool's mode to be changed in response to metadata operation failures. When the pool mode is changed it isn't synchronized with the mode in pool_features stored in the target's context (ti->private) that is used as the basis for (re)establishing the pool mode during resume via bind_control_target. It is important that we synchronize the pool mode when it is changed otherwise the pool may experience and unexpected mode transition on the next resume (especially if there was no new table load). Signed-off-by: Mike Snitzer Acked-by: Joe Thornber diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 7e84bac..dfa48ec 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1402,6 +1402,7 @@ static enum pool_mode get_pool_mode(struct pool *pool) static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) { int r; + struct pool_c *pt = pool->ti->private; enum pool_mode old_mode = pool->pf.mode; switch (new_mode) { @@ -1448,6 +1449,11 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) } pool->pf.mode = new_mode; + /* + * The pool mode may have changed, sync it so bind_control_target() + * doesn't cause an unexpected mode transition on resume. + */ + pt->adjusted_pf.mode = new_mode; } /* -- cgit v0.10.2 From c866cda47f2c6c8abb929933b7794e9a92d7c924 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Feb 2014 21:40:08 +0000 Subject: powerpc:eVh_pic: Kill irq_desc abuse I'm really grumpy about this one. The line: #include "../../../kernel/irq/settings.h" should have been an alarm sign for all people who added their SOB to this trainwreck. When I cleaned up the mess people made with interrupt descriptors a few years ago, I warned that I'm going to hunt down new offenders and treat them with stinking trouts. In this case I'll use frozen shark for a better educational value. The whole idiocy which was done there could have been avoided with two lines of perfectly fine code. And do not complain about the lack of correct examples in tree. The solution is simple: Remove the brainfart and use the proper functions, which should have been used in the first place Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Ashish Kalra Cc: Timur Tabi Cc: Kumar Gala Cc: ppc Link: http://lkml.kernel.org/r/20140223212736.451970660@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/arch/powerpc/sysdev/ehv_pic.c b/arch/powerpc/sysdev/ehv_pic.c index b74085c..2d20f10 100644 --- a/arch/powerpc/sysdev/ehv_pic.c +++ b/arch/powerpc/sysdev/ehv_pic.c @@ -28,8 +28,6 @@ #include #include -#include "../../../kernel/irq/settings.h" - static struct ehv_pic *global_ehv_pic; static DEFINE_SPINLOCK(ehv_pic_lock); @@ -113,17 +111,13 @@ static unsigned int ehv_pic_type_to_vecpri(unsigned int type) int ehv_pic_set_irq_type(struct irq_data *d, unsigned int flow_type) { unsigned int src = virq_to_hw(d->irq); - struct irq_desc *desc = irq_to_desc(d->irq); unsigned int vecpri, vold, vnew, prio, cpu_dest; unsigned long flags; if (flow_type == IRQ_TYPE_NONE) flow_type = IRQ_TYPE_LEVEL_LOW; - irq_settings_clr_level(desc); - irq_settings_set_trigger_mask(desc, flow_type); - if (flow_type & (IRQ_TYPE_LEVEL_HIGH | IRQ_TYPE_LEVEL_LOW)) - irq_settings_set_level(desc); + irqd_set_trigger_type(d, flow_type); vecpri = ehv_pic_type_to_vecpri(flow_type); @@ -144,7 +138,7 @@ int ehv_pic_set_irq_type(struct irq_data *d, unsigned int flow_type) ev_int_set_config(src, vecpri, prio, cpu_dest); spin_unlock_irqrestore(&ehv_pic_lock, flags); - return 0; + return IRQ_SET_MASK_OK_NOCOPY; } static struct irq_chip ehv_pic_irq_chip = { -- cgit v0.10.2 From a4e04c9f219d2c00764ffa7ba45500411815879d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Feb 2014 21:40:08 +0000 Subject: powerpc: Irq: Use generic_handle_irq No functional change Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: ppc Link: http://lkml.kernel.org/r/20140223212736.333718121@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 1d0848b..ca1cd74 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -465,7 +465,6 @@ static inline void check_stack_overflow(void) void __do_irq(struct pt_regs *regs) { - struct irq_desc *desc; unsigned int irq; irq_enter(); @@ -487,11 +486,8 @@ void __do_irq(struct pt_regs *regs) /* And finally process it */ if (unlikely(irq == NO_IRQ)) __get_cpu_var(irq_stat).spurious_irqs++; - else { - desc = irq_to_desc(irq); - if (likely(desc)) - desc->handle_irq(irq, desc); - } + else + generic_handle_irq(irq); trace_irq_exit(regs); -- cgit v0.10.2 From b8a9a11b976810ba12a43c4fe699a14892c97e52 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Feb 2014 21:40:09 +0000 Subject: powerpc: Eeh: Kill another abuse of irq_desc commit 91150af3a (powerpc/eeh: Fix unbalanced enable for IRQ) is another brilliant example of trainwreck engineering. The patch "fixes" the issue of an unbalanced call to irq_enable() which causes a prominent warning by checking the disabled state of the interrupt line and call conditionally into the core code. This is wrong in two aspects: 1) The warning is there to tell users, that they need to fix their asymetric enable/disable patterns by finding the root cause and solving it there. It's definitely not meant to work around it by conditionally calling into the core code depending on the random state of the irq line. Asymetric irq_disable/enable calls are a clear sign of wrong usage of the interfaces which have to be cured at the root and not by somehow hacking around it. 2) The abuse of core internal data structure instead of using the proper interfaces for retrieving the information for the 'hack around' irq_desc is core internal and it's clear enough stated. Replace at least the irq_desc abuse with the proper functions and add a big fat comment why this is absurd and completely wrong. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Gavin Shan Cc: Benjamin Herrenschmidt Cc: ppc Link: http://lkml.kernel.org/r/20140223212736.562906212@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index fdc679d..3e1d7de 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -143,15 +143,31 @@ static void eeh_disable_irq(struct pci_dev *dev) static void eeh_enable_irq(struct pci_dev *dev) { struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); - struct irq_desc *desc; if ((edev->mode) & EEH_DEV_IRQ_DISABLED) { edev->mode &= ~EEH_DEV_IRQ_DISABLED; - - desc = irq_to_desc(dev->irq); - if (desc && desc->depth > 0) + /* + * FIXME !!!!! + * + * This is just ass backwards. This maze has + * unbalanced irq_enable/disable calls. So instead of + * finding the root cause it works around the warning + * in the irq_enable code by conditionally calling + * into it. + * + * That's just wrong.The warning in the core code is + * there to tell people to fix their assymetries in + * their own code, not by abusing the core information + * to avoid it. + * + * I so wish that the assymetry would be the other way + * round and a few more irq_disable calls render that + * shit unusable forever. + * + * tglx + */ + if (irqd_irq_disabled(irq_get_irq_data(dev->irq)) enable_irq(dev->irq); - } } /** -- cgit v0.10.2 From f7bfca6db60a6ca0a73126918b2fb6f851065947 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Feb 2014 21:40:11 +0000 Subject: pci: pcie-designware: Remove irq_desc abuse There is no reason to care about irq_desc in that context, escpecially as irq_data for that interrupt is retrieved as well. Use the proper accessor for the msi descriptor Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Acked-by: Bjorn Helgaas Acked-by: Jingoo Han Cc: Mohit Kumar Cc: pci Link: http://lkml.kernel.org/r/20140223212736.987803648@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c index 17ce88f..2e48ecf 100644 --- a/drivers/pci/host/pcie-designware.c +++ b/drivers/pci/host/pcie-designware.c @@ -294,14 +294,12 @@ no_valid_irq: static void clear_irq(unsigned int irq) { unsigned int pos, nvec; - struct irq_desc *desc; struct msi_desc *msi; struct pcie_port *pp; struct irq_data *data = irq_get_irq_data(irq); /* get the port structure */ - desc = irq_to_desc(irq); - msi = irq_desc_get_msi_desc(desc); + msi = irq_data_get_msi(data); pp = sys_to_pcie(msi->dev->bus->sysdata); if (!pp) { BUG(); -- cgit v0.10.2 From 8435cf757632a5559fbbf1cb79299716c8d5b651 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Feb 2014 21:40:12 +0000 Subject: arm: Replace various irq_desc accesses Use the proper functions. There is no need to fiddle with irq_desc. Signed-off-by: Thomas Gleixner Acked-by: Shawn Guo C Acked-by: Tony Lindgren Cc: Peter Zijlstra Cc: arm Cc: Russell King Link: http://lkml.kernel.org/r/20140223212737.099151500@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/arch/arm/mach-imx/pm-imx6q.c b/arch/arm/mach-imx/pm-imx6q.c index 7a9b985..29e3fe6 100644 --- a/arch/arm/mach-imx/pm-imx6q.c +++ b/arch/arm/mach-imx/pm-imx6q.c @@ -120,7 +120,7 @@ static void imx6q_enable_wb(bool enable) int imx6q_set_lpm(enum mxc_cpu_pwr_mode mode) { - struct irq_desc *iomuxc_irq_desc; + struct irq_data *iomuxc_irq_data = irq_get_irq_data(32); u32 val = readl_relaxed(ccm_base + CLPCR); val &= ~BM_CLPCR_LPM; @@ -167,10 +167,9 @@ int imx6q_set_lpm(enum mxc_cpu_pwr_mode mode) * 3) Software should mask IRQ #32 right after CCM Low-Power mode * is set (set bits 0-1 of CCM_CLPCR). */ - iomuxc_irq_desc = irq_to_desc(32); - imx_gpc_irq_unmask(&iomuxc_irq_desc->irq_data); + imx_gpc_irq_unmask(iomuxc_irq_data); writel_relaxed(val, ccm_base + CLPCR); - imx_gpc_irq_mask(&iomuxc_irq_desc->irq_data); + imx_gpc_irq_mask(iomuxc_irq_data); return 0; } diff --git a/arch/arm/mach-omap1/ams-delta-fiq.c b/arch/arm/mach-omap1/ams-delta-fiq.c index f12a12a..2ebc514 100644 --- a/arch/arm/mach-omap1/ams-delta-fiq.c +++ b/arch/arm/mach-omap1/ams-delta-fiq.c @@ -44,13 +44,10 @@ static unsigned int irq_counter[16]; static irqreturn_t deferred_fiq(int irq, void *dev_id) { - struct irq_desc *irq_desc; - struct irq_chip *irq_chip = NULL; int gpio, irq_num, fiq_count; + struct irq_chip *irq_chip; - irq_desc = irq_to_desc(gpio_to_irq(AMS_DELTA_GPIO_PIN_KEYBRD_CLK)); - if (irq_desc) - irq_chip = irq_desc->irq_data.chip; + irq_chip = irq_get_irq_chip(gpio_to_irq(AMS_DELTA_GPIO_PIN_KEYBRD_CLK)); /* * For each handled GPIO interrupt, keep calling its interrupt handler -- cgit v0.10.2 From 49f3fbc7b8576fd578f3504c65aae9b8a212d0f6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Feb 2014 21:40:13 +0000 Subject: arm: mmp: Remove pointless fiddling with irq internals The pm-mmp2 and pm-pxa910 power management related irq_set_wake callbacks fiddle pointlessly with the irq actions for no reason except for lack of understanding how the wakeup mechanism works. On supsend the core disables all interrupts lazily, i.e. it does not mask them at the irq controller level. So any interrupt which is firing during suspend will mark the corresponding interrupt line as pending. Just before the core powers down it checks whether there are interrupts pending from interrupt lines which are marked as wakeup sources and if so it aborts the suspend and resends the interrupts. If there was no interrupt at this point, the cpu goes into suspend with these interrupts unmasked. The IRQF_NO_SUSPEND flag for interrupt actions is a totally different mechanism. That allows the device driver to prevent the core from disabling the interrupt despite the fact that it is not marked as a wakeup source. This has nothing to do with the case at hand. It was introduced for special cases where lazy disable is not possible. Remove the nonsense along with the braindamaged boundary check. The core code does NOT call these functions out of boundary. Add a FIXME comment to an unhandled error path which merily printks some useless blurb instead of returning a proper error code. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: arm Cc: Eric Miao Cc: Haojian Zhuang Cc: Russell King Link: http://lkml.kernel.org/r/20140223212737.214342433@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/arch/arm/mach-mmp/pm-mmp2.c b/arch/arm/mach-mmp/pm-mmp2.c index 461a191..43b1a51 100644 --- a/arch/arm/mach-mmp/pm-mmp2.c +++ b/arch/arm/mach-mmp/pm-mmp2.c @@ -27,22 +27,8 @@ int mmp2_set_wake(struct irq_data *d, unsigned int on) { - int irq = d->irq; - struct irq_desc *desc = irq_to_desc(irq); unsigned long data = 0; - - if (unlikely(irq >= nr_irqs)) { - pr_err("IRQ nubmers are out of boundary!\n"); - return -EINVAL; - } - - if (on) { - if (desc->action) - desc->action->flags |= IRQF_NO_SUSPEND; - } else { - if (desc->action) - desc->action->flags &= ~IRQF_NO_SUSPEND; - } + int irq = d->irq; /* enable wakeup sources */ switch (irq) { diff --git a/arch/arm/mach-mmp/pm-pxa910.c b/arch/arm/mach-mmp/pm-pxa910.c index 48981ca..04c9daf 100644 --- a/arch/arm/mach-mmp/pm-pxa910.c +++ b/arch/arm/mach-mmp/pm-pxa910.c @@ -27,22 +27,8 @@ int pxa910_set_wake(struct irq_data *data, unsigned int on) { - int irq = data->irq; - struct irq_desc *desc = irq_to_desc(data->irq); uint32_t awucrm = 0, apcr = 0; - - if (unlikely(irq >= nr_irqs)) { - pr_err("IRQ nubmers are out of boundary!\n"); - return -EINVAL; - } - - if (on) { - if (desc->action) - desc->action->flags |= IRQF_NO_SUSPEND; - } else { - if (desc->action) - desc->action->flags &= ~IRQF_NO_SUSPEND; - } + int irq = data->irq; /* setting wakeup sources */ switch (irq) { @@ -115,9 +101,11 @@ int pxa910_set_wake(struct irq_data *data, unsigned int on) if (irq >= IRQ_GPIO_START && irq < IRQ_BOARD_START) { awucrm = MPMU_AWUCRM_WAKEUP(2); apcr |= MPMU_APCR_SLPWP2; - } else + } else { + /* FIXME: This should return a proper error code ! */ printk(KERN_ERR "Error: no defined wake up source irq: %d\n", irq); + } } if (on) { -- cgit v0.10.2 From 589d03e93f6cd595f68891e48f0804f2c8f38aae Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Feb 2014 21:40:18 +0000 Subject: xen: Use the proper irq functions generic_handler_irq() already tests for !desc so use this instead of generic_handle_irq_desc(). Use irq_get_irq_data() instead of desc->irq_data. Signed-off-by: Thomas Gleixner Reviewed-by: David Vrabel Cc: Peter Zijlstra Cc: Konrad Rzeszutek Wilk Cc: Xen Link: http://lkml.kernel.org/r/20140223212738.222412125@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c index d7ff917..5db43fc 100644 --- a/drivers/xen/events/events_2l.c +++ b/drivers/xen/events/events_2l.c @@ -166,7 +166,6 @@ static void evtchn_2l_handle_events(unsigned cpu) int start_word_idx, start_bit_idx; int word_idx, bit_idx; int i; - struct irq_desc *desc; struct shared_info *s = HYPERVISOR_shared_info; struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); @@ -176,11 +175,8 @@ static void evtchn_2l_handle_events(unsigned cpu) unsigned int evtchn = evtchn_from_irq(irq); word_idx = evtchn / BITS_PER_LONG; bit_idx = evtchn % BITS_PER_LONG; - if (active_evtchns(cpu, s, word_idx) & (1ULL << bit_idx)) { - desc = irq_to_desc(irq); - if (desc) - generic_handle_irq_desc(irq, desc); - } + if (active_evtchns(cpu, s, word_idx) & (1ULL << bit_idx)) + generic_handle_irq(irq); } /* @@ -245,11 +241,8 @@ static void evtchn_2l_handle_events(unsigned cpu) port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx; irq = get_evtchn_to_irq(port); - if (irq != -1) { - desc = irq_to_desc(irq); - if (desc) - generic_handle_irq_desc(irq, desc); - } + if (irq != -1) + generic_handle_irq(irq); bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD; diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index f4a9e33..7fea02c 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -336,9 +336,8 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) BUG_ON(irq == -1); #ifdef CONFIG_SMP - cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu)); + cpumask_copy(irq_get_irq_data(irq)->affinity, cpumask_of(cpu)); #endif - xen_evtchn_port_bind_to_cpu(info, cpu); info->cpu = cpu; @@ -373,10 +372,8 @@ static void xen_irq_init(unsigned irq) { struct irq_info *info; #ifdef CONFIG_SMP - struct irq_desc *desc = irq_to_desc(irq); - /* By default all event channels notify CPU#0. */ - cpumask_copy(desc->irq_data.affinity, cpumask_of(0)); + cpumask_copy(irq_get_irq_data(irq)->affinity, cpumask_of(0)); #endif info = kzalloc(sizeof(*info), GFP_KERNEL); diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c index 1de2a19..96109a9 100644 --- a/drivers/xen/events/events_fifo.c +++ b/drivers/xen/events/events_fifo.c @@ -235,14 +235,10 @@ static uint32_t clear_linked(volatile event_word_t *word) static void handle_irq_for_port(unsigned port) { int irq; - struct irq_desc *desc; irq = get_evtchn_to_irq(port); - if (irq != -1) { - desc = irq_to_desc(irq); - if (desc) - generic_handle_irq_desc(irq, desc); - } + if (irq != -1) + generic_handle_irq(irq); } static void consume_one_event(unsigned cpu, -- cgit v0.10.2 From 02893afdd310fab8f41f6afbe5233c6609ec19ed Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Feb 2014 21:40:20 +0000 Subject: xen: Get rid of the last irq_desc abuse Warn if any PIRQ cannot be bound to an event channel. Remove the check for irq_desc->action. This hypercall never fails in practice so we can emit a warning unconditionally. Remove a check for a valid irq desc. The only caller of xen_destroy_irq() will only do so if the irq was previously fully setup, which means the descriptor has been allocated as well. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Konrad Rzeszutek Wilk Cc: Xen Cc: David Vrabel Link: http://lkml.kernel.org/r/20140223212738.579581220@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 7fea02c..5dd2ddf 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -487,13 +487,6 @@ static void pirq_query_unmask(int irq) info->u.pirq.flags |= PIRQ_NEEDS_EOI; } -static bool probing_irq(int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - - return desc && desc->action == NULL; -} - static void eoi_pirq(struct irq_data *data) { int evtchn = evtchn_from_irq(data->irq); @@ -535,8 +528,7 @@ static unsigned int __startup_pirq(unsigned int irq) BIND_PIRQ__WILL_SHARE : 0; rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq); if (rc != 0) { - if (!probing_irq(irq)) - pr_info("Failed to obtain physical IRQ %d\n", irq); + pr_warn("Failed to obtain physical IRQ %d\n", irq); return 0; } evtchn = bind_pirq.port; @@ -769,17 +761,12 @@ error_irq: int xen_destroy_irq(int irq) { - struct irq_desc *desc; struct physdev_unmap_pirq unmap_irq; struct irq_info *info = info_for_irq(irq); int rc = -ENOENT; mutex_lock(&irq_mapping_update_lock); - desc = irq_to_desc(irq); - if (!desc) - goto out; - if (xen_initial_domain()) { unmap_irq.pirq = info->u.pirq.pirq; unmap_irq.domid = info->u.pirq.domid; -- cgit v0.10.2 From 792d0018a5fe31ef8ef9d07a7a02081d4abdf6b7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Feb 2014 21:40:14 +0000 Subject: genirq: Add a kstat helper to increment irq stats There is a common pattern all over the place: kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); This results in a call to core code anyway. So provide a function which does the same thing in core. While at it, replace the butt ugly macro with an inline. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20140223212737.422068876@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 51c72be..54ec7e0 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -54,11 +54,13 @@ extern unsigned long long nr_context_switches(void); #include extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu); -#define kstat_incr_irqs_this_cpu(irqno, DESC) \ -do { \ - __this_cpu_inc(*(DESC)->kstat_irqs); \ - __this_cpu_inc(kstat.irqs_sum); \ -} while (0) +static inline void kstat_incr_irqs_this_cpu(unsigned int irq, struct irq_desc *desc) +{ + __this_cpu_inc(*desc->kstat_irqs); + __this_cpu_inc(kstat.irqs_sum); +} + +extern void kstat_incr_irq_this_cpu(unsigned int irq); static inline void kstat_incr_softirqs_this_cpu(unsigned int irq) { diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 8ab8e93..a7174617 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -489,6 +489,11 @@ void dynamic_irq_cleanup(unsigned int irq) raw_spin_unlock_irqrestore(&desc->lock, flags); } +void kstat_incr_irq_this_cpu(unsigned int irq) +{ + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); +} + unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) { struct irq_desc *desc = irq_to_desc(irq); -- cgit v0.10.2 From 310ff2c87e72208e4f04f33687abe9d208ffefe3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Feb 2014 21:40:14 +0000 Subject: mips: Use the core irq stats function Let the core do the irq_desc resolution. No functional change. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Ralf Baechle Cc: mips Link: http://lkml.kernel.org/r/20140223212737.517340416@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c index dfc1b91..c1681d6 100644 --- a/arch/mips/kernel/smtc.c +++ b/arch/mips/kernel/smtc.c @@ -1007,7 +1007,7 @@ static void __irq_entry smtc_clock_tick_interrupt(void) int irq = MIPS_CPU_IRQ_BASE + 1; irq_enter(); - kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); + kstat_incr_irq_this_cpu(irq); cd = &per_cpu(mips_clockevent_device, cpu); cd->event_handler(cd); irq_exit(); diff --git a/arch/mips/sgi-ip22/ip22-int.c b/arch/mips/sgi-ip22/ip22-int.c index 3db64d5..58b40ae 100644 --- a/arch/mips/sgi-ip22/ip22-int.c +++ b/arch/mips/sgi-ip22/ip22-int.c @@ -148,7 +148,7 @@ static void __irq_entry indy_buserror_irq(void) int irq = SGI_BUSERR_IRQ; irq_enter(); - kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); + kstat_incr_irq_this_cpu(irq); ip22_be_interrupt(irq); irq_exit(); } diff --git a/arch/mips/sgi-ip22/ip22-time.c b/arch/mips/sgi-ip22/ip22-time.c index 6071924..045aa89 100644 --- a/arch/mips/sgi-ip22/ip22-time.c +++ b/arch/mips/sgi-ip22/ip22-time.c @@ -123,7 +123,7 @@ void __irq_entry indy_8254timer_irq(void) char c; irq_enter(); - kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); + kstat_incr_irq_this_cpu(irq); printk(KERN_ALERT "Oops, got 8254 interrupt.\n"); ArcRead(0, &c, 1, &cnt); ArcEnterInteractiveMode(); diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c index 54e2c4d..70d9182 100644 --- a/arch/mips/sibyte/bcm1480/smp.c +++ b/arch/mips/sibyte/bcm1480/smp.c @@ -182,7 +182,7 @@ void bcm1480_mailbox_interrupt(void) int irq = K_BCM1480_INT_MBOX_0_0; unsigned int action; - kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); + kstat_incr_irq_this_cpu(irq); /* Load the mailbox register to figure out what we're supposed to do */ action = (__raw_readq(mailbox_0_regs[cpu]) >> 48) & 0xffff; diff --git a/arch/mips/sibyte/sb1250/smp.c b/arch/mips/sibyte/sb1250/smp.c index d7b942d..db97611 100644 --- a/arch/mips/sibyte/sb1250/smp.c +++ b/arch/mips/sibyte/sb1250/smp.c @@ -170,7 +170,7 @@ void sb1250_mailbox_interrupt(void) int irq = K_INT_MBOX_0; unsigned int action; - kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); + kstat_incr_irq_this_cpu(irq); /* Load the mailbox register to figure out what we're supposed to do */ action = (____raw_readq(mailbox_regs[cpu]) >> 48) & 0xffff; -- cgit v0.10.2 From 87a69ad6409b2c7a95e2e6e4ddbc380046cb7730 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Feb 2014 21:40:15 +0000 Subject: sparc: Use the core irq stats function Let the core do the irq_desc resolution. No functional change. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: David S. Miller Link: http://lkml.kernel.org/r/20140223212737.635609567@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c index c3d82b5..24e8b87 100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@ -733,7 +733,7 @@ void __irq_entry timer_interrupt(int irq, struct pt_regs *regs) irq_enter(); local_cpu_data().irq0_irqs++; - kstat_incr_irqs_this_cpu(0, irq_to_desc(0)); + kstat_incr_irq_this_cpu(0); if (unlikely(!evt->event_handler)) { printk(KERN_WARNING -- cgit v0.10.2 From 770144ea7beb3e0537277a96b104ba1daa965f76 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Feb 2014 21:40:16 +0000 Subject: x86: Xen: Use the core irq stats function Let the core do the irq_desc resolution. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: David Vrabel Cc: Peter Zijlstra Cc: Konrad Rzeszutek Wilk Cc: Xen Cc: x86 Link: http://lkml.kernel.org/r/20140223212737.869264085@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 581521c..4d3acc3 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -183,7 +183,7 @@ __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) local_irq_save(flags); - kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); + kstat_incr_irq_this_cpu(irq); out: cpumask_clear_cpu(cpu, &waiting_cpus); w->lock = NULL; -- cgit v0.10.2 From a21748c93544901448777b5fa2abe1194c02c6dc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Feb 2014 21:40:16 +0000 Subject: mn10300: Use the core irq stats function Let the core do the irq_desc resolution. No functional change. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: David Howells Cc: mn10300 Link: http://lkml.kernel.org/r/20140223212737.751487689@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/arch/mn10300/kernel/mn10300-watchdog.c b/arch/mn10300/kernel/mn10300-watchdog.c index db64a71..a2d8e69 100644 --- a/arch/mn10300/kernel/mn10300-watchdog.c +++ b/arch/mn10300/kernel/mn10300-watchdog.c @@ -142,7 +142,7 @@ void watchdog_interrupt(struct pt_regs *regs, enum exception_code excep) NMICR = NMICR_WDIF; nmi_count(smp_processor_id())++; - kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); + kstat_incr_irq_this_cpu(irq); for_each_online_cpu(cpu) { -- cgit v0.10.2 From bc5dfcff65f24f15567f766d8bd081d594ef8cc2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Feb 2014 21:40:17 +0000 Subject: s390: Cio: Use the core irq stats function Let the core do the irq_desc resolution. No functional change. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Martin Schwidefsky Cc: s390 Link: http://lkml.kernel.org/r/20140223212737.983433636@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 8ee88c4..f711f0b 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -584,8 +584,6 @@ static irqreturn_t do_cio_interrupt(int irq, void *dummy) return IRQ_HANDLED; } -static struct irq_desc *irq_desc_io; - static struct irqaction io_interrupt = { .name = "IO", .handler = do_cio_interrupt, @@ -596,7 +594,6 @@ void __init init_cio_interrupts(void) irq_set_chip_and_handler(IO_INTERRUPT, &dummy_irq_chip, handle_percpu_irq); setup_irq(IO_INTERRUPT, &io_interrupt); - irq_desc_io = irq_to_desc(IO_INTERRUPT); } #ifdef CONFIG_CCW_CONSOLE @@ -623,7 +620,7 @@ void cio_tsch(struct subchannel *sch) local_bh_disable(); irq_enter(); } - kstat_incr_irqs_this_cpu(IO_INTERRUPT, irq_desc_io); + kstat_incr_irq_this_cpu(IO_INTERRUPT); if (sch->driver && sch->driver->irq) sch->driver->irq(sch); else -- cgit v0.10.2 From 3611587aa15f4ff75a2af3483d6a02accada8ec8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Feb 2014 21:40:17 +0000 Subject: ia64: Use the core irq stats function Let the core do the irq_desc resolution. No functional change. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Tony Luck Cc: Fenghua Yu Cc: ia64 Link: http://lkml.kernel.org/r/20140223212738.099977064@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 1034884..c8a576b 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -489,14 +489,13 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) ia64_srlz_d(); while (vector != IA64_SPURIOUS_INT_VECTOR) { int irq = local_vector_to_irq(vector); - struct irq_desc *desc = irq_to_desc(irq); if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) { smp_local_flush_tlb(); - kstat_incr_irqs_this_cpu(irq, desc); + kstat_incr_irq_this_cpu(irq); } else if (unlikely(IS_RESCHEDULE(vector))) { scheduler_ipi(); - kstat_incr_irqs_this_cpu(irq, desc); + kstat_incr_irq_this_cpu(irq); } else { ia64_setreg(_IA64_REG_CR_TPR, vector); ia64_srlz_d(); @@ -549,13 +548,12 @@ void ia64_process_pending_intr(void) */ while (vector != IA64_SPURIOUS_INT_VECTOR) { int irq = local_vector_to_irq(vector); - struct irq_desc *desc = irq_to_desc(irq); if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) { smp_local_flush_tlb(); - kstat_incr_irqs_this_cpu(irq, desc); + kstat_incr_irq_this_cpu(irq); } else if (unlikely(IS_RESCHEDULE(vector))) { - kstat_incr_irqs_this_cpu(irq, desc); + kstat_incr_irq_this_cpu(irq); } else { struct pt_regs *old_regs = set_irq_regs(NULL); -- cgit v0.10.2 From 929320e4b4c10708d3477d7e395f0ce7b0cc8744 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Feb 2014 21:40:20 +0000 Subject: x86: Add proper vector accounting for HYPERVISOR_CALLBACK_VECTOR HyperV abuses a device interrupt to account for the HYPERVISOR_CALLBACK_VECTOR. Provide proper accounting as we have for the other vectors as well. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Konrad Rzeszutek Wilk Cc: K. Y. Srinivasan Cc: x86 Link: http://lkml.kernel.org/r/20140223212738.681855582@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index ab0ae1a..afb6536 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -33,6 +33,9 @@ typedef struct { #ifdef CONFIG_X86_MCE_THRESHOLD unsigned int irq_threshold_count; #endif +#if defined(CONFIG_HYPERV) || defined(CONFIG_XEN) + unsigned int irq_hv_callback_count; +#endif } ____cacheline_aligned irq_cpustat_t; DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index d99f31d..42805fa 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -125,6 +125,12 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", per_cpu(mce_poll_count, j)); seq_printf(p, " Machine check polls\n"); #endif +#if defined(CONFIG_HYPERV) || defined(CONFIG_XEN) + seq_printf(p, "%*s: ", prec, "THR"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_hv_callback_count); + seq_printf(p, " Hypervisor callback interrupts\n"); +#endif seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); -- cgit v0.10.2 From 99c8b79d3c165f8e2a6247c14bfa1429e7efe51f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Feb 2014 21:40:21 +0000 Subject: xen: Add proper irq accounting for HYPERCALL vector Signed-off-by: Thomas Gleixner Reviewed-by: David Vrabel Cc: Peter Zijlstra Cc: Konrad Rzeszutek Wilk Cc: Xen Link: http://lkml.kernel.org/r/20140223212738.808648133@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 5dd2ddf..8b91c256 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1235,6 +1235,7 @@ void xen_evtchn_do_upcall(struct pt_regs *regs) #ifdef CONFIG_X86 exit_idle(); #endif + inc_irq_stat(irq_hv_callback_count); __xen_evtchn_do_upcall(); -- cgit v0.10.2 From 1aec169673d7db113c37367bbc371c2ba8109f06 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Feb 2014 21:40:22 +0000 Subject: x86: Hyperv: Cleanup the irq mess The vmbus/hyperv interrupt handling is another complete trainwreck and probably the worst of all currently in tree. If CONFIG_HYPERV=y then the interrupt delivery to the vmbus happens via the direct HYPERVISOR_CALLBACK_VECTOR. So far so good, but: The driver requests first a normal device interrupt. The only reason to do so is to increment the interrupt stats of that device interrupt. For no reason it also installs a private flow handler. We have proper accounting mechanisms for direct vectors, but of course it's too much effort to add that 5 lines of code. Aside of that the alloc_intr_gate() is not protected against reallocation which makes module reload impossible. Solution to the problem is simple to rip out the whole mess and implement it correctly. First of all move all that code to arch/x86/kernel/cpu/mshyperv.c and merily install the HYPERVISOR_CALLBACK_VECTOR with proper reallocation protection and use the proper direct vector accounting mechanism. Signed-off-by: Thomas Gleixner Acked-by: K. Y. Srinivasan Cc: Peter Zijlstra Cc: Greg Kroah-Hartman Cc: linuxdrivers Cc: x86 Link: http://lkml.kernel.org/r/20140223212739.028307673@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index cd9c419..e98f66f 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -2,6 +2,7 @@ #define _ASM_X86_MSHYPER_H #include +#include #include struct ms_hyperv_info { @@ -16,6 +17,7 @@ void hyperv_callback_vector(void); #define trace_hyperv_callback_vector hyperv_callback_vector #endif void hyperv_vector_handler(struct pt_regs *regs); -void hv_register_vmbus_handler(int irq, irq_handler_t handler); +int hv_setup_vmbus_irq(int irq, irq_handler_t handler, void *dev_id); +void hv_remove_vmbus_irq(int irq, void *dev_id); #endif diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 9f7ca26..1bd316c 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -30,6 +31,45 @@ struct ms_hyperv_info ms_hyperv; EXPORT_SYMBOL_GPL(ms_hyperv); +#ifdef CONFIG_HYPERV +static irq_handler_t *vmbus_handler; + +void hyperv_vector_handler(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + irq_enter(); + exit_idle(); + + inc_irq_stat(irq_hv_callback_count); + if (vmbus_handler) + vmbus_handler(); + + irq_exit(); + set_irq_regs(old_regs); +} + +int hv_setup_vmbus_irq(int irq, irq_handler_t *handler, void *dev_id) +{ + vmbus_handler = handler; + /* + * Setup the IDT for hypervisor callback. Prevent reallocation + * at module reload. + */ + if (!test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) + alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, + hyperv_callback_vector); +} + +void hv_remove_vmbus_irq(unsigned int irq, void *dev_id) +{ + /* We have no way to deallocate the interrupt gate */ + vmbus_handler = NULL; +} +EXPORT_SYMBOL_GPL(hv_setup_vmbus_irq); +EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq); +#endif + static uint32_t __init ms_hyperv_platform(void) { u32 eax; @@ -113,41 +153,3 @@ const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { .init_platform = ms_hyperv_init_platform, }; EXPORT_SYMBOL(x86_hyper_ms_hyperv); - -#if IS_ENABLED(CONFIG_HYPERV) -static int vmbus_irq = -1; -static irq_handler_t vmbus_isr; - -void hv_register_vmbus_handler(int irq, irq_handler_t handler) -{ - /* - * Setup the IDT for hypervisor callback. - */ - alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector); - - vmbus_irq = irq; - vmbus_isr = handler; -} - -void hyperv_vector_handler(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - struct irq_desc *desc; - - irq_enter(); - exit_idle(); - - desc = irq_to_desc(vmbus_irq); - - if (desc) - generic_handle_irq_desc(vmbus_irq, desc); - - irq_exit(); - set_irq_regs(old_regs); -} -#else -void hv_register_vmbus_handler(int irq, irq_handler_t handler) -{ -} -#endif -EXPORT_SYMBOL_GPL(hv_register_vmbus_handler); diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 077bb1b..5a6909f 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -558,9 +557,6 @@ static struct bus_type hv_bus = { .dev_groups = vmbus_groups, }; -static const char *driver_name = "hyperv"; - - struct onmessage_work_context { struct work_struct work; struct hv_message msg; @@ -677,19 +673,6 @@ static irqreturn_t vmbus_isr(int irq, void *dev_id) } /* - * vmbus interrupt flow handler: - * vmbus interrupts can concurrently occur on multiple CPUs and - * can be handled concurrently. - */ - -static void vmbus_flow_handler(unsigned int irq, struct irq_desc *desc) -{ - kstat_incr_irqs_this_cpu(irq, desc); - - desc->action->handler(irq, desc->action->dev_id); -} - -/* * vmbus_bus_init -Main vmbus driver initialization routine. * * Here, we @@ -715,26 +698,13 @@ static int vmbus_bus_init(int irq) if (ret) goto err_cleanup; - ret = request_irq(irq, vmbus_isr, 0, driver_name, hv_acpi_dev); + ret = hv_setup_vmbus_irq(irq, vmbus_isr, hv_acpi_dev); if (ret != 0) { - pr_err("Unable to request IRQ %d\n", - irq); + pr_err("Unable to request IRQ %d\n", irq); goto err_unregister; } - /* - * Vmbus interrupts can be handled concurrently on - * different CPUs. Establish an appropriate interrupt flow - * handler that can support this model. - */ - irq_set_handler(irq, vmbus_flow_handler); - - /* - * Register our interrupt handler. - */ - hv_register_vmbus_handler(irq, vmbus_isr); - ret = hv_synic_alloc(); if (ret) goto err_alloc; @@ -753,7 +723,7 @@ static int vmbus_bus_init(int irq) err_alloc: hv_synic_free(); - free_irq(irq, hv_acpi_dev); + hv_remove_vmbus_irq(irq, hv_acpi_dev); err_unregister: bus_unregister(&hv_bus); @@ -978,8 +948,7 @@ cleanup: static void __exit vmbus_exit(void) { - - free_irq(irq, hv_acpi_dev); + hv_remove_vmbus_irq(irq, hv_acpi_dev); vmbus_free_channels(); bus_unregister(&hv_bus); hv_cleanup(); -- cgit v0.10.2 From 8f945a3325bbe0dd651e2f496a53df9b06fc6d07 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Feb 2014 21:40:23 +0000 Subject: genirq: Move kstat_incr_irqs_this_cpu() to core No more users outside the core code. Put it into the poison cabinet. That also gets rid of the linux/irq.h include in kernel_stat.h Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20140223212739.124207133@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 54ec7e0..c3fbda7 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -51,15 +51,7 @@ DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat); extern unsigned long long nr_context_switches(void); -#include extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu); - -static inline void kstat_incr_irqs_this_cpu(unsigned int irq, struct irq_desc *desc) -{ - __this_cpu_inc(*desc->kstat_irqs); - __this_cpu_inc(kstat.irqs_sum); -} - extern void kstat_incr_irq_this_cpu(unsigned int irq); static inline void kstat_incr_softirqs_this_cpu(unsigned int irq) diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index d61ac29..17b6717 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -6,6 +6,7 @@ * of this file for your non core code. */ #include +#include #ifdef CONFIG_SPARSE_IRQ # define IRQ_BITMAP_BITS (NR_IRQS + 8196) @@ -180,3 +181,9 @@ static inline bool irqd_has_set(struct irq_data *d, unsigned int mask) { return d->state_use_accessors & mask; } + +static inline void kstat_incr_irqs_this_cpu(unsigned int irq, struct irq_desc *desc) +{ + __this_cpu_inc(*desc->kstat_irqs); + __this_cpu_inc(kstat.irqs_sum); +} -- cgit v0.10.2 From 6decf1a33c386d4addc2ed9d269c3868f08c70bb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Feb 2014 21:40:10 +0000 Subject: sh: Use irq_set_affinity instead of homebrewn code There is no point in having an incomplete copy of irq_set_affinity() for the hotplug irq migration code. Use the core function instead. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Paul Mundt Cc: sh Link: http://lkml.kernel.org/r/20140223212736.774961401@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c index 0833736..65a1ecd 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c @@ -217,19 +217,6 @@ void __init init_IRQ(void) } #ifdef CONFIG_HOTPLUG_CPU -static void route_irq(struct irq_data *data, unsigned int irq, unsigned int cpu) -{ - struct irq_desc *desc = irq_to_desc(irq); - struct irq_chip *chip = irq_data_get_irq_chip(data); - - printk(KERN_INFO "IRQ%u: moving from cpu%u to cpu%u\n", - irq, data->node, cpu); - - raw_spin_lock_irq(&desc->lock); - chip->irq_set_affinity(data, cpumask_of(cpu), false); - raw_spin_unlock_irq(&desc->lock); -} - /* * The CPU has been marked offline. Migrate IRQs off this CPU. If * the affinity settings do not allow other CPUs, force them onto any @@ -250,11 +237,8 @@ void migrate_irqs(void) irq, cpu); cpumask_setall(data->affinity); - newcpu = cpumask_any_and(data->affinity, - cpu_online_mask); } - - route_irq(data, irq, newcpu); + irq_set_affinity(irq, data->affinity); } } } -- cgit v0.10.2 From 5c331c8626f5d39722d07101c699c8e794f5629d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 4 Mar 2014 15:19:16 +0100 Subject: irqchip: xtensa: Select only an online cpu The user space interface does not filter out offline cpus. It merily verifies that the mask contains at least one online cpu. So the selector in the irq chip implementation needs to make sure to pick only an online cpu because otherwise: Offline Core 1 Set affinity to 0xe Selector will pick first set bit, i.e. core 1 Signed-off-by: Thomas Gleixner Cc: Chris Zankel Cc: xtensa diff --git a/drivers/irqchip/irq-xtensa-mx.c b/drivers/irqchip/irq-xtensa-mx.c index f693f1b..e1c2f96 100644 --- a/drivers/irqchip/irq-xtensa-mx.c +++ b/drivers/irqchip/irq-xtensa-mx.c @@ -122,7 +122,7 @@ static int xtensa_mx_irq_retrigger(struct irq_data *d) static int xtensa_mx_irq_set_affinity(struct irq_data *d, const struct cpumask *dest, bool force) { - unsigned mask = 1u << cpumask_any(dest); + unsigned mask = 1u << cpumask_any_and(dest, cpu_online_mask); set_er(mask, MIROUT(d->hwirq - HW_IRQ_MX_BASE)); return 0; -- cgit v0.10.2 From b58d971da3433654787a88c40c6fca00f5bbaa04 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Feb 2014 21:40:10 +0000 Subject: xtensa: Use irq_set_affinity instead of homebrewn code There is no point in having an incomplete copy of irq_set_affinity() for the hotplug irq migration code. Use the core function instead and while at it switch to for_each_active_irq() Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Chris Zankel Cc: xtensa Link: http://lkml.kernel.org/r/20140223212736.664624945@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c index 482868a..3eee94f 100644 --- a/arch/xtensa/kernel/irq.c +++ b/arch/xtensa/kernel/irq.c @@ -155,18 +155,6 @@ void __init init_IRQ(void) } #ifdef CONFIG_HOTPLUG_CPU -static void route_irq(struct irq_data *data, unsigned int irq, unsigned int cpu) -{ - struct irq_desc *desc = irq_to_desc(irq); - struct irq_chip *chip = irq_data_get_irq_chip(data); - unsigned long flags; - - raw_spin_lock_irqsave(&desc->lock, flags); - if (chip->irq_set_affinity) - chip->irq_set_affinity(data, cpumask_of(cpu), false); - raw_spin_unlock_irqrestore(&desc->lock, flags); -} - /* * The CPU has been marked offline. Migrate IRQs off this CPU. If * the affinity settings do not allow other CPUs, force them onto any @@ -175,10 +163,9 @@ static void route_irq(struct irq_data *data, unsigned int irq, unsigned int cpu) void migrate_irqs(void) { unsigned int i, cpu = smp_processor_id(); - struct irq_desc *desc; - for_each_irq_desc(i, desc) { - struct irq_data *data = irq_desc_get_irq_data(desc); + for_each_active_irq(i) { + struct irq_data *data = irq_get_irq_data(i); unsigned int newcpu; if (irqd_is_per_cpu(data)) @@ -194,11 +181,8 @@ void migrate_irqs(void) i, cpu); cpumask_setall(data->affinity); - newcpu = cpumask_any_and(data->affinity, - cpu_online_mask); } - - route_irq(data, i, newcpu); + irq_set_affinity(i, data->affinity); } } #endif /* CONFIG_HOTPLUG_CPU */ -- cgit v0.10.2 From 1e9291996c4eedf79883f47ec635235e39d3d6cd Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 4 Mar 2014 10:28:23 +0200 Subject: iwlwifi: mvm: don't WARN when statistics are handled late Since the statistics handler is asynchrous, it can very well be that we will handle the statistics (hence the RSSI fluctuation) when we already disassociated. Don't WARN on this case. This solves: https://bugzilla.redhat.com/show_bug.cgi?id=1071998 Cc: [3.10+] Fixes: 2b76ef13086f ("iwlwifi: mvm: implement reduced Tx power") Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach diff --git a/drivers/net/wireless/iwlwifi/mvm/bt-coex.c b/drivers/net/wireless/iwlwifi/mvm/bt-coex.c index 76cde6c..18a895a 100644 --- a/drivers/net/wireless/iwlwifi/mvm/bt-coex.c +++ b/drivers/net/wireless/iwlwifi/mvm/bt-coex.c @@ -872,8 +872,11 @@ void iwl_mvm_bt_rssi_event(struct iwl_mvm *mvm, struct ieee80211_vif *vif, lockdep_assert_held(&mvm->mutex); - /* Rssi update while not associated ?! */ - if (WARN_ON_ONCE(mvmvif->ap_sta_id == IWL_MVM_STATION_COUNT)) + /* + * Rssi update while not associated - can happen since the statistics + * are handled asynchronously + */ + if (mvmvif->ap_sta_id == IWL_MVM_STATION_COUNT) return; /* No BT - reports should be disabled */ -- cgit v0.10.2 From acfcd9ed588465e48efe5ceb67177096d51c95a9 Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Tue, 4 Mar 2014 14:47:39 +0200 Subject: iwlwifi: fix and add 7265 series HW IDs Update of the HW IDs for the 7265 series. Signed-off-by: Oren Givon Signed-off-by: Emmanuel Grumbach diff --git a/drivers/net/wireless/iwlwifi/pcie/drv.c b/drivers/net/wireless/iwlwifi/pcie/drv.c index f47bcbe..3872ead 100644 --- a/drivers/net/wireless/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/iwlwifi/pcie/drv.c @@ -359,13 +359,12 @@ static DEFINE_PCI_DEVICE_TABLE(iwl_hw_card_ids) = { /* 7265 Series */ {IWL_PCI_DEVICE(0x095A, 0x5010, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5110, iwl7265_2ac_cfg)}, - {IWL_PCI_DEVICE(0x095A, 0x5112, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5100, iwl7265_2ac_cfg)}, - {IWL_PCI_DEVICE(0x095A, 0x510A, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095B, 0x5310, iwl7265_2ac_cfg)}, - {IWL_PCI_DEVICE(0x095B, 0x5302, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095B, 0x5302, iwl7265_n_cfg)}, {IWL_PCI_DEVICE(0x095B, 0x5210, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5012, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095A, 0x5412, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5410, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5400, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x1010, iwl7265_2ac_cfg)}, -- cgit v0.10.2 From b6251243ac0a3cb63afe748133f6600f77526078 Mon Sep 17 00:00:00 2001 From: Ivaylo Dimitrov Date: Sat, 1 Mar 2014 14:52:06 +0200 Subject: wl1251: use skb_trim to make skb shorter the current code is directly setting skb->len, which is not correct and brings problems with HAVE_EFFICIENT_UNALIGNED_ACCESS enabled in config Signed-off-by: Ivaylo Dimitrov Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/ti/wl1251/rx.c b/drivers/net/wireless/ti/wl1251/rx.c index 123c4bb..cde0eaf 100644 --- a/drivers/net/wireless/ti/wl1251/rx.c +++ b/drivers/net/wireless/ti/wl1251/rx.c @@ -180,7 +180,7 @@ static void wl1251_rx_body(struct wl1251 *wl, wl1251_mem_read(wl, rx_packet_ring_addr, rx_buffer, length); /* The actual length doesn't include the target's alignment */ - skb->len = desc->length - PLCP_HEADER_LENGTH; + skb_trim(skb, desc->length - PLCP_HEADER_LENGTH); fc = (u16 *)skb->data; -- cgit v0.10.2 From 2958a489d7d31552fd1a0a8f54a5005c278d4606 Mon Sep 17 00:00:00 2001 From: Michael Opdenacker Date: Tue, 4 Mar 2014 21:29:13 +0100 Subject: ia64: Remove deprecated IRQF_DISABLED This patch removes the IRQF_DISABLED flag from ia64 architecture code. It's a NOOP since 2.6.35 and it will be removed one day. Signed-off-by: Michael Opdenacker Cc: paul.gortmaker@windriver.com Cc: viro@zeniv.linux.org.uk Cc: srivatsa.bhat@linux.vnet.ibm.com Cc: andriy.shevchenko@linux.intel.com Cc: fenghua.yu@intel.com Cc: tony.luck@intel.com Link: http://lkml.kernel.org/r/1393964953-17002-1-git-send-email-michael.opdenacker@free-electrons.com Signed-off-by: Thomas Gleixner diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index c8a576b..0884f5e 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -364,7 +364,6 @@ static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id) static struct irqaction irq_move_irqaction = { .handler = smp_irq_move_cleanup_interrupt, - .flags = IRQF_DISABLED, .name = "irq_move" }; @@ -600,7 +599,6 @@ static irqreturn_t dummy_handler (int irq, void *dev_id) static struct irqaction ipi_irqaction = { .handler = handle_IPI, - .flags = IRQF_DISABLED, .name = "IPI" }; @@ -609,13 +607,11 @@ static struct irqaction ipi_irqaction = { */ static struct irqaction resched_irqaction = { .handler = dummy_handler, - .flags = IRQF_DISABLED, .name = "resched" }; static struct irqaction tlb_irqaction = { .handler = dummy_handler, - .flags = IRQF_DISABLED, .name = "tlb_flush" }; diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index b8edfa7..33f1462 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -1772,38 +1772,32 @@ __setup("disable_cpe_poll", ia64_mca_disable_cpe_polling); static struct irqaction cmci_irqaction = { .handler = ia64_mca_cmc_int_handler, - .flags = IRQF_DISABLED, .name = "cmc_hndlr" }; static struct irqaction cmcp_irqaction = { .handler = ia64_mca_cmc_int_caller, - .flags = IRQF_DISABLED, .name = "cmc_poll" }; static struct irqaction mca_rdzv_irqaction = { .handler = ia64_mca_rendez_int_handler, - .flags = IRQF_DISABLED, .name = "mca_rdzv" }; static struct irqaction mca_wkup_irqaction = { .handler = ia64_mca_wakeup_int_handler, - .flags = IRQF_DISABLED, .name = "mca_wkup" }; #ifdef CONFIG_ACPI static struct irqaction mca_cpe_irqaction = { .handler = ia64_mca_cpe_int_handler, - .flags = IRQF_DISABLED, .name = "cpe_hndlr" }; static struct irqaction mca_cpep_irqaction = { .handler = ia64_mca_cpe_int_caller, - .flags = IRQF_DISABLED, .name = "cpe_poll" }; #endif /* CONFIG_ACPI */ diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index cb59277..d841c4b 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -6387,7 +6387,6 @@ pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) static struct irqaction perfmon_irqaction = { .handler = pfm_interrupt_handler, - .flags = IRQF_DISABLED, .name = "perfmon" }; diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index fbaac1a..71c52bc 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -380,7 +380,7 @@ static cycle_t itc_get_cycles(struct clocksource *cs) static struct irqaction timer_irqaction = { .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_IRQPOLL, + .flags = IRQF_IRQPOLL, .name = "timer" }; -- cgit v0.10.2 From 322a126a8fa8af80d9dc06e7168db11d1aabdba7 Mon Sep 17 00:00:00 2001 From: Michael Opdenacker Date: Tue, 4 Mar 2014 21:31:51 +0100 Subject: mn10300: Remove deprecated IRQF_DISABLED This patch removes the IRQF_DISABLED flag from mn10300 architecture code. It's a NOOP since 2.6.35 and it will be removed one day. Signed-off-by: Michael Opdenacker Cc: srivatsa.bhat@linux.vnet.ibm.com Cc: linux-am33-list@redhat.com Cc: dhowells@redhat.com Cc: yasutake.koichi@jp.panasonic.com Link: http://lkml.kernel.org/r/1393965111-17092-1-git-send-email-michael.opdenacker@free-electrons.com Signed-off-by: Thomas Gleixner diff --git a/arch/mn10300/kernel/cevt-mn10300.c b/arch/mn10300/kernel/cevt-mn10300.c index ccce35e..60f64ca 100644 --- a/arch/mn10300/kernel/cevt-mn10300.c +++ b/arch/mn10300/kernel/cevt-mn10300.c @@ -113,7 +113,7 @@ int __init init_clockevents(void) cd->set_next_event = next_event; iact = &per_cpu(timer_irq, cpu); - iact->flags = IRQF_DISABLED | IRQF_SHARED | IRQF_TIMER; + iact->flags = IRQF_SHARED | IRQF_TIMER; iact->handler = timer_interrupt; clockevents_register_device(cd); diff --git a/arch/mn10300/kernel/mn10300-serial.c b/arch/mn10300/kernel/mn10300-serial.c index bf6e949..7ecf698 100644 --- a/arch/mn10300/kernel/mn10300-serial.c +++ b/arch/mn10300/kernel/mn10300-serial.c @@ -985,17 +985,17 @@ static int mn10300_serial_startup(struct uart_port *_port) irq_set_chip(port->tm_irq, &mn10300_serial_pic); if (request_irq(port->rx_irq, mn10300_serial_interrupt, - IRQF_DISABLED | IRQF_NOBALANCING, + IRQF_NOBALANCING, port->rx_name, port) < 0) goto error; if (request_irq(port->tx_irq, mn10300_serial_interrupt, - IRQF_DISABLED | IRQF_NOBALANCING, + IRQF_NOBALANCING, port->tx_name, port) < 0) goto error2; if (request_irq(port->tm_irq, mn10300_serial_interrupt, - IRQF_DISABLED | IRQF_NOBALANCING, + IRQF_NOBALANCING, port->tm_name, port) < 0) goto error3; mn10300_serial_mask_ack(port->tm_irq); diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c index a17f9c9..f984193 100644 --- a/arch/mn10300/kernel/smp.c +++ b/arch/mn10300/kernel/smp.c @@ -143,7 +143,7 @@ static struct irqaction call_function_ipi = { static irqreturn_t smp_ipi_timer_interrupt(int irq, void *dev_id); static struct irqaction local_timer_ipi = { .handler = smp_ipi_timer_interrupt, - .flags = IRQF_DISABLED | IRQF_NOBALANCING, + .flags = IRQF_NOBALANCING, .name = "smp local timer IPI" }; #endif diff --git a/arch/mn10300/unit-asb2364/irq-fpga.c b/arch/mn10300/unit-asb2364/irq-fpga.c index e16c216..073e2cc 100644 --- a/arch/mn10300/unit-asb2364/irq-fpga.c +++ b/arch/mn10300/unit-asb2364/irq-fpga.c @@ -76,7 +76,7 @@ static irqreturn_t fpga_interrupt(int irq, void *_mask) static struct irqaction fpga_irq[] = { [0] = { .handler = fpga_interrupt, - .flags = IRQF_DISABLED | IRQF_SHARED, + .flags = IRQF_SHARED, .name = "fpga", }, }; -- cgit v0.10.2 From d20d2efbf227042920d386b8eda878815f63c987 Mon Sep 17 00:00:00 2001 From: Michael Opdenacker Date: Tue, 4 Mar 2014 21:35:05 +0100 Subject: x86: Remove deprecated IRQF_DISABLED This patch removes the IRQF_DISABLED flag from x86 architecture code. It's a NOOP since 2.6.35 and it will be removed one day. Signed-off-by: Michael Opdenacker Cc: venki@google.com Link: http://lkml.kernel.org/r/1393965305-17248-1-git-send-email-michael.opdenacker@free-electrons.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h index d3d7469..1c7eefe 100644 --- a/arch/x86/include/asm/floppy.h +++ b/arch/x86/include/asm/floppy.h @@ -145,10 +145,10 @@ static int fd_request_irq(void) { if (can_use_virtual_dma) return request_irq(FLOPPY_IRQ, floppy_hardint, - IRQF_DISABLED, "floppy", NULL); + 0, "floppy", NULL); else return request_irq(FLOPPY_IRQ, floppy_interrupt, - IRQF_DISABLED, "floppy", NULL); + 0, "floppy", NULL); } static unsigned long dma_mem_alloc(unsigned long size) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index da85a8e..45d2ded 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -521,7 +521,7 @@ static int hpet_setup_irq(struct hpet_dev *dev) { if (request_irq(dev->irq, hpet_interrupt_handler, - IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, + IRQF_TIMER | IRQF_NOBALANCING, dev->name, dev)) return -1; diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 24d3c91..36b2cee 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -62,7 +62,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) static struct irqaction irq0 = { .handler = timer_interrupt, - .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, + .flags = IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, .name = "timer" }; -- cgit v0.10.2 From 86134a1b39fd056a7b6778608a5882f0ea1bc705 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Thu, 1 Aug 2013 13:33:26 +0100 Subject: x86/boot: Cleanup header.S by removing some #ifdefs handover_offset is now filled out by build.c. Don't set a default value as it will be overwritten anyway. Acked-by: Borislav Petkov Signed-off-by: Matt Fleming diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index ec3b8ba..d69d966 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -426,13 +426,7 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr #define INIT_SIZE VO_INIT_SIZE #endif init_size: .long INIT_SIZE # kernel initialization size -handover_offset: -#ifdef CONFIG_EFI_STUB - .long 0x30 # offset to the handover - # protocol entry point -#else - .long 0 -#endif +handover_offset: .long 0 # Filled in by build.c # End of setup header ##################################################### -- cgit v0.10.2 From 993c30a04e205fb239c0875b25b43ddef0499845 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Thu, 1 Aug 2013 14:18:49 +0100 Subject: x86, tools: Consolidate #ifdef code Instead of littering main() with #ifdef CONFIG_EFI_STUB, move the logic into separate functions that do nothing if the config option isn't set. This makes main() much easier to read. Acked-by: Borislav Petkov Signed-off-by: Matt Fleming diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 8e15b22..bf26207 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -219,6 +219,45 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz) update_pecoff_section_header(".text", text_start, text_sz); } +static int reserve_pecoff_reloc_section(int c) +{ + /* Reserve 0x20 bytes for .reloc section */ + memset(buf+c, 0, PECOFF_RELOC_RESERVE); + return PECOFF_RELOC_RESERVE; +} + +static void efi_stub_defaults(void) +{ + /* Defaults for old kernel */ +#ifdef CONFIG_X86_32 + efi_pe_entry = 0x10; + efi_stub_entry = 0x30; +#else + efi_pe_entry = 0x210; + efi_stub_entry = 0x230; + startup_64 = 0x200; +#endif +} + +static void efi_stub_entry_update(void) +{ +#ifdef CONFIG_X86_64 /* Yes, this is really how we defined it :( */ + efi_stub_entry -= 0x200; +#endif + put_unaligned_le32(efi_stub_entry, &buf[0x264]); +} + +#else + +static inline void update_pecoff_setup_and_reloc(unsigned int) {} +static inline void update_pecoff_text(unsigned int, unsigned int) {} +static inline void efi_stub_defaults(void) {} +static inline void efi_stup_entry_update(void) {} + +static inline int reserve_pecoff_reloc_section(int c) +{ + return 0; +} #endif /* CONFIG_EFI_STUB */ @@ -271,15 +310,7 @@ int main(int argc, char ** argv) void *kernel; u32 crc = 0xffffffffUL; - /* Defaults for old kernel */ -#ifdef CONFIG_X86_32 - efi_pe_entry = 0x10; - efi_stub_entry = 0x30; -#else - efi_pe_entry = 0x210; - efi_stub_entry = 0x230; - startup_64 = 0x200; -#endif + efi_stub_defaults(); if (argc != 5) usage(); @@ -302,11 +333,7 @@ int main(int argc, char ** argv) die("Boot block hasn't got boot flag (0xAA55)"); fclose(file); -#ifdef CONFIG_EFI_STUB - /* Reserve 0x20 bytes for .reloc section */ - memset(buf+c, 0, PECOFF_RELOC_RESERVE); - c += PECOFF_RELOC_RESERVE; -#endif + c += reserve_pecoff_reloc_section(c); /* Pad unused space with zeros */ setup_sectors = (c + 511) / 512; @@ -315,9 +342,7 @@ int main(int argc, char ** argv) i = setup_sectors*512; memset(buf+c, 0, i-c); -#ifdef CONFIG_EFI_STUB update_pecoff_setup_and_reloc(i); -#endif /* Set the default root device */ put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]); @@ -342,14 +367,9 @@ int main(int argc, char ** argv) buf[0x1f1] = setup_sectors-1; put_unaligned_le32(sys_size, &buf[0x1f4]); -#ifdef CONFIG_EFI_STUB update_pecoff_text(setup_sectors * 512, sz + i + ((sys_size * 16) - sz)); -#ifdef CONFIG_X86_64 /* Yes, this is really how we defined it :( */ - efi_stub_entry -= 0x200; -#endif - put_unaligned_le32(efi_stub_entry, &buf[0x264]); -#endif + efi_stub_entry_update(); crc = partial_crc32(buf, i, crc); if (fwrite(buf, 1, i, dest) != i) -- cgit v0.10.2 From 426e34cc4f6094cefe4f3175764cdf583128e7cd Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 6 Dec 2013 21:13:04 +0000 Subject: x86/mm/pageattr: Always dump the right page table in an oops Now that we have EFI-specific page tables we need to lookup the pgd when dumping those page tables, rather than assuming that swapper_pgdir is the current pgdir. Remove the double underscore prefix, which is usually reserved for static functions. Acked-by: Borislav Petkov Signed-off-by: Matt Fleming diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 1aa9ccd..24ae421 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -382,6 +382,8 @@ static inline void update_page_count(int level, unsigned long pages) { } * as a pte too. */ extern pte_t *lookup_address(unsigned long address, unsigned int *level); +extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, + unsigned int *level); extern phys_addr_t slow_virt_to_phys(void *__address); extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, unsigned numpages, unsigned long page_flags); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 6dea040..6055a20 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -584,8 +584,13 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, if (error_code & PF_INSTR) { unsigned int level; + pgd_t *pgd; + pte_t *pte; - pte_t *pte = lookup_address(address, &level); + pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK); + pgd += pgd_index(address); + + pte = lookup_address_in_pgd(pgd, address, &level); if (pte && pte_present(*pte) && !pte_exec(*pte)) printk(nx_warning, from_kuid(&init_user_ns, current_uid())); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index b3b19f4..14c656f 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -323,8 +323,12 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, return prot; } -static pte_t *__lookup_address_in_pgd(pgd_t *pgd, unsigned long address, - unsigned int *level) +/* + * Lookup the page table entry for a virtual address in a specific pgd. + * Return a pointer to the entry and the level of the mapping. + */ +pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, + unsigned int *level) { pud_t *pud; pmd_t *pmd; @@ -365,7 +369,7 @@ static pte_t *__lookup_address_in_pgd(pgd_t *pgd, unsigned long address, */ pte_t *lookup_address(unsigned long address, unsigned int *level) { - return __lookup_address_in_pgd(pgd_offset_k(address), address, level); + return lookup_address_in_pgd(pgd_offset_k(address), address, level); } EXPORT_SYMBOL_GPL(lookup_address); @@ -373,7 +377,7 @@ static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address, unsigned int *level) { if (cpa->pgd) - return __lookup_address_in_pgd(cpa->pgd + pgd_index(address), + return lookup_address_in_pgd(cpa->pgd + pgd_index(address), address, level); return lookup_address(address, level); -- cgit v0.10.2 From 099240ac111aac454962e6399c0cc51d1511504a Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 10 Jan 2014 13:01:39 +0000 Subject: x86/efi: Delete dead code when checking for non-native Both efi_free_boot_services() and efi_enter_virtual_mode() are invoked from init/main.c, but only if the EFI runtime services are available. This is not the case for non-native boots, e.g. where a 64-bit kernel is booted with 32-bit EFI firmware. Delete the dead code. Acked-by: Borislav Petkov Signed-off-by: Matt Fleming diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 1a201ac..b96ae79 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -466,9 +466,6 @@ void __init efi_free_boot_services(void) { void *p; - if (!efi_is_native()) - return; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { efi_memory_desc_t *md = p; unsigned long long start = md->phys_addr; @@ -1005,15 +1002,6 @@ void __init efi_enter_virtual_mode(void) efi.systab = NULL; - /* - * We don't do virtual mode, since we don't do runtime services, on - * non-native EFI - */ - if (!efi_is_native()) { - efi_unmap_memmap(); - return; - } - if (efi_setup) { efi_map_regions_fixed(); } else { -- cgit v0.10.2 From 677703cef0a148ba07d37ced649ad25b1cda2f78 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 10 Jan 2014 13:47:37 +0000 Subject: efi: Add separate 32-bit/64-bit definitions The traditional approach of using machine-specific types such as 'unsigned long' does not allow the kernel to interact with firmware running in a different CPU mode, e.g. 64-bit kernel with 32-bit EFI. Add distinct EFI structure definitions for both 32-bit and 64-bit so that we can use them in the 32-bit and 64-bit code paths. Acked-by: Borislav Petkov Signed-off-by: Matt Fleming diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index 81b6b65..d487e72 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h @@ -37,6 +37,24 @@ struct efi_graphics_output_mode_info { u32 pixels_per_scan_line; } __packed; +struct efi_graphics_output_protocol_mode_32 { + u32 max_mode; + u32 mode; + u32 info; + u32 size_of_info; + u64 frame_buffer_base; + u32 frame_buffer_size; +} __packed; + +struct efi_graphics_output_protocol_mode_64 { + u32 max_mode; + u32 mode; + u64 info; + u64 size_of_info; + u64 frame_buffer_base; + u64 frame_buffer_size; +} __packed; + struct efi_graphics_output_protocol_mode { u32 max_mode; u32 mode; @@ -46,6 +64,20 @@ struct efi_graphics_output_protocol_mode { unsigned long frame_buffer_size; } __packed; +struct efi_graphics_output_protocol_32 { + u32 query_mode; + u32 set_mode; + u32 blt; + u32 mode; +}; + +struct efi_graphics_output_protocol_64 { + u64 query_mode; + u64 set_mode; + u64 blt; + u64 mode; +}; + struct efi_graphics_output_protocol { void *query_mode; unsigned long set_mode; @@ -53,6 +85,18 @@ struct efi_graphics_output_protocol { struct efi_graphics_output_protocol_mode *mode; }; +struct efi_uga_draw_protocol_32 { + u32 get_mode; + u32 set_mode; + u32 blt; +}; + +struct efi_uga_draw_protocol_64 { + u64 get_mode; + u64 set_mode; + u64 blt; +}; + struct efi_uga_draw_protocol { void *get_mode; void *set_mode; diff --git a/include/linux/efi.h b/include/linux/efi.h index 0a819e7..42c6627 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -153,6 +153,102 @@ typedef struct { u8 sets_to_zero; } efi_time_cap_t; +typedef struct { + efi_table_hdr_t hdr; + u32 raise_tpl; + u32 restore_tpl; + u32 allocate_pages; + u32 free_pages; + u32 get_memory_map; + u32 allocate_pool; + u32 free_pool; + u32 create_event; + u32 set_timer; + u32 wait_for_event; + u32 signal_event; + u32 close_event; + u32 check_event; + u32 install_protocol_interface; + u32 reinstall_protocol_interface; + u32 uninstall_protocol_interface; + u32 handle_protocol; + u32 __reserved; + u32 register_protocol_notify; + u32 locate_handle; + u32 locate_device_path; + u32 install_configuration_table; + u32 load_image; + u32 start_image; + u32 exit; + u32 unload_image; + u32 exit_boot_services; + u32 get_next_monotonic_count; + u32 stall; + u32 set_watchdog_timer; + u32 connect_controller; + u32 disconnect_controller; + u32 open_protocol; + u32 close_protocol; + u32 open_protocol_information; + u32 protocols_per_handle; + u32 locate_handle_buffer; + u32 locate_protocol; + u32 install_multiple_protocol_interfaces; + u32 uninstall_multiple_protocol_interfaces; + u32 calculate_crc32; + u32 copy_mem; + u32 set_mem; + u32 create_event_ex; +} __packed efi_boot_services_32_t; + +typedef struct { + efi_table_hdr_t hdr; + u64 raise_tpl; + u64 restore_tpl; + u64 allocate_pages; + u64 free_pages; + u64 get_memory_map; + u64 allocate_pool; + u64 free_pool; + u64 create_event; + u64 set_timer; + u64 wait_for_event; + u64 signal_event; + u64 close_event; + u64 check_event; + u64 install_protocol_interface; + u64 reinstall_protocol_interface; + u64 uninstall_protocol_interface; + u64 handle_protocol; + u64 __reserved; + u64 register_protocol_notify; + u64 locate_handle; + u64 locate_device_path; + u64 install_configuration_table; + u64 load_image; + u64 start_image; + u64 exit; + u64 unload_image; + u64 exit_boot_services; + u64 get_next_monotonic_count; + u64 stall; + u64 set_watchdog_timer; + u64 connect_controller; + u64 disconnect_controller; + u64 open_protocol; + u64 close_protocol; + u64 open_protocol_information; + u64 protocols_per_handle; + u64 locate_handle_buffer; + u64 locate_protocol; + u64 install_multiple_protocol_interfaces; + u64 uninstall_multiple_protocol_interfaces; + u64 calculate_crc32; + u64 copy_mem; + u64 set_mem; + u64 create_event_ex; +} __packed efi_boot_services_64_t; + /* * EFI Boot Services table */ @@ -231,6 +327,15 @@ typedef enum { EfiPciIoAttributeOperationMaximum } EFI_PCI_IO_PROTOCOL_ATTRIBUTE_OPERATION; +typedef struct { + u32 read; + u32 write; +} efi_pci_io_protocol_access_32_t; + +typedef struct { + u64 read; + u64 write; +} efi_pci_io_protocol_access_64_t; typedef struct { void *read; @@ -238,6 +343,46 @@ typedef struct { } efi_pci_io_protocol_access_t; typedef struct { + u32 poll_mem; + u32 poll_io; + efi_pci_io_protocol_access_32_t mem; + efi_pci_io_protocol_access_32_t io; + efi_pci_io_protocol_access_32_t pci; + u32 copy_mem; + u32 map; + u32 unmap; + u32 allocate_buffer; + u32 free_buffer; + u32 flush; + u32 get_location; + u32 attributes; + u32 get_bar_attributes; + u32 set_bar_attributes; + uint64_t romsize; + void *romimage; +} efi_pci_io_protocol_32; + +typedef struct { + u64 poll_mem; + u64 poll_io; + efi_pci_io_protocol_access_64_t mem; + efi_pci_io_protocol_access_64_t io; + efi_pci_io_protocol_access_64_t pci; + u64 copy_mem; + u64 map; + u64 unmap; + u64 allocate_buffer; + u64 free_buffer; + u64 flush; + u64 get_location; + u64 attributes; + u64 get_bar_attributes; + u64 set_bar_attributes; + uint64_t romsize; + void *romimage; +} efi_pci_io_protocol_64; + +typedef struct { void *poll_mem; void *poll_io; efi_pci_io_protocol_access_t mem; @@ -292,6 +437,42 @@ typedef struct { typedef struct { efi_table_hdr_t hdr; + u32 get_time; + u32 set_time; + u32 get_wakeup_time; + u32 set_wakeup_time; + u32 set_virtual_address_map; + u32 convert_pointer; + u32 get_variable; + u32 get_next_variable; + u32 set_variable; + u32 get_next_high_mono_count; + u32 reset_system; + u32 update_capsule; + u32 query_capsule_caps; + u32 query_variable_info; +} efi_runtime_services_32_t; + +typedef struct { + efi_table_hdr_t hdr; + u64 get_time; + u64 set_time; + u64 get_wakeup_time; + u64 set_wakeup_time; + u64 set_virtual_address_map; + u64 convert_pointer; + u64 get_variable; + u64 get_next_variable; + u64 set_variable; + u64 get_next_high_mono_count; + u64 reset_system; + u64 update_capsule; + u64 query_capsule_caps; + u64 query_variable_info; +} efi_runtime_services_64_t; + +typedef struct { + efi_table_hdr_t hdr; void *get_time; void *set_time; void *get_wakeup_time; @@ -485,6 +666,38 @@ struct efi_memory_map { typedef struct { u32 revision; + u32 parent_handle; + u32 system_table; + u32 device_handle; + u32 file_path; + u32 reserved; + u32 load_options_size; + u32 load_options; + u32 image_base; + __aligned_u64 image_size; + unsigned int image_code_type; + unsigned int image_data_type; + unsigned long unload; +} efi_loaded_image_32_t; + +typedef struct { + u32 revision; + u64 parent_handle; + u64 system_table; + u64 device_handle; + u64 file_path; + u64 reserved; + u32 load_options_size; + u64 load_options; + u64 image_base; + __aligned_u64 image_size; + unsigned int image_code_type; + unsigned int image_data_type; + unsigned long unload; +} efi_loaded_image_64_t; + +typedef struct { + u32 revision; void *parent_handle; efi_system_table_t *system_table; void *device_handle; @@ -511,6 +724,34 @@ typedef struct { efi_char16_t filename[1]; } efi_file_info_t; +typedef struct { + u64 revision; + u32 open; + u32 close; + u32 delete; + u32 read; + u32 write; + u32 get_position; + u32 set_position; + u32 get_info; + u32 set_info; + u32 flush; +} efi_file_handle_32_t; + +typedef struct { + u64 revision; + u64 open; + u64 close; + u64 delete; + u64 read; + u64 write; + u64 get_position; + u64 set_position; + u64 get_info; + u64 set_info; + u64 flush; +} efi_file_handle_64_t; + typedef struct _efi_file_handle { u64 revision; efi_status_t (*open)(struct _efi_file_handle *, @@ -809,6 +1050,17 @@ struct efivar_entry { bool deleting; }; +struct efi_simple_text_output_protocol_32 { + u32 reset; + u32 output_string; + u32 test_string; +}; + +struct efi_simple_text_output_protocol_64 { + u64 reset; + u64 output_string; + u64 test_string; +}; struct efi_simple_text_output_protocol { void *reset; -- cgit v0.10.2 From 54b52d87268034859191d671505bb1cfce6bd74d Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 10 Jan 2014 15:27:14 +0000 Subject: x86/efi: Build our own EFI services pointer table It's not possible to dereference the EFI System table directly when booting a 64-bit kernel on a 32-bit EFI firmware because the size of pointers don't match. In preparation for supporting the above use case, build a list of function pointers on boot so that callers don't have to worry about converting pointer sizes through multiple levels of indirection. Signed-off-by: Matt Fleming diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index a7677ba..4254816 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -19,10 +19,145 @@ static efi_system_table_t *sys_table; +static struct efi_config *efi_early; + +#define BOOT_SERVICES(bits) \ +static void setup_boot_services##bits(struct efi_config *c) \ +{ \ + efi_system_table_##bits##_t *table; \ + efi_boot_services_##bits##_t *bt; \ + \ + table = (typeof(table))sys_table; \ + \ + c->text_output = table->con_out; \ + \ + bt = (typeof(bt))(unsigned long)(table->boottime); \ + \ + c->allocate_pool = bt->allocate_pool; \ + c->allocate_pages = bt->allocate_pages; \ + c->get_memory_map = bt->get_memory_map; \ + c->free_pool = bt->free_pool; \ + c->free_pages = bt->free_pages; \ + c->locate_handle = bt->locate_handle; \ + c->handle_protocol = bt->handle_protocol; \ + c->exit_boot_services = bt->exit_boot_services; \ +} +BOOT_SERVICES(32); +BOOT_SERVICES(64); -#include "../../../../drivers/firmware/efi/efi-stub-helper.c" +static void efi_printk(efi_system_table_t *, char *); +static void efi_char16_printk(efi_system_table_t *, efi_char16_t *); + +static efi_status_t +efi_file_size(efi_system_table_t *sys_table, void *__fh, + efi_char16_t *filename_16, void **handle, u64 *file_sz) +{ + efi_file_handle_t *h, *fh = __fh; + efi_file_info_t *info; + efi_status_t status; + efi_guid_t info_guid = EFI_FILE_INFO_ID; + u32 info_sz; + + status = efi_early->call((unsigned long)fh->open, fh, &h, filename_16, + EFI_FILE_MODE_READ, (u64)0); + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "Failed to open file: "); + efi_char16_printk(sys_table, filename_16); + efi_printk(sys_table, "\n"); + return status; + } + + *handle = h; + + info_sz = 0; + status = efi_early->call((unsigned long)h->get_info, h, &info_guid, + &info_sz, NULL); + if (status != EFI_BUFFER_TOO_SMALL) { + efi_printk(sys_table, "Failed to get file info size\n"); + return status; + } + +grow: + status = efi_early->call(efi_early->allocate_pool, EFI_LOADER_DATA, + info_sz, (void **)&info); + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "Failed to alloc mem for file info\n"); + return status; + } + + status = efi_early->call((unsigned long)h->get_info, h, &info_guid, + &info_sz, info); + if (status == EFI_BUFFER_TOO_SMALL) { + efi_early->call(efi_early->free_pool, info); + goto grow; + } + + *file_sz = info->file_size; + efi_early->call(efi_early->free_pool, info); + + if (status != EFI_SUCCESS) + efi_printk(sys_table, "Failed to get initrd info\n"); + + return status; +} + +static inline efi_status_t +efi_file_read(void *__fh, void *handle, unsigned long *size, void *addr) +{ + efi_file_handle_t *fh = __fh; + return efi_early->call((unsigned long)fh->read, handle, size, addr); +} + +static inline efi_status_t efi_file_close(void *__fh, void *handle) +{ + efi_file_handle_t *fh = __fh; + return efi_early->call((unsigned long)fh->close, handle); +} + +static inline efi_status_t +efi_open_volume(efi_system_table_t *sys_table, void *__image, void **__fh) +{ + efi_file_io_interface_t *io; + efi_loaded_image_t *image = __image; + efi_file_handle_t *fh; + efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID; + efi_status_t status; + void *handle = (void *)(unsigned long)image->device_handle; + u32 func; + + status = efi_early->call(efi_early->handle_protocol, handle, + &fs_proto, (void **)&io); + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "Failed to handle fs_proto\n"); + return status; + } + func = (unsigned long)io->open_volume; + status = efi_early->call(func, io, &fh); + if (status != EFI_SUCCESS) + efi_printk(sys_table, "Failed to open volume\n"); + + *__fh = fh; + return status; +} + +static inline void +efi_char16_printk(efi_system_table_t *table, efi_char16_t *str) +{ + struct efi_simple_text_output_protocol *out; + unsigned long output_string; + size_t offset; + unsigned long *func; + + offset = offsetof(typeof(*out), output_string); + output_string = efi_early->text_output + offset; + func = (unsigned long *)output_string; + + efi_early->call(*func, efi_early->text_output, str); +} + +#include "../../../../drivers/firmware/efi/efi-stub-helper.c" static void find_bits(unsigned long mask, u8 *pos, u8 *size) { @@ -51,7 +186,7 @@ static efi_status_t setup_efi_pci(struct boot_params *params) { efi_pci_io_protocol *pci; efi_status_t status; - void **pci_handle; + void **pci_handle = NULL; efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID; unsigned long nr_pci, size = 0; int i; @@ -62,20 +197,21 @@ static efi_status_t setup_efi_pci(struct boot_params *params) while (data && data->next) data = (struct setup_data *)(unsigned long)data->next; - status = efi_call_phys5(sys_table->boottime->locate_handle, - EFI_LOCATE_BY_PROTOCOL, &pci_proto, - NULL, &size, pci_handle); + status = efi_early->call(efi_early->locate_handle, + EFI_LOCATE_BY_PROTOCOL, + &pci_proto, NULL, &size, pci_handle); if (status == EFI_BUFFER_TOO_SMALL) { - status = efi_call_phys3(sys_table->boottime->allocate_pool, - EFI_LOADER_DATA, size, &pci_handle); + status = efi_early->call(efi_early->allocate_pool, + EFI_LOADER_DATA, + size, (void **)&pci_handle); if (status != EFI_SUCCESS) return status; - status = efi_call_phys5(sys_table->boottime->locate_handle, - EFI_LOCATE_BY_PROTOCOL, &pci_proto, - NULL, &size, pci_handle); + status = efi_early->call(efi_early->locate_handle, + EFI_LOCATE_BY_PROTOCOL, &pci_proto, + NULL, &size, pci_handle); } if (status != EFI_SUCCESS) @@ -87,8 +223,8 @@ static efi_status_t setup_efi_pci(struct boot_params *params) uint64_t attributes; struct pci_setup_rom *rom; - status = efi_call_phys3(sys_table->boottime->handle_protocol, - h, &pci_proto, &pci); + status = efi_early->call(efi_early->handle_protocol, h, + &pci_proto, (void **)&pci); if (status != EFI_SUCCESS) continue; @@ -97,13 +233,13 @@ static efi_status_t setup_efi_pci(struct boot_params *params) continue; #ifdef CONFIG_X86_64 - status = efi_call_phys4(pci->attributes, pci, - EfiPciIoAttributeOperationGet, 0, - &attributes); + status = efi_early->call((unsigned long)pci->attributes, pci, + EfiPciIoAttributeOperationGet, 0, + &attributes); #else - status = efi_call_phys5(pci->attributes, pci, - EfiPciIoAttributeOperationGet, 0, 0, - &attributes); + status = efi_early->call((unsigned long)pci->attributes, pci, + EfiPciIoAttributeOperationGet, 0, 0, + &attributes); #endif if (status != EFI_SUCCESS) continue; @@ -113,8 +249,8 @@ static efi_status_t setup_efi_pci(struct boot_params *params) size = pci->romsize + sizeof(*rom); - status = efi_call_phys3(sys_table->boottime->allocate_pool, - EFI_LOADER_DATA, size, &rom); + status = efi_early->call(efi_early->allocate_pool, + EFI_LOADER_DATA, size, &rom); if (status != EFI_SUCCESS) continue; @@ -124,23 +260,23 @@ static efi_status_t setup_efi_pci(struct boot_params *params) rom->data.next = 0; rom->pcilen = pci->romsize; - status = efi_call_phys5(pci->pci.read, pci, - EfiPciIoWidthUint16, PCI_VENDOR_ID, - 1, &(rom->vendor)); + status = efi_early->call((unsigned long)pci->pci.read, pci, + EfiPciIoWidthUint16, PCI_VENDOR_ID, + 1, &(rom->vendor)); if (status != EFI_SUCCESS) goto free_struct; - status = efi_call_phys5(pci->pci.read, pci, - EfiPciIoWidthUint16, PCI_DEVICE_ID, - 1, &(rom->devid)); + status = efi_early->call((unsigned long)pci->pci.read, pci, + EfiPciIoWidthUint16, PCI_DEVICE_ID, + 1, &(rom->devid)); if (status != EFI_SUCCESS) goto free_struct; - status = efi_call_phys5(pci->get_location, pci, - &(rom->segment), &(rom->bus), - &(rom->device), &(rom->function)); + status = efi_early->call((unsigned long)pci->get_location, pci, + &(rom->segment), &(rom->bus), + &(rom->device), &(rom->function)); if (status != EFI_SUCCESS) goto free_struct; @@ -156,11 +292,11 @@ static efi_status_t setup_efi_pci(struct boot_params *params) continue; free_struct: - efi_call_phys1(sys_table->boottime->free_pool, rom); + efi_early->call(efi_early->free_pool, rom); } free_handle: - efi_call_phys1(sys_table->boottime->free_pool, pci_handle); + efi_early->call(efi_early->free_pool, pci_handle); return status; } @@ -174,21 +310,21 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, struct efi_pixel_bitmask pixel_info; unsigned long nr_gops; efi_status_t status; - void **gop_handle; + void **gop_handle = NULL; u16 width, height; u32 fb_base, fb_size; u32 pixels_per_scan_line; int pixel_format; int i; - status = efi_call_phys3(sys_table->boottime->allocate_pool, - EFI_LOADER_DATA, size, &gop_handle); + status = efi_early->call(efi_early->allocate_pool, EFI_LOADER_DATA, + size, (void **)&gop_handle); if (status != EFI_SUCCESS) return status; - status = efi_call_phys5(sys_table->boottime->locate_handle, - EFI_LOCATE_BY_PROTOCOL, proto, - NULL, &size, gop_handle); + status = efi_early->call(efi_early->locate_handle, + EFI_LOCATE_BY_PROTOCOL, + proto, NULL, &size, gop_handle); if (status != EFI_SUCCESS) goto free_handle; @@ -202,19 +338,18 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, void *dummy; void *h = gop_handle[i]; - status = efi_call_phys3(sys_table->boottime->handle_protocol, - h, proto, &gop); + status = efi_early->call(efi_early->handle_protocol, h, + proto, (void **)&gop); if (status != EFI_SUCCESS) continue; - status = efi_call_phys3(sys_table->boottime->handle_protocol, - h, &conout_proto, &dummy); - + status = efi_early->call(efi_early->handle_protocol, h, + &conout_proto, &dummy); if (status == EFI_SUCCESS) conout_found = true; - status = efi_call_phys4(gop->query_mode, gop, - gop->mode->mode, &size, &info); + status = efi_early->call((unsigned long)gop->query_mode, gop, + gop->mode->mode, &size, &info); if (status == EFI_SUCCESS && (!first_gop || conout_found)) { /* * Systems that use the UEFI Console Splitter may @@ -303,7 +438,7 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; free_handle: - efi_call_phys1(sys_table->boottime->free_pool, gop_handle); + efi_early->call(efi_early->free_pool, gop_handle); return status; } @@ -320,14 +455,14 @@ static efi_status_t setup_uga(struct screen_info *si, efi_guid_t *uga_proto, void **uga_handle = NULL; int i; - status = efi_call_phys3(sys_table->boottime->allocate_pool, - EFI_LOADER_DATA, size, &uga_handle); + status = efi_early->call(efi_early->allocate_pool, EFI_LOADER_DATA, + size, (void **)&uga_handle); if (status != EFI_SUCCESS) return status; - status = efi_call_phys5(sys_table->boottime->locate_handle, - EFI_LOCATE_BY_PROTOCOL, uga_proto, - NULL, &size, uga_handle); + status = efi_early->call(efi_early->locate_handle, + EFI_LOCATE_BY_PROTOCOL, + uga_proto, NULL, &size, uga_handle); if (status != EFI_SUCCESS) goto free_handle; @@ -340,16 +475,16 @@ static efi_status_t setup_uga(struct screen_info *si, efi_guid_t *uga_proto, u32 w, h, depth, refresh; void *pciio; - status = efi_call_phys3(sys_table->boottime->handle_protocol, - handle, uga_proto, &uga); + status = efi_early->call(efi_early->handle_protocol, handle, + uga_proto, (void **)&uga); if (status != EFI_SUCCESS) continue; - efi_call_phys3(sys_table->boottime->handle_protocol, - handle, &pciio_proto, &pciio); + efi_early->call(efi_early->handle_protocol, handle, + &pciio_proto, &pciio); - status = efi_call_phys5(uga->get_mode, uga, &w, &h, - &depth, &refresh); + status = efi_early->call((unsigned long)uga->get_mode, uga, + &w, &h, &depth, &refresh); if (status == EFI_SUCCESS && (!first_uga || pciio)) { width = w; height = h; @@ -386,7 +521,7 @@ static efi_status_t setup_uga(struct screen_info *si, efi_guid_t *uga_proto, free_handle: - efi_call_phys1(sys_table->boottime->free_pool, uga_handle); + efi_early->call(efi_early->free_pool, uga_handle); return status; } @@ -404,29 +539,28 @@ void setup_graphics(struct boot_params *boot_params) memset(si, 0, sizeof(*si)); size = 0; - status = efi_call_phys5(sys_table->boottime->locate_handle, - EFI_LOCATE_BY_PROTOCOL, &graphics_proto, - NULL, &size, gop_handle); + status = efi_early->call(efi_early->locate_handle, + EFI_LOCATE_BY_PROTOCOL, + &graphics_proto, NULL, &size, gop_handle); if (status == EFI_BUFFER_TOO_SMALL) status = setup_gop(si, &graphics_proto, size); if (status != EFI_SUCCESS) { size = 0; - status = efi_call_phys5(sys_table->boottime->locate_handle, - EFI_LOCATE_BY_PROTOCOL, &uga_proto, - NULL, &size, uga_handle); + status = efi_early->call(efi_early->locate_handle, + EFI_LOCATE_BY_PROTOCOL, + &uga_proto, NULL, &size, uga_handle); if (status == EFI_BUFFER_TOO_SMALL) setup_uga(si, &uga_proto, size); } } - /* * Because the x86 boot code expects to be passed a boot_params we * need to create one ourselves (usually the bootloader would create * one for us). */ -struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table) +struct boot_params *make_boot_params(struct efi_config *c) { struct boot_params *boot_params; struct sys_desc_table *sdt; @@ -434,7 +568,7 @@ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table) struct setup_header *hdr; struct efi_info *efi; efi_loaded_image_t *image; - void *options; + void *options, *handle; efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID; int options_size = 0; efi_status_t status; @@ -445,14 +579,21 @@ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table) unsigned long ramdisk_addr; unsigned long ramdisk_size; - sys_table = _table; + efi_early = c; + sys_table = (efi_system_table_t *)(unsigned long)efi_early->table; + handle = (void *)(unsigned long)efi_early->image_handle; /* Check if we were booted by the EFI firmware */ if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) return NULL; - status = efi_call_phys3(sys_table->boottime->handle_protocol, - handle, &proto, (void *)&image); + if (efi_early->is64) + setup_boot_services64(efi_early); + else + setup_boot_services32(efi_early); + + status = efi_early->call(efi_early->handle_protocol, handle, + &proto, (void *)&image); if (status != EFI_SUCCESS) { efi_printk(sys_table, "Failed to get handle for LOADED_IMAGE_PROTOCOL\n"); return NULL; @@ -641,14 +782,13 @@ static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext, sizeof(struct e820entry) * nr_desc; if (*e820ext) { - efi_call_phys1(sys_table->boottime->free_pool, *e820ext); + efi_early->call(efi_early->free_pool, *e820ext); *e820ext = NULL; *e820ext_size = 0; } - status = efi_call_phys3(sys_table->boottime->allocate_pool, - EFI_LOADER_DATA, size, e820ext); - + status = efi_early->call(efi_early->allocate_pool, EFI_LOADER_DATA, + size, (void **)e820ext); if (status == EFI_SUCCESS) *e820ext_size = size; @@ -691,7 +831,7 @@ get_map: if (status != EFI_SUCCESS) goto free_mem_map; - efi_call_phys1(sys_table->boottime->free_pool, mem_map); + efi_early->call(efi_early->free_pool, mem_map); goto get_map; /* Allocated memory, get map again */ } @@ -708,8 +848,7 @@ get_map: #endif /* Might as well exit boot services now */ - status = efi_call_phys2(sys_table->boottime->exit_boot_services, - handle, key); + status = efi_early->call(efi_early->exit_boot_services, handle, key); if (status != EFI_SUCCESS) { /* * ExitBootServices() will fail if any of the event @@ -722,7 +861,7 @@ get_map: goto free_mem_map; called_exit = true; - efi_call_phys1(sys_table->boottime->free_pool, mem_map); + efi_early->call(efi_early->free_pool, mem_map); goto get_map; } @@ -736,23 +875,31 @@ get_map: return EFI_SUCCESS; free_mem_map: - efi_call_phys1(sys_table->boottime->free_pool, mem_map); + efi_early->call(efi_early->free_pool, mem_map); return status; } - /* * On success we return a pointer to a boot_params structure, and NULL * on failure. */ -struct boot_params *efi_main(void *handle, efi_system_table_t *_table, +struct boot_params *efi_main(struct efi_config *c, struct boot_params *boot_params) { - struct desc_ptr *gdt; + struct desc_ptr *gdt = NULL; efi_loaded_image_t *image; struct setup_header *hdr = &boot_params->hdr; efi_status_t status; struct desc_struct *desc; + void *handle; + efi_system_table_t *_table; + bool is64; + + efi_early = c; + + _table = (efi_system_table_t *)(unsigned long)efi_early->table; + handle = (void *)(unsigned long)efi_early->image_handle; + is64 = efi_early->is64; sys_table = _table; @@ -760,13 +907,17 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table, if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) goto fail; + if (is64) + setup_boot_services64(efi_early); + else + setup_boot_services32(efi_early); + setup_graphics(boot_params); setup_efi_pci(boot_params); - status = efi_call_phys3(sys_table->boottime->allocate_pool, - EFI_LOADER_DATA, sizeof(*gdt), - (void **)&gdt); + status = efi_early->call(efi_early->allocate_pool, EFI_LOADER_DATA, + sizeof(*gdt), (void **)&gdt); if (status != EFI_SUCCESS) { efi_printk(sys_table, "Failed to alloc mem for gdt structure\n"); goto fail; diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index d487e72..c88c31e 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h @@ -103,4 +103,20 @@ struct efi_uga_draw_protocol { void *blt; }; +struct efi_config { + u64 image_handle; + u64 table; + u64 allocate_pool; + u64 allocate_pages; + u64 get_memory_map; + u64 free_pool; + u64 free_pages; + u64 locate_handle; + u64 handle_protocol; + u64 exit_boot_services; + u64 text_output; + efi_status_t (*call)(unsigned long, ...); + bool is64; +} __packed; + #endif /* BOOT_COMPRESSED_EBOOT_H */ diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 9116aac..eed23c0 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -42,26 +42,53 @@ ENTRY(startup_32) ENTRY(efi_pe_entry) add $0x4, %esp + call 1f +1: popl %esi + subl $1b, %esi + + popl %ecx + movl %ecx, efi32_config(%esi) /* Handle */ + popl %ecx + movl %ecx, efi32_config+8(%esi) /* EFI System table pointer */ + + /* Relocate efi_config->call() */ + leal efi32_config(%esi), %eax + add %esi, 88(%eax) + pushl %eax + call make_boot_params cmpl $0, %eax - je 1f - movl 0x4(%esp), %esi - movl (%esp), %ecx + je fail + popl %ecx pushl %eax - pushl %esi pushl %ecx - sub $0x4, %esp + jmp 2f /* Skip efi_config initialization */ ENTRY(efi_stub_entry) add $0x4, %esp + popl %ecx + popl %edx + + call 1f +1: popl %esi + subl $1b, %esi + + movl %ecx, efi32_config(%esi) /* Handle */ + movl %edx, efi32_config+8(%esi) /* EFI System table pointer */ + + /* Relocate efi_config->call() */ + leal efi32_config(%esi), %eax + add %esi, 88(%eax) + pushl %eax +2: call efi_main cmpl $0, %eax movl %eax, %esi jne 2f -1: +fail: /* EFI init failed, so hang. */ hlt - jmp 1b + jmp fail 2: call 3f 3: @@ -202,6 +229,13 @@ relocated: xorl %ebx, %ebx jmp *%eax + .data +efi32_config: + .fill 11,8,0 + .long efi_call_phys + .long 0 + .byte 0 + /* * Stack and heap for uncompression */ diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index c5c1ae0..1bc206f 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -209,26 +209,55 @@ ENTRY(startup_64) jmp preferred_addr ENTRY(efi_pe_entry) - mov %rcx, %rdi - mov %rdx, %rsi - pushq %rdi - pushq %rsi + movq %rcx, efi64_config(%rip) /* Handle */ + movq %rdx, efi64_config+8(%rip) /* EFI System table pointer */ + + leaq efi64_config(%rip), %rax + movq %rax, efi_config(%rip) + + call 1f +1: popq %rbp + subq $1b, %rbp + + /* + * Relocate efi_config->call(). + */ + addq %rbp, efi64_config+88(%rip) + + movq %rax, %rdi call make_boot_params cmpq $0,%rax - je 1f - mov %rax, %rdx - popq %rsi - popq %rdi + je fail + mov %rax, %rsi + jmp 2f /* Skip the relocation */ ENTRY(efi_stub_entry) + movq %rdi, efi64_config(%rip) /* Handle */ + movq %rsi, efi64_config+8(%rip) /* EFI System table pointer */ + + leaq efi64_config(%rip), %rax + movq %rax, efi_config(%rip) + + call 1f +1: popq %rbp + subq $1b, %rbp + + /* + * Relocate efi_config->call(). + */ + movq efi_config(%rip), %rax + addq %rbp, 88(%rax) + movq %rdx, %rsi +2: + movq efi_config(%rip), %rdi call efi_main movq %rax,%rsi cmpq $0,%rax jne 2f -1: +fail: /* EFI init failed, so hang. */ hlt - jmp 1b + jmp fail 2: call 3f 3: @@ -372,6 +401,14 @@ gdt: .quad 0x0000000000000000 /* TS continued */ gdt_end: +efi_config: + .quad 0 + + .global efi64_config +efi64_config: + .fill 11,8,0 + .quad efi_call6 + .byte 1 /* * Stack and heap for uncompression */ diff --git a/drivers/firmware/efi/efi-stub-helper.c b/drivers/firmware/efi/efi-stub-helper.c index b6bffbf..a028287 100644 --- a/drivers/firmware/efi/efi-stub-helper.c +++ b/drivers/firmware/efi/efi-stub-helper.c @@ -16,18 +16,6 @@ struct file_info { u64 size; }; - - - -static void efi_char16_printk(efi_system_table_t *sys_table_arg, - efi_char16_t *str) -{ - struct efi_simple_text_output_protocol *out; - - out = (struct efi_simple_text_output_protocol *)sys_table_arg->con_out; - efi_call_phys2(out->output_string, out, str); -} - static void efi_printk(efi_system_table_t *sys_table_arg, char *str) { char *s8; @@ -65,20 +53,23 @@ again: * allocation which may be in a new descriptor region. */ *map_size += sizeof(*m); - status = efi_call_phys3(sys_table_arg->boottime->allocate_pool, - EFI_LOADER_DATA, *map_size, (void **)&m); + status = efi_early->call(efi_early->allocate_pool, EFI_LOADER_DATA, + *map_size, (void **)&m); if (status != EFI_SUCCESS) goto fail; - status = efi_call_phys5(sys_table_arg->boottime->get_memory_map, - map_size, m, &key, desc_size, &desc_version); + *desc_size = 0; + key = 0; + status = efi_early->call(efi_early->get_memory_map, map_size, m, + &key, desc_size, &desc_version); if (status == EFI_BUFFER_TOO_SMALL) { - efi_call_phys1(sys_table_arg->boottime->free_pool, m); + efi_early->call(efi_early->free_pool, m); goto again; } if (status != EFI_SUCCESS) - efi_call_phys1(sys_table_arg->boottime->free_pool, m); + efi_early->call(efi_early->free_pool, m); + if (key_ptr && status == EFI_SUCCESS) *key_ptr = key; if (desc_ver && status == EFI_SUCCESS) @@ -158,9 +149,9 @@ again: if (!max_addr) status = EFI_NOT_FOUND; else { - status = efi_call_phys4(sys_table_arg->boottime->allocate_pages, - EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, - nr_pages, &max_addr); + status = efi_early->call(efi_early->allocate_pages, + EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, + nr_pages, &max_addr); if (status != EFI_SUCCESS) { max = max_addr; max_addr = 0; @@ -170,8 +161,7 @@ again: *addr = max_addr; } - efi_call_phys1(sys_table_arg->boottime->free_pool, map); - + efi_early->call(efi_early->free_pool, map); fail: return status; } @@ -231,9 +221,9 @@ static efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, if ((start + size) > end) continue; - status = efi_call_phys4(sys_table_arg->boottime->allocate_pages, - EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, - nr_pages, &start); + status = efi_early->call(efi_early->allocate_pages, + EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, + nr_pages, &start); if (status == EFI_SUCCESS) { *addr = start; break; @@ -243,7 +233,7 @@ static efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, if (i == map_size / desc_size) status = EFI_NOT_FOUND; - efi_call_phys1(sys_table_arg->boottime->free_pool, map); + efi_early->call(efi_early->free_pool, map); fail: return status; } @@ -257,7 +247,7 @@ static void efi_free(efi_system_table_t *sys_table_arg, unsigned long size, return; nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; - efi_call_phys2(sys_table_arg->boottime->free_pages, addr, nr_pages); + efi_early->call(efi_early->free_pages, addr, nr_pages); } @@ -276,9 +266,7 @@ static efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, { struct file_info *files; unsigned long file_addr; - efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID; u64 file_size_total; - efi_file_io_interface_t *io; efi_file_handle_t *fh; efi_status_t status; int nr_files; @@ -319,10 +307,8 @@ static efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, if (!nr_files) return EFI_SUCCESS; - status = efi_call_phys3(sys_table_arg->boottime->allocate_pool, - EFI_LOADER_DATA, - nr_files * sizeof(*files), - (void **)&files); + status = efi_early->call(efi_early->allocate_pool, EFI_LOADER_DATA, + nr_files * sizeof(*files), (void **)&files); if (status != EFI_SUCCESS) { efi_printk(sys_table_arg, "Failed to alloc mem for file handle list\n"); goto fail; @@ -331,13 +317,8 @@ static efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, str = cmd_line; for (i = 0; i < nr_files; i++) { struct file_info *file; - efi_file_handle_t *h; - efi_file_info_t *info; efi_char16_t filename_16[256]; - unsigned long info_sz; - efi_guid_t info_guid = EFI_FILE_INFO_ID; efi_char16_t *p; - u64 file_sz; str = strstr(str, option_string); if (!str) @@ -368,71 +349,18 @@ static efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, /* Only open the volume once. */ if (!i) { - efi_boot_services_t *boottime; - - boottime = sys_table_arg->boottime; - - status = efi_call_phys3(boottime->handle_protocol, - image->device_handle, &fs_proto, - (void **)&io); - if (status != EFI_SUCCESS) { - efi_printk(sys_table_arg, "Failed to handle fs_proto\n"); - goto free_files; - } - - status = efi_call_phys2(io->open_volume, io, &fh); - if (status != EFI_SUCCESS) { - efi_printk(sys_table_arg, "Failed to open volume\n"); + status = efi_open_volume(sys_table_arg, image, + (void **)&fh); + if (status != EFI_SUCCESS) goto free_files; - } } - status = efi_call_phys5(fh->open, fh, &h, filename_16, - EFI_FILE_MODE_READ, (u64)0); - if (status != EFI_SUCCESS) { - efi_printk(sys_table_arg, "Failed to open file: "); - efi_char16_printk(sys_table_arg, filename_16); - efi_printk(sys_table_arg, "\n"); + status = efi_file_size(sys_table_arg, fh, filename_16, + (void **)&file->handle, &file->size); + if (status != EFI_SUCCESS) goto close_handles; - } - file->handle = h; - - info_sz = 0; - status = efi_call_phys4(h->get_info, h, &info_guid, - &info_sz, NULL); - if (status != EFI_BUFFER_TOO_SMALL) { - efi_printk(sys_table_arg, "Failed to get file info size\n"); - goto close_handles; - } - -grow: - status = efi_call_phys3(sys_table_arg->boottime->allocate_pool, - EFI_LOADER_DATA, info_sz, - (void **)&info); - if (status != EFI_SUCCESS) { - efi_printk(sys_table_arg, "Failed to alloc mem for file info\n"); - goto close_handles; - } - - status = efi_call_phys4(h->get_info, h, &info_guid, - &info_sz, info); - if (status == EFI_BUFFER_TOO_SMALL) { - efi_call_phys1(sys_table_arg->boottime->free_pool, - info); - goto grow; - } - - file_sz = info->file_size; - efi_call_phys1(sys_table_arg->boottime->free_pool, info); - - if (status != EFI_SUCCESS) { - efi_printk(sys_table_arg, "Failed to get file info\n"); - goto close_handles; - } - - file->size = file_sz; - file_size_total += file_sz; + file_size_total += file->size; } if (file_size_total) { @@ -468,10 +396,10 @@ grow: chunksize = EFI_READ_CHUNK_SIZE; else chunksize = size; - status = efi_call_phys3(fh->read, - files[j].handle, - &chunksize, - (void *)addr); + + status = efi_file_read(fh, files[j].handle, + &chunksize, + (void *)addr); if (status != EFI_SUCCESS) { efi_printk(sys_table_arg, "Failed to read file\n"); goto free_file_total; @@ -480,12 +408,12 @@ grow: size -= chunksize; } - efi_call_phys1(fh->close, files[j].handle); + efi_file_close(fh, files[j].handle); } } - efi_call_phys1(sys_table_arg->boottime->free_pool, files); + efi_early->call(efi_early->free_pool, files); *load_addr = file_addr; *load_size = file_size_total; @@ -497,9 +425,9 @@ free_file_total: close_handles: for (k = j; k < i; k++) - efi_call_phys1(fh->close, files[k].handle); + efi_file_close(fh, files[k].handle); free_files: - efi_call_phys1(sys_table_arg->boottime->free_pool, files); + efi_early->call(efi_early->free_pool, files); fail: *load_addr = 0; *load_size = 0; @@ -545,9 +473,9 @@ static efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg, * as possible while respecting the required alignment. */ nr_pages = round_up(alloc_size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; - status = efi_call_phys4(sys_table_arg->boottime->allocate_pages, - EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, - nr_pages, &efi_addr); + status = efi_early->call(efi_early->allocate_pages, + EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, + nr_pages, &efi_addr); new_addr = efi_addr; /* * If preferred address allocation failed allocate as low as -- cgit v0.10.2 From 0154416a71c2a84c3746c8dd8ed25287e36934d3 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 10 Jan 2014 15:37:17 +0000 Subject: x86/efi: Add early thunk code to go from 64-bit to 32-bit Implement the transition code to go from IA32e mode to protected mode in the EFI boot stub. This is required to use 32-bit EFI services from a 64-bit kernel. Since EFI boot stub is executed in an identity-mapped region, there's not much we need to do before invoking the 32-bit EFI boot services. However, we do reload the firmware's global descriptor table (efi32_boot_gdt) in case things like timer events are still running in the firmware. Signed-off-by: Matt Fleming diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S index cedc60d..7ff3632 100644 --- a/arch/x86/boot/compressed/efi_stub_64.S +++ b/arch/x86/boot/compressed/efi_stub_64.S @@ -1 +1,30 @@ +#include +#include +#include + #include "../../platform/efi/efi_stub_64.S" + +#ifdef CONFIG_EFI_MIXED + .code64 + .text +ENTRY(efi64_thunk) + push %rbp + push %rbx + + subq $16, %rsp + leaq efi_exit32(%rip), %rax + movl %eax, 8(%rsp) + leaq efi_gdt64(%rip), %rax + movl %eax, 4(%rsp) + movl %eax, 2(%rax) /* Fixup the gdt base address */ + leaq efi32_boot_gdt(%rip), %rax + movl %eax, (%rsp) + + call __efi64_thunk + + addq $16, %rsp + pop %rbx + pop %rbp + ret +ENDPROC(efi64_thunk) +#endif /* CONFIG_EFI_MIXED */ diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 88073b1..a790d69 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -7,6 +7,10 @@ */ #include +#include +#include +#include +#include #define SAVE_XMM \ mov %rsp, %rax; \ @@ -164,6 +168,152 @@ ENTRY(efi_call6) ret ENDPROC(efi_call6) +#ifdef CONFIG_EFI_MIXED + +/* + * We run this function from the 1:1 mapping. + * + * This function must be invoked with a 1:1 mapped stack. + */ +ENTRY(__efi64_thunk) + subq $32, %rsp + movl %esi, 0x0(%rsp) + movl %edx, 0x4(%rsp) + movl %ecx, 0x8(%rsp) + movq %r8, %rsi + movl %esi, 0xc(%rsp) + movq %r9, %rsi + movl %esi, 0x10(%rsp) + + sgdt save_gdt(%rip) + + leaq 1f(%rip), %rbx + movq %rbx, func_rt_ptr(%rip) + + /* Switch to gdt with 32-bit segments */ + movl 40(%rsp), %eax + lgdt (%rax) + + leaq efi_enter32(%rip), %rax + pushq $__KERNEL_CS + pushq %rax + lretq + +1: addq $32, %rsp + + lgdt save_gdt(%rip) + + /* + * Convert 32-bit status code into 64-bit. + */ + test %rax, %rax + jz 1f + movl %eax, %ecx + andl $0x0fffffff, %ecx + andl $0xf0000000, %eax + shl $32, %rax + or %rcx, %rax +1: + ret +ENDPROC(__efi64_thunk) + +ENTRY(efi_exit32) + xorq %rax, %rax + movl %eax, %ds + movl %eax, %es + movl %eax, %ss + + movq func_rt_ptr(%rip), %rax + push %rax + mov %rdi, %rax + ret +ENDPROC(efi_exit32) + + .code32 +/* + * EFI service pointer must be in %edi. + * + * The stack should represent the 32-bit calling convention. + */ +ENTRY(efi_enter32) + movl $__KERNEL_DS, %eax + movl %eax, %ds + movl %eax, %es + movl %eax, %ss + + /* Reload pgtables */ + movl %cr3, %eax + movl %eax, %cr3 + + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + + /* Disable long mode via EFER */ + movl $MSR_EFER, %ecx + rdmsr + btrl $_EFER_LME, %eax + wrmsr + + call *%edi + + /* We must preserve return value */ + movl %eax, %edi + + movl 44(%esp), %eax + movl %eax, 2(%eax) + lgdtl (%eax) + + movl %cr4, %eax + btsl $(X86_CR4_PAE_BIT), %eax + movl %eax, %cr4 + + movl %cr3, %eax + movl %eax, %cr3 + + movl $MSR_EFER, %ecx + rdmsr + btsl $_EFER_LME, %eax + wrmsr + + xorl %eax, %eax + lldt %ax + + movl 48(%esp), %eax + pushl $__KERNEL_CS + pushl %eax + + /* Enable paging */ + movl %cr0, %eax + btsl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + lret +ENDPROC(efi_enter32) + + .data + .balign 8 + .global efi32_boot_gdt +efi32_boot_gdt: .word 0 + .quad 0 + +save_gdt: .word 0 + .quad 0 +func_rt_ptr: .quad 0 + + .global efi_gdt64 +efi_gdt64: + .word efi_gdt64_end - efi_gdt64 + .long 0 /* Filled out by user */ + .word 0 + .quad 0x0000000000000000 /* NULL descriptor */ + .quad 0x00af9a000000ffff /* __KERNEL_CS */ + .quad 0x00cf92000000ffff /* __KERNEL_DS */ + .quad 0x0080890000000000 /* TS descriptor */ + .quad 0x0000000000000000 /* TS continued */ +efi_gdt64_end: +#endif /* CONFIG_EFI_MIXED */ + .data ENTRY(efi_scratch) .fill 3,8,0 -- cgit v0.10.2 From c116e8d60adabfd545a269fccab85e77febc1643 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Thu, 16 Jan 2014 11:35:43 +0000 Subject: x86/efi: Split the boot stub into 32/64 code paths Make the decision which code path to take at runtime based on efi_early->is64. Signed-off-by: Matt Fleming diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 4254816..ab1f3a2 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -49,10 +49,63 @@ static void efi_printk(efi_system_table_t *, char *); static void efi_char16_printk(efi_system_table_t *, efi_char16_t *); static efi_status_t -efi_file_size(efi_system_table_t *sys_table, void *__fh, - efi_char16_t *filename_16, void **handle, u64 *file_sz) +__file_size32(void *__fh, efi_char16_t *filename_16, + void **handle, u64 *file_sz) +{ + efi_file_handle_32_t *h, *fh = __fh; + efi_file_info_t *info; + efi_status_t status; + efi_guid_t info_guid = EFI_FILE_INFO_ID; + u32 info_sz; + + status = efi_early->call((unsigned long)fh->open, fh, &h, filename_16, + EFI_FILE_MODE_READ, (u64)0); + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "Failed to open file: "); + efi_char16_printk(sys_table, filename_16); + efi_printk(sys_table, "\n"); + return status; + } + + *handle = h; + + info_sz = 0; + status = efi_early->call((unsigned long)h->get_info, h, &info_guid, + &info_sz, NULL); + if (status != EFI_BUFFER_TOO_SMALL) { + efi_printk(sys_table, "Failed to get file info size\n"); + return status; + } + +grow: + status = efi_early->call(efi_early->allocate_pool, EFI_LOADER_DATA, + info_sz, (void **)&info); + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "Failed to alloc mem for file info\n"); + return status; + } + + status = efi_early->call((unsigned long)h->get_info, h, &info_guid, + &info_sz, info); + if (status == EFI_BUFFER_TOO_SMALL) { + efi_early->call(efi_early->free_pool, info); + goto grow; + } + + *file_sz = info->file_size; + efi_early->call(efi_early->free_pool, info); + + if (status != EFI_SUCCESS) + efi_printk(sys_table, "Failed to get initrd info\n"); + + return status; +} + +static efi_status_t +__file_size64(void *__fh, efi_char16_t *filename_16, + void **handle, u64 *file_sz) { - efi_file_handle_t *h, *fh = __fh; + efi_file_handle_64_t *h, *fh = __fh; efi_file_info_t *info; efi_status_t status; efi_guid_t info_guid = EFI_FILE_INFO_ID; @@ -100,31 +153,82 @@ grow: return status; } +static efi_status_t +efi_file_size(efi_system_table_t *sys_table, void *__fh, + efi_char16_t *filename_16, void **handle, u64 *file_sz) +{ + if (efi_early->is64) + return __file_size64(__fh, filename_16, handle, file_sz); + + return __file_size32(__fh, filename_16, handle, file_sz); +} static inline efi_status_t efi_file_read(void *__fh, void *handle, unsigned long *size, void *addr) { - efi_file_handle_t *fh = __fh; - return efi_early->call((unsigned long)fh->read, handle, size, addr); + unsigned long func; + + if (efi_early->is64) { + efi_file_handle_64_t *fh = __fh; + + func = (unsigned long)fh->read; + return efi_early->call(func, handle, size, addr); + } else { + efi_file_handle_32_t *fh = __fh; + + func = (unsigned long)fh->read; + return efi_early->call(func, handle, size, addr); + } } static inline efi_status_t efi_file_close(void *__fh, void *handle) { - efi_file_handle_t *fh = __fh; + if (efi_early->is64) { + efi_file_handle_64_t *fh = __fh; - return efi_early->call((unsigned long)fh->close, handle); + return efi_early->call((unsigned long)fh->close, handle); + } else { + efi_file_handle_32_t *fh = __fh; + + return efi_early->call((unsigned long)fh->close, handle); + } } -static inline efi_status_t -efi_open_volume(efi_system_table_t *sys_table, void *__image, void **__fh) +static inline efi_status_t __open_volume32(void *__image, void **__fh) +{ + efi_file_io_interface_t *io; + efi_loaded_image_32_t *image = __image; + efi_file_handle_32_t *fh; + efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID; + efi_status_t status; + void *handle = (void *)(unsigned long)image->device_handle; + unsigned long func; + + status = efi_early->call(efi_early->handle_protocol, handle, + &fs_proto, (void **)&io); + if (status != EFI_SUCCESS) { + efi_printk(sys_table, "Failed to handle fs_proto\n"); + return status; + } + + func = (unsigned long)io->open_volume; + status = efi_early->call(func, io, &fh); + if (status != EFI_SUCCESS) + efi_printk(sys_table, "Failed to open volume\n"); + + *__fh = fh; + return status; +} + +static inline efi_status_t __open_volume64(void *__image, void **__fh) { efi_file_io_interface_t *io; - efi_loaded_image_t *image = __image; - efi_file_handle_t *fh; + efi_loaded_image_64_t *image = __image; + efi_file_handle_64_t *fh; efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID; efi_status_t status; void *handle = (void *)(unsigned long)image->device_handle; - u32 func; + unsigned long func; status = efi_early->call(efi_early->handle_protocol, handle, &fs_proto, (void **)&io); @@ -142,19 +246,39 @@ efi_open_volume(efi_system_table_t *sys_table, void *__image, void **__fh) return status; } -static inline void -efi_char16_printk(efi_system_table_t *table, efi_char16_t *str) +static inline efi_status_t +efi_open_volume(efi_system_table_t *sys_table, void *__image, void **__fh) +{ + if (efi_early->is64) + return __open_volume64(__image, __fh); + + return __open_volume32(__image, __fh); +} + +static void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str) { - struct efi_simple_text_output_protocol *out; unsigned long output_string; size_t offset; - unsigned long *func; - offset = offsetof(typeof(*out), output_string); - output_string = efi_early->text_output + offset; - func = (unsigned long *)output_string; + if (efi_early->is64) { + struct efi_simple_text_output_protocol_64 *out; + u64 *func; + + offset = offsetof(typeof(*out), output_string); + output_string = efi_early->text_output + offset; + func = (u64 *)output_string; + + efi_early->call(*func, efi_early->text_output, str); + } else { + struct efi_simple_text_output_protocol_32 *out; + u32 *func; + + offset = offsetof(typeof(*out), output_string); + output_string = efi_early->text_output + offset; + func = (u32 *)output_string; - efi_early->call(*func, efi_early->text_output, str); + efi_early->call(*func, efi_early->text_output, str); + } } #include "../../../../drivers/firmware/efi/efi-stub-helper.c" @@ -182,46 +306,86 @@ static void find_bits(unsigned long mask, u8 *pos, u8 *size) *size = len; } -static efi_status_t setup_efi_pci(struct boot_params *params) +static efi_status_t +__setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom) { - efi_pci_io_protocol *pci; + struct pci_setup_rom *rom = NULL; efi_status_t status; - void **pci_handle = NULL; - efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID; - unsigned long nr_pci, size = 0; - int i; - struct setup_data *data; + unsigned long size; + uint64_t attributes; - data = (struct setup_data *)(unsigned long)params->hdr.setup_data; + status = efi_early->call(pci->attributes, pci, + EfiPciIoAttributeOperationGet, 0, 0, + &attributes); + if (status != EFI_SUCCESS) + return status; - while (data && data->next) - data = (struct setup_data *)(unsigned long)data->next; + if (!pci->romimage || !pci->romsize) + return EFI_INVALID_PARAMETER; - status = efi_early->call(efi_early->locate_handle, - EFI_LOCATE_BY_PROTOCOL, - &pci_proto, NULL, &size, pci_handle); + size = pci->romsize + sizeof(*rom); - if (status == EFI_BUFFER_TOO_SMALL) { - status = efi_early->call(efi_early->allocate_pool, - EFI_LOADER_DATA, - size, (void **)&pci_handle); + status = efi_early->call(efi_early->allocate_pool, + EFI_LOADER_DATA, size, &rom); - if (status != EFI_SUCCESS) - return status; + if (status != EFI_SUCCESS) + return status; - status = efi_early->call(efi_early->locate_handle, - EFI_LOCATE_BY_PROTOCOL, &pci_proto, - NULL, &size, pci_handle); - } + memset(rom, 0, sizeof(*rom)); + + rom->data.type = SETUP_PCI; + rom->data.len = size - sizeof(struct setup_data); + rom->data.next = 0; + rom->pcilen = pci->romsize; + *__rom = rom; + + status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, + PCI_VENDOR_ID, 1, &(rom->vendor)); if (status != EFI_SUCCESS) - goto free_handle; + goto free_struct; + + status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, + PCI_DEVICE_ID, 1, &(rom->devid)); + + if (status != EFI_SUCCESS) + goto free_struct; + + status = efi_early->call(pci->get_location, pci, &(rom->segment), + &(rom->bus), &(rom->device), &(rom->function)); + + if (status != EFI_SUCCESS) + goto free_struct; + + memcpy(rom->romdata, pci->romimage, pci->romsize); + return status; + +free_struct: + efi_early->call(efi_early->free_pool, rom); + return status; +} - nr_pci = size / sizeof(void *); +static efi_status_t +setup_efi_pci32(struct boot_params *params, void **pci_handle, + unsigned long size) +{ + efi_pci_io_protocol_32 *pci = NULL; + efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID; + u32 *handles = (u32 *)(unsigned long)pci_handle; + efi_status_t status; + unsigned long nr_pci; + struct setup_data *data; + int i; + + data = (struct setup_data *)(unsigned long)params->hdr.setup_data; + + while (data && data->next) + data = (struct setup_data *)(unsigned long)data->next; + + nr_pci = size / sizeof(u32); for (i = 0; i < nr_pci; i++) { - void *h = pci_handle[i]; - uint64_t attributes; - struct pci_setup_rom *rom; + struct pci_setup_rom *rom = NULL; + u32 h = handles[i]; status = efi_early->call(efi_early->handle_protocol, h, &pci_proto, (void **)&pci); @@ -232,56 +396,114 @@ static efi_status_t setup_efi_pci(struct boot_params *params) if (!pci) continue; -#ifdef CONFIG_X86_64 - status = efi_early->call((unsigned long)pci->attributes, pci, - EfiPciIoAttributeOperationGet, 0, - &attributes); -#else - status = efi_early->call((unsigned long)pci->attributes, pci, - EfiPciIoAttributeOperationGet, 0, 0, - &attributes); -#endif + status = __setup_efi_pci32(pci, &rom); if (status != EFI_SUCCESS) continue; - if (!pci->romimage || !pci->romsize) - continue; + if (data) + data->next = (unsigned long)rom; + else + params->hdr.setup_data = (unsigned long)rom; - size = pci->romsize + sizeof(*rom); + data = (struct setup_data *)rom; - status = efi_early->call(efi_early->allocate_pool, - EFI_LOADER_DATA, size, &rom); + } - if (status != EFI_SUCCESS) - continue; + return status; +} + +static efi_status_t +__setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom) +{ + struct pci_setup_rom *rom; + efi_status_t status; + unsigned long size; + uint64_t attributes; + + status = efi_early->call(pci->attributes, pci, + EfiPciIoAttributeOperationGet, 0, + &attributes); + if (status != EFI_SUCCESS) + return status; - rom->data.type = SETUP_PCI; - rom->data.len = size - sizeof(struct setup_data); - rom->data.next = 0; - rom->pcilen = pci->romsize; + if (!pci->romimage || !pci->romsize) + return EFI_INVALID_PARAMETER; - status = efi_early->call((unsigned long)pci->pci.read, pci, - EfiPciIoWidthUint16, PCI_VENDOR_ID, - 1, &(rom->vendor)); + size = pci->romsize + sizeof(*rom); - if (status != EFI_SUCCESS) - goto free_struct; + status = efi_early->call(efi_early->allocate_pool, + EFI_LOADER_DATA, size, &rom); + + if (status != EFI_SUCCESS) + return status; - status = efi_early->call((unsigned long)pci->pci.read, pci, - EfiPciIoWidthUint16, PCI_DEVICE_ID, - 1, &(rom->devid)); + rom->data.type = SETUP_PCI; + rom->data.len = size - sizeof(struct setup_data); + rom->data.next = 0; + rom->pcilen = pci->romsize; + *__rom = rom; - if (status != EFI_SUCCESS) - goto free_struct; + status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, + PCI_VENDOR_ID, 1, &(rom->vendor)); + + if (status != EFI_SUCCESS) + goto free_struct; + + status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, + PCI_DEVICE_ID, 1, &(rom->devid)); + + if (status != EFI_SUCCESS) + goto free_struct; + + status = efi_early->call(pci->get_location, pci, &(rom->segment), + &(rom->bus), &(rom->device), &(rom->function)); + + if (status != EFI_SUCCESS) + goto free_struct; + + memcpy(rom->romdata, pci->romimage, pci->romsize); + return status; + +free_struct: + efi_early->call(efi_early->free_pool, rom); + return status; + +} + +static efi_status_t +setup_efi_pci64(struct boot_params *params, void **pci_handle, + unsigned long size) +{ + efi_pci_io_protocol_64 *pci = NULL; + efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID; + u64 *handles = (u64 *)(unsigned long)pci_handle; + efi_status_t status; + unsigned long nr_pci; + struct setup_data *data; + int i; + + data = (struct setup_data *)(unsigned long)params->hdr.setup_data; + + while (data && data->next) + data = (struct setup_data *)(unsigned long)data->next; + + nr_pci = size / sizeof(u64); + for (i = 0; i < nr_pci; i++) { + struct pci_setup_rom *rom = NULL; + u64 h = handles[i]; - status = efi_early->call((unsigned long)pci->get_location, pci, - &(rom->segment), &(rom->bus), - &(rom->device), &(rom->function)); + status = efi_early->call(efi_early->handle_protocol, h, + &pci_proto, (void **)&pci); if (status != EFI_SUCCESS) - goto free_struct; + continue; + + if (!pci) + continue; - memcpy(rom->romdata, pci->romimage, pci->romsize); + status = __setup_efi_pci64(pci, &rom); + if (status != EFI_SUCCESS) + continue; if (data) data->next = (unsigned long)rom; @@ -290,104 +512,52 @@ static efi_status_t setup_efi_pci(struct boot_params *params) data = (struct setup_data *)rom; - continue; - free_struct: - efi_early->call(efi_early->free_pool, rom); } -free_handle: - efi_early->call(efi_early->free_pool, pci_handle); return status; } -/* - * See if we have Graphics Output Protocol - */ -static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, - unsigned long size) +static efi_status_t setup_efi_pci(struct boot_params *params) { - struct efi_graphics_output_protocol *gop, *first_gop; - struct efi_pixel_bitmask pixel_info; - unsigned long nr_gops; efi_status_t status; - void **gop_handle = NULL; - u16 width, height; - u32 fb_base, fb_size; - u32 pixels_per_scan_line; - int pixel_format; - int i; - - status = efi_early->call(efi_early->allocate_pool, EFI_LOADER_DATA, - size, (void **)&gop_handle); - if (status != EFI_SUCCESS) - return status; + void **pci_handle = NULL; + efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID; + unsigned long size = 0; status = efi_early->call(efi_early->locate_handle, EFI_LOCATE_BY_PROTOCOL, - proto, NULL, &size, gop_handle); - if (status != EFI_SUCCESS) - goto free_handle; - - first_gop = NULL; + &pci_proto, NULL, &size, pci_handle); - nr_gops = size / sizeof(void *); - for (i = 0; i < nr_gops; i++) { - struct efi_graphics_output_mode_info *info; - efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; - bool conout_found = false; - void *dummy; - void *h = gop_handle[i]; + if (status == EFI_BUFFER_TOO_SMALL) { + status = efi_early->call(efi_early->allocate_pool, + EFI_LOADER_DATA, + size, (void **)&pci_handle); - status = efi_early->call(efi_early->handle_protocol, h, - proto, (void **)&gop); if (status != EFI_SUCCESS) - continue; - - status = efi_early->call(efi_early->handle_protocol, h, - &conout_proto, &dummy); - if (status == EFI_SUCCESS) - conout_found = true; - - status = efi_early->call((unsigned long)gop->query_mode, gop, - gop->mode->mode, &size, &info); - if (status == EFI_SUCCESS && (!first_gop || conout_found)) { - /* - * Systems that use the UEFI Console Splitter may - * provide multiple GOP devices, not all of which are - * backed by real hardware. The workaround is to search - * for a GOP implementing the ConOut protocol, and if - * one isn't found, to just fall back to the first GOP. - */ - width = info->horizontal_resolution; - height = info->vertical_resolution; - fb_base = gop->mode->frame_buffer_base; - fb_size = gop->mode->frame_buffer_size; - pixel_format = info->pixel_format; - pixel_info = info->pixel_information; - pixels_per_scan_line = info->pixels_per_scan_line; + return status; - /* - * Once we've found a GOP supporting ConOut, - * don't bother looking any further. - */ - first_gop = gop; - if (conout_found) - break; - } + status = efi_early->call(efi_early->locate_handle, + EFI_LOCATE_BY_PROTOCOL, &pci_proto, + NULL, &size, pci_handle); } - /* Did we find any GOPs? */ - if (!first_gop) + if (status != EFI_SUCCESS) goto free_handle; - /* EFI framebuffer */ - si->orig_video_isVGA = VIDEO_TYPE_EFI; + if (efi_early->is64) + status = setup_efi_pci64(params, pci_handle, size); + else + status = setup_efi_pci32(params, pci_handle, size); - si->lfb_width = width; - si->lfb_height = height; - si->lfb_base = fb_base; - si->pages = 1; +free_handle: + efi_early->call(efi_early->free_pool, pci_handle); + return status; +} +static void +setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line, + struct efi_pixel_bitmask pixel_info, int pixel_format) +{ if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) { si->lfb_depth = 32; si->lfb_linelength = pixels_per_scan_line * 4; @@ -432,51 +602,310 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, si->rsvd_size = 0; si->rsvd_pos = 0; } +} + +static efi_status_t +__gop_query32(struct efi_graphics_output_protocol_32 *gop32, + struct efi_graphics_output_mode_info **info, + unsigned long *size, u32 *fb_base) +{ + struct efi_graphics_output_protocol_mode_32 *mode; + efi_status_t status; + unsigned long m; + + m = gop32->mode; + mode = (struct efi_graphics_output_protocol_mode_32 *)m; + + status = efi_early->call(gop32->query_mode, gop32, + mode->mode, size, info); + if (status != EFI_SUCCESS) + return status; + + *fb_base = mode->frame_buffer_base; + return status; +} + +static efi_status_t +setup_gop32(struct screen_info *si, efi_guid_t *proto, + unsigned long size, void **gop_handle) +{ + struct efi_graphics_output_protocol_32 *gop32, *first_gop; + unsigned long nr_gops; + u16 width, height; + u32 pixels_per_scan_line; + u32 fb_base; + struct efi_pixel_bitmask pixel_info; + int pixel_format; + efi_status_t status; + u32 *handles = (u32 *)(unsigned long)gop_handle; + int i; + + first_gop = NULL; + gop32 = NULL; + + nr_gops = size / sizeof(u32); + for (i = 0; i < nr_gops; i++) { + struct efi_graphics_output_mode_info *info = NULL; + efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; + bool conout_found = false; + void *dummy = NULL; + u32 h = handles[i]; + + status = efi_early->call(efi_early->handle_protocol, h, + proto, (void **)&gop32); + if (status != EFI_SUCCESS) + continue; + + status = efi_early->call(efi_early->handle_protocol, h, + &conout_proto, &dummy); + if (status == EFI_SUCCESS) + conout_found = true; + + status = __gop_query32(gop32, &info, &size, &fb_base); + if (status == EFI_SUCCESS && (!first_gop || conout_found)) { + /* + * Systems that use the UEFI Console Splitter may + * provide multiple GOP devices, not all of which are + * backed by real hardware. The workaround is to search + * for a GOP implementing the ConOut protocol, and if + * one isn't found, to just fall back to the first GOP. + */ + width = info->horizontal_resolution; + height = info->vertical_resolution; + pixel_format = info->pixel_format; + pixel_info = info->pixel_information; + pixels_per_scan_line = info->pixels_per_scan_line; + + /* + * Once we've found a GOP supporting ConOut, + * don't bother looking any further. + */ + first_gop = gop32; + if (conout_found) + break; + } + } + + /* Did we find any GOPs? */ + if (!first_gop) + goto out; + + /* EFI framebuffer */ + si->orig_video_isVGA = VIDEO_TYPE_EFI; + + si->lfb_width = width; + si->lfb_height = height; + si->lfb_base = fb_base; + si->pages = 1; + + setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format); si->lfb_size = si->lfb_linelength * si->lfb_height; si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; +out: + return status; +} -free_handle: - efi_early->call(efi_early->free_pool, gop_handle); +static efi_status_t +__gop_query64(struct efi_graphics_output_protocol_64 *gop64, + struct efi_graphics_output_mode_info **info, + unsigned long *size, u32 *fb_base) +{ + struct efi_graphics_output_protocol_mode_64 *mode; + efi_status_t status; + unsigned long m; + + m = gop64->mode; + mode = (struct efi_graphics_output_protocol_mode_64 *)m; + + status = efi_early->call(gop64->query_mode, gop64, + mode->mode, size, info); + if (status != EFI_SUCCESS) + return status; + + *fb_base = mode->frame_buffer_base; + return status; +} + +static efi_status_t +setup_gop64(struct screen_info *si, efi_guid_t *proto, + unsigned long size, void **gop_handle) +{ + struct efi_graphics_output_protocol_64 *gop64, *first_gop; + unsigned long nr_gops; + u16 width, height; + u32 pixels_per_scan_line; + u32 fb_base; + struct efi_pixel_bitmask pixel_info; + int pixel_format; + efi_status_t status; + u64 *handles = (u64 *)(unsigned long)gop_handle; + int i; + + first_gop = NULL; + gop64 = NULL; + + nr_gops = size / sizeof(u64); + for (i = 0; i < nr_gops; i++) { + struct efi_graphics_output_mode_info *info = NULL; + efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; + bool conout_found = false; + void *dummy = NULL; + u64 h = handles[i]; + + status = efi_early->call(efi_early->handle_protocol, h, + proto, (void **)&gop64); + if (status != EFI_SUCCESS) + continue; + + status = efi_early->call(efi_early->handle_protocol, h, + &conout_proto, &dummy); + if (status == EFI_SUCCESS) + conout_found = true; + + status = __gop_query64(gop64, &info, &size, &fb_base); + if (status == EFI_SUCCESS && (!first_gop || conout_found)) { + /* + * Systems that use the UEFI Console Splitter may + * provide multiple GOP devices, not all of which are + * backed by real hardware. The workaround is to search + * for a GOP implementing the ConOut protocol, and if + * one isn't found, to just fall back to the first GOP. + */ + width = info->horizontal_resolution; + height = info->vertical_resolution; + pixel_format = info->pixel_format; + pixel_info = info->pixel_information; + pixels_per_scan_line = info->pixels_per_scan_line; + + /* + * Once we've found a GOP supporting ConOut, + * don't bother looking any further. + */ + first_gop = gop64; + if (conout_found) + break; + } + } + + /* Did we find any GOPs? */ + if (!first_gop) + goto out; + + /* EFI framebuffer */ + si->orig_video_isVGA = VIDEO_TYPE_EFI; + + si->lfb_width = width; + si->lfb_height = height; + si->lfb_base = fb_base; + si->pages = 1; + + setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format); + + si->lfb_size = si->lfb_linelength * si->lfb_height; + + si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; +out: return status; } /* - * See if we have Universal Graphics Adapter (UGA) protocol + * See if we have Graphics Output Protocol */ -static efi_status_t setup_uga(struct screen_info *si, efi_guid_t *uga_proto, +static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, unsigned long size) { - struct efi_uga_draw_protocol *uga, *first_uga; - unsigned long nr_ugas; efi_status_t status; - u32 width, height; - void **uga_handle = NULL; - int i; + void **gop_handle = NULL; status = efi_early->call(efi_early->allocate_pool, EFI_LOADER_DATA, - size, (void **)&uga_handle); + size, (void **)&gop_handle); if (status != EFI_SUCCESS) return status; status = efi_early->call(efi_early->locate_handle, EFI_LOCATE_BY_PROTOCOL, - uga_proto, NULL, &size, uga_handle); + proto, NULL, &size, gop_handle); if (status != EFI_SUCCESS) goto free_handle; + if (efi_early->is64) + status = setup_gop64(si, proto, size, gop_handle); + else + status = setup_gop32(si, proto, size, gop_handle); + +free_handle: + efi_early->call(efi_early->free_pool, gop_handle); + return status; +} + +static efi_status_t +setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height) +{ + struct efi_uga_draw_protocol *uga = NULL, *first_uga; + efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; + unsigned long nr_ugas; + u32 *handles = (u32 *)uga_handle;; + efi_status_t status; + int i; + first_uga = NULL; + nr_ugas = size / sizeof(u32); + for (i = 0; i < nr_ugas; i++) { + efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; + u32 w, h, depth, refresh; + void *pciio; + u32 handle = handles[i]; + + status = efi_early->call(efi_early->handle_protocol, handle, + &uga_proto, (void **)&uga); + if (status != EFI_SUCCESS) + continue; + + efi_early->call(efi_early->handle_protocol, handle, + &pciio_proto, &pciio); + + status = efi_early->call((unsigned long)uga->get_mode, uga, + &w, &h, &depth, &refresh); + if (status == EFI_SUCCESS && (!first_uga || pciio)) { + *width = w; + *height = h; + + /* + * Once we've found a UGA supporting PCIIO, + * don't bother looking any further. + */ + if (pciio) + break; + + first_uga = uga; + } + } + + return status; +} + +static efi_status_t +setup_uga64(void **uga_handle, unsigned long size, u32 *width, u32 *height) +{ + struct efi_uga_draw_protocol *uga = NULL, *first_uga; + efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; + unsigned long nr_ugas; + u64 *handles = (u64 *)uga_handle;; + efi_status_t status; + int i; - nr_ugas = size / sizeof(void *); + first_uga = NULL; + nr_ugas = size / sizeof(u64); for (i = 0; i < nr_ugas; i++) { efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; - void *handle = uga_handle[i]; u32 w, h, depth, refresh; void *pciio; + u64 handle = handles[i]; status = efi_early->call(efi_early->handle_protocol, handle, - uga_proto, (void **)&uga); + &uga_proto, (void **)&uga); if (status != EFI_SUCCESS) continue; @@ -486,8 +915,8 @@ static efi_status_t setup_uga(struct screen_info *si, efi_guid_t *uga_proto, status = efi_early->call((unsigned long)uga->get_mode, uga, &w, &h, &depth, &refresh); if (status == EFI_SUCCESS && (!first_uga || pciio)) { - width = w; - height = h; + *width = w; + *height = h; /* * Once we've found a UGA supporting PCIIO, @@ -500,7 +929,39 @@ static efi_status_t setup_uga(struct screen_info *si, efi_guid_t *uga_proto, } } - if (!first_uga) + return status; +} + +/* + * See if we have Universal Graphics Adapter (UGA) protocol + */ +static efi_status_t setup_uga(struct screen_info *si, efi_guid_t *uga_proto, + unsigned long size) +{ + efi_status_t status; + u32 width, height; + void **uga_handle = NULL; + + status = efi_early->call(efi_early->allocate_pool, EFI_LOADER_DATA, + size, (void **)&uga_handle); + if (status != EFI_SUCCESS) + return status; + + status = efi_early->call(efi_early->locate_handle, + EFI_LOCATE_BY_PROTOCOL, + uga_proto, NULL, &size, uga_handle); + if (status != EFI_SUCCESS) + goto free_handle; + + height = 0; + width = 0; + + if (efi_early->is64) + status = setup_uga64(uga_handle, size, &width, &height); + else + status = setup_uga32(uga_handle, size, &width, &height); + + if (!width && !height) goto free_handle; /* EFI framebuffer */ @@ -519,7 +980,6 @@ static efi_status_t setup_uga(struct screen_info *si, efi_guid_t *uga_proto, si->rsvd_size = 8; si->rsvd_pos = 24; - free_handle: efi_early->call(efi_early->free_pool, uga_handle); return status; -- cgit v0.10.2 From b8ff87a6158886771677e6dc8139bac6e3cba717 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 10 Jan 2014 15:54:31 +0000 Subject: x86/efi: Firmware agnostic handover entry points The EFI handover code only works if the "bitness" of the firmware and the kernel match, i.e. 64-bit firmware and 64-bit kernel - it is not possible to mix the two. This goes against the tradition that a 32-bit kernel can be loaded on a 64-bit BIOS platform without having to do anything special in the boot loader. Linux distributions, for one thing, regularly run only 32-bit kernels on their live media. Despite having only one 'handover_offset' field in the kernel header, EFI boot loaders use two separate entry points to enter the kernel based on the architecture the boot loader was compiled for, (1) 32-bit loader: handover_offset (2) 64-bit loader: handover_offset + 512 Since we already have two entry points, we can leverage them to infer the bitness of the firmware we're running on, without requiring any boot loader modifications, by making (1) and (2) valid entry points for both CONFIG_X86_32 and CONFIG_X86_64 kernels. To be clear, a 32-bit boot loader will always use (1) and a 64-bit boot loader will always use (2). It's just that, if a single kernel image supports (1) and (2) that image can be used with both 32-bit and 64-bit boot loaders, and hence both 32-bit and 64-bit EFI. (1) and (2) must be 512 bytes apart at all times, but that is already part of the boot ABI and we could never change that delta without breaking existing boot loaders anyhow. Signed-off-by: Matt Fleming diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 878df7e..abb9eba 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -80,7 +80,7 @@ targets += voffset.h $(obj)/voffset.h: vmlinux FORCE $(call if_changed,voffset) -sed-zoffset := -e 's/^\([0-9a-fA-F]*\) . \(startup_32\|startup_64\|efi_pe_entry\|efi_stub_entry\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p' +sed-zoffset := -e 's/^\([0-9a-fA-F]*\) . \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p' quiet_cmd_zoffset = ZOFFSET $@ cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index ab1f3a2..5e1ba4f 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -1256,12 +1256,13 @@ static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext, } static efi_status_t exit_boot(struct boot_params *boot_params, - void *handle) + void *handle, bool is64) { struct efi_info *efi = &boot_params->efi_info; unsigned long map_sz, key, desc_size; efi_memory_desc_t *mem_map; struct setup_data *e820ext; + const char *signature; __u32 e820ext_size; __u32 nr_desc, prev_nr_desc; efi_status_t status; @@ -1295,7 +1296,9 @@ get_map: goto get_map; /* Allocated memory, get map again */ } - memcpy(&efi->efi_loader_signature, EFI_LOADER_SIGNATURE, sizeof(__u32)); + signature = is64 ? EFI64_LOADER_SIGNATURE : EFI32_LOADER_SIGNATURE; + memcpy(&efi->efi_loader_signature, signature, sizeof(__u32)); + efi->efi_systab = (unsigned long)sys_table; efi->efi_memdesc_size = desc_size; efi->efi_memdesc_version = desc_version; @@ -1408,7 +1411,7 @@ struct boot_params *efi_main(struct efi_config *c, hdr->code32_start = bzimage_addr; } - status = exit_boot(boot_params, handle); + status = exit_boot(boot_params, handle, is64); if (status != EFI_SUCCESS) goto fail; diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index eed23c0..cccc05f 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -64,7 +64,7 @@ ENTRY(efi_pe_entry) pushl %ecx jmp 2f /* Skip efi_config initialization */ -ENTRY(efi_stub_entry) +ENTRY(efi32_stub_entry) add $0x4, %esp popl %ecx popl %edx diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 1bc206f..37c741b 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -178,6 +178,13 @@ ENTRY(startup_32) */ pushl $__KERNEL_CS leal startup_64(%ebp), %eax +#ifdef CONFIG_EFI_MIXED + movl efi32_config(%ebp), %ebx + cmp $0, %ebx + jz 1f + leal handover_entry(%ebp), %eax +1: +#endif pushl %eax /* Enter paged protected Mode, activating Long Mode */ @@ -188,6 +195,30 @@ ENTRY(startup_32) lret ENDPROC(startup_32) +#ifdef CONFIG_EFI_MIXED + .org 0x190 +ENTRY(efi32_stub_entry) + add $0x4, %esp /* Discard return address */ + popl %ecx + popl %edx + popl %esi + + leal (BP_scratch+4)(%esi), %esp + call 1f +1: pop %ebp + subl $1b, %ebp + + movl %ecx, efi32_config(%ebp) + movl %edx, efi32_config+8(%ebp) + sgdtl efi32_boot_gdt(%ebp) + + leal efi32_config(%ebp), %eax + movl %eax, efi_config(%ebp) + + jmp startup_32 +ENDPROC(efi32_stub_entry) +#endif + .code64 .org 0x200 ENTRY(startup_64) @@ -231,13 +262,7 @@ ENTRY(efi_pe_entry) mov %rax, %rsi jmp 2f /* Skip the relocation */ -ENTRY(efi_stub_entry) - movq %rdi, efi64_config(%rip) /* Handle */ - movq %rsi, efi64_config+8(%rip) /* EFI System table pointer */ - - leaq efi64_config(%rip), %rax - movq %rax, efi_config(%rip) - +handover_entry: call 1f 1: popq %rbp subq $1b, %rbp @@ -247,7 +272,6 @@ ENTRY(efi_stub_entry) */ movq efi_config(%rip), %rax addq %rbp, 88(%rax) - movq %rdx, %rsi 2: movq efi_config(%rip), %rdi call efi_main @@ -336,6 +360,20 @@ preferred_addr: leaq relocated(%rbx), %rax jmp *%rax +#ifdef CONFIG_EFI_STUB + .org 0x390 +ENTRY(efi64_stub_entry) + movq %rdi, efi64_config(%rip) /* Handle */ + movq %rsi, efi64_config+8(%rip) /* EFI System table pointer */ + + leaq efi64_config(%rip), %rax + movq %rax, efi_config(%rip) + + movq %rdx, %rsi + jmp handover_entry +ENDPROC(efi64_stub_entry) +#endif + .text relocated: @@ -404,6 +442,14 @@ gdt_end: efi_config: .quad 0 +#ifdef CONFIG_EFI_MIXED + .global efi32_config +efi32_config: + .fill 11,8,0 + .quad efi64_thunk + .byte 0 +#endif + .global efi64_config efi64_config: .fill 11,8,0 diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index bf26207..4f07df5 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -53,7 +53,8 @@ int is_big_kernel; #define PECOFF_RELOC_RESERVE 0x20 -unsigned long efi_stub_entry; +unsigned long efi32_stub_entry; +unsigned long efi64_stub_entry; unsigned long efi_pe_entry; unsigned long startup_64; @@ -231,20 +232,26 @@ static void efi_stub_defaults(void) /* Defaults for old kernel */ #ifdef CONFIG_X86_32 efi_pe_entry = 0x10; - efi_stub_entry = 0x30; #else efi_pe_entry = 0x210; - efi_stub_entry = 0x230; startup_64 = 0x200; #endif } static void efi_stub_entry_update(void) { -#ifdef CONFIG_X86_64 /* Yes, this is really how we defined it :( */ - efi_stub_entry -= 0x200; + unsigned long addr = efi32_stub_entry; + +#ifdef CONFIG_X86_64 + /* Yes, this is really how we defined it :( */ + addr = efi64_stub_entry - 0x200; +#endif + +#ifdef CONFIG_EFI_MIXED + if (efi32_stub_entry != addr) + die("32-bit and 64-bit EFI entry points do not match\n"); #endif - put_unaligned_le32(efi_stub_entry, &buf[0x264]); + put_unaligned_le32(addr, &buf[0x264]); } #else @@ -289,7 +296,8 @@ static void parse_zoffset(char *fname) p = (char *)buf; while (p && *p) { - PARSE_ZOFS(p, efi_stub_entry); + PARSE_ZOFS(p, efi32_stub_entry); + PARSE_ZOFS(p, efi64_stub_entry); PARSE_ZOFS(p, efi_pe_entry); PARSE_ZOFS(p, startup_64); diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 3d6b9f8..8585686 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -19,9 +19,11 @@ */ #define EFI_OLD_MEMMAP EFI_ARCH_1 +#define EFI32_LOADER_SIGNATURE "EL32" +#define EFI64_LOADER_SIGNATURE "EL64" + #ifdef CONFIG_X86_32 -#define EFI_LOADER_SIGNATURE "EL32" extern unsigned long asmlinkage efi_call_phys(void *, ...); @@ -57,8 +59,6 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...); #else /* !CONFIG_X86_32 */ -#define EFI_LOADER_SIGNATURE "EL64" - extern u64 efi_call0(void *fp); extern u64 efi_call1(void *fp, u64 arg1); extern u64 efi_call2(void *fp, u64 arg1, u64 arg2); -- cgit v0.10.2 From 4f9dbcfc40299ddaa780fe8c1cd74998c1be3af5 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 10 Jan 2014 18:48:30 +0000 Subject: x86/efi: Add mixed runtime services support Setup the runtime services based on whether we're booting in EFI native mode or not. For non-native mode we need to thunk from 64-bit into 32-bit mode before invoking the EFI runtime services. Using the runtime services after SetVirtualAddressMap() is slightly more complicated because we need to ensure that all the addresses we pass to the firmware are below the 4GB boundary so that they can be addressed with 32-bit pointers, see efi_setup_page_tables(). Signed-off-by: Matt Fleming diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 8585686..85935e6 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -154,6 +154,27 @@ static inline bool efi_is_native(void) extern struct console early_efi_console; extern void parse_efi_setup(u64 phys_addr, u32 data_len); + +#ifdef CONFIG_EFI_MIXED +extern void efi_thunk_runtime_setup(void); +extern efi_status_t efi_thunk_set_virtual_address_map( + void *phys_set_virtual_address_map, + unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map); +#else +static inline void efi_thunk_runtime_setup(void) {} +static inline efi_status_t efi_thunk_set_virtual_address_map( + void *phys_set_virtual_address_map, + unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map) +{ + return EFI_SUCCESS; +} +#endif /* CONFIG_EFI_MIXED */ #else /* * IF EFI is not configured, have the EFI calls return -ENOSYS. diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile index b7b0b35..d51045a 100644 --- a/arch/x86/platform/efi/Makefile +++ b/arch/x86/platform/efi/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o obj-$(CONFIG_EARLY_PRINTK_EFI) += early_printk.o +obj-$(CONFIG_EFI_MIXED) += efi_thunk_$(BITS).o diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index b96ae79..39f5b7b 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -588,37 +588,85 @@ static int __init efi_systab_init(void *phys) return 0; } -static int __init efi_runtime_init(void) +static int __init efi_runtime_init32(void) { - efi_runtime_services_t *runtime; + efi_runtime_services_32_t *runtime; + + runtime = early_ioremap((unsigned long)efi.systab->runtime, + sizeof(efi_runtime_services_32_t)); + if (!runtime) { + pr_err("Could not map the runtime service table!\n"); + return -ENOMEM; + } /* - * Check out the runtime services table. We need to map - * the runtime services table so that we can grab the physical - * address of several of the EFI runtime functions, needed to - * set the firmware into virtual mode. + * We will only need *early* access to the following two + * EFI runtime services before set_virtual_address_map + * is invoked. */ + efi_phys.get_time = (efi_get_time_t *) + (unsigned long)runtime->get_time; + efi_phys.set_virtual_address_map = + (efi_set_virtual_address_map_t *) + (unsigned long)runtime->set_virtual_address_map; + /* + * Make efi_get_time can be called before entering + * virtual mode. + */ + efi.get_time = phys_efi_get_time; + early_iounmap(runtime, sizeof(efi_runtime_services_32_t)); + + return 0; +} + +static int __init efi_runtime_init64(void) +{ + efi_runtime_services_64_t *runtime; + runtime = early_ioremap((unsigned long)efi.systab->runtime, - sizeof(efi_runtime_services_t)); + sizeof(efi_runtime_services_64_t)); if (!runtime) { pr_err("Could not map the runtime service table!\n"); return -ENOMEM; } + /* - * We will only need *early* access to the following - * two EFI runtime services before set_virtual_address_map + * We will only need *early* access to the following two + * EFI runtime services before set_virtual_address_map * is invoked. */ - efi_phys.get_time = (efi_get_time_t *)runtime->get_time; + efi_phys.get_time = (efi_get_time_t *) + (unsigned long)runtime->get_time; efi_phys.set_virtual_address_map = - (efi_set_virtual_address_map_t *) - runtime->set_virtual_address_map; + (efi_set_virtual_address_map_t *) + (unsigned long)runtime->set_virtual_address_map; /* * Make efi_get_time can be called before entering * virtual mode. */ efi.get_time = phys_efi_get_time; - early_iounmap(runtime, sizeof(efi_runtime_services_t)); + early_iounmap(runtime, sizeof(efi_runtime_services_64_t)); + + return 0; +} + +static int __init efi_runtime_init(void) +{ + int rv; + + /* + * Check out the runtime services table. We need to map + * the runtime services table so that we can grab the physical + * address of several of the EFI runtime functions, needed to + * set the firmware into virtual mode. + */ + if (efi_enabled(EFI_64BIT)) + rv = efi_runtime_init64(); + else + rv = efi_runtime_init32(); + + if (rv) + return rv; return 0; } @@ -841,6 +889,22 @@ void __init old_map_region(efi_memory_desc_t *md) (unsigned long long)md->phys_addr); } +static void native_runtime_setup(void) +{ + efi.get_time = virt_efi_get_time; + efi.set_time = virt_efi_set_time; + efi.get_wakeup_time = virt_efi_get_wakeup_time; + efi.set_wakeup_time = virt_efi_set_wakeup_time; + efi.get_variable = virt_efi_get_variable; + efi.get_next_variable = virt_efi_get_next_variable; + efi.set_variable = virt_efi_set_variable; + efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; + efi.reset_system = virt_efi_reset_system; + efi.query_variable_info = virt_efi_query_variable_info; + efi.update_capsule = virt_efi_update_capsule; + efi.query_capsule_caps = virt_efi_query_capsule_caps; +} + /* Merge contiguous regions of the same type and attribute */ static void __init efi_merge_regions(void) { @@ -1023,11 +1087,20 @@ void __init efi_enter_virtual_mode(void) efi_sync_low_kernel_mappings(); if (!efi_setup) { - status = phys_efi_set_virtual_address_map( - memmap.desc_size * count, - memmap.desc_size, - memmap.desc_version, - (efi_memory_desc_t *)__pa(new_memmap)); + if (efi_is_native()) { + status = phys_efi_set_virtual_address_map( + memmap.desc_size * count, + memmap.desc_size, + memmap.desc_version, + (efi_memory_desc_t *)__pa(new_memmap)); + } else { + status = efi_thunk_set_virtual_address_map( + efi_phys.set_virtual_address_map, + memmap.desc_size * count, + memmap.desc_size, + memmap.desc_version, + (efi_memory_desc_t *)__pa(new_memmap)); + } if (status != EFI_SUCCESS) { pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n", @@ -1043,19 +1116,13 @@ void __init efi_enter_virtual_mode(void) * Call EFI services through wrapper functions. */ efi.runtime_version = efi_systab.hdr.revision; - efi.get_time = virt_efi_get_time; - efi.set_time = virt_efi_set_time; - efi.get_wakeup_time = virt_efi_get_wakeup_time; - efi.set_wakeup_time = virt_efi_set_wakeup_time; - efi.get_variable = virt_efi_get_variable; - efi.get_next_variable = virt_efi_get_next_variable; - efi.set_variable = virt_efi_set_variable; - efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; - efi.reset_system = virt_efi_reset_system; + + if (efi_is_native()) + native_runtime_setup(); + else + efi_thunk_runtime_setup(); + efi.set_virtual_address_map = NULL; - efi.query_variable_info = virt_efi_query_variable_info; - efi.update_capsule = virt_efi_update_capsule; - efi.query_capsule_caps = virt_efi_query_capsule_caps; efi_runtime_mkexec(); diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 0c2a234..12112ab 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -39,6 +39,7 @@ #include #include #include +#include static pgd_t *save_pgd __initdata; static unsigned long efi_flags __initdata; @@ -58,7 +59,8 @@ struct efi_scratch { u64 prev_cr3; pgd_t *efi_pgt; bool use_pgd; -}; + u64 phys_stack; +} __packed; static void __init early_code_mapping_set_exec(int executable) { @@ -139,10 +141,40 @@ void efi_sync_low_kernel_mappings(void) void efi_setup_page_tables(void) { + unsigned long text; + unsigned npages; + struct page *page; + efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd; - if (!efi_enabled(EFI_OLD_MEMMAP)) - efi_scratch.use_pgd = true; + if (efi_enabled(EFI_OLD_MEMMAP)) + return; + + efi_scratch.use_pgd = true; + + /* + * When making calls to the firmware everything needs to be 1:1 + * mapped and addressable with 32-bit pointers. Map the kernel + * text and allocate a new stack because we can't rely on the + * stack pointer being < 4GB. + */ + if (!IS_ENABLED(CONFIG_EFI_MIXED)) + return; + + page = alloc_page(GFP_KERNEL|__GFP_DMA32); + if (!page) + panic("Unable to allocate EFI runtime stack < 4GB\n"); + + efi_scratch.phys_stack = virt_to_phys(page_address(page)); + efi_scratch.phys_stack += PAGE_SIZE; /* stack grows down */ + + npages = (_end - _text) >> PAGE_SHIFT; + text = __pa(_text); + + if (kernel_map_pages_in_pgd(__va(efi_scratch.efi_pgt), + text >> PAGE_SHIFT, text, npages, 0)) { + pr_err("Failed to map kernel text 1:1\n"); + } } static void __init __map_region(efi_memory_desc_t *md, u64 va) @@ -173,6 +205,16 @@ void __init efi_map_region(efi_memory_desc_t *md) */ __map_region(md, md->phys_addr); + /* + * Enforce the 1:1 mapping as the default virtual address when + * booting in EFI mixed mode, because even though we may be + * running a 64-bit kernel, the firmware may only be 32-bit. + */ + if (!efi_is_native () && IS_ENABLED(CONFIG_EFI_MIXED)) { + md->virt_addr = md->phys_addr; + return; + } + efi_va -= size; /* Is PA 2M-aligned? */ @@ -242,3 +284,290 @@ void __init efi_runtime_mkexec(void) if (__supported_pte_mask & _PAGE_NX) runtime_code_page_mkexec(); } + +#ifdef CONFIG_EFI_MIXED +extern efi_status_t efi64_thunk(u32, ...); + +#define runtime_service32(func) \ +({ \ + u32 table = (u32)(unsigned long)efi.systab; \ + u32 *rt, *___f; \ + \ + rt = (u32 *)(table + offsetof(efi_system_table_32_t, runtime)); \ + ___f = (u32 *)(*rt + offsetof(efi_runtime_services_32_t, func)); \ + *___f; \ +}) + +/* + * Switch to the EFI page tables early so that we can access the 1:1 + * runtime services mappings which are not mapped in any other page + * tables. This function must be called before runtime_service32(). + * + * Also, disable interrupts because the IDT points to 64-bit handlers, + * which aren't going to function correctly when we switch to 32-bit. + */ +#define efi_thunk(f, ...) \ +({ \ + efi_status_t __s; \ + unsigned long flags; \ + u32 func; \ + \ + efi_sync_low_kernel_mappings(); \ + local_irq_save(flags); \ + \ + efi_scratch.prev_cr3 = read_cr3(); \ + write_cr3((unsigned long)efi_scratch.efi_pgt); \ + __flush_tlb_all(); \ + \ + func = runtime_service32(f); \ + __s = efi64_thunk(func, __VA_ARGS__); \ + \ + write_cr3(efi_scratch.prev_cr3); \ + __flush_tlb_all(); \ + local_irq_restore(flags); \ + \ + __s; \ +}) + +efi_status_t efi_thunk_set_virtual_address_map( + void *phys_set_virtual_address_map, + unsigned long memory_map_size, + unsigned long descriptor_size, + u32 descriptor_version, + efi_memory_desc_t *virtual_map) +{ + efi_status_t status; + unsigned long flags; + u32 func; + + efi_sync_low_kernel_mappings(); + local_irq_save(flags); + + efi_scratch.prev_cr3 = read_cr3(); + write_cr3((unsigned long)efi_scratch.efi_pgt); + __flush_tlb_all(); + + func = (u32)(unsigned long)phys_set_virtual_address_map; + status = efi64_thunk(func, memory_map_size, descriptor_size, + descriptor_version, virtual_map); + + write_cr3(efi_scratch.prev_cr3); + __flush_tlb_all(); + local_irq_restore(flags); + + return status; +} + +static efi_status_t efi_thunk_get_time(efi_time_t *tm, efi_time_cap_t *tc) +{ + efi_status_t status; + u32 phys_tm, phys_tc; + + spin_lock(&rtc_lock); + + phys_tm = virt_to_phys(tm); + phys_tc = virt_to_phys(tc); + + status = efi_thunk(get_time, phys_tm, phys_tc); + + spin_unlock(&rtc_lock); + + return status; +} + +static efi_status_t efi_thunk_set_time(efi_time_t *tm) +{ + efi_status_t status; + u32 phys_tm; + + spin_lock(&rtc_lock); + + phys_tm = virt_to_phys(tm); + + status = efi_thunk(set_time, phys_tm); + + spin_unlock(&rtc_lock); + + return status; +} + +static efi_status_t +efi_thunk_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, + efi_time_t *tm) +{ + efi_status_t status; + u32 phys_enabled, phys_pending, phys_tm; + + spin_lock(&rtc_lock); + + phys_enabled = virt_to_phys(enabled); + phys_pending = virt_to_phys(pending); + phys_tm = virt_to_phys(tm); + + status = efi_thunk(get_wakeup_time, phys_enabled, + phys_pending, phys_tm); + + spin_unlock(&rtc_lock); + + return status; +} + +static efi_status_t +efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) +{ + efi_status_t status; + u32 phys_tm; + + spin_lock(&rtc_lock); + + phys_tm = virt_to_phys(tm); + + status = efi_thunk(set_wakeup_time, enabled, phys_tm); + + spin_unlock(&rtc_lock); + + return status; +} + + +static efi_status_t +efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 *attr, unsigned long *data_size, void *data) +{ + efi_status_t status; + u32 phys_name, phys_vendor, phys_attr; + u32 phys_data_size, phys_data; + + phys_data_size = virt_to_phys(data_size); + phys_vendor = virt_to_phys(vendor); + phys_name = virt_to_phys(name); + phys_attr = virt_to_phys(attr); + phys_data = virt_to_phys(data); + + status = efi_thunk(get_variable, phys_name, phys_vendor, + phys_attr, phys_data_size, phys_data); + + return status; +} + +static efi_status_t +efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor, + u32 attr, unsigned long data_size, void *data) +{ + u32 phys_name, phys_vendor, phys_data; + efi_status_t status; + + phys_name = virt_to_phys(name); + phys_vendor = virt_to_phys(vendor); + phys_data = virt_to_phys(data); + + /* If data_size is > sizeof(u32) we've got problems */ + status = efi_thunk(set_variable, phys_name, phys_vendor, + attr, data_size, phys_data); + + return status; +} + +static efi_status_t +efi_thunk_get_next_variable(unsigned long *name_size, + efi_char16_t *name, + efi_guid_t *vendor) +{ + efi_status_t status; + u32 phys_name_size, phys_name, phys_vendor; + + phys_name_size = virt_to_phys(name_size); + phys_vendor = virt_to_phys(vendor); + phys_name = virt_to_phys(name); + + status = efi_thunk(get_next_variable, phys_name_size, + phys_name, phys_vendor); + + return status; +} + +static efi_status_t +efi_thunk_get_next_high_mono_count(u32 *count) +{ + efi_status_t status; + u32 phys_count; + + phys_count = virt_to_phys(count); + status = efi_thunk(get_next_high_mono_count, phys_count); + + return status; +} + +static void +efi_thunk_reset_system(int reset_type, efi_status_t status, + unsigned long data_size, efi_char16_t *data) +{ + u32 phys_data; + + phys_data = virt_to_phys(data); + + efi_thunk(reset_system, reset_type, status, data_size, phys_data); +} + +static efi_status_t +efi_thunk_update_capsule(efi_capsule_header_t **capsules, + unsigned long count, unsigned long sg_list) +{ + /* + * To properly support this function we would need to repackage + * 'capsules' because the firmware doesn't understand 64-bit + * pointers. + */ + return EFI_UNSUPPORTED; +} + +static efi_status_t +efi_thunk_query_variable_info(u32 attr, u64 *storage_space, + u64 *remaining_space, + u64 *max_variable_size) +{ + efi_status_t status; + u32 phys_storage, phys_remaining, phys_max; + + if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) + return EFI_UNSUPPORTED; + + phys_storage = virt_to_phys(storage_space); + phys_remaining = virt_to_phys(remaining_space); + phys_max = virt_to_phys(max_variable_size); + + status = efi_thunk(query_variable_info, phys_storage, + phys_remaining, phys_max); + + return status; +} + +static efi_status_t +efi_thunk_query_capsule_caps(efi_capsule_header_t **capsules, + unsigned long count, u64 *max_size, + int *reset_type) +{ + /* + * To properly support this function we would need to repackage + * 'capsules' because the firmware doesn't understand 64-bit + * pointers. + */ + return EFI_UNSUPPORTED; +} + +void efi_thunk_runtime_setup(void) +{ + efi.get_time = efi_thunk_get_time; + efi.set_time = efi_thunk_set_time; + efi.get_wakeup_time = efi_thunk_get_wakeup_time; + efi.set_wakeup_time = efi_thunk_set_wakeup_time; + efi.get_variable = efi_thunk_get_variable; + efi.get_next_variable = efi_thunk_get_next_variable; + efi.set_variable = efi_thunk_set_variable; + efi.get_next_high_mono_count = efi_thunk_get_next_high_mono_count; + efi.reset_system = efi_thunk_reset_system; + efi.query_variable_info = efi_thunk_query_variable_info; + efi.update_capsule = efi_thunk_update_capsule; + efi.query_capsule_caps = efi_thunk_query_capsule_caps; +} +#endif /* CONFIG_EFI_MIXED */ diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index a790d69..e811514 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -318,3 +318,4 @@ efi_gdt64_end: ENTRY(efi_scratch) .fill 3,8,0 .byte 0 + .quad 0 diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S new file mode 100644 index 0000000..8806fa7 --- /dev/null +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2014 Intel Corporation; author Matt Fleming + */ + +#include +#include + + .text + .code64 +ENTRY(efi64_thunk) + push %rbp + push %rbx + + /* + * Switch to 1:1 mapped 32-bit stack pointer. + */ + movq %rsp, efi_saved_sp(%rip) + movq efi_scratch+25(%rip), %rsp + + /* + * Calculate the physical address of the kernel text. + */ + movq $__START_KERNEL_map, %rax + subq phys_base(%rip), %rax + + /* + * Push some physical addresses onto the stack. This is easier + * to do now in a code64 section while the assembler can address + * 64-bit values. Note that all the addresses on the stack are + * 32-bit. + */ + subq $16, %rsp + leaq efi_exit32(%rip), %rbx + subq %rax, %rbx + movl %ebx, 8(%rsp) + leaq efi_gdt64(%rip), %rbx + subq %rax, %rbx + movl %ebx, 2(%ebx) + movl %ebx, 4(%rsp) + leaq efi_gdt32(%rip), %rbx + subq %rax, %rbx + movl %ebx, 2(%ebx) + movl %ebx, (%rsp) + + leaq __efi64_thunk(%rip), %rbx + subq %rax, %rbx + call *%rbx + + movq efi_saved_sp(%rip), %rsp + pop %rbx + pop %rbp + retq +ENDPROC(efi64_thunk) + + .data +efi_gdt32: + .word efi_gdt32_end - efi_gdt32 + .long 0 /* Filled out above */ + .word 0 + .quad 0x0000000000000000 /* NULL descriptor */ + .quad 0x00cf9a000000ffff /* __KERNEL_CS */ + .quad 0x00cf93000000ffff /* __KERNEL_DS */ +efi_gdt32_end: + +efi_saved_sp: .quad 0 -- cgit v0.10.2 From 7d453eee36ae4cf30fc2f2faae54f634c4f863b7 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 10 Jan 2014 18:52:06 +0000 Subject: x86/efi: Wire up CONFIG_EFI_MIXED Add the Kconfig option and bump the kernel header version so that boot loaders can check whether the handover code is available if they want. The xloadflags field in the bzImage header is also updated to reflect that the kernel supports both entry points by setting both of XLF_EFI_HANDOVER_32 and XLF_EFI_HANDOVER_64 when CONFIG_EFI_MIXED=y. XLF_CAN_BE_LOADED_ABOVE_4G is disabled so that the kernel text is guaranteed to be addressable with 32-bits. Note that no boot loaders should be using the bits set in xloadflags to decide which entry point to jump to. The entire scheme is based on the concept that 32-bit bootloaders always jump to ->handover_offset and 64-bit loaders always jump to ->handover_offset + 512. We set both bits merely to inform the boot loader that it's safe to use the native handover offset even if the machine type in the PE/COFF header claims otherwise. Signed-off-by: Matt Fleming diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0af5250..8453fe1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1585,6 +1585,20 @@ config EFI_STUB See Documentation/efi-stub.txt for more information. +config EFI_MIXED + bool "EFI mixed-mode support" + depends on EFI_STUB && X86_64 + ---help--- + Enabling this feature allows a 64-bit kernel to be booted + on a 32-bit firmware, provided that your CPU supports 64-bit + mode. + + Note that it is not possible to boot a mixed-mode enabled + kernel via the EFI boot stub - a bootloader that supports + the EFI handover protocol must be used. + + If unsure, say N. + config SECCOMP def_bool y prompt "Enable seccomp to safely compute untrusted bytecode" diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index d69d966..2563882 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -283,7 +283,7 @@ _start: # Part 2 of the header, from the old setup.S .ascii "HdrS" # header signature - .word 0x020c # header version number (>= 0x0105) + .word 0x020d # header version number (>= 0x0105) # or else old loadlin-1.5 will fail) .globl realmode_swtch realmode_swtch: .word 0, 0 # default_switch, SETUPSEG @@ -375,7 +375,8 @@ xloadflags: # define XLF0 0 #endif -#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64) +#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64) && \ + !defined(CONFIG_EFI_MIXED) /* kernel/boot_param/ramdisk could be loaded above 4g */ # define XLF1 XLF_CAN_BE_LOADED_ABOVE_4G #else @@ -383,10 +384,14 @@ xloadflags: #endif #ifdef CONFIG_EFI_STUB -# ifdef CONFIG_X86_64 -# define XLF23 XLF_EFI_HANDOVER_64 /* 64-bit EFI handover ok */ +# ifdef CONFIG_EFI_MIXED +# define XLF23 (XLF_EFI_HANDOVER_32|XLF_EFI_HANDOVER_64) # else -# define XLF23 XLF_EFI_HANDOVER_32 /* 32-bit EFI handover ok */ +# ifdef CONFIG_X86_64 +# define XLF23 XLF_EFI_HANDOVER_64 /* 64-bit EFI handover ok */ +# else +# define XLF23 XLF_EFI_HANDOVER_32 /* 32-bit EFI handover ok */ +# endif # endif #else # define XLF23 0 diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 85935e6..a383725 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -152,6 +152,17 @@ static inline bool efi_is_native(void) return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT); } +static inline bool efi_runtime_supported(void) +{ + if (efi_is_native()) + return true; + + if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_enabled(EFI_OLD_MEMMAP)) + return true; + + return false; +} + extern struct console early_efi_console; extern void parse_efi_setup(u64 phys_addr, u32 data_len); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 06853e6..ff9b3a6 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1243,7 +1243,7 @@ void __init setup_arch(char **cmdline_p) * mismatched firmware/kernel archtectures since there is no * support for runtime services. */ - if (efi_enabled(EFI_BOOT) && !efi_is_native()) { + if (efi_enabled(EFI_BOOT) && !efi_runtime_supported()) { pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); efi_unmap_memmap(); } diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 39f5b7b..395dece 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -802,7 +802,7 @@ void __init efi_init(void) * that doesn't match the kernel 32/64-bit mode. */ - if (!efi_is_native()) + if (!efi_runtime_supported()) pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); else { if (disable_runtime || efi_runtime_init()) -- cgit v0.10.2 From 108d3f44b16be2ecf0e44c2f2863752918eb7bce Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 24 Feb 2014 13:37:29 +0000 Subject: x86/boot: Don't overwrite cr4 when enabling PAE Some EFI firmware makes use of the FPU during boottime services and clearing X86_CR4_OSFXSR by overwriting %cr4 causes the firmware to crash. Add the PAE bit explicitly instead of trashing the existing contents, leaving the rest of the bits as the firmware set them. Cc: H. Peter Anvin Signed-off-by: Matt Fleming diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 37c741b..4f40cdd 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -113,7 +113,8 @@ ENTRY(startup_32) lgdt gdt(%ebp) /* Enable PAE mode */ - movl $(X86_CR4_PAE), %eax + movl %cr4, %eax + orl $X86_CR4_PAE, %eax movl %eax, %cr4 /* -- cgit v0.10.2 From 18c46461d9e42d398536055f31f58cdcd2c6347e Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 24 Feb 2014 18:07:26 +0000 Subject: x86/efi: Re-disable interrupts after calling firmware services Some firmware appears to enable interrupts during boot service calls, even if we've explicitly disabled them prior to the call. This is actually allowed per the UEFI spec because boottime services expect to be called with interrupts enabled. So that's fine, we just need to ensure that we disable them again in efi_enter32() before switching to a 64-bit GDT, otherwise an interrupt may fire causing a 32-bit IRQ handler to run after we've left compatibility mode. Despite efi_enter32() being called both for boottime and runtime services, this really only affects boottime because the runtime services callchain is executed with interrupts disabled. See efi_thunk(). Signed-off-by: Matt Fleming diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index e811514..65b787a 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -261,6 +261,12 @@ ENTRY(efi_enter32) /* We must preserve return value */ movl %eax, %edi + /* + * Some firmware will return with interrupts enabled. Be sure to + * disable them before we switch GDTs. + */ + cli + movl 44(%esp), %eax movl %eax, 2(%eax) lgdtl (%eax) -- cgit v0.10.2 From ae72758f1dd93bd367dc7719702f24a9bfb3bad9 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Mon, 11 Nov 2013 08:28:33 -0500 Subject: c6x: fix build failure caused by cache.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A patch to linux/irqflags.h uncovered a problem with c6x asm/cache.h which causes a build failure: /arch/c6x/include/asm/cache.h:63:20: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘c6x_cache_init’ extern void __init c6x_cache_init(void); The asm/cache.h was relying on linux/irqflags.h to pull in linux/init.h but the recent patch changed that. The c6x header should have included linux/init.h all along. Signed-off-by: Mark Salter diff --git a/arch/c6x/include/asm/cache.h b/arch/c6x/include/asm/cache.h index 09c5a0f..86648c0 100644 --- a/arch/c6x/include/asm/cache.h +++ b/arch/c6x/include/asm/cache.h @@ -12,6 +12,7 @@ #define _ASM_C6X_CACHE_H #include +#include /* * Cache line size -- cgit v0.10.2 From 3c433679ab666fb76a9399679819a303989e8ead Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 4 Mar 2014 23:39:58 +0100 Subject: x86: hyperv: Make it build with CONFIG_HYPERV=m again Commit 1aec16967 (x86: Hyperv: Cleanup the irq mess) removed the ability to build the hyperv stuff as a module. Bring it back. Reported-by: fengguang.wu@intel.com Signed-off-by: Thomas Gleixner Cc: K. Y. Srinivasan Cc: Peter Zijlstra Cc: Greg Kroah-Hartman Cc: linuxdrivers Cc: x86 diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 1bd316c..316e106 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -31,7 +31,7 @@ struct ms_hyperv_info ms_hyperv; EXPORT_SYMBOL_GPL(ms_hyperv); -#ifdef CONFIG_HYPERV +#if IS_ENABLED(CONFIG_HYPERV) static irq_handler_t *vmbus_handler; void hyperv_vector_handler(struct pt_regs *regs) -- cgit v0.10.2 From 13b5be56d1c5ed302df53f6dfbe19b9f4e3fd3ce Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 4 Mar 2014 23:51:34 +0100 Subject: x86: hyperv: Fix brown paperbag typos reported by Fenguangs build robot Reported-by: fengguang.wu@intel.com Signed-off-by: Thomas Gleixner Cc: K. Y. Srinivasan Cc: Peter Zijlstra Cc: Greg Kroah-Hartman Cc: linuxdrivers Cc: x86 diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 316e106..a6f5f35 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -32,7 +32,7 @@ struct ms_hyperv_info ms_hyperv; EXPORT_SYMBOL_GPL(ms_hyperv); #if IS_ENABLED(CONFIG_HYPERV) -static irq_handler_t *vmbus_handler; +static irq_handler_t vmbus_handler; void hyperv_vector_handler(struct pt_regs *regs) { @@ -49,7 +49,7 @@ void hyperv_vector_handler(struct pt_regs *regs) set_irq_regs(old_regs); } -int hv_setup_vmbus_irq(int irq, irq_handler_t *handler, void *dev_id) +int hv_setup_vmbus_irq(int irq, irq_handler_t handler, void *dev_id) { vmbus_handler = handler; /* @@ -61,7 +61,7 @@ int hv_setup_vmbus_irq(int irq, irq_handler_t *handler, void *dev_id) hyperv_callback_vector); } -void hv_remove_vmbus_irq(unsigned int irq, void *dev_id) +void hv_remove_vmbus_irq(int irq, void *dev_id) { /* We have no way to deallocate the interrupt gate */ vmbus_handler = NULL; -- cgit v0.10.2 From 257ceab7456bd2a2657fd1c689384cabc95e3d30 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 4 Mar 2014 23:57:29 +0100 Subject: s390: Do not rely on magic indirect includes commit: 8f945a33 (genirq: Move kstat_incr_irqs_this_cpu() to core) unearthed the following: arch/s390/kernel/irq.c: In function 'init_IRQ': >> arch/s390/kernel/irq.c:93:2: error: implicit declaration of function 'irq_reserve_irqs' [-Werror=implicit-function-declaration] .... cc1: some warnings being treated as errors -- drivers/s390/cio/cio.c: In function 'init_cio_interrupts': >> drivers/s390/cio/cio.c:594:2: error: implicit declaration of function 'irq_set_chip_and_handler' [-Werror=implicit-function-declaration] [-Werror=implicit-function-declaration] .... cc1: some warnings being treated as errors The reason is that those files require linux/irq.h and magically pulled that in via linux/kernel_stat.h The commit above got rid of the pointless include of linux/irq.h in linux/kernel_stat.h and therefor broke the build. Include linux/irq.h Reported-by: fengguang.wu@intel.com Signed-off-by: Thomas Gleixner Cc: Martin Schwidefsky Cc: s390 diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index bb27a26..a770be9 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index f711f0b..5829ddc 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include -- cgit v0.10.2 From 57310c3c99eb6fab2ecbd63aa3f7c323341ca77e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 5 Mar 2014 00:06:11 +0100 Subject: powerpc: eeh: Fixup the brown paperbag fallout of the "cleanup" Commit b8a9a11b9 (powerpc: eeh: Kill another abuse of irq_desc) is missing some brackets ..... It's not a good idea to write patches in grumpy mode and then forget to at least compile test them or rely on the few eyeballs discussing that patch to spot it..... Reported-by: fengguang.wu@intel.com Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Gavin Shan Cc: Benjamin Herrenschmidt Cc: ppc diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 3e1d7de..bb61ca5 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -166,8 +166,9 @@ static void eeh_enable_irq(struct pci_dev *dev) * * tglx */ - if (irqd_irq_disabled(irq_get_irq_data(dev->irq)) + if (irqd_irq_disabled(irq_get_irq_data(dev->irq))) enable_irq(dev->irq); + } } /** -- cgit v0.10.2 From a5d90c923bcfb9632d998ed06e9569216ad695f3 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 4 Mar 2014 17:02:17 +0100 Subject: x86/efi: Quirk out SGI UV Alex reported hitting the following BUG after the EFI 1:1 virtual mapping work was merged, kernel BUG at arch/x86/mm/init_64.c:351! invalid opcode: 0000 [#1] SMP Call Trace: [] init_extra_mapping_uc+0x13/0x15 [] uv_system_init+0x22b/0x124b [] ? clockevents_register_device+0x138/0x13d [] ? setup_APIC_timer+0xc5/0xc7 [] ? clockevent_delta2ns+0xb/0xd [] ? setup_boot_APIC_clock+0x4a8/0x4b7 [] ? printk+0x72/0x74 [] native_smp_prepare_cpus+0x389/0x3d6 [] kernel_init_freeable+0xb7/0x1fb [] ? rest_init+0x74/0x74 [] kernel_init+0x9/0xff [] ret_from_fork+0x7c/0xb0 [] ? rest_init+0x74/0x74 Getting this thing to work with the new mapping scheme would need more work, so automatically switch to the old memmap layout for SGI UV. Acked-by: Russ Anderson Cc: Alex Thorlton Signed-off-by: Matt Fleming diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 3d6b9f8..acd86c8 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -134,6 +134,7 @@ extern void efi_setup_page_tables(void); extern void __init old_map_region(efi_memory_desc_t *md); extern void __init runtime_code_page_mkexec(void); extern void __init efi_runtime_mkexec(void); +extern void __init efi_apply_memmap_quirks(void); struct efi_setup_data { u64 fw_vendor; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 06853e6..ce72964 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1239,14 +1239,8 @@ void __init setup_arch(char **cmdline_p) register_refined_jiffies(CLOCK_TICK_RATE); #ifdef CONFIG_EFI - /* Once setup is done above, unmap the EFI memory map on - * mismatched firmware/kernel archtectures since there is no - * support for runtime services. - */ - if (efi_enabled(EFI_BOOT) && !efi_is_native()) { - pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); - efi_unmap_memmap(); - } + if (efi_enabled(EFI_BOOT)) + efi_apply_memmap_quirks(); #endif } diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 1a201ac..b97acec 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -52,6 +52,7 @@ #include #include #include +#include #define EFI_DEBUG @@ -1210,3 +1211,22 @@ static int __init parse_efi_cmdline(char *str) return 0; } early_param("efi", parse_efi_cmdline); + +void __init efi_apply_memmap_quirks(void) +{ + /* + * Once setup is done earlier, unmap the EFI memory map on mismatched + * firmware/kernel architectures since there is no support for runtime + * services. + */ + if (!efi_is_native()) { + pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); + efi_unmap_memmap(); + } + + /* + * UV doesn't support the new EFI pagetable mapping yet. + */ + if (is_uv_system()) + set_bit(EFI_OLD_MEMMAP, &x86_efi_facility); +} -- cgit v0.10.2 From 0ac09f9f8cd1fb028a48330edba6023d347d3cea Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 28 Feb 2014 17:05:26 +0100 Subject: x86, trace: Fix CR2 corruption when tracing page faults The trace_do_page_fault function trigger tracepoint and then handles the actual page fault. This could lead to error if the tracepoint caused page fault. The original cr2 value gets lost and the original page fault handler kills current process with SIGSEGV. This happens if you record page faults with callchain data, the user part of it will cause tracepoint handler to page fault: # perf record -g -e exceptions:page_fault_user ls Fixing this by saving the original cr2 value and using it after tracepoint handler is done. v2: Moving the cr2 read before exception_enter, because it could trigger tracepoint as well. Reported-by: Arnaldo Carvalho de Melo Reported-by: Vince Weaver Tested-by: Vince Weaver Acked-by: Steven Rostedt Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Seiji Aguchi Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1402211701380.6395@vincent-weaver-1.um.maine.edu Link: http://lkml.kernel.org/r/20140228160526.GD1133@krava.brq.redhat.com diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 6dea040..e7fa28b 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1022,11 +1022,11 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs) * routines. */ static void __kprobes -__do_page_fault(struct pt_regs *regs, unsigned long error_code) +__do_page_fault(struct pt_regs *regs, unsigned long error_code, + unsigned long address) { struct vm_area_struct *vma; struct task_struct *tsk; - unsigned long address; struct mm_struct *mm; int fault; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; @@ -1034,9 +1034,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) tsk = current; mm = tsk->mm; - /* Get the faulting address: */ - address = read_cr2(); - /* * Detect and handle instructions that would cause a page fault for * both a tracked kernel page and a userspace page. @@ -1252,9 +1249,11 @@ dotraplinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) { enum ctx_state prev_state; + /* Get the faulting address: */ + unsigned long address = read_cr2(); prev_state = exception_enter(); - __do_page_fault(regs, error_code); + __do_page_fault(regs, error_code, address); exception_exit(prev_state); } @@ -1271,9 +1270,16 @@ dotraplinkage void __kprobes trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) { enum ctx_state prev_state; + /* + * The exception_enter and tracepoint processing could + * trigger another page faults (user space callchain + * reading) and destroy the original cr2 value, so read + * the faulting address now. + */ + unsigned long address = read_cr2(); prev_state = exception_enter(); trace_page_fault_entries(regs, error_code); - __do_page_fault(regs, error_code); + __do_page_fault(regs, error_code, address); exception_exit(prev_state); } -- cgit v0.10.2 From 084b6e7765b9554699afa23a50e702a3d0ae4b24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Salva=20Peir=C3=B3?= Date: Mon, 3 Mar 2014 08:44:04 +0100 Subject: staging/cxt1e1/linux.c: Correct arbitrary memory write in c4_ioctl() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function c4_ioctl() writes data from user in ifr->ifr_data to the kernel struct data arg, without any iolen bounds checking. This can lead to a arbitrary write outside of the struct data arg. Corrected by adding bounds-checking of iolen before the copy_from_user(). Signed-off-by: Salva Peiró Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/staging/cxt1e1/linux.c b/drivers/staging/cxt1e1/linux.c index 4a08e16..79206cb 100644 --- a/drivers/staging/cxt1e1/linux.c +++ b/drivers/staging/cxt1e1/linux.c @@ -866,6 +866,8 @@ c4_ioctl (struct net_device *ndev, struct ifreq *ifr, int cmd) _IOC_SIZE (iocmd)); #endif iolen = _IOC_SIZE (iocmd); + if (iolen > sizeof(arg)) + return -EFAULT; data = ifr->ifr_data + sizeof (iocmd); if (copy_from_user (&arg, data, iolen)) return -EFAULT; -- cgit v0.10.2 From a2a99cea5ec7c1e47825559f0e75a4efbcf8aee3 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 26 Feb 2014 03:09:41 -0800 Subject: iscsi-target: Fix iscsit_get_tpg_from_np tpg_state bug This patch fixes a bug in iscsit_get_tpg_from_np() where the tpg->tpg_state sanity check was looking for TPG_STATE_FREE, instead of != TPG_STATE_ACTIVE. The latter is expected during a normal TPG shutdown once the tpg_state goes into TPG_STATE_INACTIVE in order to reject any new incoming login attempts. Cc: #3.10+ Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 3976183..44a5471 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -137,7 +137,7 @@ struct iscsi_portal_group *iscsit_get_tpg_from_np( list_for_each_entry(tpg, &tiqn->tiqn_tpg_list, tpg_list) { spin_lock(&tpg->tpg_state_lock); - if (tpg->tpg_state == TPG_STATE_FREE) { + if (tpg->tpg_state != TPG_STATE_ACTIVE) { spin_unlock(&tpg->tpg_state_lock); continue; } -- cgit v0.10.2 From 5159d763f60af693a3fcec45dce2021f66e528a4 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 3 Feb 2014 12:53:51 -0800 Subject: iscsi/iser-target: Use list_del_init for ->i_conn_node There are a handful of uses of list_empty() for cmd->i_conn_node within iser-target code that expect to return false once a cmd has been removed from the per connect list. This patch changes all uses of list_del -> list_del_init in order to ensure that list_empty() returns false as expected. Acked-by: Sagi Grimberg Cc: Or Gerlitz Cc: #3.10+ Signed-off-by: Nicholas Bellinger diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index d18d08a..5eb2b88 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1464,7 +1464,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd) case ISCSI_OP_SCSI_CMD: spin_lock_bh(&conn->cmd_lock); if (!list_empty(&cmd->i_conn_node)) - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); if (cmd->data_direction == DMA_TO_DEVICE) @@ -1476,7 +1476,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd) case ISCSI_OP_SCSI_TMFUNC: spin_lock_bh(&conn->cmd_lock); if (!list_empty(&cmd->i_conn_node)) - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); transport_generic_free_cmd(&cmd->se_cmd, 0); @@ -1486,7 +1486,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd) case ISCSI_OP_TEXT: spin_lock_bh(&conn->cmd_lock); if (!list_empty(&cmd->i_conn_node)) - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); /* diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 7f1a7ce..ece82b0 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -785,7 +785,7 @@ static void iscsit_ack_from_expstatsn(struct iscsi_conn *conn, u32 exp_statsn) spin_unlock_bh(&conn->cmd_lock); list_for_each_entry_safe(cmd, cmd_p, &ack_list, i_conn_node) { - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); iscsit_free_cmd(cmd, false); } } @@ -3708,7 +3708,7 @@ iscsit_immediate_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state break; case ISTATE_REMOVE: spin_lock_bh(&conn->cmd_lock); - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); iscsit_free_cmd(cmd, false); @@ -4151,7 +4151,7 @@ static void iscsit_release_commands_from_conn(struct iscsi_conn *conn) spin_lock_bh(&conn->cmd_lock); list_for_each_entry_safe(cmd, cmd_tmp, &conn->conn_cmd_list, i_conn_node) { - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); iscsit_increment_maxcmdsn(cmd, sess); diff --git a/drivers/target/iscsi/iscsi_target_erl2.c b/drivers/target/iscsi/iscsi_target_erl2.c index 33be1fb..4ca8fd2 100644 --- a/drivers/target/iscsi/iscsi_target_erl2.c +++ b/drivers/target/iscsi/iscsi_target_erl2.c @@ -138,7 +138,7 @@ void iscsit_free_connection_recovery_entires(struct iscsi_session *sess) list_for_each_entry_safe(cmd, cmd_tmp, &cr->conn_recovery_cmd_list, i_conn_node) { - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); cmd->conn = NULL; spin_unlock(&cr->conn_recovery_cmd_lock); iscsit_free_cmd(cmd, true); @@ -160,7 +160,7 @@ void iscsit_free_connection_recovery_entires(struct iscsi_session *sess) list_for_each_entry_safe(cmd, cmd_tmp, &cr->conn_recovery_cmd_list, i_conn_node) { - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); cmd->conn = NULL; spin_unlock(&cr->conn_recovery_cmd_lock); iscsit_free_cmd(cmd, true); @@ -216,7 +216,7 @@ int iscsit_remove_cmd_from_connection_recovery( } cr = cmd->cr; - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); return --cr->cmd_count; } @@ -297,7 +297,7 @@ int iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(struct iscsi_conn *conn) if (!(cmd->cmd_flags & ICF_OOO_CMDSN)) continue; - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); iscsit_free_cmd(cmd, true); @@ -335,7 +335,7 @@ int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn) /* * Only perform connection recovery on ISCSI_OP_SCSI_CMD or * ISCSI_OP_NOOP_OUT opcodes. For all other opcodes call - * list_del(&cmd->i_conn_node); to release the command to the + * list_del_init(&cmd->i_conn_node); to release the command to the * session pool and remove it from the connection's list. * * Also stop the DataOUT timer, which will be restarted after @@ -351,7 +351,7 @@ int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn) " CID: %hu\n", cmd->iscsi_opcode, cmd->init_task_tag, cmd->cmd_sn, conn->cid); - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); iscsit_free_cmd(cmd, true); spin_lock_bh(&conn->cmd_lock); @@ -371,7 +371,7 @@ int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn) */ if (!(cmd->cmd_flags & ICF_OOO_CMDSN) && !cmd->immediate_cmd && iscsi_sna_gte(cmd->cmd_sn, conn->sess->exp_cmd_sn)) { - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); iscsit_free_cmd(cmd, true); spin_lock_bh(&conn->cmd_lock); @@ -393,7 +393,7 @@ int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn) cmd->sess = conn->sess; - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); iscsit_free_all_datain_reqs(cmd); -- cgit v0.10.2 From defd884845297fd5690594bfe89656b01f16d87e Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 3 Feb 2014 12:54:39 -0800 Subject: iscsi/iser-target: Fix isert_conn->state hung shutdown issues This patch addresses a couple of different hug shutdown issues related to wait_event() + isert_conn->state. First, it changes isert_conn->conn_wait + isert_conn->conn_wait_comp_err from waitqueues to completions, and sets ISER_CONN_TERMINATING from within isert_disconnect_work(). Second, it splits isert_free_conn() into isert_wait_conn() that is called earlier in iscsit_close_connection() to ensure that all outstanding commands have completed before continuing. Finally, it breaks isert_cq_comp_err() into seperate TX / RX related code, and adds logic in isert_cq_rx_comp_err() to wait for outstanding commands to complete before setting ISER_CONN_DOWN and calling complete(&isert_conn->conn_wait_comp_err). Acked-by: Sagi Grimberg Cc: Or Gerlitz Cc: #3.10+ Signed-off-by: Nicholas Bellinger diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 5eb2b88..862d7b4 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -492,8 +492,8 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) isert_conn->state = ISER_CONN_INIT; INIT_LIST_HEAD(&isert_conn->conn_accept_node); init_completion(&isert_conn->conn_login_comp); - init_waitqueue_head(&isert_conn->conn_wait); - init_waitqueue_head(&isert_conn->conn_wait_comp_err); + init_completion(&isert_conn->conn_wait); + init_completion(&isert_conn->conn_wait_comp_err); kref_init(&isert_conn->conn_kref); kref_get(&isert_conn->conn_kref); mutex_init(&isert_conn->conn_mutex); @@ -688,11 +688,11 @@ isert_disconnect_work(struct work_struct *work) pr_debug("isert_disconnect_work(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); mutex_lock(&isert_conn->conn_mutex); - isert_conn->state = ISER_CONN_DOWN; + if (isert_conn->state == ISER_CONN_UP) + isert_conn->state = ISER_CONN_TERMINATING; if (isert_conn->post_recv_buf_count == 0 && atomic_read(&isert_conn->post_send_buf_count) == 0) { - pr_debug("Calling wake_up(&isert_conn->conn_wait);\n"); mutex_unlock(&isert_conn->conn_mutex); goto wake_up; } @@ -712,7 +712,7 @@ isert_disconnect_work(struct work_struct *work) mutex_unlock(&isert_conn->conn_mutex); wake_up: - wake_up(&isert_conn->conn_wait); + complete(&isert_conn->conn_wait); isert_put_conn(isert_conn); } @@ -1589,7 +1589,7 @@ isert_do_control_comp(struct work_struct *work) pr_debug("Calling iscsit_logout_post_handler >>>>>>>>>>>>>>\n"); /* * Call atomic_dec(&isert_conn->post_send_buf_count) - * from isert_free_conn() + * from isert_wait_conn() */ isert_conn->logout_posted = true; iscsit_logout_post_handler(cmd, cmd->conn); @@ -1691,31 +1691,39 @@ isert_send_completion(struct iser_tx_desc *tx_desc, } static void -isert_cq_comp_err(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn) +isert_cq_tx_comp_err(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn) { struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + struct isert_cmd *isert_cmd = tx_desc->isert_cmd; + + if (!isert_cmd) + isert_unmap_tx_desc(tx_desc, ib_dev); + else + isert_completion_put(tx_desc, isert_cmd, ib_dev); +} + +static void +isert_cq_rx_comp_err(struct isert_conn *isert_conn) +{ + struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + struct iscsi_conn *conn = isert_conn->conn; - if (tx_desc) { - struct isert_cmd *isert_cmd = tx_desc->isert_cmd; + if (isert_conn->post_recv_buf_count) + return; - if (!isert_cmd) - isert_unmap_tx_desc(tx_desc, ib_dev); - else - isert_completion_put(tx_desc, isert_cmd, ib_dev); + if (conn->sess) { + target_sess_cmd_list_set_waiting(conn->sess->se_sess); + target_wait_for_sess_cmds(conn->sess->se_sess); } - if (isert_conn->post_recv_buf_count == 0 && - atomic_read(&isert_conn->post_send_buf_count) == 0) { - pr_debug("isert_cq_comp_err >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); - pr_debug("Calling wake_up from isert_cq_comp_err\n"); + while (atomic_read(&isert_conn->post_send_buf_count)) + msleep(3000); - mutex_lock(&isert_conn->conn_mutex); - if (isert_conn->state != ISER_CONN_DOWN) - isert_conn->state = ISER_CONN_TERMINATING; - mutex_unlock(&isert_conn->conn_mutex); + mutex_lock(&isert_conn->conn_mutex); + isert_conn->state = ISER_CONN_DOWN; + mutex_unlock(&isert_conn->conn_mutex); - wake_up(&isert_conn->conn_wait_comp_err); - } + complete(&isert_conn->conn_wait_comp_err); } static void @@ -1740,8 +1748,9 @@ isert_cq_tx_work(struct work_struct *work) pr_debug("TX wc.status != IB_WC_SUCCESS >>>>>>>>>>>>>>\n"); pr_debug("TX wc.status: 0x%08x\n", wc.status); pr_debug("TX wc.vendor_err: 0x%08x\n", wc.vendor_err); + atomic_dec(&isert_conn->post_send_buf_count); - isert_cq_comp_err(tx_desc, isert_conn); + isert_cq_tx_comp_err(tx_desc, isert_conn); } } @@ -1784,7 +1793,7 @@ isert_cq_rx_work(struct work_struct *work) wc.vendor_err); } isert_conn->post_recv_buf_count--; - isert_cq_comp_err(NULL, isert_conn); + isert_cq_rx_comp_err(isert_conn); } } @@ -2702,22 +2711,11 @@ isert_free_np(struct iscsi_np *np) kfree(isert_np); } -static int isert_check_state(struct isert_conn *isert_conn, int state) -{ - int ret; - - mutex_lock(&isert_conn->conn_mutex); - ret = (isert_conn->state == state); - mutex_unlock(&isert_conn->conn_mutex); - - return ret; -} - -static void isert_free_conn(struct iscsi_conn *conn) +static void isert_wait_conn(struct iscsi_conn *conn) { struct isert_conn *isert_conn = conn->context; - pr_debug("isert_free_conn: Starting \n"); + pr_debug("isert_wait_conn: Starting \n"); /* * Decrement post_send_buf_count for special case when called * from isert_do_control_comp() -> iscsit_logout_post_handler() @@ -2727,38 +2725,29 @@ static void isert_free_conn(struct iscsi_conn *conn) atomic_dec(&isert_conn->post_send_buf_count); if (isert_conn->conn_cm_id && isert_conn->state != ISER_CONN_DOWN) { - pr_debug("Calling rdma_disconnect from isert_free_conn\n"); + pr_debug("Calling rdma_disconnect from isert_wait_conn\n"); rdma_disconnect(isert_conn->conn_cm_id); } /* * Only wait for conn_wait_comp_err if the isert_conn made it * into full feature phase.. */ - if (isert_conn->state == ISER_CONN_UP) { - pr_debug("isert_free_conn: Before wait_event comp_err %d\n", - isert_conn->state); - mutex_unlock(&isert_conn->conn_mutex); - - wait_event(isert_conn->conn_wait_comp_err, - (isert_check_state(isert_conn, ISER_CONN_TERMINATING))); - - wait_event(isert_conn->conn_wait, - (isert_check_state(isert_conn, ISER_CONN_DOWN))); - - isert_put_conn(isert_conn); - return; - } if (isert_conn->state == ISER_CONN_INIT) { mutex_unlock(&isert_conn->conn_mutex); - isert_put_conn(isert_conn); return; } - pr_debug("isert_free_conn: wait_event conn_wait %d\n", - isert_conn->state); + if (isert_conn->state == ISER_CONN_UP) + isert_conn->state = ISER_CONN_TERMINATING; mutex_unlock(&isert_conn->conn_mutex); - wait_event(isert_conn->conn_wait, - (isert_check_state(isert_conn, ISER_CONN_DOWN))); + wait_for_completion(&isert_conn->conn_wait_comp_err); + + wait_for_completion(&isert_conn->conn_wait); +} + +static void isert_free_conn(struct iscsi_conn *conn) +{ + struct isert_conn *isert_conn = conn->context; isert_put_conn(isert_conn); } @@ -2771,6 +2760,7 @@ static struct iscsit_transport iser_target_transport = { .iscsit_setup_np = isert_setup_np, .iscsit_accept_np = isert_accept_np, .iscsit_free_np = isert_free_np, + .iscsit_wait_conn = isert_wait_conn, .iscsit_free_conn = isert_free_conn, .iscsit_get_login_rx = isert_get_login_rx, .iscsit_put_login_tx = isert_put_login_tx, diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 708a069..41e799f 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -116,8 +116,8 @@ struct isert_conn { struct isert_device *conn_device; struct work_struct conn_logout_work; struct mutex conn_mutex; - wait_queue_head_t conn_wait; - wait_queue_head_t conn_wait_comp_err; + struct completion conn_wait; + struct completion conn_wait_comp_err; struct kref conn_kref; struct list_head conn_fr_pool; int conn_fr_pool_size; diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index ece82b0..b83ec37 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -4196,6 +4196,10 @@ int iscsit_close_connection( iscsit_stop_timers_for_cmds(conn); iscsit_stop_nopin_response_timer(conn); iscsit_stop_nopin_timer(conn); + + if (conn->conn_transport->iscsit_wait_conn) + conn->conn_transport->iscsit_wait_conn(conn); + iscsit_free_queue_reqs_for_conn(conn); /* diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h index ae5a171..4483fad 100644 --- a/include/target/iscsi/iscsi_transport.h +++ b/include/target/iscsi/iscsi_transport.h @@ -12,6 +12,7 @@ struct iscsit_transport { int (*iscsit_setup_np)(struct iscsi_np *, struct __kernel_sockaddr_storage *); int (*iscsit_accept_np)(struct iscsi_np *, struct iscsi_conn *); void (*iscsit_free_np)(struct iscsi_np *); + void (*iscsit_wait_conn)(struct iscsi_conn *); void (*iscsit_free_conn)(struct iscsi_conn *); int (*iscsit_get_login_rx)(struct iscsi_conn *, struct iscsi_login *); int (*iscsit_put_login_tx)(struct iscsi_conn *, struct iscsi_login *, u32); -- cgit v0.10.2 From b6b87a1df604678ed1be40158080db012a99ccca Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 27 Feb 2014 09:05:03 -0800 Subject: iser-target: Fix post_send_buf_count for RDMA READ/WRITE This patch fixes the incorrect setting of ->post_send_buf_count related to RDMA WRITEs + READs where isert_rdma_rw->send_wr_num was not being taken into account. This includes incrementing ->post_send_buf_count within isert_put_datain() + isert_get_dataout(), decrementing within __isert_send_completion() + isert_response_completion(), and clearing wr->send_wr_num within isert_completion_rdma_read() This is necessary because even though IB_SEND_SIGNALED is not set for RDMA WRITEs + READs, during a QP failure event the work requests will be returned with exception status from the TX completion queue. Acked-by: Sagi Grimberg Cc: Or Gerlitz Cc: #3.10+ Signed-off-by: Nicholas Bellinger diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 862d7b4..a70b0cf 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1549,6 +1549,7 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc, iscsit_stop_dataout_timer(cmd); device->unreg_rdma_mem(isert_cmd, isert_conn); cmd->write_data_done = wr->cur_rdma_length; + wr->send_wr_num = 0; pr_debug("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd); spin_lock_bh(&cmd->istate_lock); @@ -1613,6 +1614,7 @@ isert_response_completion(struct iser_tx_desc *tx_desc, struct ib_device *ib_dev) { struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; + struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; if (cmd->i_state == ISTATE_SEND_TASKMGTRSP || cmd->i_state == ISTATE_SEND_LOGOUTRSP || @@ -1624,7 +1626,7 @@ isert_response_completion(struct iser_tx_desc *tx_desc, queue_work(isert_comp_wq, &isert_cmd->comp_work); return; } - atomic_dec(&isert_conn->post_send_buf_count); + atomic_sub(wr->send_wr_num + 1, &isert_conn->post_send_buf_count); cmd->i_state = ISTATE_SENT_STATUS; isert_completion_put(tx_desc, isert_cmd, ib_dev); @@ -1662,7 +1664,7 @@ __isert_send_completion(struct iser_tx_desc *tx_desc, case ISER_IB_RDMA_READ: pr_debug("isert_send_completion: Got ISER_IB_RDMA_READ:\n"); - atomic_dec(&isert_conn->post_send_buf_count); + atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count); isert_completion_rdma_read(tx_desc, isert_cmd); break; default: @@ -2386,12 +2388,12 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd) isert_init_send_wr(isert_conn, isert_cmd, &isert_cmd->tx_desc.send_wr, true); - atomic_inc(&isert_conn->post_send_buf_count); + atomic_add(wr->send_wr_num + 1, &isert_conn->post_send_buf_count); rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed); if (rc) { pr_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n"); - atomic_dec(&isert_conn->post_send_buf_count); + atomic_sub(wr->send_wr_num + 1, &isert_conn->post_send_buf_count); } pr_debug("Cmd: %p posted RDMA_WRITE + Response for iSER Data READ\n", isert_cmd); @@ -2419,12 +2421,12 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) return rc; } - atomic_inc(&isert_conn->post_send_buf_count); + atomic_add(wr->send_wr_num, &isert_conn->post_send_buf_count); rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed); if (rc) { pr_warn("ib_post_send() failed for IB_WR_RDMA_READ\n"); - atomic_dec(&isert_conn->post_send_buf_count); + atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count); } pr_debug("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n", isert_cmd); -- cgit v0.10.2 From 9bb4ca68fc48eea618b1af1c1cde2a251ae32d1b Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 27 Feb 2014 07:02:48 -0800 Subject: iser-target: Ignore completions for FRWRs in isert_cq_tx_work This patch changes IB_WR_FAST_REG_MR + IB_WR_LOCAL_INV related work requests to include a ISER_FRWR_LI_WRID value in order to signal isert_cq_tx_work() that these requests should be ignored. This is necessary because even though IB_SEND_SIGNALED is not set for either work request, during a QP failure event the work requests will be returned with exception status from the TX completion queue. v2 changes: - Rename ISER_FRWR_LI_WRID -> ISER_FASTREG_LI_WRID (Sagi) Acked-by: Sagi Grimberg Cc: Or Gerlitz Cc: #3.12+ Signed-off-by: Nicholas Bellinger diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index a70b0cf..614a6ef 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1751,8 +1751,10 @@ isert_cq_tx_work(struct work_struct *work) pr_debug("TX wc.status: 0x%08x\n", wc.status); pr_debug("TX wc.vendor_err: 0x%08x\n", wc.vendor_err); - atomic_dec(&isert_conn->post_send_buf_count); - isert_cq_tx_comp_err(tx_desc, isert_conn); + if (wc.wr_id != ISER_FASTREG_LI_WRID) { + atomic_dec(&isert_conn->post_send_buf_count); + isert_cq_tx_comp_err(tx_desc, isert_conn); + } } } @@ -2213,6 +2215,7 @@ isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc, if (!fr_desc->valid) { memset(&inv_wr, 0, sizeof(inv_wr)); + inv_wr.wr_id = ISER_FASTREG_LI_WRID; inv_wr.opcode = IB_WR_LOCAL_INV; inv_wr.ex.invalidate_rkey = fr_desc->data_mr->rkey; wr = &inv_wr; @@ -2223,6 +2226,7 @@ isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc, /* Prepare FASTREG WR */ memset(&fr_wr, 0, sizeof(fr_wr)); + fr_wr.wr_id = ISER_FASTREG_LI_WRID; fr_wr.opcode = IB_WR_FAST_REG_MR; fr_wr.wr.fast_reg.iova_start = fr_desc->data_frpl->page_list[0] + page_off; diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 41e799f..10be55a 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -6,6 +6,7 @@ #define ISERT_RDMA_LISTEN_BACKLOG 10 #define ISCSI_ISER_SG_TABLESIZE 256 +#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL enum isert_desc_type { ISCSI_TX_CONTROL, -- cgit v0.10.2 From ebbe442183b7b8192c963266f1c89048fefc63a5 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 2 Mar 2014 14:51:12 -0800 Subject: iser-target: Fix command leak for tx_desc->comp_llnode_batch This patch addresses a number of active I/O shutdown issues related to isert_cmd descriptors being leaked that are part of a completion interrupt coalescing batch. This includes adding logic in isert_cq_tx_comp_err() to drain any associated tx_desc->comp_llnode_batch, as well as isert_cq_drain_comp_llist() to drain any associated isert_conn->conn_comp_llist. Also, set tx_desc->llnode_active in isert_init_send_wr() in order to determine when work requests need to be skipped in isert_cq_tx_work() exception path code. Finally, update isert_init_send_wr() to only allow interrupt coalescing when ISER_CONN_UP. Acked-by: Sagi Grimberg Cc: Or Gerlitz Cc: #3.13+ Signed-off-by: Nicholas Bellinger diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 614a6ef..8ee228e 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -497,7 +497,6 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) kref_init(&isert_conn->conn_kref); kref_get(&isert_conn->conn_kref); mutex_init(&isert_conn->conn_mutex); - mutex_init(&isert_conn->conn_comp_mutex); spin_lock_init(&isert_conn->conn_lock); cma_id->context = isert_conn; @@ -888,16 +887,17 @@ isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, * Coalesce send completion interrupts by only setting IB_SEND_SIGNALED * bit for every ISERT_COMP_BATCH_COUNT number of ib_post_send() calls. */ - mutex_lock(&isert_conn->conn_comp_mutex); - if (coalesce && + mutex_lock(&isert_conn->conn_mutex); + if (coalesce && isert_conn->state == ISER_CONN_UP && ++isert_conn->conn_comp_batch < ISERT_COMP_BATCH_COUNT) { + tx_desc->llnode_active = true; llist_add(&tx_desc->comp_llnode, &isert_conn->conn_comp_llist); - mutex_unlock(&isert_conn->conn_comp_mutex); + mutex_unlock(&isert_conn->conn_mutex); return; } isert_conn->conn_comp_batch = 0; tx_desc->comp_llnode_batch = llist_del_all(&isert_conn->conn_comp_llist); - mutex_unlock(&isert_conn->conn_comp_mutex); + mutex_unlock(&isert_conn->conn_mutex); send_wr->send_flags = IB_SEND_SIGNALED; } @@ -1693,10 +1693,45 @@ isert_send_completion(struct iser_tx_desc *tx_desc, } static void +isert_cq_drain_comp_llist(struct isert_conn *isert_conn, struct ib_device *ib_dev) +{ + struct llist_node *llnode; + struct isert_rdma_wr *wr; + struct iser_tx_desc *t; + + mutex_lock(&isert_conn->conn_mutex); + llnode = llist_del_all(&isert_conn->conn_comp_llist); + isert_conn->conn_comp_batch = 0; + mutex_unlock(&isert_conn->conn_mutex); + + while (llnode) { + t = llist_entry(llnode, struct iser_tx_desc, comp_llnode); + llnode = llist_next(llnode); + wr = &t->isert_cmd->rdma_wr; + + atomic_sub(wr->send_wr_num + 1, &isert_conn->post_send_buf_count); + isert_completion_put(t, t->isert_cmd, ib_dev); + } +} + +static void isert_cq_tx_comp_err(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn) { struct ib_device *ib_dev = isert_conn->conn_cm_id->device; struct isert_cmd *isert_cmd = tx_desc->isert_cmd; + struct llist_node *llnode = tx_desc->comp_llnode_batch; + struct isert_rdma_wr *wr; + struct iser_tx_desc *t; + + while (llnode) { + t = llist_entry(llnode, struct iser_tx_desc, comp_llnode); + llnode = llist_next(llnode); + wr = &t->isert_cmd->rdma_wr; + + atomic_sub(wr->send_wr_num + 1, &isert_conn->post_send_buf_count); + isert_completion_put(t, t->isert_cmd, ib_dev); + } + tx_desc->comp_llnode_batch = NULL; if (!isert_cmd) isert_unmap_tx_desc(tx_desc, ib_dev); @@ -1713,6 +1748,8 @@ isert_cq_rx_comp_err(struct isert_conn *isert_conn) if (isert_conn->post_recv_buf_count) return; + isert_cq_drain_comp_llist(isert_conn, ib_dev); + if (conn->sess) { target_sess_cmd_list_set_waiting(conn->sess->se_sess); target_wait_for_sess_cmds(conn->sess->se_sess); @@ -1752,6 +1789,9 @@ isert_cq_tx_work(struct work_struct *work) pr_debug("TX wc.vendor_err: 0x%08x\n", wc.vendor_err); if (wc.wr_id != ISER_FASTREG_LI_WRID) { + if (tx_desc->llnode_active) + continue; + atomic_dec(&isert_conn->post_send_buf_count); isert_cq_tx_comp_err(tx_desc, isert_conn); } diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 10be55a..f6ae7f5 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -46,6 +46,7 @@ struct iser_tx_desc { struct isert_cmd *isert_cmd; struct llist_node *comp_llnode_batch; struct llist_node comp_llnode; + bool llnode_active; struct ib_send_wr send_wr; } __packed; @@ -127,7 +128,6 @@ struct isert_conn { #define ISERT_COMP_BATCH_COUNT 8 int conn_comp_batch; struct llist_head conn_comp_llist; - struct mutex conn_comp_mutex; }; #define ISERT_MAX_CQ 64 -- cgit v0.10.2 From 905a5117e79367b7e58ae046d12ca9961f048c89 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 11 Feb 2014 00:22:37 +0800 Subject: pinctrl: sunxi: use chained_irq_{enter, exit} for GIC compatibility On tha Allwinner A20 SoC, the external interrupts on the pin controller device are connected to the GIC. Without chained_irq_{enter, exit}, external GPIO interrupts, such as used by mmc core card detect, cause the system to hang. This issue was first encountered during my attempt to get out-of-band interrupts for WiFi on the Cubietruck working. With David's new series of sunci-mci using mmc slot-gpio for (GPIO interrupt based) card detection, removing the SD card also causes my Cubietruck to hang. This problem should extend to all Allwinner A20 based boards. With this fix, the system no longer hangs when I remove or insert the SD card. /proc/interrupts show that the interrupt has correctly fired. However the system still does not detect card removal/insertion. I believe this is another unrelated issue. Cc: stable@vger.kernel.org Signed-off-by: Chen-Yu Tsai Acked-by: Maxime Ripard Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/pinctrl-sunxi.c b/drivers/pinctrl/pinctrl-sunxi.c index 9ccf681..787f352 100644 --- a/drivers/pinctrl/pinctrl-sunxi.c +++ b/drivers/pinctrl/pinctrl-sunxi.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -665,6 +666,7 @@ static struct irq_chip sunxi_pinctrl_irq_chip = { static void sunxi_pinctrl_irq_handler(unsigned irq, struct irq_desc *desc) { + struct irq_chip *chip = irq_get_chip(irq); struct sunxi_pinctrl *pctl = irq_get_handler_data(irq); const unsigned long reg = readl(pctl->membase + IRQ_STATUS_REG); @@ -674,10 +676,12 @@ static void sunxi_pinctrl_irq_handler(unsigned irq, struct irq_desc *desc) if (reg) { int irqoffset; + chained_irq_enter(chip, desc); for_each_set_bit(irqoffset, ®, SUNXI_IRQ_NUMBER) { int pin_irq = irq_find_mapping(pctl->domain, irqoffset); generic_handle_irq(pin_irq); } + chained_irq_exit(chip, desc); } } -- cgit v0.10.2 From d82f94013a9897e02a93400e4c16efe82b530eaa Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 17 Feb 2014 22:19:43 +0100 Subject: pinctrl: sunxi: Fix masking when setting irq type Signed-off-by: Hans de Goede Acked-by: Maxime Ripard Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/pinctrl-sunxi.c b/drivers/pinctrl/pinctrl-sunxi.c index 787f352..f9fabe9 100644 --- a/drivers/pinctrl/pinctrl-sunxi.c +++ b/drivers/pinctrl/pinctrl-sunxi.c @@ -585,7 +585,7 @@ static int sunxi_pinctrl_irq_set_type(struct irq_data *d, spin_lock_irqsave(&pctl->lock, flags); regval = readl(pctl->membase + reg); - regval &= ~IRQ_CFG_IRQ_MASK; + regval &= ~(IRQ_CFG_IRQ_MASK << index); writel(regval | (mode << index), pctl->membase + reg); spin_unlock_irqrestore(&pctl->lock, flags); -- cgit v0.10.2 From ef5aff05f1e6ff172bab86097fbbfe26a44cc228 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 17 Feb 2014 22:19:44 +0100 Subject: pinctrl: sunxi: Fix interrupt register offset calculation This fixing setting the interrupt type for eints >= 8. Signed-off-by: Hans de Goede Acked-by: Maxime Ripard Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/pinctrl-sunxi.h b/drivers/pinctrl/pinctrl-sunxi.h index 01c494f..552b0e9 100644 --- a/drivers/pinctrl/pinctrl-sunxi.h +++ b/drivers/pinctrl/pinctrl-sunxi.h @@ -511,7 +511,7 @@ static inline u32 sunxi_pull_offset(u16 pin) static inline u32 sunxi_irq_cfg_reg(u16 irq) { - u8 reg = irq / IRQ_CFG_IRQ_PER_REG; + u8 reg = irq / IRQ_CFG_IRQ_PER_REG * 0x04; return reg + IRQ_CFG_REG; } @@ -523,7 +523,7 @@ static inline u32 sunxi_irq_cfg_offset(u16 irq) static inline u32 sunxi_irq_ctrl_reg(u16 irq) { - u8 reg = irq / IRQ_CTRL_IRQ_PER_REG; + u8 reg = irq / IRQ_CTRL_IRQ_PER_REG * 0x04; return reg + IRQ_CTRL_REG; } @@ -535,7 +535,7 @@ static inline u32 sunxi_irq_ctrl_offset(u16 irq) static inline u32 sunxi_irq_status_reg(u16 irq) { - u8 reg = irq / IRQ_STATUS_IRQ_PER_REG; + u8 reg = irq / IRQ_STATUS_IRQ_PER_REG * 0x04; return reg + IRQ_STATUS_REG; } -- cgit v0.10.2 From 5ba341604a054294aeb812603349bba024d716ee Mon Sep 17 00:00:00 2001 From: Josh Cartwright Date: Mon, 24 Feb 2014 12:41:20 -0600 Subject: pinctrl: msm: make PINCTRL_MSM bool instead of tristate Modular builds of pinctrl-msm break due to handle_bad_irq being unexported for module use. For now, make PINCTRL_MSM 'bool'. Signed-off-by: Josh Cartwright Reviewed-by: Bjorn Andersson Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig index be361b7..1e4e693 100644 --- a/drivers/pinctrl/Kconfig +++ b/drivers/pinctrl/Kconfig @@ -217,7 +217,7 @@ config PINCTRL_IMX28 select PINCTRL_MXS config PINCTRL_MSM - tristate + bool select PINMUX select PINCONF select GENERIC_PINCONF -- cgit v0.10.2 From b5973fcd76ac5b3f2fabb0ab181d45ac16b8ce02 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 26 Feb 2014 19:10:26 +0900 Subject: pinctrl: sh-pfc: r8a7791: SD1_CLK fix Fix the SD1_CLK handling for r8a7791. Without this patch it is impossible to request all pins needed for SDHI1 on the Koelsch board. Signed-off-by: Magnus Damm Acked-by: Laurent Pinchart Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7791.c b/drivers/pinctrl/sh-pfc/pfc-r8a7791.c index 77d103f..567d691 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7791.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7791.c @@ -89,7 +89,8 @@ enum { /* GPSR6 */ FN_IP13_10, FN_IP13_11, FN_IP13_12, FN_IP13_13, FN_IP13_14, - FN_IP13_15, FN_IP13_18_16, FN_IP13_21_19, FN_IP13_22, FN_IP13_24_23, + FN_IP13_15, FN_IP13_18_16, FN_IP13_21_19, + FN_IP13_22, FN_IP13_24_23, FN_SD1_CLK, FN_IP13_25, FN_IP13_26, FN_IP13_27, FN_IP13_30_28, FN_IP14_1_0, FN_IP14_2, FN_IP14_3, FN_IP14_4, FN_IP14_5, FN_IP14_6, FN_IP14_7, FN_IP14_10_8, FN_IP14_13_11, FN_IP14_16_14, FN_IP14_19_17, @@ -788,6 +789,7 @@ static const u16 pinmux_data[] = { PINMUX_DATA(USB1_PWEN_MARK, FN_USB1_PWEN), PINMUX_DATA(USB1_OVC_MARK, FN_USB1_OVC), PINMUX_DATA(DU0_DOTCLKIN_MARK, FN_DU0_DOTCLKIN), + PINMUX_DATA(SD1_CLK_MARK, FN_SD1_CLK), /* IPSR0 */ PINMUX_IPSR_DATA(IP0_0, D0), @@ -3825,7 +3827,7 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { GP_6_11_FN, FN_IP13_25, GP_6_10_FN, FN_IP13_24_23, GP_6_9_FN, FN_IP13_22, - 0, 0, + GP_6_8_FN, FN_SD1_CLK, GP_6_7_FN, FN_IP13_21_19, GP_6_6_FN, FN_IP13_18_16, GP_6_5_FN, FN_IP13_15, -- cgit v0.10.2 From e4ad1accb28d0ed8cea6f12395d58686ad344ca7 Mon Sep 17 00:00:00 2001 From: Patrick Lai Date: Sun, 2 Mar 2014 11:52:57 -0800 Subject: ASoC: pcm: free path list before exiting from error conditions dpcm_path_get() allocates dynamic memory to hold path list. Corresponding dpcm_path_put() must be called to free the memory. dpcm_path_put() is not called under several error conditions. This leads to memory leak. Signed-off-by: Patrick Lai Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 47e1ce7..28522bd 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1989,6 +1989,7 @@ int soc_dpcm_runtime_update(struct snd_soc_card *card) paths = dpcm_path_get(fe, SNDRV_PCM_STREAM_PLAYBACK, &list); if (paths < 0) { + dpcm_path_put(&list); dev_warn(fe->dev, "ASoC: %s no valid %s path\n", fe->dai_link->name, "playback"); mutex_unlock(&card->mutex); @@ -2018,6 +2019,7 @@ capture: paths = dpcm_path_get(fe, SNDRV_PCM_STREAM_CAPTURE, &list); if (paths < 0) { + dpcm_path_put(&list); dev_warn(fe->dev, "ASoC: %s no valid %s path\n", fe->dai_link->name, "capture"); mutex_unlock(&card->mutex); @@ -2082,6 +2084,7 @@ static int dpcm_fe_dai_open(struct snd_pcm_substream *fe_substream) fe->dpcm[stream].runtime = fe_substream->runtime; if (dpcm_path_get(fe, stream, &list) <= 0) { + dpcm_path_put(&list); dev_dbg(fe->dev, "ASoC: %s no valid %s route\n", fe->dai_link->name, stream ? "capture" : "playback"); } -- cgit v0.10.2 From 017145fef567430789e40f6a22a90ce2a766370b Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Fri, 14 Feb 2014 12:49:12 +0800 Subject: spi: fsl-dspi: Fix getting correct address for master Current code set platform drvdata to dspi. However, the code in dspi_suspend() and dspi_resume() assumes the drvdata is the address of master. Fix it by setting platform drvdata to master. Signed-off-by: Axel Lin Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index ec79f72..a253920 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -420,7 +420,6 @@ static int dspi_suspend(struct device *dev) static int dspi_resume(struct device *dev) { - struct spi_master *master = dev_get_drvdata(dev); struct fsl_dspi *dspi = spi_master_get_devdata(master); @@ -504,7 +503,7 @@ static int dspi_probe(struct platform_device *pdev) clk_prepare_enable(dspi->clk); init_waitqueue_head(&dspi->waitq); - platform_set_drvdata(pdev, dspi); + platform_set_drvdata(pdev, master); ret = spi_bitbang_start(&dspi->bitbang); if (ret != 0) { @@ -525,7 +524,8 @@ out_master_put: static int dspi_remove(struct platform_device *pdev) { - struct fsl_dspi *dspi = platform_get_drvdata(pdev); + struct spi_master *master = platform_get_drvdata(pdev); + struct fsl_dspi *dspi = spi_master_get_devdata(master); /* Disconnect from the SPI framework */ spi_bitbang_stop(&dspi->bitbang); -- cgit v0.10.2 From ee73b4c6e3fc0755a91752ab8eebc8e070038b53 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Fri, 14 Feb 2014 09:53:00 +0800 Subject: spi: coldfire-qspi: Fix getting correct address for *mcfqspi dev_get_drvdata() returns the address of master rather than mcfqspi. Fixes: af361079 (spi/coldfire-qspi: Drop extra calls to spi_master_get in suspend/resume functions) Signed-off-by: Axel Lin Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/drivers/spi/spi-coldfire-qspi.c b/drivers/spi/spi-coldfire-qspi.c index cabed8f..28ae470 100644 --- a/drivers/spi/spi-coldfire-qspi.c +++ b/drivers/spi/spi-coldfire-qspi.c @@ -514,7 +514,8 @@ static int mcfqspi_resume(struct device *dev) #ifdef CONFIG_PM_RUNTIME static int mcfqspi_runtime_suspend(struct device *dev) { - struct mcfqspi *mcfqspi = dev_get_drvdata(dev); + struct spi_master *master = dev_get_drvdata(dev); + struct mcfqspi *mcfqspi = spi_master_get_devdata(master); clk_disable(mcfqspi->clk); @@ -523,7 +524,8 @@ static int mcfqspi_runtime_suspend(struct device *dev) static int mcfqspi_runtime_resume(struct device *dev) { - struct mcfqspi *mcfqspi = dev_get_drvdata(dev); + struct spi_master *master = dev_get_drvdata(dev); + struct mcfqspi *mcfqspi = spi_master_get_devdata(master); clk_enable(mcfqspi->clk); -- cgit v0.10.2 From ba938f3a295686aa9ab5077b10d1049f8091cbd7 Mon Sep 17 00:00:00 2001 From: Wenyou Yang Date: Wed, 5 Mar 2014 11:29:01 +0800 Subject: spi: atmel: add missing spi_master_{resume,suspend} calls to PM callbacks The PM callbacks implemented by the spi-atmel driver don't call spi_master_{resume,suspend}, fix that. Signed-off-by: Wenyou Yang Signed-off-by: Mark Brown diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c index b0842f7..5d7b07f 100644 --- a/drivers/spi/spi-atmel.c +++ b/drivers/spi/spi-atmel.c @@ -1455,6 +1455,14 @@ static int atmel_spi_suspend(struct device *dev) { struct spi_master *master = dev_get_drvdata(dev); struct atmel_spi *as = spi_master_get_devdata(master); + int ret; + + /* Stop the queue running */ + ret = spi_master_suspend(master); + if (ret) { + dev_warn(dev, "cannot suspend master\n"); + return ret; + } clk_disable_unprepare(as->clk); return 0; @@ -1464,9 +1472,16 @@ static int atmel_spi_resume(struct device *dev) { struct spi_master *master = dev_get_drvdata(dev); struct atmel_spi *as = spi_master_get_devdata(master); + int ret; clk_prepare_enable(as->clk); - return 0; + + /* Start the queue running */ + ret = spi_master_resume(master); + if (ret) + dev_err(dev, "problem starting queue (%d)\n", ret); + + return ret; } static SIMPLE_DEV_PM_OPS(atmel_spi_pm_ops, atmel_spi_suspend, atmel_spi_resume); -- cgit v0.10.2 From e291fd20ef0870ed527470ca9c2402ba6f44d31c Mon Sep 17 00:00:00 2001 From: Barry Song Date: Wed, 5 Mar 2014 14:55:02 +0800 Subject: pinctrl: sirf: fix kernel panic in gpio_lock_as_irq commit 655dada6277991 causes kernel panic, this patch fixes it. [ 1.197816] [ffffffee] *pgd=0d7fd821, *pte=00000000, *ppte=00000000 [ 1.204070] Internal error: Oops: 17 [#1] PREEMPT SMP ARM [ 1.209447] Modules linked in: [ 1.212490] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.14.0-rc1 #3 [ 1.218737] task: cd03c000 ti: cd040000 task.ti: cd040000 [ 1.224127] PC is at gpiod_lock_as_irq+0xc/0x64 [ 1.228634] LR is at sirfsoc_gpio_irq_startup+0x18/0x44 [ 1.233842] pc : [] lr : [] psr: a0000193 [ 1.233842] sp : cd041d30 ip : 00000000 fp : 00000000 [ 1.245296] r10: 00000000 r9 : cd023db4 r8 : 60000113 [ 1.250505] r7 : 0000003e r6 : cd023dd4 r5 : c06bfa54 r4 : cd023d80 [ 1.257014] r3 : 00000020 r2 : 00000000 r1 : ffffffea r0 : ffffffea [ 1.263526] Flags: NzCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment kernel [ 1.270903] Control: 10c53c7d Table: 00004059 DAC: 00000015 [ 1.276631] Process swapper/0 (pid: 1, stack limit = 0xcd040240) [ 1.282620] Stack: (0xcd041d30 to 0xcd042000) [ 1.286963] 1d20: cd023d80 c01d1c38 c01d1c20 cd023d80 [ 1.295124] 1d40: 00000001 c0068438 cd023d80 ccb6d880 cd023dd4 c0067044 0000718e c006719c [ 1.286963] 1d20: cd023d80 c01d1c38 c01d1c20 cd023d80 [ 1.295124] 1d40: 00000001 c0068438 cd023d80 ccb6d880 cd023dd4 c0067044 0000718e c006719c [ 1.295124] 1d40: 00000001 c0068438 cd023d80 ccb6d880 cd023dd4 c0067044 0000718e c006719c [ 1.303283] 1d60: 00000800 00000083 ccb6d880 cd023d80 c02b41d8 00000083 0000003e ccb7c410 [ 1.311442] 1d80: 00000000 c00671dc 00000083 0000003e c02b41d8 cd3dd5c0 0000003e ccb7c634 [ 1.319601] 1da0: cd040030 c00672a8 cd3dd5c0 ccb7c410 ccb6d340 ccb7c410 ccb6d340 cd3dd400 [ 1.327760] 1dc0: cd3dd410 c02b4434 ccb7c410 c01265a8 00000001 cd3dd410 c0687108 00000000 [ 1.335919] 1de0: c0687108 00000000 00000000 c0240170 c0240158 cd3dd410 c06c30d0 c023e8bc [ 1.344079] 1e00: c023e9d4 00000000 cd3dd410 c023e9d4 c0682150 c023cf88 cd003e98 cd2d50c4 [ 1.352238] 1e20: cd3dd410 cd3dd444 c06822f0 c023e768 cd3dd418 cd3dd410 c06822f0 c023de14 [ 1.360397] 1e40: cd3dd418 00000000 cd3dd410 c023c398 cd041e78 cd041ea8 cd3dd400 cd3dd410 [ 1.368556] 1e60: 00000083 00000000 cd3dd400 cd3dd410 00000083 000000c8 c04e00c8 c023fee8 [ 1.376715] 1e80: 00000000 cd041ea8 cd3dd400 00000001 00000083 c024048c c0435ef8 c0434dec [ 1.384874] 1ea0: c068da58 c04c6d04 c0682150 c0435ef8 ffffffff 00000000 00000000 c068da58 [ 1.393033] 1ec0: 00000020 00000000 00000000 00000000 c05dabb8 00000007 c068d640 c068d640 [ 1.401193] 1ee0: c04c247c c04c249c 00000000 c00088e8 cd004c00 c043bbb8 cd029180 c03812a0 [ 1.409352] 1f00: 00000000 00000000 60000113 c0673728 60000113 c0673728 00000000 00000000 [ 1.417511] 1f20: cd7fce01 c0390a54 00000065 c003a81c c049e8bc 00000007 cd7fce0e 00000007 [ 1.425670] 1f40: 00000000 c05dabb8 00000007 c068d640 c068d640 c04c050c c04e00c8 00000065 [ 1.433829] 1f60: c04e00c0 c04c0c54 00000007 00000007 c04c050c c037d8fc cd03c000 c004322c [ 1.441988] 1f80: c0662b40 0000d640 c03737c0 00000000 00000000 00000000 00000000 00000000 [ 1.450147] 1fa0: 00000000 c03737cc 00000000 c000e478 00000000 00000000 00000000 00000000 [ 1.458307] 1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 1.466467] 1fe0: 00000000 00000000 00000000 00000000 00000013 00000000 0002d481 05014092 [ 1.474640] [] (gpiod_lock_as_irq) from [] (sirfsoc_gpio_irq_startup+0x18/0x44) [ 1.483661] [] (sirfsoc_gpio_irq_startup) from [] (irq_startup+0x34/0x6c) [ 1.492163] [] (irq_startup) from [] (__setup_irq+0x450/0x4b8) [ 1.499714] [] (__setup_irq) from [] (request_threaded_irq+0xa8/0x128) [ 1.507960] [] (request_threaded_irq) from [] (request_any_context_irq+0x4c/0x7c) [ 1.517164] [] (request_any_context_irq) from [] (gpio_extcon_probe+0x144/0x1d4) [ 1.526279] [] (gpio_extcon_probe) from [] (platform_drv_probe+0x18/0x48) [ 1.534783] [] (platform_drv_probe) from [] (driver_probe_device+0x120/0x238) [ 1.543641] [] (driver_probe_device) from [] (bus_for_each_drv+0x58/0x8c) [ 1.552143] [] (bus_for_each_drv) from [] (device_attach+0x74/0x88) [ 1.560126] [] (device_attach) from [] (bus_probe_device+0x84/0xa8) [ 1.568113] [] (bus_probe_device) from [] (device_add+0x440/0x520) [ 1.576012] [] (device_add) from [] (platform_device_add+0xb4/0x214) [ 1.584084] [] (platform_device_add) from [] (platform_device_register_full+0xb8/0xdc) [ 1.593719] [] (platform_device_register_full) from [] (sirfsoc_init_late+0xec/0xf4) [ 1.603185] [] (sirfsoc_init_late) from [] (init_machine_late+0x20/0x28) [ 1.611603] [] (init_machine_late) from [] (do_one_initcall+0xf8/0x144) [ 1.619934] [] (do_one_initcall) from [] (kernel_init_freeable+0x13c/0x1dc) [ 1.628620] [] (kernel_init_freeable) from [] (kernel_init+0xc/0x118) Signed-off-by: Barry Song Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/sirf/pinctrl-sirf.c b/drivers/pinctrl/sirf/pinctrl-sirf.c index a0d6152..617a491 100644 --- a/drivers/pinctrl/sirf/pinctrl-sirf.c +++ b/drivers/pinctrl/sirf/pinctrl-sirf.c @@ -598,7 +598,7 @@ static unsigned int sirfsoc_gpio_irq_startup(struct irq_data *d) { struct sirfsoc_gpio_bank *bank = irq_data_get_irq_chip_data(d); - if (gpio_lock_as_irq(&bank->chip.gc, d->hwirq)) + if (gpio_lock_as_irq(&bank->chip.gc, d->hwirq % SIRFSOC_GPIO_BANK_SIZE)) dev_err(bank->chip.gc.dev, "unable to lock HW IRQ %lu for IRQ\n", d->hwirq); @@ -611,7 +611,7 @@ static void sirfsoc_gpio_irq_shutdown(struct irq_data *d) struct sirfsoc_gpio_bank *bank = irq_data_get_irq_chip_data(d); sirfsoc_gpio_irq_mask(d); - gpio_unlock_as_irq(&bank->chip.gc, d->hwirq); + gpio_unlock_as_irq(&bank->chip.gc, d->hwirq % SIRFSOC_GPIO_BANK_SIZE); } static struct irq_chip sirfsoc_irq_chip = { -- cgit v0.10.2 From b663a685831905066e3efcb4c173cd8b3f46228b Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 5 Mar 2014 10:03:59 +0000 Subject: x86, tools: Fix up compiler warnings The kbuild test robot reported the following errors that were introduced with commit 993c30a04e20 ("x86, tools: Consolidate #ifdef code"), arch/x86/boot/tools/build.c: In function 'update_pecoff_setup_and_reloc': >> arch/x86/boot/tools/build.c:252:1: error: parameter name omitted static inline void update_pecoff_setup_and_reloc(unsigned int) {} ^ arch/x86/boot/tools/build.c: In function 'update_pecoff_text': >> arch/x86/boot/tools/build.c:253:1: error: parameter name omitted static inline void update_pecoff_text(unsigned int, unsigned int) {} ^ >> arch/x86/boot/tools/build.c:253:1: error: parameter name omitted arch/x86/boot/tools/build.c: In function 'main': >> arch/x86/boot/tools/build.c:372:2: warning: implicit declaration of function 'efi_stub_entry_update' [-Wimplicit-function-declaration] efi_stub_entry_update(); ^ Signed-off-by: Matt Fleming diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 4f07df5..1a2f212 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -256,10 +256,11 @@ static void efi_stub_entry_update(void) #else -static inline void update_pecoff_setup_and_reloc(unsigned int) {} -static inline void update_pecoff_text(unsigned int, unsigned int) {} +static inline void update_pecoff_setup_and_reloc(unsigned int size) {} +static inline void update_pecoff_text(unsigned int text_start, + unsigned int file_sz) {} static inline void efi_stub_defaults(void) {} -static inline void efi_stup_entry_update(void) {} +static inline void efi_stub_entry_update(void) {} static inline int reserve_pecoff_reloc_section(int c) { -- cgit v0.10.2 From 3db4cafdfd05717dc939780134e53023a3c1f15f Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 5 Mar 2014 10:15:55 +0000 Subject: x86/boot: Fix non-EFI build The kbuild test robot reported the following errors, introduced with commit 54b52d872680 ("x86/efi: Build our own EFI services pointer table"), arch/x86/boot/compressed/head_32.o: In function `efi32_config': >> (.data+0x58): undefined reference to `efi_call_phys' arch/x86/boot/compressed/head_64.o: In function `efi64_config': >> (.data+0x90): undefined reference to `efi_call6' Wrap the efi*_config structures in #ifdef CONFIG_EFI_STUB so that we don't make references to EFI functions if they're not compiled in. Signed-off-by: Matt Fleming diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index cccc05f..de9d420 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -229,12 +229,14 @@ relocated: xorl %ebx, %ebx jmp *%eax +#ifdef CONFIG_EFI_STUB .data efi32_config: .fill 11,8,0 .long efi_call_phys .long 0 .byte 0 +#endif /* * Stack and heap for uncompression diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 4f40cdd..57e58a5 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -440,6 +440,7 @@ gdt: .quad 0x0000000000000000 /* TS continued */ gdt_end: +#ifdef CONFIG_EFI_STUB efi_config: .quad 0 @@ -456,6 +457,8 @@ efi64_config: .fill 11,8,0 .quad efi_call6 .byte 1 +#endif /* CONFIG_EFI_STUB */ + /* * Stack and heap for uncompression */ -- cgit v0.10.2 From c5eda4c1bf6214332c46fb2f4e7c42a85e5e5643 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 5 Mar 2014 11:52:24 +0100 Subject: ALSA: hda - Add missing loopback merge path for AD1884/1984 codecs The mixer widget (NID 0x20) of AD1884 and AD1984 codecs isn't connected directly to the actual I/O paths but only via another mixer widget (NID 0x21). We need a similar fix as we did for AD1882. Cc: Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index df3652a..4989710 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -1092,6 +1092,7 @@ static int patch_ad1884(struct hda_codec *codec) spec = codec->spec; spec->gen.mixer_nid = 0x20; + spec->gen.mixer_merge_nid = 0x21; spec->gen.beep_nid = 0x10; set_beep_amp(spec, 0x10, 0, HDA_OUTPUT); -- cgit v0.10.2 From f3e9b59cb948e2328bc06635ad39572d5b7b4791 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 5 Mar 2014 12:00:29 +0100 Subject: ALSA: hda - Use analog beep for Thinkpads with AD1984 codecs For making the driver behavior compatible with the earlier kernels, use the analog beep in the loopback path instead of the digital beep. Cc: Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 4989710..8ed0bcc 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -1026,6 +1026,9 @@ static void ad1884_fixup_thinkpad(struct hda_codec *codec, spec->gen.keep_eapd_on = 1; spec->gen.vmaster_mute.hook = ad_vmaster_eapd_hook; spec->eapd_nid = 0x12; + /* Analog PC Beeper - allow firmware/ACPI beeps */ + spec->beep_amp = HDA_COMPOSE_AMP_VAL(0x20, 3, 3, HDA_INPUT); + spec->gen.beep_nid = 0; /* no digital beep */ } } -- cgit v0.10.2 From e805ca8b0a9b6c91099c0eaa4b160a1196a4ae25 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 5 Mar 2014 12:34:39 +0100 Subject: ALSA: usb-audio: Add quirk for Logitech Webcam C500 Logitech C500 (046d:0807) needs the same workaround like other Logitech Webcams. Cc: Signed-off-by: Takashi Iwai diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 44b0ba4..1bed780 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -883,6 +883,7 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, } break; + case USB_ID(0x046d, 0x0807): /* Logitech Webcam C500 */ case USB_ID(0x046d, 0x0808): case USB_ID(0x046d, 0x0809): case USB_ID(0x046d, 0x081b): /* HD Webcam c310 */ -- cgit v0.10.2 From abcfc543bec803a53c5bd2925d3293df4ede84b0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 5 Mar 2014 13:28:32 +0100 Subject: m68k: Do not rely on magic indirect includes commit: 8f945a33 (genirq: Move kstat_incr_irqs_this_cpu() to core) unearthed the following: arch/m68k/kernel/ints.c:34:15: error: variable 'auto_irq_chip' has initializer but incomplete type arch/m68k/kernel/ints.c:35:2: error: unknown field 'name' specified in initializer arch/m68k/kernel/ints.c:35:2: warning: excess elements in struct initializer [enabled by default] The reason is that this file requires linux/irq.h and magically pulled that in via linux/kernel_stat.h The commit above got rid of the pointless include of linux/irq.h in linux/kernel_stat.h and therefor broke the build. Include linux/irq.h Reported-by: fengguang.wu@intel.com Signed-off-by: Thomas Gleixner diff --git a/arch/m68k/kernel/ints.c b/arch/m68k/kernel/ints.c index 077d3a7..5b8d66f 100644 --- a/arch/m68k/kernel/ints.c +++ b/arch/m68k/kernel/ints.c @@ -10,9 +10,9 @@ #include #include #include -#include #include #include +#include #include #include -- cgit v0.10.2 From 76d388cd72ab08c2c56b1e2bd430e7422fc40168 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 5 Mar 2014 13:42:14 +0100 Subject: x86: hyperv: Fixup the (brain) damage caused by the irq cleanup Compiling last minute changes without setting the proper config options is not really clever. Reported-by: Fengguang Wu Signed-off-by: Thomas Gleixner diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index e98f66f..c163215 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -17,7 +17,7 @@ void hyperv_callback_vector(void); #define trace_hyperv_callback_vector hyperv_callback_vector #endif void hyperv_vector_handler(struct pt_regs *regs); -int hv_setup_vmbus_irq(int irq, irq_handler_t handler, void *dev_id); -void hv_remove_vmbus_irq(int irq, void *dev_id); +void hv_setup_vmbus_irq(void (*handler)(void)); +void hv_remove_vmbus_irq(void); #endif diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index a6f5f35..b4dcca1 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -32,7 +32,7 @@ struct ms_hyperv_info ms_hyperv; EXPORT_SYMBOL_GPL(ms_hyperv); #if IS_ENABLED(CONFIG_HYPERV) -static irq_handler_t vmbus_handler; +static void (*vmbus_handler)(void); void hyperv_vector_handler(struct pt_regs *regs) { @@ -49,7 +49,7 @@ void hyperv_vector_handler(struct pt_regs *regs) set_irq_regs(old_regs); } -int hv_setup_vmbus_irq(int irq, irq_handler_t handler, void *dev_id) +void hv_setup_vmbus_irq(void (*handler)(void)) { vmbus_handler = handler; /* @@ -61,7 +61,7 @@ int hv_setup_vmbus_irq(int irq, irq_handler_t handler, void *dev_id) hyperv_callback_vector); } -void hv_remove_vmbus_irq(int irq, void *dev_id) +void hv_remove_vmbus_irq(void) { /* We have no way to deallocate the interrupt gate */ vmbus_handler = NULL; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 5a6909f..3f0a952 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -615,7 +615,7 @@ static void vmbus_on_msg_dpc(unsigned long data) } } -static irqreturn_t vmbus_isr(int irq, void *dev_id) +static void vmbus_isr(void) { int cpu = smp_processor_id(); void *page_addr; @@ -625,7 +625,7 @@ static irqreturn_t vmbus_isr(int irq, void *dev_id) page_addr = hv_context.synic_event_page[cpu]; if (page_addr == NULL) - return IRQ_NONE; + return; event = (union hv_synic_event_flags *)page_addr + VMBUS_MESSAGE_SINT; @@ -661,15 +661,8 @@ static irqreturn_t vmbus_isr(int irq, void *dev_id) msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; /* Check if there are actual msgs to be processed */ - if (msg->header.message_type != HVMSG_NONE) { - handled = true; + if (msg->header.message_type != HVMSG_NONE) tasklet_schedule(&msg_dpc); - } - - if (handled) - return IRQ_HANDLED; - else - return IRQ_NONE; } /* @@ -698,12 +691,7 @@ static int vmbus_bus_init(int irq) if (ret) goto err_cleanup; - ret = hv_setup_vmbus_irq(irq, vmbus_isr, hv_acpi_dev); - - if (ret != 0) { - pr_err("Unable to request IRQ %d\n", irq); - goto err_unregister; - } + hv_setup_vmbus_irq(vmbus_isr); ret = hv_synic_alloc(); if (ret) @@ -723,9 +711,8 @@ static int vmbus_bus_init(int irq) err_alloc: hv_synic_free(); - hv_remove_vmbus_irq(irq, hv_acpi_dev); + hv_remove_vmbus_irq(); -err_unregister: bus_unregister(&hv_bus); err_cleanup: @@ -917,7 +904,6 @@ static int __init hv_acpi_init(void) /* * Get irq resources first. */ - ret = acpi_bus_register_driver(&vmbus_acpi_driver); if (ret) @@ -948,7 +934,7 @@ cleanup: static void __exit vmbus_exit(void) { - hv_remove_vmbus_irq(irq, hv_acpi_dev); + hv_remove_vmbus_irq(); vmbus_free_channels(); bus_unregister(&hv_bus); hv_cleanup(); -- cgit v0.10.2 From 864a6040f395464003af8dd0d8ca86fed19866d4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 4 Mar 2014 13:46:53 +0100 Subject: mac80211: clear sequence/fragment number in QoS-null frames Avoid leaking data by sending uninitialized memory and setting an invalid (non-zero) fragment number (the sequence number is ignored anyway) by setting the seq_ctrl field to zero. Cc: stable@vger.kernel.org Fixes: 3f52b7e328c5 ("mac80211: mesh power save basics") Fixes: ce662b44ce22 ("mac80211: send (QoS) Null if no buffered frames") Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c index 2802f9d..ad8b377 100644 --- a/net/mac80211/mesh_ps.c +++ b/net/mac80211/mesh_ps.c @@ -36,6 +36,7 @@ static struct sk_buff *mps_qos_null_get(struct sta_info *sta) sdata->vif.addr); nullfunc->frame_control = fc; nullfunc->duration_id = 0; + nullfunc->seq_ctrl = 0; /* no address resolution for this frame -> set addr 1 immediately */ memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); memset(skb_put(skb, 2), 0, 2); /* append QoS control field */ diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index a023b43..137a192 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1206,6 +1206,7 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN); memcpy(nullfunc->addr3, sdata->vif.addr, ETH_ALEN); + nullfunc->seq_ctrl = 0; skb->priority = tid; skb_set_queue_mapping(skb, ieee802_1d_to_ac[tid]); -- cgit v0.10.2 From e1253be0ece1a95a02c7f5843194877471af8179 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 5 Mar 2014 08:44:23 -0500 Subject: NFSv4: nfs4_stateid_is_current should return 'true' for an invalid stateid When nfs4_set_rw_stateid() can fails by returning EIO to indicate that the stateid is completely invalid, then it makes no sense to have it trigger a retry of the READ or WRITE operation. Instead, we should just have it fall through and attempt a recovery. This fixes an infinite loop in which the client keeps replaying the same bad stateid back to the server. Reported-by: Andy Adamson Link: http://lkml.kernel.org/r/1393954269-3974-1-git-send-email-andros@netapp.com Cc: stable@vger.kernel.org # 3.10+ Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 44e088d..4ae8141 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4011,8 +4011,9 @@ static bool nfs4_stateid_is_current(nfs4_stateid *stateid, { nfs4_stateid current_stateid; - if (nfs4_set_rw_stateid(¤t_stateid, ctx, l_ctx, fmode)) - return false; + /* If the current stateid represents a lost lock, then exit */ + if (nfs4_set_rw_stateid(¤t_stateid, ctx, l_ctx, fmode) == -EIO) + return true; return nfs4_stateid_match(stateid, ¤t_stateid); } -- cgit v0.10.2 From 927864cd92aaad1d6285e3bb16e503caf3d6e27e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 4 Mar 2014 13:12:03 -0500 Subject: NFSv4: Fix the return value of nfs4_select_rw_stateid In commit 5521abfdcf4d6 (NFSv4: Resend the READ/WRITE RPC call if a stateid change causes an error), we overloaded the return value of nfs4_select_rw_stateid() to cause it to return -EWOULDBLOCK if an RPC call is outstanding that would cause the NFSv4 lock or open stateid to change. That is all redundant when we actually copy the stateid used in the read/write RPC call that failed, and check that against the current stateid. It is doubly so, when we consider that in the NFSv4.1 case, we also set the stateid's seqid to the special value '0', which means 'match the current valid stateid'. Reported-by: Andy Adamson Link: http://lkml.kernel.org/r/1393954269-3974-1-git-send-email-andros@netapp.com Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index e1a4721..0deb321 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -974,9 +974,6 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst, else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { nfs4_stateid_copy(dst, &lsp->ls_stateid); ret = 0; - smp_rmb(); - if (!list_empty(&lsp->ls_seqid.list)) - ret = -EWOULDBLOCK; } spin_unlock(&state->state_lock); nfs4_put_lock_state(lsp); @@ -984,10 +981,9 @@ out: return ret; } -static int nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) +static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) { const nfs4_stateid *src; - int ret; int seq; do { @@ -996,12 +992,7 @@ static int nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) if (test_bit(NFS_OPEN_STATE, &state->flags)) src = &state->open_stateid; nfs4_stateid_copy(dst, src); - ret = 0; - smp_rmb(); - if (!list_empty(&state->owner->so_seqid.list)) - ret = -EWOULDBLOCK; } while (read_seqretry(&state->seqlock, seq)); - return ret; } /* @@ -1026,7 +1017,8 @@ int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, * choose to use. */ goto out; - ret = nfs4_copy_open_stateid(dst, state); + nfs4_copy_open_stateid(dst, state); + ret = 0; out: if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41)) dst->seqid = 0; -- cgit v0.10.2 From 869a9d375dca601dde1dee1344f3d1d665505f19 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 4 Mar 2014 12:31:09 -0500 Subject: NFSv4.1 Fail data server I/O if stateid represents a lost lock Signed-off-by: Andy Adamson Link: http://lkml.kernel.org/r/1393954269-3974-1-git-send-email-andros@netapp.com Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 12c8132..b9a35c0 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -324,8 +324,9 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) &rdata->res.seq_res, task)) return; - nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context, - rdata->args.lock_context, FMODE_READ); + if (nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context, + rdata->args.lock_context, FMODE_READ) == -EIO) + rpc_exit(task, -EIO); /* lost lock, terminate I/O */ } static void filelayout_read_call_done(struct rpc_task *task, void *data) @@ -435,8 +436,9 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data) &wdata->res.seq_res, task)) return; - nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context, - wdata->args.lock_context, FMODE_WRITE); + if (nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context, + wdata->args.lock_context, FMODE_WRITE) == -EIO) + rpc_exit(task, -EIO); /* lost lock, terminate I/O */ } static void filelayout_write_call_done(struct rpc_task *task, void *data) -- cgit v0.10.2 From 0418dae1056d6091e9527b7158a3763f7aa92353 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 4 Mar 2014 13:48:16 -0500 Subject: NFSv4: Fail the truncate() if the lock/open stateid is invalid If the open stateid could not be recovered, or the file locks were lost, then we should fail the truncate() operation altogether. Reported-by: Andy Adamson Link: http://lkml.kernel.org/r/1393954269-3974-1-git-send-email-andros@netapp.com Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4ae8141..450bfed 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2398,13 +2398,16 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, if (nfs4_copy_delegation_stateid(&arg.stateid, inode, fmode)) { /* Use that stateid */ - } else if (truncate && state != NULL && nfs4_valid_open_stateid(state)) { + } else if (truncate && state != NULL) { struct nfs_lockowner lockowner = { .l_owner = current->files, .l_pid = current->tgid, }; - nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE, - &lockowner); + if (!nfs4_valid_open_stateid(state)) + return -EBADF; + if (nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE, + &lockowner) == -EIO) + return -EBADF; } else nfs4_stateid_copy(&arg.stateid, &zero_stateid); -- cgit v0.10.2 From 07f2b6e0382ec4c59887d5954683f1a0b265574e Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 14 Feb 2014 11:58:41 -0500 Subject: dm thin: ensure user takes action to validate data and metadata consistency If a thin metadata operation fails the current transaction will abort, whereby causing potential for IO layers up the stack (e.g. filesystems) to have data loss. As such, set THIN_METADATA_NEEDS_CHECK_FLAG in the thin metadata's superblock which: 1) requires the user verify the thin metadata is consistent (e.g. use thin_check, etc) 2) suggests the user verify the thin data is consistent (e.g. use fsck) The only way to clear the superblock's THIN_METADATA_NEEDS_CHECK_FLAG is to run thin_repair. On metadata operation failure: abort current metadata transaction, set pool in read-only mode, and now set the needs_check flag. As part of this change, constraints are introduced or relaxed: * don't allow a pool to transition to write mode if needs_check is set * don't allow data or metadata space to be resized if needs_check is set * if a thin pool's metadata space is exhausted: the kernel will now force the user to take the pool offline for repair before the kernel will allow the metadata space to be extended. Also, update Documentation to include information about when the thin provisioning target commits metadata, how it handles metadata failures and running out of space. Signed-off-by: Mike Snitzer Signed-off-by: Joe Thornber diff --git a/Documentation/device-mapper/cache.txt b/Documentation/device-mapper/cache.txt index e6b72d3..68c0f51 100644 --- a/Documentation/device-mapper/cache.txt +++ b/Documentation/device-mapper/cache.txt @@ -124,12 +124,11 @@ the default being 204800 sectors (or 100MB). Updating on-disk metadata ------------------------- -On-disk metadata is committed every time a REQ_SYNC or REQ_FUA bio is -written. If no such requests are made then commits will occur every -second. This means the cache behaves like a physical disk that has a -write cache (the same is true of the thin-provisioning target). If -power is lost you may lose some recent writes. The metadata should -always be consistent in spite of any crash. +On-disk metadata is committed every time a FLUSH or FUA bio is written. +If no such requests are made then commits will occur every second. This +means the cache behaves like a physical disk that has a volatile write +cache. If power is lost you may lose some recent writes. The metadata +should always be consistent in spite of any crash. The 'dirty' state for a cache block changes far too frequently for us to keep updating it on the fly. So we treat it as a hint. In normal diff --git a/Documentation/device-mapper/thin-provisioning.txt b/Documentation/device-mapper/thin-provisioning.txt index 8a7a3d4..3b34b4f 100644 --- a/Documentation/device-mapper/thin-provisioning.txt +++ b/Documentation/device-mapper/thin-provisioning.txt @@ -116,6 +116,35 @@ Resuming a device with a new table itself triggers an event so the userspace daemon can use this to detect a situation where a new table already exceeds the threshold. +A low water mark for the metadata device is maintained in the kernel and +will trigger a dm event if free space on the metadata device drops below +it. + +Updating on-disk metadata +------------------------- + +On-disk metadata is committed every time a FLUSH or FUA bio is written. +If no such requests are made then commits will occur every second. This +means the thin-provisioning target behaves like a physical disk that has +a volatile write cache. If power is lost you may lose some recent +writes. The metadata should always be consistent in spite of any crash. + +If data space is exhausted the pool will either error or queue IO +according to the configuration (see: error_if_no_space). If metadata +space is exhausted or a metadata operation fails: the pool will error IO +until the pool is taken offline and repair is performed to 1) fix any +potential inconsistencies and 2) clear the flag that imposes repair. +Once the pool's metadata device is repaired it may be resized, which +will allow the pool to return to normal operation. Note that if a pool +is flagged as needing repair, the pool's data and metadata devices +cannot be resized until repair is performed. It should also be noted +that when the pool's metadata space is exhausted the current metadata +transaction is aborted. Given that the pool will cache IO whose +completion may have already been acknowledged to upper IO layers +(e.g. filesystem) it is strongly suggested that consistency checks +(e.g. fsck) be performed on those layers when repair of the pool is +required. + Thin provisioning ----------------- diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index baa87ff..fb9efc8 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -76,7 +76,7 @@ #define THIN_SUPERBLOCK_MAGIC 27022010 #define THIN_SUPERBLOCK_LOCATION 0 -#define THIN_VERSION 1 +#define THIN_VERSION 2 #define THIN_METADATA_CACHE_SIZE 64 #define SECTOR_TO_BLOCK_SHIFT 3 @@ -1755,3 +1755,38 @@ int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd, return r; } + +int dm_pool_metadata_set_needs_check(struct dm_pool_metadata *pmd) +{ + int r; + struct dm_block *sblock; + struct thin_disk_superblock *disk_super; + + down_write(&pmd->root_lock); + pmd->flags |= THIN_METADATA_NEEDS_CHECK_FLAG; + + r = superblock_lock(pmd, &sblock); + if (r) { + DMERR("couldn't read superblock"); + goto out; + } + + disk_super = dm_block_data(sblock); + disk_super->flags = cpu_to_le32(pmd->flags); + + dm_bm_unlock(sblock); +out: + up_write(&pmd->root_lock); + return r; +} + +bool dm_pool_metadata_needs_check(struct dm_pool_metadata *pmd) +{ + bool needs_check; + + down_read(&pmd->root_lock); + needs_check = pmd->flags & THIN_METADATA_NEEDS_CHECK_FLAG; + up_read(&pmd->root_lock); + + return needs_check; +} diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h index 82ea384..e3c857d 100644 --- a/drivers/md/dm-thin-metadata.h +++ b/drivers/md/dm-thin-metadata.h @@ -25,6 +25,11 @@ /*----------------------------------------------------------------*/ +/* + * Thin metadata superblock flags. + */ +#define THIN_METADATA_NEEDS_CHECK_FLAG (1 << 0) + struct dm_pool_metadata; struct dm_thin_device; @@ -202,6 +207,12 @@ int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd, dm_sm_threshold_fn fn, void *context); +/* + * Updates the superblock immediately. + */ +int dm_pool_metadata_set_needs_check(struct dm_pool_metadata *pmd); +bool dm_pool_metadata_needs_check(struct dm_pool_metadata *pmd); + /*----------------------------------------------------------------*/ #endif diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index dfa48ec..a04eba9 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1403,7 +1403,28 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) { int r; struct pool_c *pt = pool->ti->private; - enum pool_mode old_mode = pool->pf.mode; + bool needs_check = dm_pool_metadata_needs_check(pool->pmd); + enum pool_mode old_mode = get_pool_mode(pool); + + /* + * Never allow the pool to transition to PM_WRITE mode if user + * intervention is required to verify metadata and data consistency. + */ + if (new_mode == PM_WRITE && needs_check) { + DMERR("%s: unable to switch pool to write mode until repaired.", + dm_device_name(pool->pool_md)); + if (old_mode != new_mode) + new_mode = old_mode; + else + new_mode = PM_READ_ONLY; + } + /* + * If we were in PM_FAIL mode, rollback of metadata failed. We're + * not going to recover without a thin_repair. So we never let the + * pool move out of the old mode. + */ + if (old_mode == PM_FAIL) + new_mode = old_mode; switch (new_mode) { case PM_FAIL: @@ -1467,19 +1488,28 @@ static void out_of_data_space(struct pool *pool) set_pool_mode(pool, PM_READ_ONLY); } -static void metadata_operation_failed(struct pool *pool, const char *op, int r) +static void abort_transaction(struct pool *pool) { - dm_block_t free_blocks; + const char *dev_name = dm_device_name(pool->pool_md); + + DMERR_LIMIT("%s: aborting current metadata transaction", dev_name); + if (dm_pool_abort_metadata(pool->pmd)) { + DMERR("%s: failed to abort metadata transaction", dev_name); + set_pool_mode(pool, PM_FAIL); + } + + if (dm_pool_metadata_set_needs_check(pool->pmd)) { + DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name); + set_pool_mode(pool, PM_FAIL); + } +} +static void metadata_operation_failed(struct pool *pool, const char *op, int r) +{ DMERR_LIMIT("%s: metadata operation '%s' failed: error = %d", dm_device_name(pool->pool_md), op, r); - if (r == -ENOSPC && - !dm_pool_get_free_metadata_block_count(pool->pmd, &free_blocks) && - !free_blocks) - DMERR_LIMIT("%s: no free metadata space available.", - dm_device_name(pool->pool_md)); - + abort_transaction(pool); set_pool_mode(pool, PM_READ_ONLY); } @@ -1693,7 +1723,7 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti) /* * We want to make sure that a pool in PM_FAIL mode is never upgraded. */ - enum pool_mode old_mode = pool->pf.mode; + enum pool_mode old_mode = get_pool_mode(pool); enum pool_mode new_mode = pt->adjusted_pf.mode; /* @@ -1707,16 +1737,6 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti) pool->pf = pt->adjusted_pf; pool->low_water_blocks = pt->low_water_blocks; - /* - * If we were in PM_FAIL mode, rollback of metadata failed. We're - * not going to recover without a thin_repair. So we never let the - * pool move out of the old mode. On the other hand a PM_READ_ONLY - * may have been due to a lack of metadata or data space, and may - * now work (ie. if the underlying devices have been resized). - */ - if (old_mode == PM_FAIL) - new_mode = old_mode; - set_pool_mode(pool, new_mode); return 0; @@ -2259,6 +2279,12 @@ static int maybe_resize_data_dev(struct dm_target *ti, bool *need_commit) return -EINVAL; } else if (data_size > sb_data_size) { + if (dm_pool_metadata_needs_check(pool->pmd)) { + DMERR("%s: unable to grow the data device until repaired.", + dm_device_name(pool->pool_md)); + return 0; + } + if (sb_data_size) DMINFO("%s: growing the data device from %llu to %llu blocks", dm_device_name(pool->pool_md), @@ -2300,6 +2326,12 @@ static int maybe_resize_metadata_dev(struct dm_target *ti, bool *need_commit) return -EINVAL; } else if (metadata_dev_size > sb_metadata_dev_size) { + if (dm_pool_metadata_needs_check(pool->pmd)) { + DMERR("%s: unable to grow the metadata device until repaired.", + dm_device_name(pool->pool_md)); + return 0; + } + warn_if_metadata_device_too_big(pool->md_dev); DMINFO("%s: growing the metadata device from %llu to %llu blocks", dm_device_name(pool->pool_md), @@ -2801,7 +2833,7 @@ static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 10, 0}, + .version = {1, 11, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -3091,7 +3123,7 @@ static int thin_iterate_devices(struct dm_target *ti, static struct target_type thin_target = { .name = "thin", - .version = {1, 10, 0}, + .version = {1, 11, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, -- cgit v0.10.2 From 3e1a0699095803e53072699a4a1485af7744601d Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Mon, 3 Mar 2014 16:03:26 +0000 Subject: dm thin: fix out of data space handling Ideally a thin pool would never run out of data space; the low water mark would trigger userland to extend the pool before we completely run out of space. However, many small random IOs to unprovisioned space can consume data space at an alarming rate. Adjust your low water mark if you're frequently seeing "out-of-data-space" mode. Before this fix, if data space ran out the pool would be put in PM_READ_ONLY mode which also aborted the pool's current metadata transaction (data loss for any changes in the transaction). This had a side-effect of needlessly compromising data consistency. And retry of queued unserviceable bios, once the data pool was resized, could initiate changes to potentially inconsistent pool metadata. Now when the pool's data space is exhausted transition to a new pool mode (PM_OUT_OF_DATA_SPACE) that allows metadata to be changed but data may not be allocated. This allows users to remove thin volumes or discard data to recover data space. The pool is no longer put in PM_READ_ONLY mode in response to the pool running out of data space. And PM_READ_ONLY mode no longer aborts the pool's current metadata transaction. Also, set_pool_mode() will now notify userspace when the pool mode is changed. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index a04eba9..38a063f 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -130,10 +130,11 @@ static void build_virtual_key(struct dm_thin_device *td, dm_block_t b, struct dm_thin_new_mapping; /* - * The pool runs in 3 modes. Ordered in degraded order for comparisons. + * The pool runs in 4 modes. Ordered in degraded order for comparisons. */ enum pool_mode { PM_WRITE, /* metadata may be changed */ + PM_OUT_OF_DATA_SPACE, /* metadata may be changed, though data may not be allocated */ PM_READ_ONLY, /* metadata may not be changed */ PM_FAIL, /* all I/O fails */ }; @@ -198,7 +199,6 @@ struct pool { }; static enum pool_mode get_pool_mode(struct pool *pool); -static void out_of_data_space(struct pool *pool); static void metadata_operation_failed(struct pool *pool, const char *op, int r); /* @@ -399,6 +399,23 @@ static void requeue_io(struct thin_c *tc) spin_unlock_irqrestore(&pool->lock, flags); } +static void error_retry_list(struct pool *pool) +{ + struct bio *bio; + unsigned long flags; + struct bio_list bios; + + bio_list_init(&bios); + + spin_lock_irqsave(&pool->lock, flags); + bio_list_merge(&bios, &pool->retry_on_resume_list); + bio_list_init(&pool->retry_on_resume_list); + spin_unlock_irqrestore(&pool->lock, flags); + + while ((bio = bio_list_pop(&bios))) + bio_io_error(bio); +} + /* * This section of code contains the logic for processing a thin device's IO. * Much of the code depends on pool object resources (lists, workqueues, etc) @@ -925,13 +942,15 @@ static void check_low_water_mark(struct pool *pool, dm_block_t free_blocks) } } +static void set_pool_mode(struct pool *pool, enum pool_mode new_mode); + static int alloc_data_block(struct thin_c *tc, dm_block_t *result) { int r; dm_block_t free_blocks; struct pool *pool = tc->pool; - if (get_pool_mode(pool) != PM_WRITE) + if (WARN_ON(get_pool_mode(pool) != PM_WRITE)) return -EINVAL; r = dm_pool_get_free_block_count(pool->pmd, &free_blocks); @@ -958,7 +977,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) } if (!free_blocks) { - out_of_data_space(pool); + set_pool_mode(pool, PM_OUT_OF_DATA_SPACE); return -ENOSPC; } } @@ -988,15 +1007,32 @@ static void retry_on_resume(struct bio *bio) spin_unlock_irqrestore(&pool->lock, flags); } -static void handle_unserviceable_bio(struct pool *pool, struct bio *bio) +static bool should_error_unserviceable_bio(struct pool *pool) { - /* - * When pool is read-only, no cell locking is needed because - * nothing is changing. - */ - WARN_ON_ONCE(get_pool_mode(pool) != PM_READ_ONLY); + enum pool_mode m = get_pool_mode(pool); + + switch (m) { + case PM_WRITE: + /* Shouldn't get here */ + DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode"); + return true; + + case PM_OUT_OF_DATA_SPACE: + return pool->pf.error_if_no_space; + + case PM_READ_ONLY: + case PM_FAIL: + return true; + default: + /* Shouldn't get here */ + DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode"); + return true; + } +} - if (pool->pf.error_if_no_space) +static void handle_unserviceable_bio(struct pool *pool, struct bio *bio) +{ + if (should_error_unserviceable_bio(pool)) bio_io_error(bio); else retry_on_resume(bio); @@ -1007,11 +1043,20 @@ static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *c struct bio *bio; struct bio_list bios; + if (should_error_unserviceable_bio(pool)) { + cell_error(pool, cell); + return; + } + bio_list_init(&bios); cell_release(pool, cell, &bios); - while ((bio = bio_list_pop(&bios))) - handle_unserviceable_bio(pool, bio); + if (should_error_unserviceable_bio(pool)) + while ((bio = bio_list_pop(&bios))) + bio_io_error(bio); + else + while ((bio = bio_list_pop(&bios))) + retry_on_resume(bio); } static void process_discard(struct thin_c *tc, struct bio *bio) @@ -1296,6 +1341,11 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio) } } +static void process_bio_success(struct thin_c *tc, struct bio *bio) +{ + bio_endio(bio, 0); +} + static void process_bio_fail(struct thin_c *tc, struct bio *bio) { bio_io_error(bio); @@ -1399,9 +1449,15 @@ static enum pool_mode get_pool_mode(struct pool *pool) return pool->pf.mode; } +static void notify_of_pool_mode_change(struct pool *pool, const char *new_mode) +{ + dm_table_event(pool->ti->table); + DMINFO("%s: switching pool to %s mode", + dm_device_name(pool->pool_md), new_mode); +} + static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) { - int r; struct pool_c *pt = pool->ti->private; bool needs_check = dm_pool_metadata_needs_check(pool->pmd); enum pool_mode old_mode = get_pool_mode(pool); @@ -1429,38 +1485,48 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) switch (new_mode) { case PM_FAIL: if (old_mode != new_mode) - DMERR("%s: switching pool to failure mode", - dm_device_name(pool->pool_md)); + notify_of_pool_mode_change(pool, "failure"); dm_pool_metadata_read_only(pool->pmd); pool->process_bio = process_bio_fail; pool->process_discard = process_bio_fail; pool->process_prepared_mapping = process_prepared_mapping_fail; pool->process_prepared_discard = process_prepared_discard_fail; + + error_retry_list(pool); break; case PM_READ_ONLY: if (old_mode != new_mode) - DMERR("%s: switching pool to read-only mode", - dm_device_name(pool->pool_md)); - r = dm_pool_abort_metadata(pool->pmd); - if (r) { - DMERR("%s: aborting transaction failed", - dm_device_name(pool->pool_md)); - new_mode = PM_FAIL; - set_pool_mode(pool, new_mode); - } else { - dm_pool_metadata_read_only(pool->pmd); - pool->process_bio = process_bio_read_only; - pool->process_discard = process_discard; - pool->process_prepared_mapping = process_prepared_mapping_fail; - pool->process_prepared_discard = process_prepared_discard_passdown; - } + notify_of_pool_mode_change(pool, "read-only"); + dm_pool_metadata_read_only(pool->pmd); + pool->process_bio = process_bio_read_only; + pool->process_discard = process_bio_success; + pool->process_prepared_mapping = process_prepared_mapping_fail; + pool->process_prepared_discard = process_prepared_discard_passdown; + + error_retry_list(pool); + break; + + case PM_OUT_OF_DATA_SPACE: + /* + * Ideally we'd never hit this state; the low water mark + * would trigger userland to extend the pool before we + * completely run out of data space. However, many small + * IOs to unprovisioned space can consume data space at an + * alarming rate. Adjust your low water mark if you're + * frequently seeing this mode. + */ + if (old_mode != new_mode) + notify_of_pool_mode_change(pool, "out-of-data-space"); + pool->process_bio = process_bio_read_only; + pool->process_discard = process_discard; + pool->process_prepared_mapping = process_prepared_mapping; + pool->process_prepared_discard = process_prepared_discard_passdown; break; case PM_WRITE: if (old_mode != new_mode) - DMINFO("%s: switching pool to write mode", - dm_device_name(pool->pool_md)); + notify_of_pool_mode_change(pool, "write"); dm_pool_metadata_read_write(pool->pmd); pool->process_bio = process_bio; pool->process_discard = process_discard; @@ -1477,17 +1543,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) pt->adjusted_pf.mode = new_mode; } -/* - * Rather than calling set_pool_mode directly, use these which describe the - * reason for mode degradation. - */ -static void out_of_data_space(struct pool *pool) -{ - DMERR_LIMIT("%s: no free data space available.", - dm_device_name(pool->pool_md)); - set_pool_mode(pool, PM_READ_ONLY); -} - static void abort_transaction(struct pool *pool) { const char *dev_name = dm_device_name(pool->pool_md); @@ -2719,7 +2774,9 @@ static void pool_status(struct dm_target *ti, status_type_t type, else DMEMIT("- "); - if (pool->pf.mode == PM_READ_ONLY) + if (pool->pf.mode == PM_OUT_OF_DATA_SPACE) + DMEMIT("out_of_data_space "); + else if (pool->pf.mode == PM_READ_ONLY) DMEMIT("ro "); else DMEMIT("rw "); -- cgit v0.10.2 From 18adc57779866ba451237dccca2ebf019be2fa20 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Mon, 3 Mar 2014 15:46:42 +0000 Subject: dm thin: fix deadlock in __requeue_bio_list The spin lock in requeue_io() was held for too long, allowing deadlock. Don't worry, due to other issues addressed in the following "dm thin: fix noflush suspend IO queueing" commit, this code was never called. Fix this by taking the spin lock for a much shorter period of time. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 38a063f..f39876d 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -369,14 +369,18 @@ struct dm_thin_endio_hook { struct dm_thin_new_mapping *overwrite_mapping; }; -static void __requeue_bio_list(struct thin_c *tc, struct bio_list *master) +static void requeue_bio_list(struct thin_c *tc, struct bio_list *master) { struct bio *bio; struct bio_list bios; + unsigned long flags; bio_list_init(&bios); + + spin_lock_irqsave(&tc->pool->lock, flags); bio_list_merge(&bios, master); bio_list_init(master); + spin_unlock_irqrestore(&tc->pool->lock, flags); while ((bio = bio_list_pop(&bios))) { struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); @@ -391,12 +395,9 @@ static void __requeue_bio_list(struct thin_c *tc, struct bio_list *master) static void requeue_io(struct thin_c *tc) { struct pool *pool = tc->pool; - unsigned long flags; - spin_lock_irqsave(&pool->lock, flags); - __requeue_bio_list(tc, &pool->deferred_bios); - __requeue_bio_list(tc, &pool->retry_on_resume_list); - spin_unlock_irqrestore(&pool->lock, flags); + requeue_bio_list(tc, &pool->deferred_bios); + requeue_bio_list(tc, &pool->retry_on_resume_list); } static void error_retry_list(struct pool *pool) -- cgit v0.10.2 From 738211f70a1d0c7ff7dc965395f7b8a436365ebd Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Mon, 3 Mar 2014 15:52:28 +0000 Subject: dm thin: fix noflush suspend IO queueing i) by the time DM core calls the postsuspend hook the dm_noflush flag has been cleared. So the old thin_postsuspend did nothing. We need to use the presuspend hook instead. ii) There was a race between bios leaving DM core and arriving in the deferred queue. thin_presuspend now sets a 'requeue' flag causing all bios destined for that thin to be requeued back to DM core. Then it requeues all held IO, and all IO on the deferred queue (destined for that thin). Finally postsuspend clears the 'requeue' flag. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index f39876d..be70d38 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -226,6 +226,7 @@ struct thin_c { struct pool *pool; struct dm_thin_device *td; + bool requeue_mode:1; }; /*----------------------------------------------------------------*/ @@ -1379,6 +1380,11 @@ static void process_deferred_bios(struct pool *pool) struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct thin_c *tc = h->tc; + if (tc->requeue_mode) { + bio_endio(bio, DM_ENDIO_REQUEUE); + continue; + } + /* * If we've got no free new_mapping structs, and processing * this bio might require one, we pause until there are some @@ -1445,6 +1451,51 @@ static void do_waker(struct work_struct *ws) /*----------------------------------------------------------------*/ +struct noflush_work { + struct work_struct worker; + struct thin_c *tc; + + atomic_t complete; + wait_queue_head_t wait; +}; + +static void complete_noflush_work(struct noflush_work *w) +{ + atomic_set(&w->complete, 1); + wake_up(&w->wait); +} + +static void do_noflush_start(struct work_struct *ws) +{ + struct noflush_work *w = container_of(ws, struct noflush_work, worker); + w->tc->requeue_mode = true; + requeue_io(w->tc); + complete_noflush_work(w); +} + +static void do_noflush_stop(struct work_struct *ws) +{ + struct noflush_work *w = container_of(ws, struct noflush_work, worker); + w->tc->requeue_mode = false; + complete_noflush_work(w); +} + +static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *)) +{ + struct noflush_work w; + + INIT_WORK(&w.worker, fn); + w.tc = tc; + atomic_set(&w.complete, 0); + init_waitqueue_head(&w.wait); + + queue_work(tc->pool->wq, &w.worker); + + wait_event(w.wait, atomic_read(&w.complete)); +} + +/*----------------------------------------------------------------*/ + static enum pool_mode get_pool_mode(struct pool *pool) { return pool->pf.mode; @@ -1616,6 +1667,11 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) thin_hook_bio(tc, bio); + if (tc->requeue_mode) { + bio_endio(bio, DM_ENDIO_REQUEUE); + return DM_MAPIO_SUBMITTED; + } + if (get_pool_mode(tc->pool) == PM_FAIL) { bio_io_error(bio); return DM_MAPIO_SUBMITTED; @@ -3093,10 +3149,23 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err) return 0; } -static void thin_postsuspend(struct dm_target *ti) +static void thin_presuspend(struct dm_target *ti) { + struct thin_c *tc = ti->private; + if (dm_noflush_suspending(ti)) - requeue_io((struct thin_c *)ti->private); + noflush_work(tc, do_noflush_start); +} + +static void thin_postsuspend(struct dm_target *ti) +{ + struct thin_c *tc = ti->private; + + /* + * The dm_noflush_suspending flag has been cleared by now, so + * unfortunately we must always run this. + */ + noflush_work(tc, do_noflush_stop); } /* @@ -3187,6 +3256,7 @@ static struct target_type thin_target = { .dtr = thin_dtr, .map = thin_map, .end_io = thin_endio, + .presuspend = thin_presuspend, .postsuspend = thin_postsuspend, .status = thin_status, .iterate_devices = thin_iterate_devices, -- cgit v0.10.2 From a4f1987e4c5489a3877eaa7451a68d28c5a3f664 Mon Sep 17 00:00:00 2001 From: "Li, Aubrey" Date: Sun, 2 Mar 2014 18:39:02 +0800 Subject: x86, reboot: Add EFI and CF9 reboot methods into the default list Reboot is the last service linux OS provides to the end user. We are supposed to make this function more robust than today. This patch adds all of the known reboot methods into the default attempt list. The machines requiring reboot=efi or reboot=p or reboot=bios get a chance to reboot automatically now. If there is a new reboot method emerged, we are supposed to add it to the default list as well, instead of adding the endless dmidecode entry. If one method required is in the default list in this patch but the machine reboot still hangs, that means some methods ahead of the required method cause the system hangs, then reboot the machine by passing reboot= arguments and submit the reboot dmidecode table quirk. We are supposed to remove the reboot dmidecode table from the kernel, but to be safe, we keep it. This patch prevents us from adding more. If you happened to have a machine listed in the reboot dmidecode table and this patch makes reboot work on your machine, please submit a patch to remove the quirk. The default reboot order with this patch is now: ACPI > KBD > ACPI > KBD > EFI > CF9_COND > BIOS Because BIOS and TRIPLE are mutually exclusive (either will either work or hang the machine) that method is not included. [ hpa: as with any changes to the reboot order, this patch will have to be monitored carefully for regressions. ] Signed-off-by: Aubrey Li Acked-by: Matthew Garrett Link: http://lkml.kernel.org/r/53130A46.1010801@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index c752cb4..f601295 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -464,9 +464,12 @@ void __attribute__((weak)) mach_reboot_fixups(void) * 2) If still alive, write to the keyboard controller * 3) If still alive, write to the ACPI reboot register again * 4) If still alive, write to the keyboard controller again + * 5) If still alive, call the EFI runtime service to reboot + * 6) If still alive, write to the PCI IO port 0xCF9 to reboot + * 7) If still alive, inform BIOS to do a proper reboot * * If the machine is still alive at this stage, it gives up. We default to - * following the same pattern, except that if we're still alive after (4) we'll + * following the same pattern, except that if we're still alive after (7) we'll * try to force a triple fault and then cycle between hitting the keyboard * controller and doing that */ @@ -502,7 +505,7 @@ static void native_machine_emergency_restart(void) attempt = 1; reboot_type = BOOT_ACPI; } else { - reboot_type = BOOT_TRIPLE; + reboot_type = BOOT_EFI; } break; @@ -510,13 +513,15 @@ static void native_machine_emergency_restart(void) load_idt(&no_idt); __asm__ __volatile__("int3"); + /* We're probably dead after this, but... */ reboot_type = BOOT_KBD; break; case BOOT_BIOS: machine_real_restart(MRR_BIOS); - reboot_type = BOOT_KBD; + /* We're probably dead after this, but... */ + reboot_type = BOOT_TRIPLE; break; case BOOT_ACPI: @@ -530,7 +535,7 @@ static void native_machine_emergency_restart(void) EFI_RESET_WARM : EFI_RESET_COLD, EFI_SUCCESS, 0, NULL); - reboot_type = BOOT_KBD; + reboot_type = BOOT_CF9; break; case BOOT_CF9: @@ -548,7 +553,7 @@ static void native_machine_emergency_restart(void) outb(cf9|reboot_code, 0xcf9); udelay(50); } - reboot_type = BOOT_KBD; + reboot_type = BOOT_BIOS; break; } } -- cgit v0.10.2 From fb3bd7b19b2b6ef779d18573c10c00c53cd8add6 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 5 Mar 2014 15:41:15 -0800 Subject: x86, reboot: Only use CF9_COND automatically, not CF9 Only CF9_COND is appropriate for inclusion in the default chain, not CF9; the latter will poke that register unconditionally, whereas CF9_COND will at least look for PCI configuration method #1 or #2 first (a weak check, but better than nothing.) CF9 should be used for explicit system configuration (command line or DMI) only. Cc: Aubrey Li Cc: Matthew Garrett Link: http://lkml.kernel.org/r/53130A46.1010801@linux.intel.com Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f601295..654b465 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -535,7 +535,7 @@ static void native_machine_emergency_restart(void) EFI_RESET_WARM : EFI_RESET_COLD, EFI_SUCCESS, 0, NULL); - reboot_type = BOOT_CF9; + reboot_type = BOOT_CF9_COND; break; case BOOT_CF9: -- cgit v0.10.2 From 24b9bf43e93e0edd89072da51cf1fab95fc69dec Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 3 Mar 2014 23:19:18 +0100 Subject: net: fix for a race condition in the inet frag code I stumbled upon this very serious bug while hunting for another one, it's a very subtle race condition between inet_frag_evictor, inet_frag_intern and the IPv4/6 frag_queue and expire functions (basically the users of inet_frag_kill/inet_frag_put). What happens is that after a fragment has been added to the hash chain but before it's been added to the lru_list (inet_frag_lru_add) in inet_frag_intern, it may get deleted (either by an expired timer if the system load is high or the timer sufficiently low, or by the fraq_queue function for different reasons) before it's added to the lru_list, then after it gets added it's a matter of time for the evictor to get to a piece of memory which has been freed leading to a number of different bugs depending on what's left there. I've been able to trigger this on both IPv4 and IPv6 (which is normal as the frag code is the same), but it's been much more difficult to trigger on IPv4 due to the protocol differences about how fragments are treated. The setup I used to reproduce this is: 2 machines with 4 x 10G bonded in a RR bond, so the same flow can be seen on multiple cards at the same time. Then I used multiple instances of ping/ping6 to generate fragmented packets and flood the machines with them while running other processes to load the attacked machine. *It is very important to have the _same flow_ coming in on multiple CPUs concurrently. Usually the attacked machine would die in less than 30 minutes, if configured properly to have many evictor calls and timeouts it could happen in 10 minutes or so. An important point to make is that any caller (frag_queue or timer) of inet_frag_kill will remove both the timer refcount and the original/guarding refcount thus removing everything that's keeping the frag from being freed at the next inet_frag_put. All of this could happen before the frag was ever added to the LRU list, then it gets added and the evictor uses a freed fragment. An example for IPv6 would be if a fragment is being added and is at the stage of being inserted in the hash after the hash lock is released, but before inet_frag_lru_add executes (or is able to obtain the lru lock) another overlapping fragment for the same flow arrives at a different CPU which finds it in the hash, but since it's overlapping it drops it invoking inet_frag_kill and thus removing all guarding refcounts, and afterwards freeing it by invoking inet_frag_put which removes the last refcount added previously by inet_frag_find, then inet_frag_lru_add gets executed by inet_frag_intern and we have a freed fragment in the lru_list. The fix is simple, just move the lru_add under the hash chain locked region so when a removing function is called it'll have to wait for the fragment to be added to the lru_list, and then it'll remove it (it works because the hash chain removal is done before the lru_list one and there's no window between the two list adds when the frag can get dropped). With this fix applied I couldn't kill the same machine in 24 hours with the same setup. Fixes: 3ef0eb0db4bf ("net: frag, move LRU list maintenance outside of rwlock") CC: Florian Westphal CC: Jesper Dangaard Brouer CC: David S. Miller Signed-off-by: Nikolay Aleksandrov Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index bb075fc..322dceb 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -278,9 +278,10 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, atomic_inc(&qp->refcnt); hlist_add_head(&qp->list, &hb->chain); + inet_frag_lru_add(nf, qp); spin_unlock(&hb->chain_lock); read_unlock(&f->lock); - inet_frag_lru_add(nf, qp); + return qp; } -- cgit v0.10.2 From 6565b9eeef194afbb3beec80d6dd2447f4091f8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 4 Mar 2014 03:57:35 +0100 Subject: bridge: multicast: add sanity check for query source addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MLD queries are supposed to have an IPv6 link-local source address according to RFC2710, section 4 and RFC3810, section 5.1.14. This patch adds a sanity check to ignore such broken MLD queries. Without this check, such malformed MLD queries can result in a denial of service: The queries are ignored by any MLD listener therefore they will not respond with an MLD report. However, without this patch these malformed MLD queries would enable the snooping part in the bridge code, potentially shutting down the according ports towards these hosts for multicast traffic as the bridge did not learn about these listeners. Reported-by: Jan Stancek Signed-off-by: Linus Lüssing Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index ef66365..fb0e36f 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1235,6 +1235,12 @@ static int br_ip6_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; + /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */ + if (!(ipv6_addr_type(&ip6h->saddr) & IPV6_ADDR_LINKLOCAL)) { + err = -EINVAL; + goto out; + } + if (skb->len == sizeof(*mld)) { if (!pskb_may_pull(skb, sizeof(*mld))) { err = -EINVAL; -- cgit v0.10.2 From 367d56f7b4d5ce61e883c64f81786c7a3ae88eea Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 4 Mar 2014 15:35:20 +0800 Subject: net/mlx4: Support shutdown() interface In kexec scenario, we failed to load the mlx4 driver in the second kernel because the ownership bit was hold by the first kernel without release correctly. The patch adds shutdown() interface so that the ownership can be released correctly in the first kernel. It also helps avoiding EEH error happened during boot stage of the second kernel because of undesired traffic, which can't be handled by hardware during that stage on Power platform. Signed-off-by: Gavin Shan Tested-by: Wei Yang Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index d711158..5a6105f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2684,6 +2684,7 @@ static struct pci_driver mlx4_driver = { .name = DRV_NAME, .id_table = mlx4_pci_table, .probe = mlx4_init_one, + .shutdown = mlx4_remove_one, .remove = mlx4_remove_one, .err_handler = &mlx4_err_handler, }; -- cgit v0.10.2 From 10c3271712f58215f4d336a1e30aa25be09cd5d1 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 4 Mar 2014 20:47:48 +0800 Subject: r8152: disable the ECM mode There are known issues for switching the drivers between ECM mode and vendor mode. The interrup transfer may become abnormal. The hardware may have the opportunity to die if you change the configuration without unloading the current driver first, because all the control transfers of the current driver would fail after the command of switching the configuration. Although to use the ecm driver and vendor driver independently is fine, it may have problems to change the driver from one to the other by switching the configuration. Additionally, now the vendor mode driver is more powerful than the ECM driver. Thus, disable the ECM mode driver, and let r8152 to set the configuration to vendor mode and reset the device automatically. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller diff --git a/drivers/net/usb/Makefile b/drivers/net/usb/Makefile index 433f0a0..e2797f1 100644 --- a/drivers/net/usb/Makefile +++ b/drivers/net/usb/Makefile @@ -11,7 +11,7 @@ obj-$(CONFIG_USB_HSO) += hso.o obj-$(CONFIG_USB_NET_AX8817X) += asix.o asix-y := asix_devices.o asix_common.o ax88172a.o obj-$(CONFIG_USB_NET_AX88179_178A) += ax88179_178a.o -obj-$(CONFIG_USB_NET_CDCETHER) += cdc_ether.o r815x.o +obj-$(CONFIG_USB_NET_CDCETHER) += cdc_ether.o obj-$(CONFIG_USB_NET_CDC_EEM) += cdc_eem.o obj-$(CONFIG_USB_NET_DM9601) += dm9601.o obj-$(CONFIG_USB_NET_SR9700) += sr9700.o diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 42e1769..bd363b2 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -652,6 +652,13 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* Samsung USB Ethernet Adapters */ +{ + USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, 0xa101, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* WHITELIST!!! * * CDC Ether uses two interfaces, not necessarily consecutive. diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index d89dbe3..adb12f3 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -449,9 +449,6 @@ enum rtl8152_flags { #define MCU_TYPE_PLA 0x0100 #define MCU_TYPE_USB 0x0000 -#define REALTEK_USB_DEVICE(vend, prod) \ - USB_DEVICE_INTERFACE_CLASS(vend, prod, USB_CLASS_VENDOR_SPEC) - struct rx_desc { __le32 opts1; #define RX_LEN_MASK 0x7fff @@ -2739,6 +2736,12 @@ static int rtl8152_probe(struct usb_interface *intf, struct net_device *netdev; int ret; + if (udev->actconfig->desc.bConfigurationValue != 1) { + usb_driver_set_configuration(udev, 1); + return -ENODEV; + } + + usb_reset_device(udev); netdev = alloc_etherdev(sizeof(struct r8152)); if (!netdev) { dev_err(&intf->dev, "Out of memory\n"); @@ -2819,9 +2822,9 @@ static void rtl8152_disconnect(struct usb_interface *intf) /* table of devices that work with this driver */ static struct usb_device_id rtl8152_table[] = { - {REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, PRODUCT_ID_RTL8152)}, - {REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, PRODUCT_ID_RTL8153)}, - {REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, PRODUCT_ID_SAMSUNG)}, + {USB_DEVICE(VENDOR_ID_REALTEK, PRODUCT_ID_RTL8152)}, + {USB_DEVICE(VENDOR_ID_REALTEK, PRODUCT_ID_RTL8153)}, + {USB_DEVICE(VENDOR_ID_SAMSUNG, PRODUCT_ID_SAMSUNG)}, {} }; diff --git a/drivers/net/usb/r815x.c b/drivers/net/usb/r815x.c deleted file mode 100644 index f0a8791..0000000 --- a/drivers/net/usb/r815x.c +++ /dev/null @@ -1,248 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define RTL815x_REQT_READ 0xc0 -#define RTL815x_REQT_WRITE 0x40 -#define RTL815x_REQ_GET_REGS 0x05 -#define RTL815x_REQ_SET_REGS 0x05 - -#define MCU_TYPE_PLA 0x0100 -#define OCP_BASE 0xe86c -#define BASE_MII 0xa400 - -#define BYTE_EN_DWORD 0xff -#define BYTE_EN_WORD 0x33 -#define BYTE_EN_BYTE 0x11 - -#define R815x_PHY_ID 32 -#define REALTEK_VENDOR_ID 0x0bda - - -static int pla_read_word(struct usb_device *udev, u16 index) -{ - int ret; - u8 shift = index & 2; - __le32 *tmp; - - tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); - if (!tmp) - return -ENOMEM; - - index &= ~3; - - ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), - RTL815x_REQ_GET_REGS, RTL815x_REQT_READ, - index, MCU_TYPE_PLA, tmp, sizeof(*tmp), 500); - if (ret < 0) - goto out2; - - ret = __le32_to_cpu(*tmp); - ret >>= (shift * 8); - ret &= 0xffff; - -out2: - kfree(tmp); - return ret; -} - -static int pla_write_word(struct usb_device *udev, u16 index, u32 data) -{ - __le32 *tmp; - u32 mask = 0xffff; - u16 byen = BYTE_EN_WORD; - u8 shift = index & 2; - int ret; - - tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); - if (!tmp) - return -ENOMEM; - - data &= mask; - - if (shift) { - byen <<= shift; - mask <<= (shift * 8); - data <<= (shift * 8); - index &= ~3; - } - - ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), - RTL815x_REQ_GET_REGS, RTL815x_REQT_READ, - index, MCU_TYPE_PLA, tmp, sizeof(*tmp), 500); - if (ret < 0) - goto out3; - - data |= __le32_to_cpu(*tmp) & ~mask; - *tmp = __cpu_to_le32(data); - - ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), - RTL815x_REQ_SET_REGS, RTL815x_REQT_WRITE, - index, MCU_TYPE_PLA | byen, tmp, sizeof(*tmp), - 500); - -out3: - kfree(tmp); - return ret; -} - -static int ocp_reg_read(struct usbnet *dev, u16 addr) -{ - u16 ocp_base, ocp_index; - int ret; - - ocp_base = addr & 0xf000; - ret = pla_write_word(dev->udev, OCP_BASE, ocp_base); - if (ret < 0) - goto out; - - ocp_index = (addr & 0x0fff) | 0xb000; - ret = pla_read_word(dev->udev, ocp_index); - -out: - return ret; -} - -static int ocp_reg_write(struct usbnet *dev, u16 addr, u16 data) -{ - u16 ocp_base, ocp_index; - int ret; - - ocp_base = addr & 0xf000; - ret = pla_write_word(dev->udev, OCP_BASE, ocp_base); - if (ret < 0) - goto out1; - - ocp_index = (addr & 0x0fff) | 0xb000; - ret = pla_write_word(dev->udev, ocp_index, data); - -out1: - return ret; -} - -static int r815x_mdio_read(struct net_device *netdev, int phy_id, int reg) -{ - struct usbnet *dev = netdev_priv(netdev); - int ret; - - if (phy_id != R815x_PHY_ID) - return -EINVAL; - - if (usb_autopm_get_interface(dev->intf) < 0) - return -ENODEV; - - ret = ocp_reg_read(dev, BASE_MII + reg * 2); - - usb_autopm_put_interface(dev->intf); - return ret; -} - -static -void r815x_mdio_write(struct net_device *netdev, int phy_id, int reg, int val) -{ - struct usbnet *dev = netdev_priv(netdev); - - if (phy_id != R815x_PHY_ID) - return; - - if (usb_autopm_get_interface(dev->intf) < 0) - return; - - ocp_reg_write(dev, BASE_MII + reg * 2, val); - - usb_autopm_put_interface(dev->intf); -} - -static int r8153_bind(struct usbnet *dev, struct usb_interface *intf) -{ - int status; - - status = usbnet_cdc_bind(dev, intf); - if (status < 0) - return status; - - dev->mii.dev = dev->net; - dev->mii.mdio_read = r815x_mdio_read; - dev->mii.mdio_write = r815x_mdio_write; - dev->mii.phy_id_mask = 0x3f; - dev->mii.reg_num_mask = 0x1f; - dev->mii.phy_id = R815x_PHY_ID; - dev->mii.supports_gmii = 1; - - return status; -} - -static int r8152_bind(struct usbnet *dev, struct usb_interface *intf) -{ - int status; - - status = usbnet_cdc_bind(dev, intf); - if (status < 0) - return status; - - dev->mii.dev = dev->net; - dev->mii.mdio_read = r815x_mdio_read; - dev->mii.mdio_write = r815x_mdio_write; - dev->mii.phy_id_mask = 0x3f; - dev->mii.reg_num_mask = 0x1f; - dev->mii.phy_id = R815x_PHY_ID; - dev->mii.supports_gmii = 0; - - return status; -} - -static const struct driver_info r8152_info = { - .description = "RTL8152 ECM Device", - .flags = FLAG_ETHER | FLAG_POINTTOPOINT, - .bind = r8152_bind, - .unbind = usbnet_cdc_unbind, - .status = usbnet_cdc_status, - .manage_power = usbnet_manage_power, -}; - -static const struct driver_info r8153_info = { - .description = "RTL8153 ECM Device", - .flags = FLAG_ETHER | FLAG_POINTTOPOINT, - .bind = r8153_bind, - .unbind = usbnet_cdc_unbind, - .status = usbnet_cdc_status, - .manage_power = usbnet_manage_power, -}; - -static const struct usb_device_id products[] = { -{ - USB_DEVICE_AND_INTERFACE_INFO(REALTEK_VENDOR_ID, 0x8152, USB_CLASS_COMM, - USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), - .driver_info = (unsigned long) &r8152_info, -}, - -{ - USB_DEVICE_AND_INTERFACE_INFO(REALTEK_VENDOR_ID, 0x8153, USB_CLASS_COMM, - USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), - .driver_info = (unsigned long) &r8153_info, -}, - - { }, /* END */ -}; -MODULE_DEVICE_TABLE(usb, products); - -static struct usb_driver r815x_driver = { - .name = "r815x", - .id_table = products, - .probe = usbnet_probe, - .disconnect = usbnet_disconnect, - .suspend = usbnet_suspend, - .resume = usbnet_resume, - .reset_resume = usbnet_resume, - .supports_autosuspend = 1, - .disable_hub_initiated_lpm = 1, -}; - -module_usb_driver(r815x_driver); - -MODULE_AUTHOR("Hayes Wang"); -MODULE_DESCRIPTION("Realtek USB ECM device"); -MODULE_LICENSE("GPL"); -- cgit v0.10.2 From c485658bae87faccd7aed540fd2ca3ab37992310 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 4 Mar 2014 16:35:51 +0100 Subject: net: sctp: fix skb leakage in COOKIE ECHO path of chunk->auth_chunk While working on ec0223ec48a9 ("net: sctp: fix sctp_sf_do_5_1D_ce to verify if we/peer is AUTH capable"), we noticed that there's a skb memory leakage in the error path. Running the same reproducer as in ec0223ec48a9 and by unconditionally jumping to the error label (to simulate an error condition) in sctp_sf_do_5_1D_ce() receive path lets kmemleak detector bark about the unfreed chunk->auth_chunk skb clone: Unreferenced object 0xffff8800b8f3a000 (size 256): comm "softirq", pid 0, jiffies 4294769856 (age 110.757s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 89 ab 75 5e d4 01 58 13 00 00 00 00 00 00 00 00 ..u^..X......... backtrace: [] kmemleak_alloc+0x4e/0xb0 [] kmem_cache_alloc+0xc8/0x210 [] skb_clone+0x49/0xb0 [] sctp_endpoint_bh_rcv+0x1d9/0x230 [sctp] [] sctp_inq_push+0x4c/0x70 [sctp] [] sctp_rcv+0x82e/0x9a0 [sctp] [] ip_local_deliver_finish+0xa8/0x210 [] nf_reinject+0xbf/0x180 [] nfqnl_recv_verdict+0x1d2/0x2b0 [nfnetlink_queue] [] nfnetlink_rcv_msg+0x14b/0x250 [nfnetlink] [] netlink_rcv_skb+0xa9/0xc0 [] nfnetlink_rcv+0x23f/0x408 [nfnetlink] [] netlink_unicast+0x168/0x250 [] netlink_sendmsg+0x2e1/0x3f0 [] sock_sendmsg+0x8b/0xc0 [] ___sys_sendmsg+0x369/0x380 What happens is that commit bbd0d59809f9 clones the skb containing the AUTH chunk in sctp_endpoint_bh_rcv() when having the edge case that an endpoint requires COOKIE-ECHO chunks to be authenticated: ---------- INIT[RANDOM; CHUNKS; HMAC-ALGO] ----------> <------- INIT-ACK[RANDOM; CHUNKS; HMAC-ALGO] --------- ------------------ AUTH; COOKIE-ECHO ----------------> <-------------------- COOKIE-ACK --------------------- When we enter sctp_sf_do_5_1D_ce() and before we actually get to the point where we process (and subsequently free) a non-NULL chunk->auth_chunk, we could hit the "goto nomem_init" path from an error condition and thus leave the cloned skb around w/o freeing it. The fix is to centrally free such clones in sctp_chunk_destroy() handler that is invoked from sctp_chunk_free() after all refs have dropped; and also move both kfree_skb(chunk->auth_chunk) there, so that chunk->auth_chunk is either NULL (since sctp_chunkify() allocs new chunks through kmem_cache_zalloc()) or non-NULL with a valid skb pointer. chunk->skb and chunk->auth_chunk are the only skbs in the sctp_chunk structure that need to be handeled. While at it, we should use consume_skb() for both. It is the same as dev_kfree_skb() but more appropriately named as we are not a device but a protocol. Also, this effectively replaces the kfree_skb() from both invocations into consume_skb(). Functions are the same only that kfree_skb() assumes that the frame was being dropped after a failure (e.g. for tools like drop monitor), usage of consume_skb() seems more appropriate in function sctp_chunk_destroy() though. Fixes: bbd0d59809f9 ("[SCTP]: Implement the receive and verification of AUTH chunk") Signed-off-by: Daniel Borkmann Cc: Vlad Yasevich Cc: Neil Horman Acked-by: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 632090b..3a1767e 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1421,8 +1421,8 @@ static void sctp_chunk_destroy(struct sctp_chunk *chunk) BUG_ON(!list_empty(&chunk->list)); list_del_init(&chunk->transmitted_list); - /* Free the chunk skb data and the SCTP_chunk stub itself. */ - dev_kfree_skb(chunk->skb); + consume_skb(chunk->skb); + consume_skb(chunk->auth_chunk); SCTP_DBG_OBJCNT_DEC(chunk); kmem_cache_free(sctp_chunk_cachep, chunk); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index ae65b6b..01e0024 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -760,7 +760,6 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, /* Make sure that we and the peer are AUTH capable */ if (!net->sctp.auth_enable || !new_asoc->peer.auth_capable) { - kfree_skb(chunk->auth_chunk); sctp_association_free(new_asoc); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } @@ -775,10 +774,6 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, auth.transport = chunk->transport; ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth); - - /* We can now safely free the auth_chunk clone */ - kfree_skb(chunk->auth_chunk); - if (ret != SCTP_IERROR_NO_ERROR) { sctp_association_free(new_asoc); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); -- cgit v0.10.2 From 920309086652651c7d59791e5b83e2f10112b293 Mon Sep 17 00:00:00 2001 From: Soren Brinkmann Date: Tue, 4 Mar 2014 08:46:39 -0800 Subject: net: macb: Check DMA mappings for error With CONFIG_DMA_API_DEBUG enabled the following warning is printed: WARNING: CPU: 0 PID: 619 at lib/dma-debug.c:1101 check_unmap+0x758/0x894() macb e000b000.ethernet: DMA-API: device driver failed to check map error[device address=0x000000002d171c02] [size=322 bytes] [mapped as single] Modules linked in: CPU: 0 PID: 619 Comm: udhcpc Not tainted 3.14.0-rc3-xilinx-00219-gd158fc7f36a2 #63 [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x7c/0xc8) [] (dump_stack) from [] (warn_slowpath_common+0x60/0x84) [] (warn_slowpath_common) from [] (warn_slowpath_fmt+0x2c/0x3c) [] (warn_slowpath_fmt) from [] (check_unmap+0x758/0x894) [] (check_unmap) from [] (debug_dma_unmap_page+0x64/0x70) [] (debug_dma_unmap_page) from [] (macb_interrupt+0x1f8/0x2dc) [] (macb_interrupt) from [] (handle_irq_event_percpu+0x2c/0x178) [] (handle_irq_event_percpu) from [] (handle_irq_event+0x3c/0x5c) [] (handle_irq_event) from [] (handle_fasteoi_irq+0xb8/0x100) [] (handle_fasteoi_irq) from [] (generic_handle_irq+0x20/0x30) [] (generic_handle_irq) from [] (handle_IRQ+0x64/0x8c) [] (handle_IRQ) from [] (gic_handle_irq+0x3c/0x60) [] (gic_handle_irq) from [] (__irq_svc+0x44/0x78) Exception stack(0xed197f60 to 0xed197fa8) 7f60: 00000134 60000013 bd94362e bd94362e be96b37c 00000014 fffffd72 00000122 7f80: c000ebe4 ed196000 00000000 00000011 c032c0d8 ed197fa8 c0064008 c000ea20 7fa0: 60000013 ffffffff [] (__irq_svc) from [] (ret_fast_syscall+0x0/0x48) ---[ end trace 478f921d0d542d1e ]--- Mapped at: [] debug_dma_map_page+0x48/0x11c [] macb_start_xmit+0x184/0x2a8 [] dev_hard_start_xmit+0x334/0x470 [] sch_direct_xmit+0x78/0x2f8 [] __dev_queue_xmit+0x318/0x708 due to missing checks of the dma mapping. Add the appropriate checks to fix this. Signed-off-by: Soren Brinkmann Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 3190d38..1f03e20 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -632,11 +632,16 @@ static void gem_rx_refill(struct macb *bp) "Unable to allocate sk_buff\n"); break; } - bp->rx_skbuff[entry] = skb; /* now fill corresponding descriptor entry */ paddr = dma_map_single(&bp->pdev->dev, skb->data, bp->rx_buffer_size, DMA_FROM_DEVICE); + if (dma_mapping_error(&bp->pdev->dev, paddr)) { + dev_kfree_skb(skb); + break; + } + + bp->rx_skbuff[entry] = skb; if (entry == RX_RING_SIZE - 1) paddr |= MACB_BIT(RX_WRAP); @@ -1036,11 +1041,15 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) } entry = macb_tx_ring_wrap(bp->tx_head); - bp->tx_head++; netdev_vdbg(bp->dev, "Allocated ring entry %u\n", entry); mapping = dma_map_single(&bp->pdev->dev, skb->data, len, DMA_TO_DEVICE); + if (dma_mapping_error(&bp->pdev->dev, mapping)) { + kfree_skb(skb); + goto unlock; + } + bp->tx_head++; tx_skb = &bp->tx_skb[entry]; tx_skb->skb = skb; tx_skb->mapping = mapping; @@ -1066,6 +1075,7 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) if (CIRC_SPACE(bp->tx_head, bp->tx_tail, TX_RING_SIZE) < 1) netif_stop_queue(dev); +unlock: spin_unlock_irqrestore(&bp->lock, flags); return NETDEV_TX_OK; -- cgit v0.10.2 From 48330e08fa168395b9fd9f369f06cca1df204361 Mon Sep 17 00:00:00 2001 From: Soren Brinkmann Date: Tue, 4 Mar 2014 08:46:40 -0800 Subject: net: macb: DMA-unmap full rx-buffer When allocating RX buffers a fixed size is used, while freeing is based on actually received bytes, resulting in the following kernel warning when CONFIG_DMA_API_DEBUG is enabled: WARNING: CPU: 0 PID: 0 at lib/dma-debug.c:1051 check_unmap+0x258/0x894() macb e000b000.ethernet: DMA-API: device driver frees DMA memory with different size [device address=0x000000002d170040] [map size=1536 bytes] [unmap size=60 bytes] Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.14.0-rc3-xilinx-00220-g49f84081ce4f #65 [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x7c/0xc8) [] (dump_stack) from [] (warn_slowpath_common+0x60/0x84) [] (warn_slowpath_common) from [] (warn_slowpath_fmt+0x2c/0x3c) [] (warn_slowpath_fmt) from [] (check_unmap+0x258/0x894) [] (check_unmap) from [] (debug_dma_unmap_page+0x64/0x70) [] (debug_dma_unmap_page) from [] (gem_rx+0x118/0x170) [] (gem_rx) from [] (macb_poll+0x24/0x94) [] (macb_poll) from [] (net_rx_action+0x6c/0x188) [] (net_rx_action) from [] (__do_softirq+0x108/0x280) [] (__do_softirq) from [] (irq_exit+0x84/0xf8) [] (irq_exit) from [] (handle_IRQ+0x68/0x8c) [] (handle_IRQ) from [] (gic_handle_irq+0x3c/0x60) [] (gic_handle_irq) from [] (__irq_svc+0x44/0x78) Exception stack(0xc056df20 to 0xc056df68) df20: 00000001 c0577430 00000000 c0577430 04ce8e0d 00000002 edfce238 00000000 df40: 04e20f78 00000002 c05981f4 00000000 00000008 c056df68 c0064008 c02d7658 df60: 20000013 ffffffff [] (__irq_svc) from [] (cpuidle_enter_state+0x54/0xf8) [] (cpuidle_enter_state) from [] (cpuidle_idle_call+0xe0/0x138) [] (cpuidle_idle_call) from [] (arch_cpu_idle+0x8/0x3c) [] (arch_cpu_idle) from [] (cpu_startup_entry+0xbc/0x124) [] (cpu_startup_entry) from [] (start_kernel+0x350/0x3b0) ---[ end trace d5fdc38641bd3a11 ]--- Mapped at: [] debug_dma_map_page+0x48/0x11c [] gem_rx_refill+0x154/0x1f8 [] macb_open+0x270/0x3e0 [] __dev_open+0x7c/0xfc [] __dev_change_flags+0x8c/0x140 Fixing this by passing the same size which is passed during mapping the memory to the unmap function as well. Signed-off-by: Soren Brinkmann Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 1f03e20..d0c38e0 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -730,7 +730,7 @@ static int gem_rx(struct macb *bp, int budget) skb_put(skb, len); addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, addr)); dma_unmap_single(&bp->pdev->dev, addr, - len, DMA_FROM_DEVICE); + bp->rx_buffer_size, DMA_FROM_DEVICE); skb->protocol = eth_type_trans(skb, bp->dev); skb_checksum_none_assert(skb); -- cgit v0.10.2 From 1b07da516ee25250f458c76c012ebe4cd677a84f Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Tue, 4 Mar 2014 14:11:06 -0800 Subject: hyperv: Move state setting for link query It moves the state setting for query into rndis_filter_receive_response(). All callbacks including query-complete and status-callback are synchronized by channel->inbound_lock. This prevents pentential race between them. Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 7141a19..d6fce97 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -442,6 +442,8 @@ static int netvsc_probe(struct hv_device *dev, if (!net) return -ENOMEM; + netif_carrier_off(net); + net_device_ctx = netdev_priv(net); net_device_ctx->device_ctx = dev; hv_set_drvdata(dev, net); @@ -473,6 +475,8 @@ static int netvsc_probe(struct hv_device *dev, pr_err("Unable to register netdev.\n"); rndis_filter_device_remove(dev); free_netdev(net); + } else { + schedule_delayed_work(&net_device_ctx->dwork, 0); } return ret; diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 1084e5d..b54fd25 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -243,6 +243,22 @@ static int rndis_filter_send_request(struct rndis_device *dev, return ret; } +static void rndis_set_link_state(struct rndis_device *rdev, + struct rndis_request *request) +{ + u32 link_status; + struct rndis_query_complete *query_complete; + + query_complete = &request->response_msg.msg.query_complete; + + if (query_complete->status == RNDIS_STATUS_SUCCESS && + query_complete->info_buflen == sizeof(u32)) { + memcpy(&link_status, (void *)((unsigned long)query_complete + + query_complete->info_buf_offset), sizeof(u32)); + rdev->link_state = link_status != 0; + } +} + static void rndis_filter_receive_response(struct rndis_device *dev, struct rndis_message *resp) { @@ -272,6 +288,10 @@ static void rndis_filter_receive_response(struct rndis_device *dev, sizeof(struct rndis_message) + RNDIS_EXT_LEN) { memcpy(&request->response_msg, resp, resp->msg_len); + if (request->request_msg.ndis_msg_type == + RNDIS_MSG_QUERY && request->request_msg.msg. + query_req.oid == RNDIS_OID_GEN_MEDIA_CONNECT_STATUS) + rndis_set_link_state(dev, request); } else { netdev_err(ndev, "rndis response buffer overflow " @@ -620,7 +640,6 @@ static int rndis_filter_query_device_link_status(struct rndis_device *dev) ret = rndis_filter_query_device(dev, RNDIS_OID_GEN_MEDIA_CONNECT_STATUS, &link_status, &size); - dev->link_state = (link_status != 0) ? true : false; return ret; } -- cgit v0.10.2 From d9120198ddef2c0b61ca6659ace41b7c1e7c8f08 Mon Sep 17 00:00:00 2001 From: Benoit Cousson Date: Fri, 28 Feb 2014 14:12:05 +0100 Subject: clk: shmobile: rcar-gen2: Use kick bit to allow Z clock frequency change The Z clock frequency change is effective only after setting the kick bit located in the FRQCRB register. Without that, the CA15 CPUs clock rate will never change. Fix that by checking if the kick bit is cleared and enable it to make the clock rate change effective. The bit is cleared automatically upon completion. Signed-off-by: Benoit Cousson Acked-by: Laurent Pinchart Signed-off-by: Mike Turquette diff --git a/drivers/clk/shmobile/clk-rcar-gen2.c b/drivers/clk/shmobile/clk-rcar-gen2.c index a59ec21..de680e6 100644 --- a/drivers/clk/shmobile/clk-rcar-gen2.c +++ b/drivers/clk/shmobile/clk-rcar-gen2.c @@ -26,6 +26,8 @@ struct rcar_gen2_cpg { void __iomem *reg; }; +#define CPG_FRQCRB 0x00000004 +#define CPG_FRQCRB_KICK BIT(31) #define CPG_SDCKCR 0x00000074 #define CPG_PLL0CR 0x000000d8 #define CPG_FRQCRC 0x000000e0 @@ -45,6 +47,7 @@ struct rcar_gen2_cpg { struct cpg_z_clk { struct clk_hw hw; void __iomem *reg; + void __iomem *kick_reg; }; #define to_z_clk(_hw) container_of(_hw, struct cpg_z_clk, hw) @@ -83,17 +86,45 @@ static int cpg_z_clk_set_rate(struct clk_hw *hw, unsigned long rate, { struct cpg_z_clk *zclk = to_z_clk(hw); unsigned int mult; - u32 val; + u32 val, kick; + unsigned int i; mult = div_u64((u64)rate * 32, parent_rate); mult = clamp(mult, 1U, 32U); + if (clk_readl(zclk->kick_reg) & CPG_FRQCRB_KICK) + return -EBUSY; + val = clk_readl(zclk->reg); val &= ~CPG_FRQCRC_ZFC_MASK; val |= (32 - mult) << CPG_FRQCRC_ZFC_SHIFT; clk_writel(val, zclk->reg); - return 0; + /* + * Set KICK bit in FRQCRB to update hardware setting and wait for + * clock change completion. + */ + kick = clk_readl(zclk->kick_reg); + kick |= CPG_FRQCRB_KICK; + clk_writel(kick, zclk->kick_reg); + + /* + * Note: There is no HW information about the worst case latency. + * + * Using experimental measurements, it seems that no more than + * ~10 iterations are needed, independently of the CPU rate. + * Since this value might be dependant of external xtal rate, pll1 + * rate or even the other emulation clocks rate, use 1000 as a + * "super" safe value. + */ + for (i = 1000; i; i--) { + if (!(clk_readl(zclk->kick_reg) & CPG_FRQCRB_KICK)) + return 0; + + cpu_relax(); + } + + return -ETIMEDOUT; } static const struct clk_ops cpg_z_clk_ops = { @@ -120,6 +151,7 @@ static struct clk * __init cpg_z_clk_register(struct rcar_gen2_cpg *cpg) init.num_parents = 1; zclk->reg = cpg->reg + CPG_FRQCRC; + zclk->kick_reg = cpg->reg + CPG_FRQCRB; zclk->hw.init = &init; clk = clk_register(NULL, &zclk->hw); -- cgit v0.10.2 From 6cbde8253a8143ada18ec0d1711230747a7c1934 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 6 Mar 2014 03:30:46 +0000 Subject: ARM: KVM: fix non-VGIC compilation Add a stub for kvm_vgic_addr when compiling without CONFIG_KVM_ARM_VGIC. The usefulness of this configurarion is extremely doubtful, but let's fix it anyway (until we decide that we'll always support a VGIC). Reported-by: Michele Paolino Cc: Paolo Bonzini Cc: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Paolo Bonzini diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index be85127..f27000f 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -171,6 +171,11 @@ static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 add return 0; } +static inline int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) +{ + return -ENXIO; +} + static inline int kvm_vgic_init(struct kvm *kvm) { return 0; -- cgit v0.10.2 From 224aa3ed45c8735ae02bb2ecca002409fa6aa772 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Wed, 5 Mar 2014 23:11:08 +0100 Subject: clocksource: vf_pit_timer: use complement for sched_clock reading Vybrids PIT register is monitonic decreasing. However, sched_clock reading needs to be monitonic increasing. Use bitwise not to get the complement of the clock register. This fixes the clock going backward. Also, the clock now starts at 0 since we load the register with the maximum value at start. Signed-off-by: Stefan Agner Acked-by: Shawn Guo Cc: daniel.lezcano@linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux@arm.linux.org.uk Link: http://lkml.kernel.org/r/d25af915993aec1b486be653eb86f748ddef54fe.1394057313.git.stefan@agner.ch Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner diff --git a/drivers/clocksource/vf_pit_timer.c b/drivers/clocksource/vf_pit_timer.c index 02821b0..a918bc4 100644 --- a/drivers/clocksource/vf_pit_timer.c +++ b/drivers/clocksource/vf_pit_timer.c @@ -54,7 +54,7 @@ static inline void pit_irq_acknowledge(void) static u64 pit_read_sched_clock(void) { - return __raw_readl(clksrc_base + PITCVAL); + return ~__raw_readl(clksrc_base + PITCVAL); } static int __init pit_clocksource_init(unsigned long rate) -- cgit v0.10.2 From e571c58f313d35c56e0018470e3375ddd1fd320e Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 6 Mar 2014 10:29:27 +1100 Subject: m68k: Skip futex_atomic_cmpxchg_inatomic() test Skip the futex_atomic_cmpxchg_inatomic() test in futex_init(). It causes a fatal exception on 68030 (and presumably 68020 also). Signed-off-by: Finn Thain Acked-by: Geert Uytterhoeven Link: http://lkml.kernel.org/r/alpine.LNX.2.00.1403061006440.5525@nippy.intranet Signed-off-by: Thomas Gleixner diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index dbdd223..b2e3229 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -17,6 +17,7 @@ config M68K select FPU if MMU select ARCH_WANT_IPC_PARSE_VERSION select ARCH_USES_GETTIMEOFFSET if MMU && !COLDFIRE + select HAVE_FUTEX_CMPXCHG if MMU && FUTEX select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_REL select MODULES_USE_ELF_RELA -- cgit v0.10.2 From 559ba4b153acfbfe49007def9d0efc475d5f937c Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 4 Mar 2014 17:01:21 -0800 Subject: irqchip: Silence sparse warning drivers/irqchip/irqchip.c:27:13: warning: symbol 'irqchip_init' was not declared. Should it be static? Signed-off-by: Stephen Boyd Cc: trivial@kernel.org Link: http://lkml.kernel.org/r/1393981281-25553-1-git-send-email-sboyd@codeaurora.org Signed-off-by: Thomas Gleixner diff --git a/drivers/irqchip/irqchip.c b/drivers/irqchip/irqchip.c index f496afc..3469141 100644 --- a/drivers/irqchip/irqchip.c +++ b/drivers/irqchip/irqchip.c @@ -10,6 +10,7 @@ #include #include +#include #include "irqchip.h" -- cgit v0.10.2 From 6859358e4b0bf2e599027dc4c6317e0bc25ff339 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 4 Mar 2014 17:02:01 -0800 Subject: irqchip: gic: Silence sparse warnings drivers/irqchip/irq-gic.c:53:23: warning: duplicate [noderef] drivers/irqchip/irq-gic.c:651:6: warning: symbol 'gic_raise_softirq' was not declared. Should it be static? drivers/irqchip/irq-gic.c:872:29: warning: symbol 'gic_irq_domain_ops' was not declared. Should it be static? drivers/irqchip/irq-gic.c:977:12: warning: symbol 'gic_of_init' was not declared. Should it be static? Signed-off-by: Stephen Boyd Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1393981321-25721-1-git-send-email-sboyd@codeaurora.org Signed-off-by: Thomas Gleixner diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 341c601..abeb5a9 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -50,7 +50,7 @@ union gic_base { void __iomem *common_base; - void __percpu __iomem **percpu_base; + void __percpu * __iomem *percpu_base; }; struct gic_chip_data { @@ -648,7 +648,7 @@ static void __init gic_pm_init(struct gic_chip_data *gic) #endif #ifdef CONFIG_SMP -void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) +static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) { int cpu; unsigned long flags, map = 0; @@ -869,7 +869,7 @@ static struct notifier_block gic_cpu_notifier = { }; #endif -const struct irq_domain_ops gic_irq_domain_ops = { +static const struct irq_domain_ops gic_irq_domain_ops = { .map = gic_irq_domain_map, .xlate = gic_irq_domain_xlate, }; @@ -974,7 +974,8 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start, #ifdef CONFIG_OF static int gic_cnt __initdata; -int __init gic_of_init(struct device_node *node, struct device_node *parent) +static int __init +gic_of_init(struct device_node *node, struct device_node *parent) { void __iomem *cpu_base; void __iomem *dist_base; -- cgit v0.10.2 From 7ff42473ebdee32ed3ac34f6bf4b4080c7455840 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 6 Mar 2014 12:08:37 +0100 Subject: x86: hardirq: Make irq_hv_callback_count available for CONFIG_HYPERV=m as well Reported-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index afb6536..230853d 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -33,7 +33,7 @@ typedef struct { #ifdef CONFIG_X86_MCE_THRESHOLD unsigned int irq_threshold_count; #endif -#if defined(CONFIG_HYPERV) || defined(CONFIG_XEN) +#if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) unsigned int irq_hv_callback_count; #endif } ____cacheline_aligned irq_cpustat_t; -- cgit v0.10.2 From 24bd9bf54d45d28089251cdf62bf14323d1aa827 Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Tue, 4 Mar 2014 22:38:10 -0800 Subject: drm/i915: Fix PSR programming | has a higher precedence than ?. Therefore, the calculation doesn't do at all what you would expect. Thanks to Ken for convincing me that this was indeed the issue. Send me back to C programmer school, please. I'm sort of surprised PSR was continuing to work for people. It should be broken IMO (and it was broken for me, but I had assumed it never worked). Regression from: commit ed8546ac1f99b850879f07b1e9b06b42fb0a36d9 Author: Ben Widawsky Date: Mon Nov 4 22:45:05 2013 -0800 drm/i915/bdw: Support eDP PSR Cc: Rodrigo Vivi Cc: Kenneth Graunke Cc: Art Runyan Reported-by: "Kumar, Kiran S" Cc: stable@vger.kernel.org [v3.13+] Signed-off-by: Ben Widawsky Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 57552eb..41bdac4 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1639,7 +1639,7 @@ static void intel_edp_psr_enable_source(struct intel_dp *intel_dp) val |= EDP_PSR_LINK_DISABLE; I915_WRITE(EDP_PSR_CTL(dev), val | - IS_BROADWELL(dev) ? 0 : link_entry_time | + (IS_BROADWELL(dev) ? 0 : link_entry_time) | max_sleep_time << EDP_PSR_MAX_SLEEP_TIME_SHIFT | idle_frames << EDP_PSR_IDLE_FRAME_SHIFT | EDP_PSR_ENABLE); -- cgit v0.10.2 From 999976e0f6233322a878b0b7148c810544d6c8a8 Mon Sep 17 00:00:00 2001 From: Aaron Plattner Date: Tue, 4 Mar 2014 12:42:15 -0800 Subject: cpufreq: use cpufreq_cpu_get() to avoid cpufreq_get() race conditions If a module calls cpufreq_get while cpufreq is initializing, it's possible for it to be called after cpufreq_driver is set but before cpufreq_cpu_data is written during subsys_interface_register. This happens because cpufreq_get doesn't take the cpufreq_driver_lock around its use of cpufreq_cpu_data. Fix this by using cpufreq_cpu_get(cpu) to look up the policy rather than reading it out of cpufreq_cpu_data directly. cpufreq_cpu_get() takes the appropriate locks to prevent this race from happening. Since it's possible for policy to be NULL if the caller passes in an invalid CPU number or calls the function before cpufreq is initialized, delete the BUG_ON(!policy) and simply return 0. Don't try to return -ENOENT because that's negative and the function returns an unsigned integer. References: https://bbs.archlinux.org/viewtopic.php?id=177934 Signed-off-by: Aaron Plattner Cc: 3.13+ # 3.13+ Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index cb003a6..c4cacab 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1546,23 +1546,16 @@ static unsigned int __cpufreq_get(unsigned int cpu) */ unsigned int cpufreq_get(unsigned int cpu) { - struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); unsigned int ret_freq = 0; - if (cpufreq_disabled() || !cpufreq_driver) - return -ENOENT; - - BUG_ON(!policy); - - if (!down_read_trylock(&cpufreq_rwsem)) - return 0; - - down_read(&policy->rwsem); - - ret_freq = __cpufreq_get(cpu); + if (policy) { + down_read(&policy->rwsem); + ret_freq = __cpufreq_get(cpu); + up_read(&policy->rwsem); - up_read(&policy->rwsem); - up_read(&cpufreq_rwsem); + cpufreq_cpu_put(policy); + } return ret_freq; } -- cgit v0.10.2 From 5a7e56a5d29071bcccd947dee6e3b9f8e4eb3309 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 4 Mar 2014 11:44:00 +0800 Subject: cpufreq: Initialize policy before making it available for others to use Policy must be fully initialized before it is being made available for use by others. Otherwise cpufreq_cpu_get() would be able to grab a half initialized policy structure that might not have affected_cpus (for example) populated. Then, anybody accessing those fields will get a wrong value and that will lead to unpredictable results. In order to fix this, do all the necessary initialization before we make the policy structure available via cpufreq_cpu_get(). That will guarantee that any code accessing fields of the policy will get correct data from them. Reported-by: Saravana Kannan Signed-off-by: Viresh Kumar [rjw: Changelog] Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index c4cacab..d6622689 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1109,6 +1109,20 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif, goto err_set_policy_cpu; } + /* related cpus should atleast have policy->cpus */ + cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus); + + /* + * affected cpus must always be the one, which are online. We aren't + * managing offline cpus here. + */ + cpumask_and(policy->cpus, policy->cpus, cpu_online_mask); + + if (!frozen) { + policy->user_policy.min = policy->min; + policy->user_policy.max = policy->max; + } + write_lock_irqsave(&cpufreq_driver_lock, flags); for_each_cpu(j, policy->cpus) per_cpu(cpufreq_cpu_data, j) = policy; @@ -1162,20 +1176,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif, } } - /* related cpus should atleast have policy->cpus */ - cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus); - - /* - * affected cpus must always be the one, which are online. We aren't - * managing offline cpus here. - */ - cpumask_and(policy->cpus, policy->cpus, cpu_online_mask); - - if (!frozen) { - policy->user_policy.min = policy->min; - policy->user_policy.max = policy->max; - } - blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_START, policy); -- cgit v0.10.2 From 4e97b631f24c927b2302368f4f83efbba82076ee Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 4 Mar 2014 11:44:01 +0800 Subject: cpufreq: Initialize governor for a new policy under policy->rwsem policy->rwsem is used to lock access to all parts of code modifying struct cpufreq_policy, but it's not used on a new policy created by __cpufreq_add_dev(). Because of that, if cpufreq_update_policy() is called in a tight loop on one CPU in parallel with offline/online of another CPU, then the following crash can be triggered: Unable to handle kernel NULL pointer dereference at virtual address 00000020 pgd = c0003000 [00000020] *pgd=80000000004003, *pmd=00000000 Internal error: Oops: 206 [#1] PREEMPT SMP ARM PC is at __cpufreq_governor+0x10/0x1ac LR is at cpufreq_update_policy+0x114/0x150 ---[ end trace f23a8defea6cd706 ]--- Kernel panic - not syncing: Fatal exception CPU0: stopping CPU: 0 PID: 7136 Comm: mpdecision Tainted: G D W 3.10.0-gd727407-00074-g979ede8 #396 [] (notifier_call_chain+0x40/0x68) from [] (__blocking_notifier_call_chain+0x40/0x58) [] (__blocking_notifier_call_chain+0x40/0x58) from [] (blocking_notifier_call_chain+0x14/0x1c) [] (blocking_notifier_call_chain+0x14/0x1c) from [] (cpufreq_set_policy+0xd4/0x2b8) [] (cpufreq_set_policy+0xd4/0x2b8) from [] (cpufreq_init_policy+0x30/0x98) [] (cpufreq_init_policy+0x30/0x98) from [] (__cpufreq_add_dev.isra.17+0x4dc/0x7a4) [] (__cpufreq_add_dev.isra.17+0x4dc/0x7a4) from [] (cpufreq_cpu_callback+0x58/0x84) [] (cpufreq_cpu_callback+0x58/0x84) from [] (notifier_call_chain+0x40/0x68) [] (notifier_call_chain+0x40/0x68) from [] (__cpu_notify+0x28/0x44) [] (__cpu_notify+0x28/0x44) from [] (_cpu_up+0xf4/0x1dc) [] (_cpu_up+0xf4/0x1dc) from [] (cpu_up+0x5c/0x78) [] (cpu_up+0x5c/0x78) from [] (store_online+0x44/0x74) [] (store_online+0x44/0x74) from [] (sysfs_write_file+0x108/0x14c) [] (sysfs_write_file+0x108/0x14c) from [] (vfs_write+0xd0/0x180) [] (vfs_write+0xd0/0x180) from [] (SyS_write+0x38/0x68) [] (SyS_write+0x38/0x68) from [] (ret_fast_syscall+0x0/0x30) Fix that by taking locks at appropriate places in __cpufreq_add_dev() as well. Reported-by: Saravana Kannan Suggested-by: Srivatsa S. Bhat Signed-off-by: Viresh Kumar [rjw: Changelog] Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d6622689..cf485d9 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1123,6 +1123,7 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif, policy->user_policy.max = policy->max; } + down_write(&policy->rwsem); write_lock_irqsave(&cpufreq_driver_lock, flags); for_each_cpu(j, policy->cpus) per_cpu(cpufreq_cpu_data, j) = policy; @@ -1206,6 +1207,7 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif, policy->user_policy.policy = policy->policy; policy->user_policy.governor = policy->governor; } + up_write(&policy->rwsem); kobject_uevent(&policy->kobj, KOBJ_ADD); up_read(&cpufreq_rwsem); -- cgit v0.10.2 From ad332c8a45330d170bb38b95209de449b31cd1b4 Mon Sep 17 00:00:00 2001 From: Kieran Clancy Date: Sat, 1 Mar 2014 00:42:28 +1030 Subject: ACPI / EC: Clear stale EC events on Samsung systems A number of Samsung notebooks (530Uxx/535Uxx/540Uxx/550Pxx/900Xxx/etc) continue to log events during sleep (lid open/close, AC plug/unplug, battery level change), which accumulate in the EC until a buffer fills. After the buffer is full (tests suggest it holds 8 events), GPEs stop being triggered for new events. This state persists on wake or even on power cycle, and prevents new events from being registered until the EC is manually polled. This is the root cause of a number of bugs, including AC not being detected properly, lid close not triggering suspend, and low ambient light not triggering the keyboard backlight. The bug also seemed to be responsible for performance issues on at least one user's machine. Juan Manuel Cabo found the cause of bug and the workaround of polling the EC manually on wake. The loop which clears the stale events is based on an earlier patch by Lan Tianyu (see referenced attachment). This patch: - Adds a function acpi_ec_clear() which polls the EC for stale _Q events at most ACPI_EC_CLEAR_MAX (currently 100) times. A warning is logged if this limit is reached. - Adds a flag EC_FLAGS_CLEAR_ON_RESUME which is set to 1 if the DMI system vendor is Samsung. This check could be replaced by several more specific DMI vendor/product pairs, but it's likely that the bug affects more Samsung products than just the five series mentioned above. Further, it should not be harmful to run acpi_ec_clear() on systems without the bug; it will return immediately after finding no data waiting. - Runs acpi_ec_clear() on initialisation (boot), from acpi_ec_add() - Runs acpi_ec_clear() on wake, from acpi_ec_unblock_transactions() References: https://bugzilla.kernel.org/show_bug.cgi?id=44161 References: https://bugzilla.kernel.org/show_bug.cgi?id=45461 References: https://bugzilla.kernel.org/show_bug.cgi?id=57271 References: https://bugzilla.kernel.org/attachment.cgi?id=126801 Suggested-by: Juan Manuel Cabo Signed-off-by: Kieran Clancy Reviewed-by: Lan Tianyu Reviewed-by: Dennis Jansen Tested-by: Kieran Clancy Tested-by: Juan Manuel Cabo Tested-by: Dennis Jansen Tested-by: Maurizio D'Addona Tested-by: San Zamoyski Cc: All applicable Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 959d41a..d7d32c2 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -67,6 +67,8 @@ enum ec_command { #define ACPI_EC_DELAY 500 /* Wait 500ms max. during EC ops */ #define ACPI_EC_UDELAY_GLK 1000 /* Wait 1ms max. to get global lock */ #define ACPI_EC_MSI_UDELAY 550 /* Wait 550us for MSI EC */ +#define ACPI_EC_CLEAR_MAX 100 /* Maximum number of events to query + * when trying to clear the EC */ enum { EC_FLAGS_QUERY_PENDING, /* Query is pending */ @@ -116,6 +118,7 @@ EXPORT_SYMBOL(first_ec); static int EC_FLAGS_MSI; /* Out-of-spec MSI controller */ static int EC_FLAGS_VALIDATE_ECDT; /* ASUStec ECDTs need to be validated */ static int EC_FLAGS_SKIP_DSDT_SCAN; /* Not all BIOS survive early DSDT scan */ +static int EC_FLAGS_CLEAR_ON_RESUME; /* Needs acpi_ec_clear() on boot/resume */ /* -------------------------------------------------------------------------- Transaction Management @@ -440,6 +443,29 @@ acpi_handle ec_get_handle(void) EXPORT_SYMBOL(ec_get_handle); +static int acpi_ec_query_unlocked(struct acpi_ec *ec, u8 *data); + +/* + * Clears stale _Q events that might have accumulated in the EC. + * Run with locked ec mutex. + */ +static void acpi_ec_clear(struct acpi_ec *ec) +{ + int i, status; + u8 value = 0; + + for (i = 0; i < ACPI_EC_CLEAR_MAX; i++) { + status = acpi_ec_query_unlocked(ec, &value); + if (status || !value) + break; + } + + if (unlikely(i == ACPI_EC_CLEAR_MAX)) + pr_warn("Warning: Maximum of %d stale EC events cleared\n", i); + else + pr_info("%d stale EC events cleared\n", i); +} + void acpi_ec_block_transactions(void) { struct acpi_ec *ec = first_ec; @@ -463,6 +489,10 @@ void acpi_ec_unblock_transactions(void) mutex_lock(&ec->mutex); /* Allow transactions to be carried out again */ clear_bit(EC_FLAGS_BLOCKED, &ec->flags); + + if (EC_FLAGS_CLEAR_ON_RESUME) + acpi_ec_clear(ec); + mutex_unlock(&ec->mutex); } @@ -821,6 +851,13 @@ static int acpi_ec_add(struct acpi_device *device) /* EC is fully operational, allow queries */ clear_bit(EC_FLAGS_QUERY_PENDING, &ec->flags); + + /* Clear stale _Q events if hardware might require that */ + if (EC_FLAGS_CLEAR_ON_RESUME) { + mutex_lock(&ec->mutex); + acpi_ec_clear(ec); + mutex_unlock(&ec->mutex); + } return ret; } @@ -922,6 +959,30 @@ static int ec_enlarge_storm_threshold(const struct dmi_system_id *id) return 0; } +/* + * On some hardware it is necessary to clear events accumulated by the EC during + * sleep. These ECs stop reporting GPEs until they are manually polled, if too + * many events are accumulated. (e.g. Samsung Series 5/9 notebooks) + * + * https://bugzilla.kernel.org/show_bug.cgi?id=44161 + * + * Ideally, the EC should also be instructed NOT to accumulate events during + * sleep (which Windows seems to do somehow), but the interface to control this + * behaviour is not known at this time. + * + * Models known to be affected are Samsung 530Uxx/535Uxx/540Uxx/550Pxx/900Xxx, + * however it is very likely that other Samsung models are affected. + * + * On systems which don't accumulate _Q events during sleep, this extra check + * should be harmless. + */ +static int ec_clear_on_resume(const struct dmi_system_id *id) +{ + pr_debug("Detected system needing EC poll on resume.\n"); + EC_FLAGS_CLEAR_ON_RESUME = 1; + return 0; +} + static struct dmi_system_id ec_dmi_table[] __initdata = { { ec_skip_dsdt_scan, "Compal JFL92", { @@ -965,6 +1026,9 @@ static struct dmi_system_id ec_dmi_table[] __initdata = { ec_validate_ecdt, "ASUS hardware", { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTek Computer Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "L4R"),}, NULL}, + { + ec_clear_on_resume, "Samsung hardware", { + DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD.")}, NULL}, {}, }; -- cgit v0.10.2 From 291fdb0bcebd5e8db6af767c1fdc522167dad73d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 4 Mar 2014 12:39:03 +0100 Subject: ipc/compat_sys_msgrcv: change msgtyp type from long to compat_long_t Change the type of compat_sys_msgrcv's msgtyp parameter from long to compat_long_t, since compat user space passes only a 32 bit signed value. Let the compat wrapper do proper sign extension to 64 bit of this parameter. Signed-off-by: Heiko Carstens diff --git a/include/linux/compat.h b/include/linux/compat.h index 4c42df3..179b1da 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -321,7 +321,7 @@ asmlinkage long compat_sys_semctl(int semid, int semnum, int cmd, int arg); asmlinkage long compat_sys_msgsnd(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz, int msgflg); asmlinkage long compat_sys_msgrcv(int msqid, compat_uptr_t msgp, - compat_ssize_t msgsz, long msgtyp, int msgflg); + compat_ssize_t msgsz, compat_long_t msgtyp, int msgflg); long compat_sys_msgctl(int first, int second, void __user *uptr); long compat_sys_shmctl(int first, int second, void __user *uptr); long compat_sys_semtimedop(int semid, struct sembuf __user *tsems, diff --git a/ipc/compat.c b/ipc/compat.c index f486b00..e1f4ab6 100644 --- a/ipc/compat.c +++ b/ipc/compat.c @@ -430,9 +430,9 @@ COMPAT_SYSCALL_DEFINE4(msgsnd, int, msqid, compat_uptr_t, msgp, } COMPAT_SYSCALL_DEFINE5(msgrcv, int, msqid, compat_uptr_t, msgp, - compat_ssize_t, msgsz, long, msgtyp, int, msgflg) + compat_ssize_t, msgsz, compat_long_t, msgtyp, int, msgflg) { - return do_msgrcv(msqid, compat_ptr(msgp), (ssize_t)msgsz, msgtyp, + return do_msgrcv(msqid, compat_ptr(msgp), (ssize_t)msgsz, (long)msgtyp, msgflg, compat_do_msg_fill); } -- cgit v0.10.2 From 378a10f3ae2e8a67ecc8f548c8e5ff25881bd88a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 5 Mar 2014 10:43:51 +0100 Subject: fs/compat: optional preadv64/pwrite64 compat system calls The preadv64/pwrite64 have been implemented for the x32 ABI, in order to allow passing 64 bit arguments from user space without splitting them into two 32 bit parameters, like it would be necessary for usual compat tasks. Howevert these two system calls are only being used for the x32 ABI, so add __ARCH_WANT_COMPAT defines for these two compat syscalls and make these two only visible for x86. Signed-off-by: Heiko Carstens diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index f4b5795..3f556c6 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -24,6 +24,8 @@ # include # define __ARCH_WANT_COMPAT_SYS_TIME # define __ARCH_WANT_COMPAT_SYS_GETDENTS64 +# define __ARCH_WANT_COMPAT_SYS_PREADV64 +# define __ARCH_WANT_COMPAT_SYS_PWRITEV64 # endif diff --git a/fs/read_write.c b/fs/read_write.c index edc5746..72c09b4 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -982,9 +982,9 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, return ret; } -COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, - const struct compat_iovec __user *,vec, - unsigned long, vlen, loff_t, pos) +static long __compat_sys_preadv64(unsigned long fd, + const struct compat_iovec __user *vec, + unsigned long vlen, loff_t pos) { struct fd f; ssize_t ret; @@ -1001,12 +1001,22 @@ COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, return ret; } +#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 +COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, + const struct compat_iovec __user *,vec, + unsigned long, vlen, loff_t, pos) +{ + return __compat_sys_preadv64(fd, vec, vlen, pos); +} +#endif + COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, const struct compat_iovec __user *,vec, compat_ulong_t, vlen, u32, pos_low, u32, pos_high) { loff_t pos = ((loff_t)pos_high << 32) | pos_low; - return compat_sys_preadv64(fd, vec, vlen, pos); + + return __compat_sys_preadv64(fd, vec, vlen, pos); } static size_t compat_writev(struct file *file, @@ -1049,9 +1059,9 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, return ret; } -COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, - const struct compat_iovec __user *,vec, - unsigned long, vlen, loff_t, pos) +static long __compat_sys_pwritev64(unsigned long fd, + const struct compat_iovec __user *vec, + unsigned long vlen, loff_t pos) { struct fd f; ssize_t ret; @@ -1068,12 +1078,22 @@ COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, return ret; } +#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 +COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, + const struct compat_iovec __user *,vec, + unsigned long, vlen, loff_t, pos) +{ + return __compat_sys_pwritev64(fd, vec, vlen, pos); +} +#endif + COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, const struct compat_iovec __user *,vec, compat_ulong_t, vlen, u32, pos_low, u32, pos_high) { loff_t pos = ((loff_t)pos_high << 32) | pos_low; - return compat_sys_pwritev64(fd, vec, vlen, pos); + + return __compat_sys_pwritev64(fd, vec, vlen, pos); } #endif diff --git a/include/linux/compat.h b/include/linux/compat.h index 179b1da..1c45742 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -340,6 +340,19 @@ asmlinkage ssize_t compat_sys_preadv(compat_ulong_t fd, asmlinkage ssize_t compat_sys_pwritev(compat_ulong_t fd, const struct compat_iovec __user *vec, compat_ulong_t vlen, u32 pos_low, u32 pos_high); + +#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 +asmlinkage long compat_sys_preadv64(unsigned long fd, + const struct compat_iovec __user *vec, + unsigned long vlen, loff_t pos); +#endif + +#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 +asmlinkage long compat_sys_pwritev64(unsigned long fd, + const struct compat_iovec __user *vec, + unsigned long vlen, loff_t pos); +#endif + asmlinkage long compat_sys_lseek(unsigned int, compat_off_t, unsigned int); asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv, -- cgit v0.10.2 From 62a6fa97684ed4c124564ea92500ecd513d60611 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 3 Mar 2014 16:11:13 +0100 Subject: kernel/compat: convert to COMPAT_SYSCALL_DEFINE Convert all compat system call functions where all parameter types have a size of four or less than four bytes, or are pointer types to COMPAT_SYSCALL_DEFINE. The implicit casts within COMPAT_SYSCALL_DEFINE will perform proper zero and sign extension to 64 bit of all parameters if needed. Signed-off-by: Heiko Carstens diff --git a/kernel/compat.c b/kernel/compat.c index 0a09e48..2622011 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -110,8 +110,8 @@ static int compat_put_timex(struct compat_timex __user *utp, struct timex *txc) return 0; } -asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv, - struct timezone __user *tz) +COMPAT_SYSCALL_DEFINE2(gettimeofday, struct compat_timeval __user *, tv, + struct timezone __user *, tz) { if (tv) { struct timeval ktv; @@ -127,8 +127,8 @@ asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv, return 0; } -asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, - struct timezone __user *tz) +COMPAT_SYSCALL_DEFINE2(settimeofday, struct compat_timeval __user *, tv, + struct timezone __user *, tz) { struct timespec kts; struct timezone ktz; @@ -236,8 +236,8 @@ static long compat_nanosleep_restart(struct restart_block *restart) return ret; } -asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp, - struct compat_timespec __user *rmtp) +COMPAT_SYSCALL_DEFINE2(nanosleep, struct compat_timespec __user *, rqtp, + struct compat_timespec __user *, rmtp) { struct timespec tu, rmt; mm_segment_t oldfs; @@ -328,7 +328,7 @@ static compat_clock_t clock_t_to_compat_clock_t(clock_t x) return compat_jiffies_to_clock_t(clock_t_to_jiffies(x)); } -asmlinkage long compat_sys_times(struct compat_tms __user *tbuf) +COMPAT_SYSCALL_DEFINE1(times, struct compat_tms __user *, tbuf) { if (tbuf) { struct tms tms; @@ -354,7 +354,7 @@ asmlinkage long compat_sys_times(struct compat_tms __user *tbuf) * types that can be passed to put_user()/get_user(). */ -asmlinkage long compat_sys_sigpending(compat_old_sigset_t __user *set) +COMPAT_SYSCALL_DEFINE1(sigpending, compat_old_sigset_t __user *, set) { old_sigset_t s; long ret; @@ -424,8 +424,8 @@ COMPAT_SYSCALL_DEFINE3(sigprocmask, int, how, #endif -asmlinkage long compat_sys_setrlimit(unsigned int resource, - struct compat_rlimit __user *rlim) +COMPAT_SYSCALL_DEFINE2(setrlimit, unsigned int, resource, + struct compat_rlimit __user *, rlim) { struct rlimit r; @@ -443,8 +443,8 @@ asmlinkage long compat_sys_setrlimit(unsigned int resource, #ifdef COMPAT_RLIM_OLD_INFINITY -asmlinkage long compat_sys_old_getrlimit(unsigned int resource, - struct compat_rlimit __user *rlim) +COMPAT_SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, + struct compat_rlimit __user *, rlim) { struct rlimit r; int ret; @@ -470,8 +470,8 @@ asmlinkage long compat_sys_old_getrlimit(unsigned int resource, #endif -asmlinkage long compat_sys_getrlimit(unsigned int resource, - struct compat_rlimit __user *rlim) +COMPAT_SYSCALL_DEFINE2(getrlimit, unsigned int, resource, + struct compat_rlimit __user *, rlim) { struct rlimit r; int ret; @@ -596,9 +596,9 @@ static int compat_get_user_cpu_mask(compat_ulong_t __user *user_mask_ptr, return compat_get_bitmap(k, user_mask_ptr, len * 8); } -asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid, - unsigned int len, - compat_ulong_t __user *user_mask_ptr) +COMPAT_SYSCALL_DEFINE3(sched_setaffinity, compat_pid_t, pid, + unsigned int, len, + compat_ulong_t __user *, user_mask_ptr) { cpumask_var_t new_mask; int retval; @@ -616,8 +616,8 @@ out: return retval; } -asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid, unsigned int len, - compat_ulong_t __user *user_mask_ptr) +COMPAT_SYSCALL_DEFINE3(sched_getaffinity, compat_pid_t, pid, unsigned int, len, + compat_ulong_t __user *, user_mask_ptr) { int ret; cpumask_var_t mask; @@ -662,9 +662,9 @@ int put_compat_itimerspec(struct compat_itimerspec __user *dst, return 0; } -long compat_sys_timer_create(clockid_t which_clock, - struct compat_sigevent __user *timer_event_spec, - timer_t __user *created_timer_id) +COMPAT_SYSCALL_DEFINE3(timer_create, clockid_t, which_clock, + struct compat_sigevent __user *, timer_event_spec, + timer_t __user *, created_timer_id) { struct sigevent __user *event = NULL; @@ -680,9 +680,9 @@ long compat_sys_timer_create(clockid_t which_clock, return sys_timer_create(which_clock, event, created_timer_id); } -long compat_sys_timer_settime(timer_t timer_id, int flags, - struct compat_itimerspec __user *new, - struct compat_itimerspec __user *old) +COMPAT_SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, + struct compat_itimerspec __user *, new, + struct compat_itimerspec __user *, old) { long err; mm_segment_t oldfs; @@ -703,8 +703,8 @@ long compat_sys_timer_settime(timer_t timer_id, int flags, return err; } -long compat_sys_timer_gettime(timer_t timer_id, - struct compat_itimerspec __user *setting) +COMPAT_SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, + struct compat_itimerspec __user *, setting) { long err; mm_segment_t oldfs; @@ -720,8 +720,8 @@ long compat_sys_timer_gettime(timer_t timer_id, return err; } -long compat_sys_clock_settime(clockid_t which_clock, - struct compat_timespec __user *tp) +COMPAT_SYSCALL_DEFINE2(clock_settime, clockid_t, which_clock, + struct compat_timespec __user *, tp) { long err; mm_segment_t oldfs; @@ -737,8 +737,8 @@ long compat_sys_clock_settime(clockid_t which_clock, return err; } -long compat_sys_clock_gettime(clockid_t which_clock, - struct compat_timespec __user *tp) +COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock, + struct compat_timespec __user *, tp) { long err; mm_segment_t oldfs; @@ -754,8 +754,8 @@ long compat_sys_clock_gettime(clockid_t which_clock, return err; } -long compat_sys_clock_adjtime(clockid_t which_clock, - struct compat_timex __user *utp) +COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock, + struct compat_timex __user *, utp) { struct timex txc; mm_segment_t oldfs; @@ -777,8 +777,8 @@ long compat_sys_clock_adjtime(clockid_t which_clock, return ret; } -long compat_sys_clock_getres(clockid_t which_clock, - struct compat_timespec __user *tp) +COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock, + struct compat_timespec __user *, tp) { long err; mm_segment_t oldfs; @@ -818,9 +818,9 @@ static long compat_clock_nanosleep_restart(struct restart_block *restart) return err; } -long compat_sys_clock_nanosleep(clockid_t which_clock, int flags, - struct compat_timespec __user *rqtp, - struct compat_timespec __user *rmtp) +COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags, + struct compat_timespec __user *, rqtp, + struct compat_timespec __user *, rmtp) { long err; mm_segment_t oldfs; @@ -1010,7 +1010,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese, /* compat_time_t is a 32 bit "long" and needs to get converted. */ -asmlinkage long compat_sys_time(compat_time_t __user * tloc) +COMPAT_SYSCALL_DEFINE1(time, compat_time_t __user *, tloc) { compat_time_t i; struct timeval tv; @@ -1026,7 +1026,7 @@ asmlinkage long compat_sys_time(compat_time_t __user * tloc) return i; } -asmlinkage long compat_sys_stime(compat_time_t __user *tptr) +COMPAT_SYSCALL_DEFINE1(stime, compat_time_t __user *, tptr) { struct timespec tv; int err; @@ -1046,7 +1046,7 @@ asmlinkage long compat_sys_stime(compat_time_t __user *tptr) #endif /* __ARCH_WANT_COMPAT_SYS_TIME */ -asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp) +COMPAT_SYSCALL_DEFINE1(adjtimex, struct compat_timex __user *, utp) { struct timex txc; int err, ret; @@ -1085,10 +1085,10 @@ asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_pages, return sys_move_pages(pid, nr_pages, pages, nodes, status, flags); } -asmlinkage long compat_sys_migrate_pages(compat_pid_t pid, - compat_ulong_t maxnode, - const compat_ulong_t __user *old_nodes, - const compat_ulong_t __user *new_nodes) +COMPAT_SYSCALL_DEFINE4(migrate_pages, compat_pid_t, pid, + compat_ulong_t, maxnode, + const compat_ulong_t __user *, old_nodes, + const compat_ulong_t __user *, new_nodes) { unsigned long __user *old = NULL; unsigned long __user *new = NULL; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 1f4bcb3..adf9862 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -1180,8 +1180,8 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, return ret; } -asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, - compat_long_t addr, compat_long_t data) +COMPAT_SYSCALL_DEFINE4(ptrace, compat_long_t, request, compat_long_t, pid, + compat_long_t, addr, compat_long_t, data) { struct task_struct *child; long ret; -- cgit v0.10.2 From 361d93c46f688d1a2209912bda377bdb48842cce Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 3 Mar 2014 16:21:27 +0100 Subject: net/compat: convert to COMPAT_SYSCALL_DEFINE Convert all compat system call functions where all parameter types have a size of four or less than four bytes, or are pointer types to COMPAT_SYSCALL_DEFINE. The implicit casts within COMPAT_SYSCALL_DEFINE will perform proper zero and sign extension to 64 bit of all parameters if needed. Signed-off-by: Heiko Carstens diff --git a/net/compat.c b/net/compat.c index f50161f..706b78b 100644 --- a/net/compat.c +++ b/net/compat.c @@ -384,8 +384,8 @@ static int compat_sock_setsockopt(struct socket *sock, int level, int optname, return sock_setsockopt(sock, level, optname, optval, optlen); } -asmlinkage long compat_sys_setsockopt(int fd, int level, int optname, - char __user *optval, unsigned int optlen) +COMPAT_SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, + char __user *, optval, unsigned int, optlen) { int err; struct socket *sock = sockfd_lookup(fd, &err); @@ -504,8 +504,8 @@ int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *usersta } EXPORT_SYMBOL(compat_sock_get_timestampns); -asmlinkage long compat_sys_getsockopt(int fd, int level, int optname, - char __user *optval, int __user *optlen) +COMPAT_SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, + char __user *, optval, int __user *, optlen) { int err; struct socket *sock = sockfd_lookup(fd, &err); @@ -735,15 +735,15 @@ static unsigned char nas[21] = { }; #undef AL -asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags) +COMPAT_SYSCALL_DEFINE3(sendmsg, int, fd, struct compat_msghdr __user *, msg, unsigned int, flags) { if (flags & MSG_CMSG_COMPAT) return -EINVAL; return __sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); } -asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, - unsigned int vlen, unsigned int flags) +COMPAT_SYSCALL_DEFINE4(sendmmsg, int, fd, struct compat_mmsghdr __user *, mmsg, + unsigned int, vlen, unsigned int, flags) { if (flags & MSG_CMSG_COMPAT) return -EINVAL; @@ -751,7 +751,7 @@ asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, flags | MSG_CMSG_COMPAT); } -asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags) +COMPAT_SYSCALL_DEFINE3(recvmsg, int, fd, struct compat_msghdr __user *, msg, unsigned int, flags) { if (flags & MSG_CMSG_COMPAT) return -EINVAL; @@ -770,9 +770,9 @@ asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, size_t len, return sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, addrlen); } -asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg, - unsigned int vlen, unsigned int flags, - struct compat_timespec __user *timeout) +COMPAT_SYSCALL_DEFINE5(recvmmsg, int, fd, struct compat_mmsghdr __user *, mmsg, + unsigned int, vlen, unsigned int, flags, + struct compat_timespec __user *, timeout) { int datagrams; struct timespec ktspec; @@ -795,7 +795,7 @@ asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg, return datagrams; } -asmlinkage long compat_sys_socketcall(int call, u32 __user *args) +COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args) { int ret; u32 a[6]; -- cgit v0.10.2 From c93e0f6c894312705fcdf1fc156b684a344155b5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 3 Mar 2014 16:32:26 +0100 Subject: mm/compat: convert to COMPAT_SYSCALL_DEFINE Convert all compat system call functions where all parameter types have a size of four or less than four bytes, or are pointer types to COMPAT_SYSCALL_DEFINE. The implicit casts within COMPAT_SYSCALL_DEFINE will perform proper zero and sign extension to 64 bit of all parameters if needed. Signed-off-by: Heiko Carstens diff --git a/mm/mempolicy.c b/mm/mempolicy.c index ae3c8f3..eeb2e3d 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1556,10 +1556,10 @@ SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, #ifdef CONFIG_COMPAT -asmlinkage long compat_sys_get_mempolicy(int __user *policy, - compat_ulong_t __user *nmask, - compat_ulong_t maxnode, - compat_ulong_t addr, compat_ulong_t flags) +COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, + compat_ulong_t __user *, nmask, + compat_ulong_t, maxnode, + compat_ulong_t, addr, compat_ulong_t, flags) { long err; unsigned long __user *nm = NULL; @@ -1586,8 +1586,8 @@ asmlinkage long compat_sys_get_mempolicy(int __user *policy, return err; } -asmlinkage long compat_sys_set_mempolicy(int mode, compat_ulong_t __user *nmask, - compat_ulong_t maxnode) +COMPAT_SYSCALL_DEFINE3(set_mempolicy, int, mode, compat_ulong_t __user *, nmask, + compat_ulong_t, maxnode) { long err = 0; unsigned long __user *nm = NULL; @@ -1609,9 +1609,9 @@ asmlinkage long compat_sys_set_mempolicy(int mode, compat_ulong_t __user *nmask, return sys_set_mempolicy(mode, nm, nr_bits+1); } -asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, - compat_ulong_t mode, compat_ulong_t __user *nmask, - compat_ulong_t maxnode, compat_ulong_t flags) +COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len, + compat_ulong_t, mode, compat_ulong_t __user *, nmask, + compat_ulong_t, maxnode, compat_ulong_t, flags) { long err = 0; unsigned long __user *nm = NULL; -- cgit v0.10.2 From 875ec3da4c50571906dcd23952f1dfcdd92ae3f1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 3 Mar 2014 16:34:41 +0100 Subject: security/compat: convert to COMPAT_SYSCALL_DEFINE Convert all compat system call functions where all parameter types have a size of four or less than four bytes, or are pointer types to COMPAT_SYSCALL_DEFINE. The implicit casts within COMPAT_SYSCALL_DEFINE will perform proper zero and sign extension to 64 bit of all parameters if needed. Signed-off-by: Heiko Carstens diff --git a/security/keys/compat.c b/security/keys/compat.c index bbd32c7..3478965 100644 --- a/security/keys/compat.c +++ b/security/keys/compat.c @@ -65,8 +65,8 @@ no_payload: * taking a 32-bit syscall are zero. If you can, you should call sys_keyctl() * directly. */ -asmlinkage long compat_sys_keyctl(u32 option, - u32 arg2, u32 arg3, u32 arg4, u32 arg5) +COMPAT_SYSCALL_DEFINE5(keyctl, u32, option, + u32, arg2, u32, arg3, u32, arg4, u32, arg5) { switch (option) { case KEYCTL_GET_KEYRING_ID: -- cgit v0.10.2 From 625b1d7e812d55df8d42253a134002c006de7468 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 4 Mar 2014 10:53:50 +0100 Subject: fs/compat: convert to COMPAT_SYSCALL_DEFINE Convert all compat system call functions where all parameter types have a size of four or less than four bytes, or are pointer types to COMPAT_SYSCALL_DEFINE. The implicit casts within COMPAT_SYSCALL_DEFINE will perform proper zero and sign extension to 64 bit of all parameters if needed. Signed-off-by: Heiko Carstens diff --git a/fs/compat.c b/fs/compat.c index 0095a69..6d8312b 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -72,8 +72,8 @@ int compat_printk(const char *fmt, ...) * Not all architectures have sys_utime, so implement this in terms * of sys_utimes. */ -asmlinkage long compat_sys_utime(const char __user *filename, - struct compat_utimbuf __user *t) +COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename, + struct compat_utimbuf __user *, t) { struct timespec tv[2]; @@ -87,7 +87,7 @@ asmlinkage long compat_sys_utime(const char __user *filename, return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0); } -asmlinkage long compat_sys_utimensat(unsigned int dfd, const char __user *filename, struct compat_timespec __user *t, int flags) +COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filename, struct compat_timespec __user *, t, int, flags) { struct timespec tv[2]; @@ -102,7 +102,7 @@ asmlinkage long compat_sys_utimensat(unsigned int dfd, const char __user *filena return do_utimes(dfd, filename, t ? tv : NULL, flags); } -asmlinkage long compat_sys_futimesat(unsigned int dfd, const char __user *filename, struct compat_timeval __user *t) +COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd, const char __user *, filename, struct compat_timeval __user *, t) { struct timespec tv[2]; @@ -121,7 +121,7 @@ asmlinkage long compat_sys_futimesat(unsigned int dfd, const char __user *filena return do_utimes(dfd, filename, t ? tv : NULL, 0); } -asmlinkage long compat_sys_utimes(const char __user *filename, struct compat_timeval __user *t) +COMPAT_SYSCALL_DEFINE2(utimes, const char __user *, filename, struct compat_timeval __user *, t) { return compat_sys_futimesat(AT_FDCWD, filename, t); } @@ -159,8 +159,8 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0; } -asmlinkage long compat_sys_newstat(const char __user * filename, - struct compat_stat __user *statbuf) +COMPAT_SYSCALL_DEFINE2(newstat, const char __user *, filename, + struct compat_stat __user *, statbuf) { struct kstat stat; int error; @@ -171,8 +171,8 @@ asmlinkage long compat_sys_newstat(const char __user * filename, return cp_compat_stat(&stat, statbuf); } -asmlinkage long compat_sys_newlstat(const char __user * filename, - struct compat_stat __user *statbuf) +COMPAT_SYSCALL_DEFINE2(newlstat, const char __user *, filename, + struct compat_stat __user *, statbuf) { struct kstat stat; int error; @@ -184,9 +184,9 @@ asmlinkage long compat_sys_newlstat(const char __user * filename, } #ifndef __ARCH_WANT_STAT64 -asmlinkage long compat_sys_newfstatat(unsigned int dfd, - const char __user *filename, - struct compat_stat __user *statbuf, int flag) +COMPAT_SYSCALL_DEFINE4(newfstatat, unsigned int, dfd, + const char __user *, filename, + struct compat_stat __user *, statbuf, int, flag) { struct kstat stat; int error; @@ -198,8 +198,8 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, } #endif -asmlinkage long compat_sys_newfstat(unsigned int fd, - struct compat_stat __user * statbuf) +COMPAT_SYSCALL_DEFINE2(newfstat, unsigned int, fd, + struct compat_stat __user *, statbuf) { struct kstat stat; int error = vfs_fstat(fd, &stat); @@ -247,7 +247,7 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs * * The following statfs calls are copies of code from fs/statfs.c and * should be checked against those from time to time */ -asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf) +COMPAT_SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct compat_statfs __user *, buf) { struct kstatfs tmp; int error = user_statfs(pathname, &tmp); @@ -256,7 +256,7 @@ asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_sta return error; } -asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf) +COMPAT_SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct compat_statfs __user *, buf) { struct kstatfs tmp; int error = fd_statfs(fd, &tmp); @@ -298,7 +298,7 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat return 0; } -asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf) +COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, sz, struct compat_statfs64 __user *, buf) { struct kstatfs tmp; int error; @@ -312,7 +312,7 @@ asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t s return error; } -asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf) +COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct compat_statfs64 __user *, buf) { struct kstatfs tmp; int error; @@ -331,7 +331,7 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c * Given how simple this syscall is that apporach is more maintainable * than the various conversion hacks. */ -asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u) +COMPAT_SYSCALL_DEFINE2(ustat, unsigned, dev, struct compat_ustat __user *, u) { struct compat_ustat tmp; struct kstatfs sbuf; @@ -476,8 +476,7 @@ asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd, return compat_sys_fcntl64(fd, cmd, arg); } -asmlinkage long -compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p) +COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_reqs, u32 __user *, ctx32p) { long ret; aio_context_t ctx64; @@ -869,8 +868,8 @@ efault: return -EFAULT; } -asmlinkage long compat_sys_old_readdir(unsigned int fd, - struct compat_old_linux_dirent __user *dirent, unsigned int count) +COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd, + struct compat_old_linux_dirent __user *, dirent, unsigned int, count) { int error; struct fd f = fdget(fd); @@ -948,8 +947,8 @@ efault: return -EFAULT; } -asmlinkage long compat_sys_getdents(unsigned int fd, - struct compat_linux_dirent __user *dirent, unsigned int count) +COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, + struct compat_linux_dirent __user *, dirent, unsigned int, count) { struct fd f; struct compat_linux_dirent __user * lastdirent; @@ -1033,8 +1032,8 @@ efault: return -EFAULT; } -asmlinkage long compat_sys_getdents64(unsigned int fd, - struct linux_dirent64 __user * dirent, unsigned int count) +COMPAT_SYSCALL_DEFINE3(getdents64, unsigned int, fd, + struct linux_dirent64 __user *, dirent, unsigned int, count) { struct fd f; struct linux_dirent64 __user * lastdirent; @@ -1287,9 +1286,9 @@ out_nofds: return ret; } -asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, - compat_ulong_t __user *outp, compat_ulong_t __user *exp, - struct compat_timeval __user *tvp) +COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp, + compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, + struct compat_timeval __user *, tvp) { struct timespec end_time, *to = NULL; struct compat_timeval tv; @@ -1320,7 +1319,7 @@ struct compat_sel_arg_struct { compat_uptr_t tvp; }; -asmlinkage long compat_sys_old_select(struct compat_sel_arg_struct __user *arg) +COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg) { struct compat_sel_arg_struct a; @@ -1381,9 +1380,9 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp, return ret; } -asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp, - compat_ulong_t __user *outp, compat_ulong_t __user *exp, - struct compat_timespec __user *tsp, void __user *sig) +COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp, + compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, + struct compat_timespec __user *, tsp, void __user *, sig) { compat_size_t sigsetsize = 0; compat_uptr_t up = 0; @@ -1400,9 +1399,9 @@ asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp, sigsetsize); } -asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, - unsigned int nfds, struct compat_timespec __user *tsp, - const compat_sigset_t __user *sigmask, compat_size_t sigsetsize) +COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, + unsigned int, nfds, struct compat_timespec __user *, tsp, + const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) { compat_sigset_t ss32; sigset_t ksigmask, sigsaved; diff --git a/fs/exec.c b/fs/exec.c index 3d78fcc..4f59402 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1619,9 +1619,9 @@ SYSCALL_DEFINE3(execve, return do_execve(getname(filename), argv, envp); } #ifdef CONFIG_COMPAT -asmlinkage long compat_sys_execve(const char __user * filename, - const compat_uptr_t __user * argv, - const compat_uptr_t __user * envp) +COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename, + const compat_uptr_t __user *, argv, + const compat_uptr_t __user *, envp) { return compat_do_execve(getname(filename), argv, envp); } -- cgit v0.10.2 From 5d70a59637911e84687b421afeb4c111a579fb2b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 4 Mar 2014 11:17:50 +0100 Subject: ipc/compat: convert to COMPAT_SYSCALL_DEFINE Convert all compat system call functions where all parameter types have a size of four or less than four bytes, or are pointer types to COMPAT_SYSCALL_DEFINE. The implicit casts within COMPAT_SYSCALL_DEFINE will perform proper zero and sign extension to 64 bit of all parameters if needed. Signed-off-by: Heiko Carstens diff --git a/ipc/compat.c b/ipc/compat.c index e1f4ab6..98b9016 100644 --- a/ipc/compat.c +++ b/ipc/compat.c @@ -498,7 +498,7 @@ static inline int put_compat_msqid_ds(struct msqid64_ds *m, return err; } -long compat_sys_msgctl(int first, int second, void __user *uptr) +COMPAT_SYSCALL_DEFINE3(msgctl, int, first, int, second, void __user *, uptr) { int err, err2; struct msqid64_ds m64; @@ -668,7 +668,7 @@ static inline int put_compat_shm_info(struct shm_info __user *ip, return err; } -long compat_sys_shmctl(int first, int second, void __user *uptr) +COMPAT_SYSCALL_DEFINE3(shmctl, int, first, int, second, void __user *, uptr) { void __user *p; struct shmid64_ds s64; @@ -749,8 +749,9 @@ long compat_sys_shmctl(int first, int second, void __user *uptr) return err; } -long compat_sys_semtimedop(int semid, struct sembuf __user *tsems, - unsigned nsops, const struct compat_timespec __user *timeout) +COMPAT_SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsems, + unsigned, nsops, + const struct compat_timespec __user *, timeout) { struct timespec __user *ts64 = NULL; if (timeout) { diff --git a/ipc/compat_mq.c b/ipc/compat_mq.c index 63d7c6de..af087fb 100644 --- a/ipc/compat_mq.c +++ b/ipc/compat_mq.c @@ -46,9 +46,9 @@ static inline int put_compat_mq_attr(const struct mq_attr *attr, | __put_user(attr->mq_curmsgs, &uattr->mq_curmsgs); } -asmlinkage long compat_sys_mq_open(const char __user *u_name, - int oflag, compat_mode_t mode, - struct compat_mq_attr __user *u_attr) +COMPAT_SYSCALL_DEFINE4(mq_open, const char __user *, u_name, + int, oflag, compat_mode_t, mode, + struct compat_mq_attr __user *, u_attr) { void __user *p = NULL; if (u_attr && oflag & O_CREAT) { @@ -105,8 +105,8 @@ asmlinkage ssize_t compat_sys_mq_timedreceive(mqd_t mqdes, u_msg_prio, u_ts); } -asmlinkage long compat_sys_mq_notify(mqd_t mqdes, - const struct compat_sigevent __user *u_notification) +COMPAT_SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes, + const struct compat_sigevent __user *, u_notification) { struct sigevent __user *p = NULL; if (u_notification) { @@ -122,9 +122,9 @@ asmlinkage long compat_sys_mq_notify(mqd_t mqdes, return sys_mq_notify(mqdes, p); } -asmlinkage long compat_sys_mq_getsetattr(mqd_t mqdes, - const struct compat_mq_attr __user *u_mqstat, - struct compat_mq_attr __user *u_omqstat) +COMPAT_SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes, + const struct compat_mq_attr __user *, u_mqstat, + struct compat_mq_attr __user *, u_omqstat) { struct mq_attr mqstat; struct mq_attr __user *p = compat_alloc_user_space(2 * sizeof(*p)); -- cgit v0.10.2 From 932602e238329da99f8482c1b721549531fbfe7f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 4 Mar 2014 16:07:52 +0100 Subject: fs/compat: convert to COMPAT_SYSCALL_DEFINE with changing parameter types Some fs compat system calls have unsigned long parameters instead of compat_ulong_t. In order to allow the COMPAT_SYSCALL_DEFINE macro generate code that performs proper zero and sign extension convert all 64 bit parameters their corresponding 32 bit counterparts. compat_sys_io_getevents() is a bit different: the non-compat version has signed parameters for the "min_nr" and "nr" parameters while the compat version has unsigned parameters. So change this as well. For all practical purposes this shouldn't make any difference (doesn't fix a real bug). Also introduce a generic compat_aio_context_t type which can be used everywhere. The access_ok() check within compat_sys_io_getevents() got also removed since the non-compat sys_io_getevents() should be able to handle everything anyway. Signed-off-by: Heiko Carstens diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index 1174ea2..5d7e8cf 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -65,7 +65,6 @@ typedef u32 compat_caddr_t; typedef __kernel_fsid_t compat_fsid_t; typedef s32 compat_key_t; typedef s32 compat_timer_t; -typedef u32 compat_aio_context_t; typedef s32 compat_int_t; typedef s32 compat_long_t; diff --git a/fs/compat.c b/fs/compat.c index 6d8312b..19252b9 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -399,8 +399,8 @@ static int put_compat_flock64(struct flock *kfl, struct compat_flock64 __user *u } #endif -asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, - unsigned long arg) +COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, + compat_ulong_t, arg) { mm_segment_t old_fs; struct flock f; @@ -468,8 +468,8 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, return ret; } -asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd, - unsigned long arg) +COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, + compat_ulong_t, arg) { if ((cmd == F_GETLK64) || (cmd == F_SETLK64) || (cmd == F_SETLKW64)) return -EINVAL; @@ -495,32 +495,24 @@ COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_reqs, u32 __user *, ctx32p) return ret; } -asmlinkage long -compat_sys_io_getevents(aio_context_t ctx_id, - unsigned long min_nr, - unsigned long nr, - struct io_event __user *events, - struct compat_timespec __user *timeout) +COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id, + compat_long_t, min_nr, + compat_long_t, nr, + struct io_event __user *, events, + struct compat_timespec __user *, timeout) { - long ret; struct timespec t; struct timespec __user *ut = NULL; - ret = -EFAULT; - if (unlikely(!access_ok(VERIFY_WRITE, events, - nr * sizeof(struct io_event)))) - goto out; if (timeout) { if (get_compat_timespec(&t, timeout)) - goto out; + return -EFAULT; ut = compat_alloc_user_space(sizeof(*ut)); if (copy_to_user(ut, &t, sizeof(t)) ) - goto out; + return -EFAULT; } - ret = sys_io_getevents(ctx_id, min_nr, nr, events, ut); -out: - return ret; + return sys_io_getevents(ctx_id, min_nr, nr, events, ut); } /* A write operation does a read from user space and vice versa */ @@ -616,8 +608,8 @@ copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64) #define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *)) -asmlinkage long -compat_sys_io_submit(aio_context_t ctx_id, int nr, u32 __user *iocb) +COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, + int, nr, u32 __user *, iocb) { struct iocb __user * __user *iocb64; long ret; @@ -769,10 +761,10 @@ static int do_nfs4_super_data_conv(void *raw_data) #define NCPFS_NAME "ncpfs" #define NFS4_NAME "nfs4" -asmlinkage long compat_sys_mount(const char __user * dev_name, - const char __user * dir_name, - const char __user * type, unsigned long flags, - const void __user * data) +COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name, + const char __user *, dir_name, + const char __user *, type, compat_ulong_t, flags, + const void __user *, data) { char *kernel_type; unsigned long data_page; diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 3881610..e822890 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1538,9 +1538,10 @@ static int compat_ioctl_check_table(unsigned int xcmd) return ioctl_pointer[i] == xcmd; } -asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, - unsigned long arg) +COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, + compat_ulong_t, arg32) { + unsigned long arg = arg32; struct fd f = fdget(fd); int error = -EBADF; if (!f.file) diff --git a/include/linux/compat.h b/include/linux/compat.h index 1c45742..fea8ee9 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -71,6 +71,8 @@ typedef struct compat_sigaltstack { typedef __compat_uid32_t compat_uid_t; typedef __compat_gid32_t compat_gid_t; +typedef compat_ulong_t compat_aio_context_t; + struct compat_sel_arg_struct; struct rusage; @@ -497,20 +499,20 @@ asmlinkage long compat_sys_statfs64(const char __user *pathname, asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf); asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, - unsigned long arg); + compat_ulong_t arg); asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd, - unsigned long arg); + compat_ulong_t arg); asmlinkage long compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p); -asmlinkage long compat_sys_io_getevents(aio_context_t ctx_id, - unsigned long min_nr, - unsigned long nr, +asmlinkage long compat_sys_io_getevents(compat_aio_context_t ctx_id, + compat_long_t min_nr, + compat_long_t nr, struct io_event __user *events, struct compat_timespec __user *timeout); -asmlinkage long compat_sys_io_submit(aio_context_t ctx_id, int nr, +asmlinkage long compat_sys_io_submit(compat_aio_context_t ctx_id, int nr, u32 __user *iocb); asmlinkage long compat_sys_mount(const char __user *dev_name, const char __user *dir_name, - const char __user *type, unsigned long flags, + const char __user *type, compat_ulong_t flags, const void __user *data); asmlinkage long compat_sys_old_readdir(unsigned int fd, struct compat_old_linux_dirent __user *, @@ -633,7 +635,7 @@ asmlinkage long compat_sys_rt_sigqueueinfo(compat_pid_t pid, int sig, struct compat_siginfo __user *uinfo); asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info); asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, - unsigned long arg); + compat_ulong_t arg); asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val, struct compat_timespec __user *utime, u32 __user *uaddr2, u32 val3); -- cgit v0.10.2 From 8eee9093cdbeb2aa89d67dc1a3fd118acabaea52 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 4 Mar 2014 16:19:16 +0100 Subject: ipc/compat: convert to COMPAT_SYSCALL_DEFINE with changing parameter types In order to allow the COMPAT_SYSCALL_DEFINE macro generate code that performs proper zero and sign extension convert all 64 bit parameters to their corresponding 32 bit compat counterparts. Signed-off-by: Heiko Carstens diff --git a/include/linux/compat.h b/include/linux/compat.h index fea8ee9..f670e59 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -655,11 +655,11 @@ asmlinkage long compat_sys_mq_open(const char __user *u_name, struct compat_mq_attr __user *u_attr); asmlinkage long compat_sys_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr, - size_t msg_len, unsigned int msg_prio, + compat_size_t msg_len, unsigned int msg_prio, const struct compat_timespec __user *u_abs_timeout); asmlinkage ssize_t compat_sys_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr, - size_t msg_len, unsigned int __user *u_msg_prio, + compat_size_t msg_len, unsigned int __user *u_msg_prio, const struct compat_timespec __user *u_abs_timeout); asmlinkage long compat_sys_socketcall(int call, u32 __user *args); asmlinkage long compat_sys_sysctl(struct compat_sysctl_args __user *args); diff --git a/ipc/compat_mq.c b/ipc/compat_mq.c index af087fb..d5874729 100644 --- a/ipc/compat_mq.c +++ b/ipc/compat_mq.c @@ -78,10 +78,10 @@ static int compat_prepare_timeout(struct timespec __user **p, return 0; } -asmlinkage long compat_sys_mq_timedsend(mqd_t mqdes, - const char __user *u_msg_ptr, - size_t msg_len, unsigned int msg_prio, - const struct compat_timespec __user *u_abs_timeout) +COMPAT_SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, + const char __user *, u_msg_ptr, + compat_size_t, msg_len, unsigned int, msg_prio, + const struct compat_timespec __user *, u_abs_timeout) { struct timespec __user *u_ts; @@ -92,10 +92,10 @@ asmlinkage long compat_sys_mq_timedsend(mqd_t mqdes, msg_prio, u_ts); } -asmlinkage ssize_t compat_sys_mq_timedreceive(mqd_t mqdes, - char __user *u_msg_ptr, - size_t msg_len, unsigned int __user *u_msg_prio, - const struct compat_timespec __user *u_abs_timeout) +COMPAT_SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, + char __user *, u_msg_ptr, + compat_size_t, msg_len, unsigned int __user *, u_msg_prio, + const struct compat_timespec __user *, u_abs_timeout) { struct timespec __user *u_ts; if (compat_prepare_timeout(&u_ts, u_abs_timeout)) -- cgit v0.10.2 From 3a49a0f7181c243aa04e6c5e44ca70a90ead8f9a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 4 Mar 2014 17:09:57 +0100 Subject: net/compat: convert to COMPAT_SYSCALL_DEFINE with changing parameter types In order to allow the COMPAT_SYSCALL_DEFINE macro generate code that performs proper zero and sign extension convert all 64 bit parameters to their corresponding 32 bit compat counterparts. Signed-off-by: Heiko Carstens diff --git a/include/linux/compat.h b/include/linux/compat.h index f670e59..8e63621 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -569,9 +569,9 @@ asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg, unsigned vlen, unsigned int flags); asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags); -asmlinkage long compat_sys_recv(int fd, void __user *buf, size_t len, +asmlinkage long compat_sys_recv(int fd, void __user *buf, compat_size_t len, unsigned flags); -asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, size_t len, +asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, compat_size_t len, unsigned flags, struct sockaddr __user *addr, int __user *addrlen); asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg, diff --git a/net/compat.c b/net/compat.c index 706b78b..9a76eaf 100644 --- a/net/compat.c +++ b/net/compat.c @@ -758,14 +758,14 @@ COMPAT_SYSCALL_DEFINE3(recvmsg, int, fd, struct compat_msghdr __user *, msg, uns return __sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); } -asmlinkage long compat_sys_recv(int fd, void __user *buf, size_t len, unsigned int flags) +COMPAT_SYSCALL_DEFINE4(recv, int, fd, void __user *, buf, compat_size_t, len, unsigned int, flags) { return sys_recv(fd, buf, len, flags | MSG_CMSG_COMPAT); } -asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, size_t len, - unsigned int flags, struct sockaddr __user *addr, - int __user *addrlen) +COMPAT_SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, buf, compat_size_t, len, + unsigned int, flags, struct sockaddr __user *, addr, + int __user *, addrlen) { return sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, addrlen); } -- cgit v0.10.2 From ca2c405ab90591dcb1bc3765467cbdf2b99a0f6a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 4 Mar 2014 17:13:42 +0100 Subject: kexec/compat: convert to COMPAT_SYSCALL_DEFINE with changing parameter types In order to allow the COMPAT_SYSCALL_DEFINE macro generate code that performs proper zero and sign extension convert all 64 bit parameters to their corresponding 32 bit compat counterparts. Signed-off-by: Heiko Carstens diff --git a/include/linux/compat.h b/include/linux/compat.h index 8e63621..ef4834c 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -641,10 +641,10 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val, u32 val3); asmlinkage long compat_sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen); -asmlinkage long compat_sys_kexec_load(unsigned long entry, - unsigned long nr_segments, +asmlinkage long compat_sys_kexec_load(compat_ulong_t entry, + compat_ulong_t nr_segments, struct compat_kexec_segment __user *, - unsigned long flags); + compat_ulong_t flags); asmlinkage long compat_sys_mq_getsetattr(mqd_t mqdes, const struct compat_mq_attr __user *u_mqstat, struct compat_mq_attr __user *u_omqstat); diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 6d4066c..a756419 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -127,12 +127,6 @@ extern asmlinkage long sys_kexec_load(unsigned long entry, struct kexec_segment __user *segments, unsigned long flags); extern int kernel_kexec(void); -#ifdef CONFIG_COMPAT -extern asmlinkage long compat_sys_kexec_load(unsigned long entry, - unsigned long nr_segments, - struct compat_kexec_segment __user *segments, - unsigned long flags); -#endif extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order); extern void crash_kexec(struct pt_regs *); diff --git a/kernel/kexec.c b/kernel/kexec.c index 60bafbe..45601cf 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1039,10 +1039,10 @@ void __weak crash_unmap_reserved_pages(void) {} #ifdef CONFIG_COMPAT -asmlinkage long compat_sys_kexec_load(unsigned long entry, - unsigned long nr_segments, - struct compat_kexec_segment __user *segments, - unsigned long flags) +COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, + compat_ulong_t, nr_segments, + struct compat_kexec_segment __user *, segments, + compat_ulong_t, flags) { struct compat_kexec_segment in; struct kexec_segment out, __user *ksegments; -- cgit v0.10.2 From 2f2728f6de9837abe4b354443a45be578fbbf942 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 4 Mar 2014 17:18:23 +0100 Subject: mm/compat: convert to COMPAT_SYSCALL_DEFINE with changing parameter types In order to allow the COMPAT_SYSCALL_DEFINE macro generate code that performs proper zero and sign extension convert all 64 bit parameters to their corresponding 32 bit compat counterparts. Signed-off-by: Heiko Carstens diff --git a/include/linux/compat.h b/include/linux/compat.h index ef4834c..7c76562 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -469,7 +469,7 @@ asmlinkage long compat_sys_timerfd_settime(int ufd, int flags, asmlinkage long compat_sys_timerfd_gettime(int ufd, struct compat_itimerspec __user *otmr); -asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_page, +asmlinkage long compat_sys_move_pages(pid_t pid, compat_ulong_t nr_pages, __u32 __user *pages, const int __user *nodes, int __user *status, @@ -674,12 +674,12 @@ extern void __user *compat_alloc_user_space(unsigned long len); asmlinkage ssize_t compat_sys_process_vm_readv(compat_pid_t pid, const struct compat_iovec __user *lvec, - unsigned long liovcnt, const struct compat_iovec __user *rvec, - unsigned long riovcnt, unsigned long flags); + compat_ulong_t liovcnt, const struct compat_iovec __user *rvec, + compat_ulong_t riovcnt, compat_ulong_t flags); asmlinkage ssize_t compat_sys_process_vm_writev(compat_pid_t pid, const struct compat_iovec __user *lvec, - unsigned long liovcnt, const struct compat_iovec __user *rvec, - unsigned long riovcnt, unsigned long flags); + compat_ulong_t liovcnt, const struct compat_iovec __user *rvec, + compat_ulong_t riovcnt, compat_ulong_t flags); asmlinkage long compat_sys_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, compat_size_t count); diff --git a/kernel/compat.c b/kernel/compat.c index 2622011..488ff8c 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -1065,11 +1065,11 @@ COMPAT_SYSCALL_DEFINE1(adjtimex, struct compat_timex __user *, utp) } #ifdef CONFIG_NUMA -asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_pages, - compat_uptr_t __user *pages32, - const int __user *nodes, - int __user *status, - int flags) +COMPAT_SYSCALL_DEFINE6(move_pages, pid_t, pid, compat_ulong_t, nr_pages, + compat_uptr_t __user *, pages32, + const int __user *, nodes, + int __user *, status, + int, flags) { const void __user * __user *pages; int i; diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index fd26d04..3c5cf68 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -456,25 +456,23 @@ free_iovecs: return rc; } -asmlinkage ssize_t -compat_sys_process_vm_readv(compat_pid_t pid, - const struct compat_iovec __user *lvec, - unsigned long liovcnt, - const struct compat_iovec __user *rvec, - unsigned long riovcnt, - unsigned long flags) +COMPAT_SYSCALL_DEFINE6(process_vm_readv, compat_pid_t, pid, + const struct compat_iovec __user *, lvec, + compat_ulong_t, liovcnt, + const struct compat_iovec __user *, rvec, + compat_ulong_t, riovcnt, + compat_ulong_t, flags) { return compat_process_vm_rw(pid, lvec, liovcnt, rvec, riovcnt, flags, 0); } -asmlinkage ssize_t -compat_sys_process_vm_writev(compat_pid_t pid, - const struct compat_iovec __user *lvec, - unsigned long liovcnt, - const struct compat_iovec __user *rvec, - unsigned long riovcnt, - unsigned long flags) +COMPAT_SYSCALL_DEFINE6(process_vm_writev, compat_pid_t, pid, + const struct compat_iovec __user *, lvec, + compat_ulong_t, liovcnt, + const struct compat_iovec __user *, rvec, + compat_ulong_t, riovcnt, + compat_ulong_t, flags) { return compat_process_vm_rw(pid, lvec, liovcnt, rvec, riovcnt, flags, 1); -- cgit v0.10.2 From 9a205286bcca84b38d3ab1689f16236d1935af2d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 5 Mar 2014 12:55:06 +0100 Subject: s390/compat: build error for large compat syscall args Enforce 32 bit types for all compat syscall argument types. This way we can make sure that all arguments get correct sign or zero extension. Otherwise incorrect code would be generated. E.g. for a 'long' type the COMPAT_SYSCALL_DEFINE macro wouldn't generate code that would cause sign extension of the passed in 32 bit user space parameter. This can cause quite subtle bugs like e.g. the one that was fixed with dfd948e32af2e "fs/compat: fix parameter handling for compat readv/writev syscalls". Signed-off-by: Heiko Carstens diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index 5d7e8cf..d350ed9 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -8,7 +8,11 @@ #include #define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p(typeof(0?(t)0:0ULL), u64)) -#define __SC_DELOUSE(t,v) (t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v)) + +#define __SC_DELOUSE(t,v) ({ \ + BUILD_BUG_ON(sizeof(t) > 4 && !__TYPE_IS_PTR(t)); \ + (t)(__TYPE_IS_PTR(t) ? ((v) & 0x7fffffff) : (v)); \ +}) #define PSW32_MASK_PER 0x40000000UL #define PSW32_MASK_DAT 0x04000000UL -- cgit v0.10.2 From 5b098c204827ed21961988b4206b411f90cc89c8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 5 Mar 2014 10:05:56 +0100 Subject: s390/compat: get rid of compat wrapper assembly code Now that all compat syscalls have been converted to use the COMPAT_SYSCALL_DEFINE macros, we don't need to compat syscall wrapper assembly code anymore. So remove it and fix up the system call table accordingly. Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 725515f9..1b3ac09 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -50,7 +50,6 @@ compat-obj-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o \ compat_wrapper.o compat_exec_domain.o \ $(compat-obj-y) -obj-$(CONFIG_COMPAT) += compat_wrap.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_KPROBES) += kprobes.o diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index 330e11d..39ddfdb 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -112,5 +112,7 @@ long compat_sys_s390_fadvise64(int fd, u32 high, u32 low, compat_size_t len, int long compat_sys_s390_fadvise64_64(struct fadvise64_64_args __user *args); long compat_sys_s390_sync_file_range(int fd, u32 offhigh, u32 offlow, u32 nhigh, u32 nlow, unsigned int flags); long compat_sys_s390_fallocate(int fd, int mode, u32 offhigh, u32 offlow, u32 lenhigh, u32 lenlow); +long compat_sys_sigreturn(void); +long compat_sys_rt_sigreturn(void); #endif /* _ASM_S390X_S390_H */ diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 8b84bc3..7df5ed9 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -241,7 +241,7 @@ static int restore_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs) return 0; } -asmlinkage long sys32_sigreturn(void) +COMPAT_SYSCALL_DEFINE0(sigreturn) { struct pt_regs *regs = task_pt_regs(current); sigframe32 __user *frame = (sigframe32 __user *)regs->gprs[15]; @@ -260,7 +260,7 @@ badframe: return 0; } -asmlinkage long sys32_rt_sigreturn(void) +COMPAT_SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = task_pt_regs(current); rt_sigframe32 __user *frame = (rt_sigframe32 __user *)regs->gprs[15]; diff --git a/arch/s390/kernel/compat_wrap.c b/arch/s390/kernel/compat_wrap.c deleted file mode 100644 index d123f5d..0000000 --- a/arch/s390/kernel/compat_wrap.c +++ /dev/null @@ -1,209 +0,0 @@ -#include -#include -#include "entry.h" - -#define COMPAT_SYSCALL_WRAP1(name, ...) \ - COMPAT_SYSCALL_WRAPx(1, _##name, __VA_ARGS__) -#define COMPAT_SYSCALL_WRAP2(name, ...) \ - COMPAT_SYSCALL_WRAPx(2, _##name, __VA_ARGS__) -#define COMPAT_SYSCALL_WRAP3(name, ...) \ - COMPAT_SYSCALL_WRAPx(3, _##name, __VA_ARGS__) -#define COMPAT_SYSCALL_WRAP4(name, ...) \ - COMPAT_SYSCALL_WRAPx(4, _##name, __VA_ARGS__) -#define COMPAT_SYSCALL_WRAP5(name, ...) \ - COMPAT_SYSCALL_WRAPx(5, _##name, __VA_ARGS__) -#define COMPAT_SYSCALL_WRAP6(name, ...) \ - COMPAT_SYSCALL_WRAPx(6, _##name, __VA_ARGS__) - -#define __SC_COMPAT_TYPE(t, a) \ - __typeof(__builtin_choose_expr(sizeof(t) > 4, 0L, (t)0)) a - -#define __SC_COMPAT_CAST(t, a) \ -({ \ - long __ReS = a; \ - \ - BUILD_BUG_ON((sizeof(t) > 4) && !__TYPE_IS_L(t) && \ - !__TYPE_IS_UL(t) && !__TYPE_IS_PTR(t)); \ - if (__TYPE_IS_L(t)) \ - __ReS = (s32)a; \ - if (__TYPE_IS_UL(t)) \ - __ReS = (u32)a; \ - if (__TYPE_IS_PTR(t)) \ - __ReS = a & 0x7fffffff; \ - (t)__ReS; \ -}) - -/* - * The COMPAT_SYSCALL_WRAP macro generates system call wrappers to be used by - * compat tasks. These wrappers will only be used for system calls where only - * the system call arguments need sign or zero extension or zeroing of the upper - * 33 bits of pointers. - * Note: since the wrapper function will afterwards call a system call which - * again performs zero and sign extension for all system call arguments with - * a size of less than eight bytes, these compat wrappers only touch those - * system call arguments with a size of eight bytes ((unsigned) long and - * pointers). Zero and sign extension for e.g. int parameters will be done by - * the regular system call wrappers. - */ -#define COMPAT_SYSCALL_WRAPx(x, name, ...) \ - asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ - asmlinkage long compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__));\ - asmlinkage long compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__)) \ - { \ - return sys##name(__MAP(x,__SC_COMPAT_CAST,__VA_ARGS__)); \ - } - -COMPAT_SYSCALL_WRAP1(exit, int, error_code); -COMPAT_SYSCALL_WRAP1(close, unsigned int, fd); -COMPAT_SYSCALL_WRAP2(creat, const char __user *, pathname, umode_t, mode); -COMPAT_SYSCALL_WRAP2(link, const char __user *, oldname, const char __user *, newname); -COMPAT_SYSCALL_WRAP1(unlink, const char __user *, pathname); -COMPAT_SYSCALL_WRAP1(chdir, const char __user *, filename); -COMPAT_SYSCALL_WRAP3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev); -COMPAT_SYSCALL_WRAP2(chmod, const char __user *, filename, umode_t, mode); -COMPAT_SYSCALL_WRAP1(oldumount, char __user *, name); -COMPAT_SYSCALL_WRAP1(alarm, unsigned int, seconds); -COMPAT_SYSCALL_WRAP2(access, const char __user *, filename, int, mode); -COMPAT_SYSCALL_WRAP1(nice, int, increment); -COMPAT_SYSCALL_WRAP2(kill, int, pid, int, sig); -COMPAT_SYSCALL_WRAP2(rename, const char __user *, oldname, const char __user *, newname); -COMPAT_SYSCALL_WRAP2(mkdir, const char __user *, pathname, umode_t, mode); -COMPAT_SYSCALL_WRAP1(rmdir, const char __user *, pathname); -COMPAT_SYSCALL_WRAP1(dup, unsigned int, fildes); -COMPAT_SYSCALL_WRAP1(pipe, int __user *, fildes); -COMPAT_SYSCALL_WRAP1(brk, unsigned long, brk); -COMPAT_SYSCALL_WRAP2(signal, int, sig, __sighandler_t, handler); -COMPAT_SYSCALL_WRAP1(acct, const char __user *, name); -COMPAT_SYSCALL_WRAP2(umount, char __user *, name, int, flags); -COMPAT_SYSCALL_WRAP2(setpgid, pid_t, pid, pid_t, pgid); -COMPAT_SYSCALL_WRAP1(umask, int, mask); -COMPAT_SYSCALL_WRAP1(chroot, const char __user *, filename); -COMPAT_SYSCALL_WRAP2(dup2, unsigned int, oldfd, unsigned int, newfd); -COMPAT_SYSCALL_WRAP3(sigsuspend, int, unused1, int, unused2, old_sigset_t, mask); -COMPAT_SYSCALL_WRAP2(sethostname, char __user *, name, int, len); -COMPAT_SYSCALL_WRAP2(symlink, const char __user *, old, const char __user *, new); -COMPAT_SYSCALL_WRAP3(readlink, const char __user *, path, char __user *, buf, int, bufsiz); -COMPAT_SYSCALL_WRAP1(uselib, const char __user *, library); -COMPAT_SYSCALL_WRAP2(swapon, const char __user *, specialfile, int, swap_flags); -COMPAT_SYSCALL_WRAP4(reboot, int, magic1, int, magic2, unsigned int, cmd, void __user *, arg); -COMPAT_SYSCALL_WRAP2(munmap, unsigned long, addr, size_t, len); -COMPAT_SYSCALL_WRAP2(fchmod, unsigned int, fd, umode_t, mode); -COMPAT_SYSCALL_WRAP2(getpriority, int, which, int, who); -COMPAT_SYSCALL_WRAP3(setpriority, int, which, int, who, int, niceval); -COMPAT_SYSCALL_WRAP3(syslog, int, type, char __user *, buf, int, len); -COMPAT_SYSCALL_WRAP1(swapoff, const char __user *, specialfile); -COMPAT_SYSCALL_WRAP1(fsync, unsigned int, fd); -COMPAT_SYSCALL_WRAP2(setdomainname, char __user *, name, int, len); -COMPAT_SYSCALL_WRAP1(newuname, struct new_utsname __user *, name); -COMPAT_SYSCALL_WRAP3(mprotect, unsigned long, start, size_t, len, unsigned long, prot); -COMPAT_SYSCALL_WRAP3(init_module, void __user *, umod, unsigned long, len, const char __user *, uargs); -COMPAT_SYSCALL_WRAP2(delete_module, const char __user *, name_user, unsigned int, flags); -COMPAT_SYSCALL_WRAP4(quotactl, unsigned int, cmd, const char __user *, special, qid_t, id, void __user *, addr); -COMPAT_SYSCALL_WRAP1(getpgid, pid_t, pid); -COMPAT_SYSCALL_WRAP1(fchdir, unsigned int, fd); -COMPAT_SYSCALL_WRAP2(bdflush, int, func, long, data); -COMPAT_SYSCALL_WRAP3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2); -COMPAT_SYSCALL_WRAP1(s390_personality, unsigned int, personality); -COMPAT_SYSCALL_WRAP5(llseek, unsigned int, fd, unsigned long, high, unsigned long, low, loff_t __user *, result, unsigned int, whence); -COMPAT_SYSCALL_WRAP2(flock, unsigned int, fd, unsigned int, cmd); -COMPAT_SYSCALL_WRAP3(msync, unsigned long, start, size_t, len, int, flags); -COMPAT_SYSCALL_WRAP1(getsid, pid_t, pid); -COMPAT_SYSCALL_WRAP1(fdatasync, unsigned int, fd); -COMPAT_SYSCALL_WRAP2(mlock, unsigned long, start, size_t, len); -COMPAT_SYSCALL_WRAP2(munlock, unsigned long, start, size_t, len); -COMPAT_SYSCALL_WRAP1(mlockall, int, flags); -COMPAT_SYSCALL_WRAP2(sched_setparam, pid_t, pid, struct sched_param __user *, param); -COMPAT_SYSCALL_WRAP2(sched_getparam, pid_t, pid, struct sched_param __user *, param); -COMPAT_SYSCALL_WRAP3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param); -COMPAT_SYSCALL_WRAP1(sched_getscheduler, pid_t, pid); -COMPAT_SYSCALL_WRAP1(sched_get_priority_max, int, policy); -COMPAT_SYSCALL_WRAP1(sched_get_priority_min, int, policy); -COMPAT_SYSCALL_WRAP5(mremap, unsigned long, addr, unsigned long, old_len, unsigned long, new_len, unsigned long, flags, unsigned long, new_addr); -COMPAT_SYSCALL_WRAP3(poll, struct pollfd __user *, ufds, unsigned int, nfds, int, timeout); -COMPAT_SYSCALL_WRAP5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5); -COMPAT_SYSCALL_WRAP2(getcwd, char __user *, buf, unsigned long, size); -COMPAT_SYSCALL_WRAP2(capget, cap_user_header_t, header, cap_user_data_t, dataptr); -COMPAT_SYSCALL_WRAP2(capset, cap_user_header_t, header, const cap_user_data_t, data); -COMPAT_SYSCALL_WRAP3(lchown, const char __user *, filename, uid_t, user, gid_t, group); -COMPAT_SYSCALL_WRAP2(setreuid, uid_t, ruid, uid_t, euid); -COMPAT_SYSCALL_WRAP2(setregid, gid_t, rgid, gid_t, egid); -COMPAT_SYSCALL_WRAP2(getgroups, int, gidsetsize, gid_t __user *, grouplist); -COMPAT_SYSCALL_WRAP2(setgroups, int, gidsetsize, gid_t __user *, grouplist); -COMPAT_SYSCALL_WRAP3(fchown, unsigned int, fd, uid_t, user, gid_t, group); -COMPAT_SYSCALL_WRAP3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid); -COMPAT_SYSCALL_WRAP3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid); -COMPAT_SYSCALL_WRAP3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid); -COMPAT_SYSCALL_WRAP3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid); -COMPAT_SYSCALL_WRAP3(chown, const char __user *, filename, uid_t, user, gid_t, group); -COMPAT_SYSCALL_WRAP1(setuid, uid_t, uid); -COMPAT_SYSCALL_WRAP1(setgid, gid_t, gid); -COMPAT_SYSCALL_WRAP1(setfsuid, uid_t, uid); -COMPAT_SYSCALL_WRAP1(setfsgid, gid_t, gid); -COMPAT_SYSCALL_WRAP2(pivot_root, const char __user *, new_root, const char __user *, put_old); -COMPAT_SYSCALL_WRAP3(mincore, unsigned long, start, size_t, len, unsigned char __user *, vec); -COMPAT_SYSCALL_WRAP3(madvise, unsigned long, start, size_t, len, int, behavior); -COMPAT_SYSCALL_WRAP5(setxattr, const char __user *, path, const char __user *, name, const void __user *, value, size_t, size, int, flags); -COMPAT_SYSCALL_WRAP5(lsetxattr, const char __user *, path, const char __user *, name, const void __user *, value, size_t, size, int, flags); -COMPAT_SYSCALL_WRAP5(fsetxattr, int, fd, const char __user *, name, const void __user *, value, size_t, size, int, flags); -COMPAT_SYSCALL_WRAP3(getdents64, unsigned int, fd, struct linux_dirent64 __user *, dirent, unsigned int, count); -COMPAT_SYSCALL_WRAP4(getxattr, const char __user *, path, const char __user *, name, void __user *, value, size_t, size); -COMPAT_SYSCALL_WRAP4(lgetxattr, const char __user *, path, const char __user *, name, void __user *, value, size_t, size); -COMPAT_SYSCALL_WRAP4(fgetxattr, int, fd, const char __user *, name, void __user *, value, size_t, size); -COMPAT_SYSCALL_WRAP3(listxattr, const char __user *, path, char __user *, list, size_t, size); -COMPAT_SYSCALL_WRAP3(llistxattr, const char __user *, path, char __user *, list, size_t, size); -COMPAT_SYSCALL_WRAP3(flistxattr, int, fd, char __user *, list, size_t, size); -COMPAT_SYSCALL_WRAP2(removexattr, const char __user *, path, const char __user *, name); -COMPAT_SYSCALL_WRAP2(lremovexattr, const char __user *, path, const char __user *, name); -COMPAT_SYSCALL_WRAP2(fremovexattr, int, fd, const char __user *, name); -COMPAT_SYSCALL_WRAP1(exit_group, int, error_code); -COMPAT_SYSCALL_WRAP1(set_tid_address, int __user *, tidptr); -COMPAT_SYSCALL_WRAP1(epoll_create, int, size); -COMPAT_SYSCALL_WRAP4(epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event __user *, event); -COMPAT_SYSCALL_WRAP4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout); -COMPAT_SYSCALL_WRAP1(timer_getoverrun, timer_t, timer_id); -COMPAT_SYSCALL_WRAP1(timer_delete, compat_timer_t, compat_timer_id); -COMPAT_SYSCALL_WRAP1(io_destroy, aio_context_t, ctx); -COMPAT_SYSCALL_WRAP3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result); -COMPAT_SYSCALL_WRAP1(mq_unlink, const char __user *, name); -COMPAT_SYSCALL_WRAP5(add_key, const char __user *, tp, const char __user *, dsc, const void __user *, pld, size_t, len, key_serial_t, id); -COMPAT_SYSCALL_WRAP4(request_key, const char __user *, tp, const char __user *, dsc, const char __user *, info, key_serial_t, id); -COMPAT_SYSCALL_WRAP5(remap_file_pages, unsigned long, start, unsigned long, size, unsigned long, prot, unsigned long, pgoff, unsigned long, flags); -COMPAT_SYSCALL_WRAP3(ioprio_set, int, which, int, who, int, ioprio); -COMPAT_SYSCALL_WRAP2(ioprio_get, int, which, int, who); -COMPAT_SYSCALL_WRAP3(inotify_add_watch, int, fd, const char __user *, path, u32, mask); -COMPAT_SYSCALL_WRAP2(inotify_rm_watch, int, fd, __s32, wd); -COMPAT_SYSCALL_WRAP3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode); -COMPAT_SYSCALL_WRAP4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, unsigned, dev); -COMPAT_SYSCALL_WRAP5(fchownat, int, dfd, const char __user *, filename, uid_t, user, gid_t, group, int, flag); -COMPAT_SYSCALL_WRAP3(unlinkat, int, dfd, const char __user *, pathname, int, flag); -COMPAT_SYSCALL_WRAP4(renameat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname); -COMPAT_SYSCALL_WRAP5(linkat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, int, flags); -COMPAT_SYSCALL_WRAP3(symlinkat, const char __user *, oldname, int, newdfd, const char __user *, newname); -COMPAT_SYSCALL_WRAP4(readlinkat, int, dfd, const char __user *, path, char __user *, buf, int, bufsiz); -COMPAT_SYSCALL_WRAP3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode); -COMPAT_SYSCALL_WRAP3(faccessat, int, dfd, const char __user *, filename, int, mode); -COMPAT_SYSCALL_WRAP1(unshare, unsigned long, unshare_flags); -COMPAT_SYSCALL_WRAP6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); -COMPAT_SYSCALL_WRAP4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags); -COMPAT_SYSCALL_WRAP3(getcpu, unsigned __user *, cpu, unsigned __user *, node, struct getcpu_cache __user *, cache); -COMPAT_SYSCALL_WRAP1(eventfd, unsigned int, count); -COMPAT_SYSCALL_WRAP2(timerfd_create, int, clockid, int, flags); -COMPAT_SYSCALL_WRAP2(eventfd2, unsigned int, count, int, flags); -COMPAT_SYSCALL_WRAP1(inotify_init1, int, flags); -COMPAT_SYSCALL_WRAP2(pipe2, int __user *, fildes, int, flags); -COMPAT_SYSCALL_WRAP3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags); -COMPAT_SYSCALL_WRAP1(epoll_create1, int, flags); -COMPAT_SYSCALL_WRAP2(tkill, int, pid, int, sig); -COMPAT_SYSCALL_WRAP3(tgkill, int, tgid, int, pid, int, sig); -COMPAT_SYSCALL_WRAP5(perf_event_open, struct perf_event_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, unsigned long, flags); -COMPAT_SYSCALL_WRAP5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val); -COMPAT_SYSCALL_WRAP2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags); -COMPAT_SYSCALL_WRAP4(prlimit64, pid_t, pid, unsigned int, resource, const struct rlimit64 __user *, new_rlim, struct rlimit64 __user *, old_rlim); -COMPAT_SYSCALL_WRAP5(name_to_handle_at, int, dfd, const char __user *, name, struct file_handle __user *, handle, int __user *, mnt_id, int, flag); -COMPAT_SYSCALL_WRAP1(syncfs, int, fd); -COMPAT_SYSCALL_WRAP2(setns, int, fd, int, nstype); -COMPAT_SYSCALL_WRAP2(s390_runtime_instr, int, command, int, signum); -COMPAT_SYSCALL_WRAP5(kcmp, pid_t, pid1, pid_t, pid2, int, type, unsigned long, idx1, unsigned long, idx2); -COMPAT_SYSCALL_WRAP3(finit_module, int, fd, const char __user *, uargs, int, flags); -COMPAT_SYSCALL_WRAP3(sched_setattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, flags); -COMPAT_SYSCALL_WRAP4(sched_getattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, size, unsigned int, flags); diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S deleted file mode 100644 index 23d9f8a..0000000 --- a/arch/s390/kernel/compat_wrapper.S +++ /dev/null @@ -1,369 +0,0 @@ -/* -* wrapper for 31 bit compatible system calls. -* -* Copyright IBM Corp. 2000, 2006 -* Author(s): Gerhard Tonn (ton@de.ibm.com), -* Thomas Spatzier (tspat@de.ibm.com) -*/ - -#include - -ENTRY(sys32_time_wrapper) - llgtr %r2,%r2 # int * - jg compat_sys_time # branch to system call - -#sys32_getpid_wrapper # void - -ENTRY(sys32_mount_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # char * - llgfr %r5,%r5 # unsigned long - llgtr %r6,%r6 # void * - jg compat_sys_mount # branch to system call - -ENTRY(sys32_ptrace_wrapper) - lgfr %r2,%r2 # long - lgfr %r3,%r3 # long - llgtr %r4,%r4 # long - llgfr %r5,%r5 # long - jg compat_sys_ptrace # branch to system call - -ENTRY(compat_sys_utime_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct compat_utimbuf * - jg compat_sys_utime # branch to system call - -ENTRY(compat_sys_times_wrapper) - llgtr %r2,%r2 # struct compat_tms * - jg compat_sys_times # branch to system call - -ENTRY(compat_sys_ioctl_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - llgfr %r4,%r4 # unsigned int - jg compat_sys_ioctl # branch to system call - -ENTRY(compat_sys_fcntl_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - llgfr %r4,%r4 # unsigned long - jg compat_sys_fcntl # branch to system call - -ENTRY(sys32_ustat_wrapper) - llgfr %r2,%r2 # dev_t - llgtr %r3,%r3 # struct ustat * - jg compat_sys_ustat - -ENTRY(compat_sys_sigpending_wrapper) - llgtr %r2,%r2 # compat_old_sigset_t * - jg compat_sys_sigpending # branch to system call - -ENTRY(compat_sys_setrlimit_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct rlimit_emu31 * - jg compat_sys_setrlimit # branch to system call - -ENTRY(compat_sys_old_getrlimit_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct rlimit_emu31 * - jg compat_sys_old_getrlimit # branch to system call - -ENTRY(compat_sys_getrlimit_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct rlimit_emu31 * - jg compat_sys_getrlimit # branch to system call - -ENTRY(compat_sys_gettimeofday_wrapper) - llgtr %r2,%r2 # struct timeval_emu31 * - llgtr %r3,%r3 # struct timezone * - jg compat_sys_gettimeofday # branch to system call - -ENTRY(compat_sys_settimeofday_wrapper) - llgtr %r2,%r2 # struct timeval_emu31 * - llgtr %r3,%r3 # struct timezone * - jg compat_sys_settimeofday # branch to system call - -ENTRY(old32_readdir_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # void * - llgfr %r4,%r4 # unsigned int - jg compat_sys_old_readdir # branch to system call - -ENTRY(compat_sys_statfs_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct compat_statfs * - jg compat_sys_statfs # branch to system call - -ENTRY(compat_sys_fstatfs_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct compat_statfs * - jg compat_sys_fstatfs # branch to system call - -ENTRY(compat_sys_socketcall_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # u32 * - jg compat_sys_socketcall # branch to system call - -ENTRY(compat_sys_newstat_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct stat_emu31 * - jg compat_sys_newstat # branch to system call - -ENTRY(compat_sys_newlstat_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct stat_emu31 * - jg compat_sys_newlstat # branch to system call - -ENTRY(compat_sys_newfstat_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # struct stat_emu31 * - jg compat_sys_newfstat # branch to system call - -ENTRY(compat_sys_sysinfo_wrapper) - llgtr %r2,%r2 # struct sysinfo_emu31 * - jg compat_sys_sysinfo # branch to system call - -ENTRY(compat_sys_adjtimex_wrapper) - llgtr %r2,%r2 # struct compat_timex * - jg compat_sys_adjtimex # branch to system call - -ENTRY(sys32_getdents_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # void * - llgfr %r4,%r4 # unsigned int - jg compat_sys_getdents # branch to system call - -ENTRY(compat_sys_select_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # compat_fd_set * - llgtr %r4,%r4 # compat_fd_set * - llgtr %r5,%r5 # compat_fd_set * - llgtr %r6,%r6 # struct compat_timeval * - jg compat_sys_select # branch to system call - -ENTRY(compat_sys_readv_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const struct compat_iovec * - llgfr %r4,%r4 # unsigned long - jg compat_sys_readv # branch to system call - -ENTRY(compat_sys_writev_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # const struct compat_iovec * - llgfr %r4,%r4 # unsigned long - jg compat_sys_writev # branch to system call - -ENTRY(compat_sys_nanosleep_wrapper) - llgtr %r2,%r2 # struct compat_timespec * - llgtr %r3,%r3 # struct compat_timespec * - jg compat_sys_nanosleep # branch to system call - -ENTRY(compat_sys_fcntl64_wrapper) - llgfr %r2,%r2 # unsigned int - llgfr %r3,%r3 # unsigned int - llgfr %r4,%r4 # unsigned long - jg compat_sys_fcntl64 # branch to system call - -ENTRY(sys32_stime_wrapper) - llgtr %r2,%r2 # long * - jg compat_sys_stime # branch to system call - -ENTRY(sys32_sched_setaffinity_wrapper) - lgfr %r2,%r2 # int - llgfr %r3,%r3 # unsigned int - llgtr %r4,%r4 # unsigned long * - jg compat_sys_sched_setaffinity - -ENTRY(sys32_sched_getaffinity_wrapper) - lgfr %r2,%r2 # int - llgfr %r3,%r3 # unsigned int - llgtr %r4,%r4 # unsigned long * - jg compat_sys_sched_getaffinity - -ENTRY(sys32_clock_settime_wrapper) - lgfr %r2,%r2 # clockid_t (int) - llgtr %r3,%r3 # struct compat_timespec * - jg compat_sys_clock_settime - -ENTRY(sys32_clock_gettime_wrapper) - lgfr %r2,%r2 # clockid_t (int) - llgtr %r3,%r3 # struct compat_timespec * - jg compat_sys_clock_gettime - -ENTRY(sys32_clock_getres_wrapper) - lgfr %r2,%r2 # clockid_t (int) - llgtr %r3,%r3 # struct compat_timespec * - jg compat_sys_clock_getres - -ENTRY(sys32_clock_nanosleep_wrapper) - lgfr %r2,%r2 # clockid_t (int) - lgfr %r3,%r3 # int - llgtr %r4,%r4 # struct compat_timespec * - llgtr %r5,%r5 # struct compat_timespec * - jg compat_sys_clock_nanosleep - -ENTRY(sys32_timer_create_wrapper) - lgfr %r2,%r2 # timer_t (int) - llgtr %r3,%r3 # struct compat_sigevent * - llgtr %r4,%r4 # timer_t * - jg compat_sys_timer_create - -ENTRY(sys32_timer_settime_wrapper) - lgfr %r2,%r2 # timer_t (int) - lgfr %r3,%r3 # int - llgtr %r4,%r4 # struct compat_itimerspec * - llgtr %r5,%r5 # struct compat_itimerspec * - jg compat_sys_timer_settime - -ENTRY(sys32_timer_gettime_wrapper) - lgfr %r2,%r2 # timer_t (int) - llgtr %r3,%r3 # struct compat_itimerspec * - jg compat_sys_timer_gettime - -ENTRY(sys32_io_setup_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # u32 * - jg compat_sys_io_setup - -ENTRY(sys32_io_getevents_wrapper) - llgfr %r2,%r2 # (aio_context_t) u32 - lgfr %r3,%r3 # long - lgfr %r4,%r4 # long - llgtr %r5,%r5 # struct io_event * - llgtr %r6,%r6 # struct compat_timespec * - jg compat_sys_io_getevents - -ENTRY(sys32_io_submit_wrapper) - llgfr %r2,%r2 # (aio_context_t) u32 - lgfr %r3,%r3 # long - llgtr %r4,%r4 # struct iocb ** - jg compat_sys_io_submit - -ENTRY(compat_sys_statfs64_wrapper) - llgtr %r2,%r2 # const char * - llgfr %r3,%r3 # compat_size_t - llgtr %r4,%r4 # struct compat_statfs64 * - jg compat_sys_statfs64 - -ENTRY(compat_sys_fstatfs64_wrapper) - llgfr %r2,%r2 # unsigned int fd - llgfr %r3,%r3 # compat_size_t - llgtr %r4,%r4 # struct compat_statfs64 * - jg compat_sys_fstatfs64 - -ENTRY(compat_sys_mq_open_wrapper) - llgtr %r2,%r2 # const char * - lgfr %r3,%r3 # int - llgfr %r4,%r4 # mode_t - llgtr %r5,%r5 # struct compat_mq_attr * - jg compat_sys_mq_open - -ENTRY(compat_sys_mq_timedsend_wrapper) - lgfr %r2,%r2 # mqd_t - llgtr %r3,%r3 # const char * - llgfr %r4,%r4 # size_t - llgfr %r5,%r5 # unsigned int - llgtr %r6,%r6 # const struct compat_timespec * - jg compat_sys_mq_timedsend - -ENTRY(compat_sys_mq_timedreceive_wrapper) - lgfr %r2,%r2 # mqd_t - llgtr %r3,%r3 # char * - llgfr %r4,%r4 # size_t - llgtr %r5,%r5 # unsigned int * - llgtr %r6,%r6 # const struct compat_timespec * - jg compat_sys_mq_timedreceive - -ENTRY(compat_sys_mq_notify_wrapper) - lgfr %r2,%r2 # mqd_t - llgtr %r3,%r3 # struct compat_sigevent * - jg compat_sys_mq_notify - -ENTRY(compat_sys_mq_getsetattr_wrapper) - lgfr %r2,%r2 # mqd_t - llgtr %r3,%r3 # struct compat_mq_attr * - llgtr %r4,%r4 # struct compat_mq_attr * - jg compat_sys_mq_getsetattr - -ENTRY(compat_sys_kexec_load_wrapper) - llgfr %r2,%r2 # unsigned long - llgfr %r3,%r3 # unsigned long - llgtr %r4,%r4 # struct kexec_segment * - llgfr %r5,%r5 # unsigned long - jg compat_sys_kexec_load - -ENTRY(compat_sys_futimesat_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # struct timeval * - jg compat_sys_futimesat - -ENTRY(compat_sys_pselect6_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # fd_set * - llgtr %r4,%r4 # fd_set * - llgtr %r5,%r5 # fd_set * - llgtr %r6,%r6 # struct timespec * - llgt %r0,164(%r15) # void * - stg %r0,160(%r15) - jg compat_sys_pselect6 - -ENTRY(compat_sys_ppoll_wrapper) - llgtr %r2,%r2 # struct pollfd * - llgfr %r3,%r3 # unsigned int - llgtr %r4,%r4 # struct timespec * - llgtr %r5,%r5 # const sigset_t * - llgfr %r6,%r6 # size_t - jg compat_sys_ppoll - -ENTRY(compat_sys_utimes_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # struct compat_timeval * - jg compat_sys_utimes - -ENTRY(compat_sys_utimensat_wrapper) - llgfr %r2,%r2 # unsigned int - llgtr %r3,%r3 # char * - llgtr %r4,%r4 # struct compat_timespec * - lgfr %r5,%r5 # int - jg compat_sys_utimensat - -ENTRY(compat_sys_keyctl_wrapper) - llgfr %r2,%r2 # u32 - llgfr %r3,%r3 # u32 - llgfr %r4,%r4 # u32 - llgfr %r5,%r5 # u32 - llgfr %r6,%r6 # u32 - jg compat_sys_keyctl # branch to system call - -ENTRY(sys32_execve_wrapper) - llgtr %r2,%r2 # char * - llgtr %r3,%r3 # compat_uptr_t * - llgtr %r4,%r4 # compat_uptr_t * - jg compat_sys_execve # branch to system call - -ENTRY(compat_sys_clock_adjtime_wrapper) - lgfr %r2,%r2 # clockid_t (int) - llgtr %r3,%r3 # struct compat_timex __user * - jg compat_sys_clock_adjtime - -ENTRY(compat_sys_process_vm_readv_wrapper) - lgfr %r2,%r2 # compat_pid_t - llgtr %r3,%r3 # struct compat_iovec __user * - llgfr %r4,%r4 # unsigned long - llgtr %r5,%r5 # struct compat_iovec __user * - llgfr %r6,%r6 # unsigned long - llgf %r0,164(%r15) # unsigned long - stg %r0,160(%r15) - jg compat_sys_process_vm_readv - -ENTRY(compat_sys_process_vm_writev_wrapper) - lgfr %r2,%r2 # compat_pid_t - llgtr %r3,%r3 # struct compat_iovec __user * - llgfr %r4,%r4 # unsigned long - llgtr %r5,%r5 # struct compat_iovec __user * - llgfr %r6,%r6 # unsigned long - llgf %r0,164(%r15) # unsigned long - stg %r0,160(%r15) - jg compat_sys_process_vm_writev diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c new file mode 100644 index 0000000..d123f5d --- /dev/null +++ b/arch/s390/kernel/compat_wrapper.c @@ -0,0 +1,209 @@ +#include +#include +#include "entry.h" + +#define COMPAT_SYSCALL_WRAP1(name, ...) \ + COMPAT_SYSCALL_WRAPx(1, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP2(name, ...) \ + COMPAT_SYSCALL_WRAPx(2, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP3(name, ...) \ + COMPAT_SYSCALL_WRAPx(3, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP4(name, ...) \ + COMPAT_SYSCALL_WRAPx(4, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP5(name, ...) \ + COMPAT_SYSCALL_WRAPx(5, _##name, __VA_ARGS__) +#define COMPAT_SYSCALL_WRAP6(name, ...) \ + COMPAT_SYSCALL_WRAPx(6, _##name, __VA_ARGS__) + +#define __SC_COMPAT_TYPE(t, a) \ + __typeof(__builtin_choose_expr(sizeof(t) > 4, 0L, (t)0)) a + +#define __SC_COMPAT_CAST(t, a) \ +({ \ + long __ReS = a; \ + \ + BUILD_BUG_ON((sizeof(t) > 4) && !__TYPE_IS_L(t) && \ + !__TYPE_IS_UL(t) && !__TYPE_IS_PTR(t)); \ + if (__TYPE_IS_L(t)) \ + __ReS = (s32)a; \ + if (__TYPE_IS_UL(t)) \ + __ReS = (u32)a; \ + if (__TYPE_IS_PTR(t)) \ + __ReS = a & 0x7fffffff; \ + (t)__ReS; \ +}) + +/* + * The COMPAT_SYSCALL_WRAP macro generates system call wrappers to be used by + * compat tasks. These wrappers will only be used for system calls where only + * the system call arguments need sign or zero extension or zeroing of the upper + * 33 bits of pointers. + * Note: since the wrapper function will afterwards call a system call which + * again performs zero and sign extension for all system call arguments with + * a size of less than eight bytes, these compat wrappers only touch those + * system call arguments with a size of eight bytes ((unsigned) long and + * pointers). Zero and sign extension for e.g. int parameters will be done by + * the regular system call wrappers. + */ +#define COMPAT_SYSCALL_WRAPx(x, name, ...) \ + asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + asmlinkage long compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__));\ + asmlinkage long compat_sys##name(__MAP(x,__SC_COMPAT_TYPE,__VA_ARGS__)) \ + { \ + return sys##name(__MAP(x,__SC_COMPAT_CAST,__VA_ARGS__)); \ + } + +COMPAT_SYSCALL_WRAP1(exit, int, error_code); +COMPAT_SYSCALL_WRAP1(close, unsigned int, fd); +COMPAT_SYSCALL_WRAP2(creat, const char __user *, pathname, umode_t, mode); +COMPAT_SYSCALL_WRAP2(link, const char __user *, oldname, const char __user *, newname); +COMPAT_SYSCALL_WRAP1(unlink, const char __user *, pathname); +COMPAT_SYSCALL_WRAP1(chdir, const char __user *, filename); +COMPAT_SYSCALL_WRAP3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev); +COMPAT_SYSCALL_WRAP2(chmod, const char __user *, filename, umode_t, mode); +COMPAT_SYSCALL_WRAP1(oldumount, char __user *, name); +COMPAT_SYSCALL_WRAP1(alarm, unsigned int, seconds); +COMPAT_SYSCALL_WRAP2(access, const char __user *, filename, int, mode); +COMPAT_SYSCALL_WRAP1(nice, int, increment); +COMPAT_SYSCALL_WRAP2(kill, int, pid, int, sig); +COMPAT_SYSCALL_WRAP2(rename, const char __user *, oldname, const char __user *, newname); +COMPAT_SYSCALL_WRAP2(mkdir, const char __user *, pathname, umode_t, mode); +COMPAT_SYSCALL_WRAP1(rmdir, const char __user *, pathname); +COMPAT_SYSCALL_WRAP1(dup, unsigned int, fildes); +COMPAT_SYSCALL_WRAP1(pipe, int __user *, fildes); +COMPAT_SYSCALL_WRAP1(brk, unsigned long, brk); +COMPAT_SYSCALL_WRAP2(signal, int, sig, __sighandler_t, handler); +COMPAT_SYSCALL_WRAP1(acct, const char __user *, name); +COMPAT_SYSCALL_WRAP2(umount, char __user *, name, int, flags); +COMPAT_SYSCALL_WRAP2(setpgid, pid_t, pid, pid_t, pgid); +COMPAT_SYSCALL_WRAP1(umask, int, mask); +COMPAT_SYSCALL_WRAP1(chroot, const char __user *, filename); +COMPAT_SYSCALL_WRAP2(dup2, unsigned int, oldfd, unsigned int, newfd); +COMPAT_SYSCALL_WRAP3(sigsuspend, int, unused1, int, unused2, old_sigset_t, mask); +COMPAT_SYSCALL_WRAP2(sethostname, char __user *, name, int, len); +COMPAT_SYSCALL_WRAP2(symlink, const char __user *, old, const char __user *, new); +COMPAT_SYSCALL_WRAP3(readlink, const char __user *, path, char __user *, buf, int, bufsiz); +COMPAT_SYSCALL_WRAP1(uselib, const char __user *, library); +COMPAT_SYSCALL_WRAP2(swapon, const char __user *, specialfile, int, swap_flags); +COMPAT_SYSCALL_WRAP4(reboot, int, magic1, int, magic2, unsigned int, cmd, void __user *, arg); +COMPAT_SYSCALL_WRAP2(munmap, unsigned long, addr, size_t, len); +COMPAT_SYSCALL_WRAP2(fchmod, unsigned int, fd, umode_t, mode); +COMPAT_SYSCALL_WRAP2(getpriority, int, which, int, who); +COMPAT_SYSCALL_WRAP3(setpriority, int, which, int, who, int, niceval); +COMPAT_SYSCALL_WRAP3(syslog, int, type, char __user *, buf, int, len); +COMPAT_SYSCALL_WRAP1(swapoff, const char __user *, specialfile); +COMPAT_SYSCALL_WRAP1(fsync, unsigned int, fd); +COMPAT_SYSCALL_WRAP2(setdomainname, char __user *, name, int, len); +COMPAT_SYSCALL_WRAP1(newuname, struct new_utsname __user *, name); +COMPAT_SYSCALL_WRAP3(mprotect, unsigned long, start, size_t, len, unsigned long, prot); +COMPAT_SYSCALL_WRAP3(init_module, void __user *, umod, unsigned long, len, const char __user *, uargs); +COMPAT_SYSCALL_WRAP2(delete_module, const char __user *, name_user, unsigned int, flags); +COMPAT_SYSCALL_WRAP4(quotactl, unsigned int, cmd, const char __user *, special, qid_t, id, void __user *, addr); +COMPAT_SYSCALL_WRAP1(getpgid, pid_t, pid); +COMPAT_SYSCALL_WRAP1(fchdir, unsigned int, fd); +COMPAT_SYSCALL_WRAP2(bdflush, int, func, long, data); +COMPAT_SYSCALL_WRAP3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2); +COMPAT_SYSCALL_WRAP1(s390_personality, unsigned int, personality); +COMPAT_SYSCALL_WRAP5(llseek, unsigned int, fd, unsigned long, high, unsigned long, low, loff_t __user *, result, unsigned int, whence); +COMPAT_SYSCALL_WRAP2(flock, unsigned int, fd, unsigned int, cmd); +COMPAT_SYSCALL_WRAP3(msync, unsigned long, start, size_t, len, int, flags); +COMPAT_SYSCALL_WRAP1(getsid, pid_t, pid); +COMPAT_SYSCALL_WRAP1(fdatasync, unsigned int, fd); +COMPAT_SYSCALL_WRAP2(mlock, unsigned long, start, size_t, len); +COMPAT_SYSCALL_WRAP2(munlock, unsigned long, start, size_t, len); +COMPAT_SYSCALL_WRAP1(mlockall, int, flags); +COMPAT_SYSCALL_WRAP2(sched_setparam, pid_t, pid, struct sched_param __user *, param); +COMPAT_SYSCALL_WRAP2(sched_getparam, pid_t, pid, struct sched_param __user *, param); +COMPAT_SYSCALL_WRAP3(sched_setscheduler, pid_t, pid, int, policy, struct sched_param __user *, param); +COMPAT_SYSCALL_WRAP1(sched_getscheduler, pid_t, pid); +COMPAT_SYSCALL_WRAP1(sched_get_priority_max, int, policy); +COMPAT_SYSCALL_WRAP1(sched_get_priority_min, int, policy); +COMPAT_SYSCALL_WRAP5(mremap, unsigned long, addr, unsigned long, old_len, unsigned long, new_len, unsigned long, flags, unsigned long, new_addr); +COMPAT_SYSCALL_WRAP3(poll, struct pollfd __user *, ufds, unsigned int, nfds, int, timeout); +COMPAT_SYSCALL_WRAP5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5); +COMPAT_SYSCALL_WRAP2(getcwd, char __user *, buf, unsigned long, size); +COMPAT_SYSCALL_WRAP2(capget, cap_user_header_t, header, cap_user_data_t, dataptr); +COMPAT_SYSCALL_WRAP2(capset, cap_user_header_t, header, const cap_user_data_t, data); +COMPAT_SYSCALL_WRAP3(lchown, const char __user *, filename, uid_t, user, gid_t, group); +COMPAT_SYSCALL_WRAP2(setreuid, uid_t, ruid, uid_t, euid); +COMPAT_SYSCALL_WRAP2(setregid, gid_t, rgid, gid_t, egid); +COMPAT_SYSCALL_WRAP2(getgroups, int, gidsetsize, gid_t __user *, grouplist); +COMPAT_SYSCALL_WRAP2(setgroups, int, gidsetsize, gid_t __user *, grouplist); +COMPAT_SYSCALL_WRAP3(fchown, unsigned int, fd, uid_t, user, gid_t, group); +COMPAT_SYSCALL_WRAP3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid); +COMPAT_SYSCALL_WRAP3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid); +COMPAT_SYSCALL_WRAP3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid); +COMPAT_SYSCALL_WRAP3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid); +COMPAT_SYSCALL_WRAP3(chown, const char __user *, filename, uid_t, user, gid_t, group); +COMPAT_SYSCALL_WRAP1(setuid, uid_t, uid); +COMPAT_SYSCALL_WRAP1(setgid, gid_t, gid); +COMPAT_SYSCALL_WRAP1(setfsuid, uid_t, uid); +COMPAT_SYSCALL_WRAP1(setfsgid, gid_t, gid); +COMPAT_SYSCALL_WRAP2(pivot_root, const char __user *, new_root, const char __user *, put_old); +COMPAT_SYSCALL_WRAP3(mincore, unsigned long, start, size_t, len, unsigned char __user *, vec); +COMPAT_SYSCALL_WRAP3(madvise, unsigned long, start, size_t, len, int, behavior); +COMPAT_SYSCALL_WRAP5(setxattr, const char __user *, path, const char __user *, name, const void __user *, value, size_t, size, int, flags); +COMPAT_SYSCALL_WRAP5(lsetxattr, const char __user *, path, const char __user *, name, const void __user *, value, size_t, size, int, flags); +COMPAT_SYSCALL_WRAP5(fsetxattr, int, fd, const char __user *, name, const void __user *, value, size_t, size, int, flags); +COMPAT_SYSCALL_WRAP3(getdents64, unsigned int, fd, struct linux_dirent64 __user *, dirent, unsigned int, count); +COMPAT_SYSCALL_WRAP4(getxattr, const char __user *, path, const char __user *, name, void __user *, value, size_t, size); +COMPAT_SYSCALL_WRAP4(lgetxattr, const char __user *, path, const char __user *, name, void __user *, value, size_t, size); +COMPAT_SYSCALL_WRAP4(fgetxattr, int, fd, const char __user *, name, void __user *, value, size_t, size); +COMPAT_SYSCALL_WRAP3(listxattr, const char __user *, path, char __user *, list, size_t, size); +COMPAT_SYSCALL_WRAP3(llistxattr, const char __user *, path, char __user *, list, size_t, size); +COMPAT_SYSCALL_WRAP3(flistxattr, int, fd, char __user *, list, size_t, size); +COMPAT_SYSCALL_WRAP2(removexattr, const char __user *, path, const char __user *, name); +COMPAT_SYSCALL_WRAP2(lremovexattr, const char __user *, path, const char __user *, name); +COMPAT_SYSCALL_WRAP2(fremovexattr, int, fd, const char __user *, name); +COMPAT_SYSCALL_WRAP1(exit_group, int, error_code); +COMPAT_SYSCALL_WRAP1(set_tid_address, int __user *, tidptr); +COMPAT_SYSCALL_WRAP1(epoll_create, int, size); +COMPAT_SYSCALL_WRAP4(epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event __user *, event); +COMPAT_SYSCALL_WRAP4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout); +COMPAT_SYSCALL_WRAP1(timer_getoverrun, timer_t, timer_id); +COMPAT_SYSCALL_WRAP1(timer_delete, compat_timer_t, compat_timer_id); +COMPAT_SYSCALL_WRAP1(io_destroy, aio_context_t, ctx); +COMPAT_SYSCALL_WRAP3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, struct io_event __user *, result); +COMPAT_SYSCALL_WRAP1(mq_unlink, const char __user *, name); +COMPAT_SYSCALL_WRAP5(add_key, const char __user *, tp, const char __user *, dsc, const void __user *, pld, size_t, len, key_serial_t, id); +COMPAT_SYSCALL_WRAP4(request_key, const char __user *, tp, const char __user *, dsc, const char __user *, info, key_serial_t, id); +COMPAT_SYSCALL_WRAP5(remap_file_pages, unsigned long, start, unsigned long, size, unsigned long, prot, unsigned long, pgoff, unsigned long, flags); +COMPAT_SYSCALL_WRAP3(ioprio_set, int, which, int, who, int, ioprio); +COMPAT_SYSCALL_WRAP2(ioprio_get, int, which, int, who); +COMPAT_SYSCALL_WRAP3(inotify_add_watch, int, fd, const char __user *, path, u32, mask); +COMPAT_SYSCALL_WRAP2(inotify_rm_watch, int, fd, __s32, wd); +COMPAT_SYSCALL_WRAP3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode); +COMPAT_SYSCALL_WRAP4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, unsigned, dev); +COMPAT_SYSCALL_WRAP5(fchownat, int, dfd, const char __user *, filename, uid_t, user, gid_t, group, int, flag); +COMPAT_SYSCALL_WRAP3(unlinkat, int, dfd, const char __user *, pathname, int, flag); +COMPAT_SYSCALL_WRAP4(renameat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname); +COMPAT_SYSCALL_WRAP5(linkat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, int, flags); +COMPAT_SYSCALL_WRAP3(symlinkat, const char __user *, oldname, int, newdfd, const char __user *, newname); +COMPAT_SYSCALL_WRAP4(readlinkat, int, dfd, const char __user *, path, char __user *, buf, int, bufsiz); +COMPAT_SYSCALL_WRAP3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode); +COMPAT_SYSCALL_WRAP3(faccessat, int, dfd, const char __user *, filename, int, mode); +COMPAT_SYSCALL_WRAP1(unshare, unsigned long, unshare_flags); +COMPAT_SYSCALL_WRAP6(splice, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags); +COMPAT_SYSCALL_WRAP4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags); +COMPAT_SYSCALL_WRAP3(getcpu, unsigned __user *, cpu, unsigned __user *, node, struct getcpu_cache __user *, cache); +COMPAT_SYSCALL_WRAP1(eventfd, unsigned int, count); +COMPAT_SYSCALL_WRAP2(timerfd_create, int, clockid, int, flags); +COMPAT_SYSCALL_WRAP2(eventfd2, unsigned int, count, int, flags); +COMPAT_SYSCALL_WRAP1(inotify_init1, int, flags); +COMPAT_SYSCALL_WRAP2(pipe2, int __user *, fildes, int, flags); +COMPAT_SYSCALL_WRAP3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags); +COMPAT_SYSCALL_WRAP1(epoll_create1, int, flags); +COMPAT_SYSCALL_WRAP2(tkill, int, pid, int, sig); +COMPAT_SYSCALL_WRAP3(tgkill, int, tgid, int, pid, int, sig); +COMPAT_SYSCALL_WRAP5(perf_event_open, struct perf_event_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, unsigned long, flags); +COMPAT_SYSCALL_WRAP5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val); +COMPAT_SYSCALL_WRAP2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags); +COMPAT_SYSCALL_WRAP4(prlimit64, pid_t, pid, unsigned int, resource, const struct rlimit64 __user *, new_rlim, struct rlimit64 __user *, old_rlim); +COMPAT_SYSCALL_WRAP5(name_to_handle_at, int, dfd, const char __user *, name, struct file_handle __user *, handle, int __user *, mnt_id, int, flag); +COMPAT_SYSCALL_WRAP1(syncfs, int, fd); +COMPAT_SYSCALL_WRAP2(setns, int, fd, int, nstype); +COMPAT_SYSCALL_WRAP2(s390_runtime_instr, int, command, int, signum); +COMPAT_SYSCALL_WRAP5(kcmp, pid_t, pid1, pid_t, pid2, int, type, unsigned long, idx1, unsigned long, idx2); +COMPAT_SYSCALL_WRAP3(finit_module, int, fd, const char __user *, uargs, int, flags); +COMPAT_SYSCALL_WRAP3(sched_setattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, flags); +COMPAT_SYSCALL_WRAP4(sched_getattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, size, unsigned int, flags); diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index e916788..6ac7819 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -67,11 +67,6 @@ struct s390_mmap_arg_struct; struct fadvise64_64_args; struct old_sigaction; -long sys_sigreturn(void); -long sys_rt_sigreturn(void); -long sys32_sigreturn(void); -long sys32_rt_sigreturn(void); - long sys_s390_personality(unsigned int personality); long sys_s390_runtime_instr(int command, int signum); diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index dbdec47..542ef48 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -19,9 +19,9 @@ SYSCALL(sys_restart_syscall,sys_restart_syscall,sys_restart_syscall) SYSCALL(sys_creat,sys_creat,compat_sys_creat) SYSCALL(sys_link,sys_link,compat_sys_link) SYSCALL(sys_unlink,sys_unlink,compat_sys_unlink) /* 10 */ -SYSCALL(sys_execve,sys_execve,sys32_execve_wrapper) +SYSCALL(sys_execve,sys_execve,compat_sys_execve) SYSCALL(sys_chdir,sys_chdir,compat_sys_chdir) -SYSCALL(sys_time,sys_ni_syscall,sys32_time_wrapper) /* old time syscall */ +SYSCALL(sys_time,sys_ni_syscall,compat_sys_time) /* old time syscall */ SYSCALL(sys_mknod,sys_mknod,compat_sys_mknod) SYSCALL(sys_chmod,sys_chmod,compat_sys_chmod) /* 15 */ SYSCALL(sys_lchown16,sys_ni_syscall,compat_sys_s390_lchown16) /* old lchown16 syscall*/ @@ -29,16 +29,16 @@ NI_SYSCALL /* old break syscall holder */ NI_SYSCALL /* old stat syscall holder */ SYSCALL(sys_lseek,sys_lseek,compat_sys_lseek) SYSCALL(sys_getpid,sys_getpid,sys_getpid) /* 20 */ -SYSCALL(sys_mount,sys_mount,sys32_mount_wrapper) +SYSCALL(sys_mount,sys_mount,compat_sys_mount) SYSCALL(sys_oldumount,sys_oldumount,compat_sys_oldumount) SYSCALL(sys_setuid16,sys_ni_syscall,compat_sys_s390_setuid16) /* old setuid16 syscall*/ SYSCALL(sys_getuid16,sys_ni_syscall,compat_sys_s390_getuid16) /* old getuid16 syscall*/ -SYSCALL(sys_stime,sys_ni_syscall,sys32_stime_wrapper) /* 25 old stime syscall */ -SYSCALL(sys_ptrace,sys_ptrace,sys32_ptrace_wrapper) +SYSCALL(sys_stime,sys_ni_syscall,compat_sys_stime) /* 25 old stime syscall */ +SYSCALL(sys_ptrace,sys_ptrace,compat_sys_ptrace) SYSCALL(sys_alarm,sys_alarm,compat_sys_alarm) NI_SYSCALL /* old fstat syscall */ SYSCALL(sys_pause,sys_pause,sys_pause) -SYSCALL(sys_utime,sys_utime,compat_sys_utime_wrapper) /* 30 */ +SYSCALL(sys_utime,sys_utime,compat_sys_utime) /* 30 */ NI_SYSCALL /* old stty syscall */ NI_SYSCALL /* old gtty syscall */ SYSCALL(sys_access,sys_access,compat_sys_access) @@ -51,7 +51,7 @@ SYSCALL(sys_mkdir,sys_mkdir,compat_sys_mkdir) SYSCALL(sys_rmdir,sys_rmdir,compat_sys_rmdir) /* 40 */ SYSCALL(sys_dup,sys_dup,compat_sys_dup) SYSCALL(sys_pipe,sys_pipe,compat_sys_pipe) -SYSCALL(sys_times,sys_times,compat_sys_times_wrapper) +SYSCALL(sys_times,sys_times,compat_sys_times) NI_SYSCALL /* old prof syscall */ SYSCALL(sys_brk,sys_brk,compat_sys_brk) /* 45 */ SYSCALL(sys_setgid16,sys_ni_syscall,compat_sys_s390_setgid16) /* old setgid16 syscall*/ @@ -62,15 +62,15 @@ SYSCALL(sys_getegid16,sys_ni_syscall,compat_sys_s390_getegid16) /* 50 old getegi SYSCALL(sys_acct,sys_acct,compat_sys_acct) SYSCALL(sys_umount,sys_umount,compat_sys_umount) NI_SYSCALL /* old lock syscall */ -SYSCALL(sys_ioctl,sys_ioctl,compat_sys_ioctl_wrapper) -SYSCALL(sys_fcntl,sys_fcntl,compat_sys_fcntl_wrapper) /* 55 */ +SYSCALL(sys_ioctl,sys_ioctl,compat_sys_ioctl) +SYSCALL(sys_fcntl,sys_fcntl,compat_sys_fcntl) /* 55 */ NI_SYSCALL /* intel mpx syscall */ SYSCALL(sys_setpgid,sys_setpgid,compat_sys_setpgid) NI_SYSCALL /* old ulimit syscall */ NI_SYSCALL /* old uname syscall */ SYSCALL(sys_umask,sys_umask,compat_sys_umask) /* 60 */ SYSCALL(sys_chroot,sys_chroot,compat_sys_chroot) -SYSCALL(sys_ustat,sys_ustat,sys32_ustat_wrapper) +SYSCALL(sys_ustat,sys_ustat,compat_sys_ustat) SYSCALL(sys_dup2,sys_dup2,compat_sys_dup2) SYSCALL(sys_getppid,sys_getppid,sys_getppid) SYSCALL(sys_getpgrp,sys_getpgrp,sys_getpgrp) /* 65 */ @@ -81,13 +81,13 @@ NI_SYSCALL /* old ssetmask syscall*/ SYSCALL(sys_setreuid16,sys_ni_syscall,compat_sys_s390_setreuid16) /* old setreuid16 syscall */ SYSCALL(sys_setregid16,sys_ni_syscall,compat_sys_s390_setregid16) /* old setregid16 syscall */ SYSCALL(sys_sigsuspend,sys_sigsuspend,compat_sys_sigsuspend) -SYSCALL(sys_sigpending,sys_sigpending,compat_sys_sigpending_wrapper) +SYSCALL(sys_sigpending,sys_sigpending,compat_sys_sigpending) SYSCALL(sys_sethostname,sys_sethostname,compat_sys_sethostname) -SYSCALL(sys_setrlimit,sys_setrlimit,compat_sys_setrlimit_wrapper) /* 75 */ -SYSCALL(sys_old_getrlimit,sys_getrlimit,compat_sys_old_getrlimit_wrapper) +SYSCALL(sys_setrlimit,sys_setrlimit,compat_sys_setrlimit) /* 75 */ +SYSCALL(sys_old_getrlimit,sys_getrlimit,compat_sys_old_getrlimit) SYSCALL(sys_getrusage,sys_getrusage,compat_sys_getrusage) -SYSCALL(sys_gettimeofday,sys_gettimeofday,compat_sys_gettimeofday_wrapper) -SYSCALL(sys_settimeofday,sys_settimeofday,compat_sys_settimeofday_wrapper) +SYSCALL(sys_gettimeofday,sys_gettimeofday,compat_sys_gettimeofday) +SYSCALL(sys_settimeofday,sys_settimeofday,compat_sys_settimeofday) SYSCALL(sys_getgroups16,sys_ni_syscall,compat_sys_s390_getgroups16) /* 80 old getgroups16 syscall */ SYSCALL(sys_setgroups16,sys_ni_syscall,compat_sys_s390_setgroups16) /* old setgroups16 syscall */ NI_SYSCALL /* old select syscall */ @@ -97,7 +97,7 @@ SYSCALL(sys_readlink,sys_readlink,compat_sys_readlink) /* 85 */ SYSCALL(sys_uselib,sys_uselib,compat_sys_uselib) SYSCALL(sys_swapon,sys_swapon,compat_sys_swapon) SYSCALL(sys_reboot,sys_reboot,compat_sys_reboot) -SYSCALL(sys_ni_syscall,sys_ni_syscall,old32_readdir_wrapper) /* old readdir syscall */ +SYSCALL(sys_ni_syscall,sys_ni_syscall,compat_sys_old_readdir) /* old readdir syscall */ SYSCALL(sys_old_mmap,sys_old_mmap,compat_sys_s390_old_mmap) /* 90 */ SYSCALL(sys_munmap,sys_munmap,compat_sys_munmap) SYSCALL(sys_truncate,sys_truncate,compat_sys_truncate) @@ -107,16 +107,16 @@ SYSCALL(sys_fchown16,sys_ni_syscall,compat_sys_s390_fchown16) /* 95 old fchown16 SYSCALL(sys_getpriority,sys_getpriority,compat_sys_getpriority) SYSCALL(sys_setpriority,sys_setpriority,compat_sys_setpriority) NI_SYSCALL /* old profil syscall */ -SYSCALL(sys_statfs,sys_statfs,compat_sys_statfs_wrapper) -SYSCALL(sys_fstatfs,sys_fstatfs,compat_sys_fstatfs_wrapper) /* 100 */ +SYSCALL(sys_statfs,sys_statfs,compat_sys_statfs) +SYSCALL(sys_fstatfs,sys_fstatfs,compat_sys_fstatfs) /* 100 */ NI_SYSCALL /* ioperm for i386 */ -SYSCALL(sys_socketcall,sys_socketcall,compat_sys_socketcall_wrapper) +SYSCALL(sys_socketcall,sys_socketcall,compat_sys_socketcall) SYSCALL(sys_syslog,sys_syslog,compat_sys_syslog) SYSCALL(sys_setitimer,sys_setitimer,compat_sys_setitimer) SYSCALL(sys_getitimer,sys_getitimer,compat_sys_getitimer) /* 105 */ -SYSCALL(sys_newstat,sys_newstat,compat_sys_newstat_wrapper) -SYSCALL(sys_newlstat,sys_newlstat,compat_sys_newlstat_wrapper) -SYSCALL(sys_newfstat,sys_newfstat,compat_sys_newfstat_wrapper) +SYSCALL(sys_newstat,sys_newstat,compat_sys_newstat) +SYSCALL(sys_newlstat,sys_newlstat,compat_sys_newlstat) +SYSCALL(sys_newfstat,sys_newfstat,compat_sys_newfstat) NI_SYSCALL /* old uname syscall */ SYSCALL(sys_lookup_dcookie,sys_lookup_dcookie,compat_sys_lookup_dcookie) /* 110 */ SYSCALL(sys_vhangup,sys_vhangup,sys_vhangup) @@ -124,15 +124,15 @@ NI_SYSCALL /* old "idle" system call */ NI_SYSCALL /* vm86old for i386 */ SYSCALL(sys_wait4,sys_wait4,compat_sys_wait4) SYSCALL(sys_swapoff,sys_swapoff,compat_sys_swapoff) /* 115 */ -SYSCALL(sys_sysinfo,sys_sysinfo,compat_sys_sysinfo_wrapper) +SYSCALL(sys_sysinfo,sys_sysinfo,compat_sys_sysinfo) SYSCALL(sys_s390_ipc,sys_s390_ipc,compat_sys_s390_ipc) SYSCALL(sys_fsync,sys_fsync,compat_sys_fsync) -SYSCALL(sys_sigreturn,sys_sigreturn,sys32_sigreturn) +SYSCALL(sys_sigreturn,sys_sigreturn,compat_sys_sigreturn) SYSCALL(sys_clone,sys_clone,compat_sys_clone) /* 120 */ SYSCALL(sys_setdomainname,sys_setdomainname,compat_sys_setdomainname) SYSCALL(sys_newuname,sys_newuname,compat_sys_newuname) NI_SYSCALL /* modify_ldt for i386 */ -SYSCALL(sys_adjtimex,sys_adjtimex,compat_sys_adjtimex_wrapper) +SYSCALL(sys_adjtimex,sys_adjtimex,compat_sys_adjtimex) SYSCALL(sys_mprotect,sys_mprotect,compat_sys_mprotect) /* 125 */ SYSCALL(sys_sigprocmask,sys_sigprocmask,compat_sys_sigprocmask) NI_SYSCALL /* old "create module" */ @@ -149,12 +149,12 @@ NI_SYSCALL /* for afs_syscall */ SYSCALL(sys_setfsuid16,sys_ni_syscall,compat_sys_s390_setfsuid16) /* old setfsuid16 syscall */ SYSCALL(sys_setfsgid16,sys_ni_syscall,compat_sys_s390_setfsgid16) /* old setfsgid16 syscall */ SYSCALL(sys_llseek,sys_llseek,compat_sys_llseek) /* 140 */ -SYSCALL(sys_getdents,sys_getdents,sys32_getdents_wrapper) -SYSCALL(sys_select,sys_select,compat_sys_select_wrapper) +SYSCALL(sys_getdents,sys_getdents,compat_sys_getdents) +SYSCALL(sys_select,sys_select,compat_sys_select) SYSCALL(sys_flock,sys_flock,compat_sys_flock) SYSCALL(sys_msync,sys_msync,compat_sys_msync) -SYSCALL(sys_readv,sys_readv,compat_sys_readv_wrapper) /* 145 */ -SYSCALL(sys_writev,sys_writev,compat_sys_writev_wrapper) +SYSCALL(sys_readv,sys_readv,compat_sys_readv) /* 145 */ +SYSCALL(sys_writev,sys_writev,compat_sys_writev) SYSCALL(sys_getsid,sys_getsid,compat_sys_getsid) SYSCALL(sys_fdatasync,sys_fdatasync,compat_sys_fdatasync) SYSCALL(sys_sysctl,sys_sysctl,compat_sys_sysctl) @@ -170,7 +170,7 @@ SYSCALL(sys_sched_yield,sys_sched_yield,sys_sched_yield) SYSCALL(sys_sched_get_priority_max,sys_sched_get_priority_max,compat_sys_sched_get_priority_max) SYSCALL(sys_sched_get_priority_min,sys_sched_get_priority_min,compat_sys_sched_get_priority_min) /* 160 */ SYSCALL(sys_sched_rr_get_interval,sys_sched_rr_get_interval,compat_sys_sched_rr_get_interval) -SYSCALL(sys_nanosleep,sys_nanosleep,compat_sys_nanosleep_wrapper) +SYSCALL(sys_nanosleep,sys_nanosleep,compat_sys_nanosleep) SYSCALL(sys_mremap,sys_mremap,compat_sys_mremap) SYSCALL(sys_setresuid16,sys_ni_syscall,compat_sys_s390_setresuid16) /* old setresuid16 syscall */ SYSCALL(sys_getresuid16,sys_ni_syscall,compat_sys_s390_getresuid16) /* 165 old getresuid16 syscall */ @@ -181,7 +181,7 @@ NI_SYSCALL /* old nfsservctl */ SYSCALL(sys_setresgid16,sys_ni_syscall,compat_sys_s390_setresgid16) /* 170 old setresgid16 syscall */ SYSCALL(sys_getresgid16,sys_ni_syscall,compat_sys_s390_getresgid16) /* old getresgid16 syscall */ SYSCALL(sys_prctl,sys_prctl,compat_sys_prctl) -SYSCALL(sys_rt_sigreturn,sys_rt_sigreturn,sys32_rt_sigreturn) +SYSCALL(sys_rt_sigreturn,sys_rt_sigreturn,compat_sys_rt_sigreturn) SYSCALL(sys_rt_sigaction,sys_rt_sigaction,compat_sys_rt_sigaction) SYSCALL(sys_rt_sigprocmask,sys_rt_sigprocmask,compat_sys_rt_sigprocmask) /* 175 */ SYSCALL(sys_rt_sigpending,sys_rt_sigpending,compat_sys_rt_sigpending) @@ -199,7 +199,7 @@ SYSCALL(sys_sendfile,sys_sendfile64,compat_sys_sendfile) NI_SYSCALL /* streams1 */ NI_SYSCALL /* streams2 */ SYSCALL(sys_vfork,sys_vfork,sys_vfork) /* 190 */ -SYSCALL(sys_getrlimit,sys_getrlimit,compat_sys_getrlimit_wrapper) +SYSCALL(sys_getrlimit,sys_getrlimit,compat_sys_getrlimit) SYSCALL(sys_mmap2,sys_mmap2,compat_sys_s390_mmap2) SYSCALL(sys_truncate64,sys_ni_syscall,compat_sys_s390_truncate64) SYSCALL(sys_ftruncate64,sys_ni_syscall,compat_sys_s390_ftruncate64) @@ -229,7 +229,7 @@ SYSCALL(sys_pivot_root,sys_pivot_root,compat_sys_pivot_root) SYSCALL(sys_mincore,sys_mincore,compat_sys_mincore) SYSCALL(sys_madvise,sys_madvise,compat_sys_madvise) SYSCALL(sys_getdents64,sys_getdents64,compat_sys_getdents64) /* 220 */ -SYSCALL(sys_fcntl64,sys_ni_syscall,compat_sys_fcntl64_wrapper) +SYSCALL(sys_fcntl64,sys_ni_syscall,compat_sys_fcntl64) SYSCALL(sys_readahead,sys_readahead,compat_sys_s390_readahead) SYSCALL(sys_sendfile64,sys_ni_syscall,compat_sys_sendfile64) SYSCALL(sys_setxattr,sys_setxattr,compat_sys_setxattr) @@ -247,14 +247,14 @@ SYSCALL(sys_fremovexattr,sys_fremovexattr,compat_sys_fremovexattr) /* 235 */ SYSCALL(sys_gettid,sys_gettid,sys_gettid) SYSCALL(sys_tkill,sys_tkill,compat_sys_tkill) SYSCALL(sys_futex,sys_futex,compat_sys_futex) -SYSCALL(sys_sched_setaffinity,sys_sched_setaffinity,sys32_sched_setaffinity_wrapper) -SYSCALL(sys_sched_getaffinity,sys_sched_getaffinity,sys32_sched_getaffinity_wrapper) /* 240 */ +SYSCALL(sys_sched_setaffinity,sys_sched_setaffinity,compat_sys_sched_setaffinity) +SYSCALL(sys_sched_getaffinity,sys_sched_getaffinity,compat_sys_sched_getaffinity) /* 240 */ SYSCALL(sys_tgkill,sys_tgkill,compat_sys_tgkill) NI_SYSCALL /* reserved for TUX */ -SYSCALL(sys_io_setup,sys_io_setup,sys32_io_setup_wrapper) +SYSCALL(sys_io_setup,sys_io_setup,compat_sys_io_setup) SYSCALL(sys_io_destroy,sys_io_destroy,compat_sys_io_destroy) -SYSCALL(sys_io_getevents,sys_io_getevents,sys32_io_getevents_wrapper) /* 245 */ -SYSCALL(sys_io_submit,sys_io_submit,sys32_io_submit_wrapper) +SYSCALL(sys_io_getevents,sys_io_getevents,compat_sys_io_getevents) /* 245 */ +SYSCALL(sys_io_submit,sys_io_submit,compat_sys_io_submit) SYSCALL(sys_io_cancel,sys_io_cancel,compat_sys_io_cancel) SYSCALL(sys_exit_group,sys_exit_group,compat_sys_exit_group) SYSCALL(sys_epoll_create,sys_epoll_create,compat_sys_epoll_create) @@ -262,33 +262,33 @@ SYSCALL(sys_epoll_ctl,sys_epoll_ctl,compat_sys_epoll_ctl) /* 250 */ SYSCALL(sys_epoll_wait,sys_epoll_wait,compat_sys_epoll_wait) SYSCALL(sys_set_tid_address,sys_set_tid_address,compat_sys_set_tid_address) SYSCALL(sys_s390_fadvise64,sys_fadvise64_64,compat_sys_s390_fadvise64) -SYSCALL(sys_timer_create,sys_timer_create,sys32_timer_create_wrapper) -SYSCALL(sys_timer_settime,sys_timer_settime,sys32_timer_settime_wrapper) /* 255 */ -SYSCALL(sys_timer_gettime,sys_timer_gettime,sys32_timer_gettime_wrapper) +SYSCALL(sys_timer_create,sys_timer_create,compat_sys_timer_create) +SYSCALL(sys_timer_settime,sys_timer_settime,compat_sys_timer_settime) /* 255 */ +SYSCALL(sys_timer_gettime,sys_timer_gettime,compat_sys_timer_gettime) SYSCALL(sys_timer_getoverrun,sys_timer_getoverrun,compat_sys_timer_getoverrun) SYSCALL(sys_timer_delete,sys_timer_delete,compat_sys_timer_delete) -SYSCALL(sys_clock_settime,sys_clock_settime,sys32_clock_settime_wrapper) -SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper) /* 260 */ -SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper) -SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper) +SYSCALL(sys_clock_settime,sys_clock_settime,compat_sys_clock_settime) +SYSCALL(sys_clock_gettime,sys_clock_gettime,compat_sys_clock_gettime) /* 260 */ +SYSCALL(sys_clock_getres,sys_clock_getres,compat_sys_clock_getres) +SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,compat_sys_clock_nanosleep) NI_SYSCALL /* reserved for vserver */ SYSCALL(sys_s390_fadvise64_64,sys_ni_syscall,compat_sys_s390_fadvise64_64) -SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper) -SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper) +SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64) +SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64) SYSCALL(sys_remap_file_pages,sys_remap_file_pages,compat_sys_remap_file_pages) NI_SYSCALL /* 268 sys_mbind */ NI_SYSCALL /* 269 sys_get_mempolicy */ NI_SYSCALL /* 270 sys_set_mempolicy */ -SYSCALL(sys_mq_open,sys_mq_open,compat_sys_mq_open_wrapper) +SYSCALL(sys_mq_open,sys_mq_open,compat_sys_mq_open) SYSCALL(sys_mq_unlink,sys_mq_unlink,compat_sys_mq_unlink) -SYSCALL(sys_mq_timedsend,sys_mq_timedsend,compat_sys_mq_timedsend_wrapper) -SYSCALL(sys_mq_timedreceive,sys_mq_timedreceive,compat_sys_mq_timedreceive_wrapper) -SYSCALL(sys_mq_notify,sys_mq_notify,compat_sys_mq_notify_wrapper) /* 275 */ -SYSCALL(sys_mq_getsetattr,sys_mq_getsetattr,compat_sys_mq_getsetattr_wrapper) -SYSCALL(sys_kexec_load,sys_kexec_load,compat_sys_kexec_load_wrapper) +SYSCALL(sys_mq_timedsend,sys_mq_timedsend,compat_sys_mq_timedsend) +SYSCALL(sys_mq_timedreceive,sys_mq_timedreceive,compat_sys_mq_timedreceive) +SYSCALL(sys_mq_notify,sys_mq_notify,compat_sys_mq_notify) /* 275 */ +SYSCALL(sys_mq_getsetattr,sys_mq_getsetattr,compat_sys_mq_getsetattr) +SYSCALL(sys_kexec_load,sys_kexec_load,compat_sys_kexec_load) SYSCALL(sys_add_key,sys_add_key,compat_sys_add_key) SYSCALL(sys_request_key,sys_request_key,compat_sys_request_key) -SYSCALL(sys_keyctl,sys_keyctl,compat_sys_keyctl_wrapper) /* 280 */ +SYSCALL(sys_keyctl,sys_keyctl,compat_sys_keyctl) /* 280 */ SYSCALL(sys_waitid,sys_waitid,compat_sys_waitid) SYSCALL(sys_ioprio_set,sys_ioprio_set,compat_sys_ioprio_set) SYSCALL(sys_ioprio_get,sys_ioprio_get,compat_sys_ioprio_get) @@ -300,7 +300,7 @@ SYSCALL(sys_openat,sys_openat,compat_sys_openat) SYSCALL(sys_mkdirat,sys_mkdirat,compat_sys_mkdirat) SYSCALL(sys_mknodat,sys_mknodat,compat_sys_mknodat) /* 290 */ SYSCALL(sys_fchownat,sys_fchownat,compat_sys_fchownat) -SYSCALL(sys_futimesat,sys_futimesat,compat_sys_futimesat_wrapper) +SYSCALL(sys_futimesat,sys_futimesat,compat_sys_futimesat) SYSCALL(sys_fstatat64,sys_newfstatat,compat_sys_s390_fstatat64) SYSCALL(sys_unlinkat,sys_unlinkat,compat_sys_unlinkat) SYSCALL(sys_renameat,sys_renameat,compat_sys_renameat) /* 295 */ @@ -309,8 +309,8 @@ SYSCALL(sys_symlinkat,sys_symlinkat,compat_sys_symlinkat) SYSCALL(sys_readlinkat,sys_readlinkat,compat_sys_readlinkat) SYSCALL(sys_fchmodat,sys_fchmodat,compat_sys_fchmodat) SYSCALL(sys_faccessat,sys_faccessat,compat_sys_faccessat) /* 300 */ -SYSCALL(sys_pselect6,sys_pselect6,compat_sys_pselect6_wrapper) -SYSCALL(sys_ppoll,sys_ppoll,compat_sys_ppoll_wrapper) +SYSCALL(sys_pselect6,sys_pselect6,compat_sys_pselect6) +SYSCALL(sys_ppoll,sys_ppoll,compat_sys_ppoll) SYSCALL(sys_unshare,sys_unshare,compat_sys_unshare) SYSCALL(sys_set_robust_list,sys_set_robust_list,compat_sys_set_robust_list) SYSCALL(sys_get_robust_list,sys_get_robust_list,compat_sys_get_robust_list) @@ -321,9 +321,9 @@ SYSCALL(sys_vmsplice,sys_vmsplice,compat_sys_vmsplice) NI_SYSCALL /* 310 sys_move_pages */ SYSCALL(sys_getcpu,sys_getcpu,compat_sys_getcpu) SYSCALL(sys_epoll_pwait,sys_epoll_pwait,compat_sys_epoll_pwait) -SYSCALL(sys_utimes,sys_utimes,compat_sys_utimes_wrapper) +SYSCALL(sys_utimes,sys_utimes,compat_sys_utimes) SYSCALL(sys_s390_fallocate,sys_fallocate,compat_sys_s390_fallocate) -SYSCALL(sys_utimensat,sys_utimensat,compat_sys_utimensat_wrapper) /* 315 */ +SYSCALL(sys_utimensat,sys_utimensat,compat_sys_utimensat) /* 315 */ SYSCALL(sys_signalfd,sys_signalfd,compat_sys_signalfd) NI_SYSCALL /* 317 old sys_timer_fd */ SYSCALL(sys_eventfd,sys_eventfd,compat_sys_eventfd) @@ -345,11 +345,11 @@ SYSCALL(sys_fanotify_mark,sys_fanotify_mark,compat_sys_fanotify_mark) SYSCALL(sys_prlimit64,sys_prlimit64,compat_sys_prlimit64) SYSCALL(sys_name_to_handle_at,sys_name_to_handle_at,compat_sys_name_to_handle_at) /* 335 */ SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at) -SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime_wrapper) +SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime) SYSCALL(sys_syncfs,sys_syncfs,compat_sys_syncfs) SYSCALL(sys_setns,sys_setns,compat_sys_setns) -SYSCALL(sys_process_vm_readv,sys_process_vm_readv,compat_sys_process_vm_readv_wrapper) /* 340 */ -SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev_wrapper) +SYSCALL(sys_process_vm_readv,sys_process_vm_readv,compat_sys_process_vm_readv) /* 340 */ +SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev) SYSCALL(sys_ni_syscall,sys_s390_runtime_instr,compat_sys_s390_runtime_instr) SYSCALL(sys_kcmp,sys_kcmp,compat_sys_kcmp) SYSCALL(sys_finit_module,sys_finit_module,compat_sys_finit_module) -- cgit v0.10.2 From c99b1861c232e1f641f13b8645e0febb3712cc71 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 4 Mar 2014 18:43:13 -0800 Subject: mwifiex: copy AP's HT capability info correctly While preparing association request, intersection of device's HT capability information and corresponding fields advertised by AP is used. This patch fixes an error while copying this field from AP's beacon. Cc: Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/mwifiex/11n.c b/drivers/net/wireless/mwifiex/11n.c index 6261f8c..7db1a89 100644 --- a/drivers/net/wireless/mwifiex/11n.c +++ b/drivers/net/wireless/mwifiex/11n.c @@ -308,8 +308,7 @@ mwifiex_cmd_append_11n_tlv(struct mwifiex_private *priv, ht_cap->header.len = cpu_to_le16(sizeof(struct ieee80211_ht_cap)); memcpy((u8 *) ht_cap + sizeof(struct mwifiex_ie_types_header), - (u8 *) bss_desc->bcn_ht_cap + - sizeof(struct ieee_types_header), + (u8 *)bss_desc->bcn_ht_cap, le16_to_cpu(ht_cap->header.len)); mwifiex_fill_cap_info(priv, radio_type, ht_cap); -- cgit v0.10.2 From d51246481c7f28bbfa1f814ded2da65e531cd4b2 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 4 Mar 2014 18:43:14 -0800 Subject: mwifiex: save and copy AP's VHT capability info correctly While preparing association request, intersection of device's VHT capability information and corresponding field advertised by AP is used. This patch fixes a couple errors while saving and copying vht_cap and vht_oper fields from AP's beacon. Cc: # 3.9+ Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/mwifiex/11ac.c b/drivers/net/wireless/mwifiex/11ac.c index 5e0eec4..5d9a808 100644 --- a/drivers/net/wireless/mwifiex/11ac.c +++ b/drivers/net/wireless/mwifiex/11ac.c @@ -189,8 +189,7 @@ int mwifiex_cmd_append_11ac_tlv(struct mwifiex_private *priv, vht_cap->header.len = cpu_to_le16(sizeof(struct ieee80211_vht_cap)); memcpy((u8 *)vht_cap + sizeof(struct mwifiex_ie_types_header), - (u8 *)bss_desc->bcn_vht_cap + - sizeof(struct ieee_types_header), + (u8 *)bss_desc->bcn_vht_cap, le16_to_cpu(vht_cap->header.len)); mwifiex_fill_vht_cap_tlv(priv, vht_cap, bss_desc->bss_band); diff --git a/drivers/net/wireless/mwifiex/scan.c b/drivers/net/wireless/mwifiex/scan.c index 0a8a26e..668547c 100644 --- a/drivers/net/wireless/mwifiex/scan.c +++ b/drivers/net/wireless/mwifiex/scan.c @@ -2101,12 +2101,12 @@ mwifiex_save_curr_bcn(struct mwifiex_private *priv) curr_bss->ht_info_offset); if (curr_bss->bcn_vht_cap) - curr_bss->bcn_ht_cap = (void *)(curr_bss->beacon_buf + - curr_bss->vht_cap_offset); + curr_bss->bcn_vht_cap = (void *)(curr_bss->beacon_buf + + curr_bss->vht_cap_offset); if (curr_bss->bcn_vht_oper) - curr_bss->bcn_ht_oper = (void *)(curr_bss->beacon_buf + - curr_bss->vht_info_offset); + curr_bss->bcn_vht_oper = (void *)(curr_bss->beacon_buf + + curr_bss->vht_info_offset); if (curr_bss->bcn_bss_co_2040) curr_bss->bcn_bss_co_2040 = -- cgit v0.10.2 From d4078e232267ff53f3b030b9698a3c001db4dbec Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 5 Mar 2014 14:07:49 +0100 Subject: x86, trace: Further robustify CR2 handling vs tracing Building on commit 0ac09f9f8cd1 ("x86, trace: Fix CR2 corruption when tracing page faults") this patch addresses another few issues: - Now that read_cr2() is lifted into trace_do_page_fault(), we should pass the address to trace_page_fault_entries() to avoid it re-reading a potentially changed cr2. - Put both trace_do_page_fault() and trace_page_fault_entries() under CONFIG_TRACING. - Mark both fault entry functions {,trace_}do_page_fault() as notrace to avoid getting __mcount or other function entry trace callbacks before we've observed CR2. - Mark __do_page_fault() as noinline to guarantee the function tracer does get to see the fault. Cc: Cc: Acked-by: Steven Rostedt Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140306145300.GO9987@twins.programming.kicks-ass.net Signed-off-by: H. Peter Anvin diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index e7fa28b..a10c8c7 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1020,8 +1020,12 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs) * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. + * + * This function must have noinline because both callers + * {,trace_}do_page_fault() have notrace on. Having this an actual function + * guarantees there's a function trace entry. */ -static void __kprobes +static void __kprobes noinline __do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address) { @@ -1245,31 +1249,38 @@ good_area: up_read(&mm->mmap_sem); } -dotraplinkage void __kprobes +dotraplinkage void __kprobes notrace do_page_fault(struct pt_regs *regs, unsigned long error_code) { + unsigned long address = read_cr2(); /* Get the faulting address */ enum ctx_state prev_state; - /* Get the faulting address: */ - unsigned long address = read_cr2(); + + /* + * We must have this function tagged with __kprobes, notrace and call + * read_cr2() before calling anything else. To avoid calling any kind + * of tracing machinery before we've observed the CR2 value. + * + * exception_{enter,exit}() contain all sorts of tracepoints. + */ prev_state = exception_enter(); __do_page_fault(regs, error_code, address); exception_exit(prev_state); } -static void trace_page_fault_entries(struct pt_regs *regs, +#ifdef CONFIG_TRACING +static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs, unsigned long error_code) { if (user_mode(regs)) - trace_page_fault_user(read_cr2(), regs, error_code); + trace_page_fault_user(address, regs, error_code); else - trace_page_fault_kernel(read_cr2(), regs, error_code); + trace_page_fault_kernel(address, regs, error_code); } -dotraplinkage void __kprobes +dotraplinkage void __kprobes notrace trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) { - enum ctx_state prev_state; /* * The exception_enter and tracepoint processing could * trigger another page faults (user space callchain @@ -1277,9 +1288,11 @@ trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) * the faulting address now. */ unsigned long address = read_cr2(); + enum ctx_state prev_state; prev_state = exception_enter(); - trace_page_fault_entries(regs, error_code); + trace_page_fault_entries(address, regs, error_code); __do_page_fault(regs, error_code, address); exception_exit(prev_state); } +#endif /* CONFIG_TRACING */ -- cgit v0.10.2 From f6d16d32797f665100b1507f6a77c4cd0acb30c5 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 6 Mar 2014 14:04:51 -0500 Subject: dm thin: fix Documentation for held metadata root feature The Documentation for the thin provisioning target's held metadata root feature was incorrect. It is now available and the value for the held metadata root is in block units (not 512b sectors). Signed-off-by: Mike Snitzer diff --git a/Documentation/device-mapper/thin-provisioning.txt b/Documentation/device-mapper/thin-provisioning.txt index 3b34b4f..05a27e9 100644 --- a/Documentation/device-mapper/thin-provisioning.txt +++ b/Documentation/device-mapper/thin-provisioning.txt @@ -287,10 +287,9 @@ ii) Status should register for the event and then check the target's status. held metadata root: - The location, in sectors, of the metadata root that has been + The location, in blocks, of the metadata root that has been 'held' for userspace read access. '-' indicates there is no - held root. This feature is not yet implemented so '-' is - always returned. + held root. discard_passdown|no_discard_passdown Whether or not discards are actually being passed down to the -- cgit v0.10.2 From bb5016eac1656506df1a9d6057ce5bec342afbef Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 6 Mar 2014 11:14:30 +0100 Subject: l2tp: fix manual sequencing (de)activation in L2TPv2 Commit e0d4435f "l2tp: Update PPP-over-L2TP driver to work over L2TPv3" broke the PPPOL2TP_SO_SENDSEQ setsockopt. The L2TP header length was previously computed by pppol2tp_l2t_header_len() before each call to l2tp_xmit_skb(). Now that header length is retrieved from the hdr_len session field, this field must be updated every time the L2TP header format is modified, or l2tp_xmit_skb() won't push the right amount of data for the L2TP header. This patch uses l2tp_session_set_header_len() to adjust hdr_len every time sequencing is (de)activated from userspace (either by the PPPOL2TP_SO_SENDSEQ setsockopt or the L2TP_ATTR_SEND_SEQ netlink attribute). Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 735d0f6..85d9d94 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -112,7 +112,6 @@ struct l2tp_net { spinlock_t l2tp_session_hlist_lock; }; -static void l2tp_session_set_header_len(struct l2tp_session *session, int version); static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk) @@ -1863,7 +1862,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_delete); /* We come here whenever a session's send_seq, cookie_len or * l2specific_len parameters are set. */ -static void l2tp_session_set_header_len(struct l2tp_session *session, int version) +void l2tp_session_set_header_len(struct l2tp_session *session, int version) { if (version == L2TP_HDR_VER_2) { session->hdr_len = 6; @@ -1876,6 +1875,7 @@ static void l2tp_session_set_header_len(struct l2tp_session *session, int versio } } +EXPORT_SYMBOL_GPL(l2tp_session_set_header_len); struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) { diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 1f01ba3..3f93ccd 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -263,6 +263,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, int length, int (*payload_hook)(struct sk_buff *skb)); int l2tp_session_queue_purge(struct l2tp_session *session); int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb); +void l2tp_session_set_header_len(struct l2tp_session *session, int version); int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 4cfd722..bd7387a 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -578,8 +578,10 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf if (info->attrs[L2TP_ATTR_RECV_SEQ]) session->recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]); - if (info->attrs[L2TP_ATTR_SEND_SEQ]) + if (info->attrs[L2TP_ATTR_SEND_SEQ]) { session->send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]); + l2tp_session_set_header_len(session, session->tunnel->version); + } if (info->attrs[L2TP_ATTR_LNS_MODE]) session->lns_mode = nla_get_u8(info->attrs[L2TP_ATTR_LNS_MODE]); diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index be5fadf..6bfeaa7 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1312,6 +1312,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk, po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ : PPPOL2TP_L2TP_HDR_SIZE_NOSEQ; } + l2tp_session_set_header_len(session, session->tunnel->version); l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: set send_seq=%d\n", session->name, session->send_seq); -- cgit v0.10.2 From 9e9cb6221aa7cb04765484fe87cc2d1b92edce64 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 6 Mar 2014 11:15:10 +0100 Subject: l2tp: fix userspace reception on plain L2TP sockets As pppol2tp_recv() never queues up packets to plain L2TP sockets, pppol2tp_recvmsg() never returns data to userspace, thus making the recv*() system calls unusable. Instead of dropping packets when the L2TP socket isn't bound to a PPP channel, this patch adds them to its reception queue. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 6bfeaa7..5990919 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -254,12 +254,14 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int po = pppox_sk(sk); ppp_input(&po->chan, skb); } else { - l2tp_info(session, PPPOL2TP_MSG_DATA, "%s: socket not bound\n", - session->name); + l2tp_dbg(session, PPPOL2TP_MSG_DATA, + "%s: recv %d byte data frame, passing to L2TP socket\n", + session->name, data_len); - /* Not bound. Nothing we can do, so discard. */ - atomic_long_inc(&session->stats.rx_errors); - kfree_skb(skb); + if (sock_queue_rcv_skb(sk, skb) < 0) { + atomic_long_inc(&session->stats.rx_errors); + kfree_skb(skb); + } } return; -- cgit v0.10.2 From 46ce6b74fdec1a8a8e9b45597e6c7989441682bf Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 5 Mar 2014 20:17:49 +0100 Subject: ahci_imx: Put #ifdef CONFIG_PM_SLEEP around suspend / resume functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the following warnings when CONFIG_PM_SLEEP is not set: drivers/ata/ahci_imx.c:284:12: warning: ‘imx_ahci_suspend’ defined but not used [-Wunused-function] drivers/ata/ahci_imx.c:299:12: warning: ‘imx_ahci_resume’ defined but not used [-Wunused-function] Reported-by: Bartlomiej Zolnierkiewicz Cc: Bartlomiej Zolnierkiewicz Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c index 3cb5d69..497c7ab 100644 --- a/drivers/ata/ahci_imx.c +++ b/drivers/ata/ahci_imx.c @@ -281,6 +281,7 @@ static void ahci_imx_host_stop(struct ata_host *host) imx_sata_disable(hpriv); } +#ifdef CONFIG_PM_SLEEP static int imx_ahci_suspend(struct device *dev) { struct ata_host *host = dev_get_drvdata(dev); @@ -308,6 +309,7 @@ static int imx_ahci_resume(struct device *dev) return ahci_platform_resume_host(dev); } +#endif static SIMPLE_DEV_PM_OPS(ahci_imx_pm_ops, imx_ahci_suspend, imx_ahci_resume); -- cgit v0.10.2 From 6d4ebeb4df0176b1973875840a9f7e91394c0685 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 6 Mar 2014 14:40:16 +0100 Subject: tipc: allow connection shutdown callback to be invoked in advance Currently connection shutdown callback function is called when connection instance is released in tipc_conn_kref_release(), and receiving packets and sending packets are running in different threads. Even if connection is closed by the thread of receiving packets, its shutdown callback may not be called immediately as the connection reference count is non-zero at that moment. So, although the connection is shut down by the thread of receiving packets, the thread of sending packets doesn't know it. Before its shutdown callback is invoked to tell the sending thread its connection has been closed, the sending thread may deliver messages by tipc_conn_sendmsg(), this is why the following error information appears: "Sending subscription event failed, no memory" To eliminate it, allow connection shutdown callback function to be called before connection id is removed in tipc_close_conn(), which makes the sending thread know the truth in time that its socket is closed so that it doesn't send message to it. We also remove the "Sending XXX failed..." error reporting for topology and config services. Signed-off-by: Ying Xue Signed-off-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller diff --git a/net/tipc/config.c b/net/tipc/config.c index e74eef2..e6d7216 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -376,7 +376,6 @@ static void cfg_conn_msg_event(int conid, struct sockaddr_tipc *addr, struct tipc_cfg_msg_hdr *req_hdr; struct tipc_cfg_msg_hdr *rep_hdr; struct sk_buff *rep_buf; - int ret; /* Validate configuration message header (ignore invalid message) */ req_hdr = (struct tipc_cfg_msg_hdr *)buf; @@ -398,12 +397,8 @@ static void cfg_conn_msg_event(int conid, struct sockaddr_tipc *addr, memcpy(rep_hdr, req_hdr, sizeof(*rep_hdr)); rep_hdr->tcm_len = htonl(rep_buf->len); rep_hdr->tcm_flags &= htons(~TCM_F_REQUEST); - - ret = tipc_conn_sendmsg(&cfgsrv, conid, addr, rep_buf->data, - rep_buf->len); - if (ret < 0) - pr_err("Sending cfg reply message failed, no memory\n"); - + tipc_conn_sendmsg(&cfgsrv, conid, addr, rep_buf->data, + rep_buf->len); kfree_skb(rep_buf); } } diff --git a/net/tipc/server.c b/net/tipc/server.c index 3739797..bbaa8f5 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -87,7 +87,6 @@ static void tipc_clean_outqueues(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); - struct tipc_server *s = con->server; if (con->sock) { tipc_sock_release_local(con->sock); @@ -95,10 +94,6 @@ static void tipc_conn_kref_release(struct kref *kref) } tipc_clean_outqueues(con); - - if (con->conid) - s->tipc_conn_shutdown(con->conid, con->usr_data); - kfree(con); } @@ -181,6 +176,9 @@ static void tipc_close_conn(struct tipc_conn *con) struct tipc_server *s = con->server; if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { + if (con->conid) + s->tipc_conn_shutdown(con->conid, con->usr_data); + spin_lock_bh(&s->idr_lock); idr_remove(&s->conn_idr, con->conid); s->idr_in_use--; diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 7cb0bd5..a6ce3bb 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -96,20 +96,16 @@ static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower, { struct tipc_subscriber *subscriber = sub->subscriber; struct kvec msg_sect; - int ret; msg_sect.iov_base = (void *)&sub->evt; msg_sect.iov_len = sizeof(struct tipc_event); - sub->evt.event = htohl(event, sub->swap); sub->evt.found_lower = htohl(found_lower, sub->swap); sub->evt.found_upper = htohl(found_upper, sub->swap); sub->evt.port.ref = htohl(port_ref, sub->swap); sub->evt.port.node = htohl(node, sub->swap); - ret = tipc_conn_sendmsg(&topsrv, subscriber->conid, NULL, - msg_sect.iov_base, msg_sect.iov_len); - if (ret < 0) - pr_err("Sending subscription event failed, no memory\n"); + tipc_conn_sendmsg(&topsrv, subscriber->conid, NULL, msg_sect.iov_base, + msg_sect.iov_len); } /** -- cgit v0.10.2 From 4652edb70e8a7eebbe47fa931940f65522c36e8f Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 6 Mar 2014 14:40:17 +0100 Subject: tipc: fix connection refcount leak When tipc_conn_sendmsg() calls tipc_conn_lookup() to query a connection instance, its reference count value is increased if it's found. But subsequently if it's found that the connection is closed, the work of sending message is not queued into its server send workqueue, and the connection reference count is not decreased. This will cause a reference count leak. To reproduce this problem, an application would need to open and closes topology server connections with high intensity. We fix this by immediately decrementing the connection reference count if a send fails due to the connection being closed. Signed-off-by: Ying Xue Acked-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller diff --git a/net/tipc/server.c b/net/tipc/server.c index bbaa8f5..646a930 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -427,10 +427,12 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid, list_add_tail(&e->list, &con->outqueue); spin_unlock_bh(&con->outqueue_lock); - if (test_bit(CF_CONNECTED, &con->flags)) + if (test_bit(CF_CONNECTED, &con->flags)) { if (!queue_work(s->send_wq, &con->swork)) conn_put(con); - + } else { + conn_put(con); + } return 0; } -- cgit v0.10.2 From fe8e4649397915cf3b2ab0b695929a27e543967e Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 6 Mar 2014 14:40:18 +0100 Subject: tipc: avoid to unnecessary process switch under non-block mode When messages are received via tipc socket under non-block mode, schedule_timeout() is called in tipc_wait_for_rcvmsg(), that is, the process of receiving messages will be scheduled once although timeout value passed to schedule_timeout() is 0. The same issue exists in accept()/wait_for_accept(). To avoid this unnecessary process switch, we only call schedule_timeout() if the timeout value is non-zero. Signed-off-by: Ying Xue Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller diff --git a/net/tipc/socket.c b/net/tipc/socket.c index a4cf274..0ed0eaa 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -997,7 +997,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo) for (;;) { prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - if (skb_queue_empty(&sk->sk_receive_queue)) { + if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { if (sock->state == SS_DISCONNECTING) { err = -ENOTCONN; break; @@ -1623,7 +1623,7 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) for (;;) { prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - if (skb_queue_empty(&sk->sk_receive_queue)) { + if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); -- cgit v0.10.2 From edcc0511b5ee7235282a688cd604e3ae7f9e1fc9 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Thu, 6 Mar 2014 14:40:19 +0100 Subject: tipc: drop subscriber connection id invalidation When a topology server subscriber is disconnected, the associated connection id is set to zero. A check vs zero is then done in the subscription timeout function to see if the subscriber have been shut down. This is unnecessary, because all subscription timers will be cancelled when a subscriber terminates. Setting the connection id to zero is actually harmful because id zero is the identity of the topology server listening socket, and can cause a race that leads to this socket being closed instead. Signed-off-by: Erik Hugne Acked-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index a6ce3bb..11c9ae0 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -149,14 +149,6 @@ static void subscr_timeout(struct tipc_subscription *sub) /* The spin lock per subscriber is used to protect its members */ spin_lock_bh(&subscriber->lock); - /* Validate if the connection related to the subscriber is - * closed (in case subscriber is terminating) - */ - if (subscriber->conid == 0) { - spin_unlock_bh(&subscriber->lock); - return; - } - /* Validate timeout (in case subscription is being cancelled) */ if (sub->timeout == TIPC_WAIT_FOREVER) { spin_unlock_bh(&subscriber->lock); @@ -211,9 +203,6 @@ static void subscr_release(struct tipc_subscriber *subscriber) spin_lock_bh(&subscriber->lock); - /* Invalidate subscriber reference */ - subscriber->conid = 0; - /* Destroy any existing subscriptions for subscriber */ list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, subscription_list) { -- cgit v0.10.2 From 1bb8dce57f4d15233688c68990852a10eb1cd79f Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Thu, 6 Mar 2014 14:40:20 +0100 Subject: tipc: fix memory leak during module removal When the TIPC module is removed, the tasklet handler is disabled before all other subsystems. This will cause lingering publications in the name table because the node_down tasklets responsible to clean up publications from an unreachable node will never run. When the name table is shut down, these publications are detected and an error message is logged: tipc: nametbl_stop(): orphaned hash chain detected This is actually a memory leak, introduced with commit 993b858e37b3120ee76d9957a901cca22312ffaa ("tipc: correct the order of stopping services at rmmod") Instead of just logging an error and leaking memory, we free the orphaned entries during nametable shutdown. Signed-off-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 48302be..042e8e3 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -941,17 +941,48 @@ int tipc_nametbl_init(void) return 0; } +/** + * tipc_purge_publications - remove all publications for a given type + * + * tipc_nametbl_lock must be held when calling this function + */ +static void tipc_purge_publications(struct name_seq *seq) +{ + struct publication *publ, *safe; + struct sub_seq *sseq; + struct name_info *info; + + if (!seq->sseqs) { + nameseq_delete_empty(seq); + return; + } + sseq = seq->sseqs; + info = sseq->info; + list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) { + tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node, + publ->ref, publ->key); + } +} + void tipc_nametbl_stop(void) { u32 i; + struct name_seq *seq; + struct hlist_head *seq_head; + struct hlist_node *safe; - /* Verify name table is empty, then release it */ + /* Verify name table is empty and purge any lingering + * publications, then release the name table + */ write_lock_bh(&tipc_nametbl_lock); for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { if (hlist_empty(&table.types[i])) continue; - pr_err("nametbl_stop(): orphaned hash chain detected\n"); - break; + seq_head = &table.types[i]; + hlist_for_each_entry_safe(seq, safe, seq_head, ns_list) { + tipc_purge_publications(seq); + } + continue; } kfree(table.types); table.types = NULL; -- cgit v0.10.2 From 2892505ea170094f982516bb38105eac45f274b1 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Thu, 6 Mar 2014 14:40:21 +0100 Subject: tipc: don't log disabled tasklet handler errors Failure to schedule a TIPC tasklet with tipc_k_signal because the tasklet handler is disabled is not an error. It means TIPC is currently in the process of shutting down. We remove the error logging in this case. Signed-off-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller diff --git a/net/tipc/handler.c b/net/tipc/handler.c index e4bc8a2..1fabf16 100644 --- a/net/tipc/handler.c +++ b/net/tipc/handler.c @@ -58,7 +58,6 @@ unsigned int tipc_k_signal(Handler routine, unsigned long argument) spin_lock_bh(&qitem_lock); if (!handler_enabled) { - pr_err("Signal request ignored by handler\n"); spin_unlock_bh(&qitem_lock); return -ENOPROTOOPT; } -- cgit v0.10.2 From f5179287b016cc61d6ad77b4a15fab9b6932df83 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Mon, 10 Feb 2014 18:00:17 +0100 Subject: MIPS: Fix randconfig build error. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CC arch/mips/kernel/ptrace.o In file included from arch/mips/kernel/ptrace.c:42:0: arch/mips/kernel/ptrace.c: In function ‘mips_get_syscall_arg’: /home/ralf/src/linux/linux-mips/arch/mips/include/asm/syscall.h:60:1: error: control reaches end of non-void function [-Werror=return-type] cc1: all warnings being treated as errors make[2]: *** [arch/mips/kernel/ptrace.o] Error 1 make[1]: *** [arch/mips/kernel] Error 2 make: *** [arch/mips] Error 2 Fixed by marking the end of mips_get_syscall_arg() as unreachable. Signed-off-by: Ralf Baechle diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h index 33e8dbf..5ce530f 100644 --- a/arch/mips/include/asm/syscall.h +++ b/arch/mips/include/asm/syscall.h @@ -13,6 +13,7 @@ #ifndef __ASM_MIPS_SYSCALL_H #define __ASM_MIPS_SYSCALL_H +#include #include #include #include @@ -57,6 +58,8 @@ static inline unsigned long mips_get_syscall_arg(unsigned long *arg, default: BUG(); } + + unreachable(); } static inline long syscall_get_return_value(struct task_struct *task, -- cgit v0.10.2 From d334c2b9de03c62767d04d8d927bc6d06f3fbd62 Mon Sep 17 00:00:00 2001 From: Manuel Lauss Date: Thu, 20 Feb 2014 14:37:40 +0100 Subject: MIPS: Alchemy: Fix unchecked kstrtoul return value enabled __must_check logic triggers a build error for mtx1 and gpr in the prom init code. Fix by checking the kstrtoul() return value. Signed-off-by: Manuel Lauss Cc: Linux-MIPS Patchwork: https://patchwork.linux-mips.org/patch/6574/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/alchemy/board-gpr.c b/arch/mips/alchemy/board-gpr.c index 9edc35f..acf9a2a 100644 --- a/arch/mips/alchemy/board-gpr.c +++ b/arch/mips/alchemy/board-gpr.c @@ -53,10 +53,8 @@ void __init prom_init(void) prom_init_cmdline(); memsize_str = prom_getenv("memsize"); - if (!memsize_str) + if (!memsize_str || kstrtoul(memsize_str, 0, &memsize)) memsize = 0x04000000; - else - strict_strtoul(memsize_str, 0, &memsize); add_memory_region(0, memsize, BOOT_MEM_RAM); } diff --git a/arch/mips/alchemy/board-mtx1.c b/arch/mips/alchemy/board-mtx1.c index 9969dba..25a59a2 100644 --- a/arch/mips/alchemy/board-mtx1.c +++ b/arch/mips/alchemy/board-mtx1.c @@ -52,10 +52,8 @@ void __init prom_init(void) prom_init_cmdline(); memsize_str = prom_getenv("memsize"); - if (!memsize_str) + if (!memsize_str || kstrtoul(memsize_str, 0, &memsize)) memsize = 0x04000000; - else - strict_strtoul(memsize_str, 0, &memsize); add_memory_region(0, memsize, BOOT_MEM_RAM); } -- cgit v0.10.2 From 4890e2eb69d0461cbc532265e03280ffce670ee8 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 19 Feb 2014 10:17:18 +0000 Subject: MIPS: bcm47xx: Include missing errno.h for ENXIO Fixes the following build problen on allmodconfig: arch/mips/bcm47xx/board.c: In function 'bcm47xx_board_detect': arch/mips/bcm47xx/board.c:291:14: error: 'ENXIO' undeclared (first use in this function) Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/6571/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/bcm47xx/board.c b/arch/mips/bcm47xx/board.c index 6d612e2..cdd8246 100644 --- a/arch/mips/bcm47xx/board.c +++ b/arch/mips/bcm47xx/board.c @@ -1,3 +1,4 @@ +#include #include #include #include -- cgit v0.10.2 From 9c1f6e008297e184a7f701f2f03c9269011fd049 Mon Sep 17 00:00:00 2001 From: Deng-Cheng Zhu Date: Fri, 28 Feb 2014 10:23:01 -0800 Subject: MIPS: APRP: Fix the linking of rtlx interrupt hook There are 2 errors with the existing aprp_hook linking: - The prefix CONFIG_ is missing; - The hook should be linked exclusively in the cases of MT and CMP. Signed-off-by: Deng-Cheng Zhu Reviewed-by: Steven J. Hill Cc: linux-mips@linux-mips.org Cc: john@phrozen.org Patchwork: https://patchwork.linux-mips.org/patch/6588/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c index ca3e3a4..2242181 100644 --- a/arch/mips/mti-malta/malta-int.c +++ b/arch/mips/mti-malta/malta-int.c @@ -119,7 +119,7 @@ static void malta_hw0_irqdispatch(void) do_IRQ(MALTA_INT_BASE + irq); -#ifdef MIPS_VPE_APSP_API +#ifdef CONFIG_MIPS_VPE_APSP_API_MT if (aprp_hook) aprp_hook(); #endif @@ -310,7 +310,7 @@ static void ipi_call_dispatch(void) static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id) { -#ifdef MIPS_VPE_APSP_API +#ifdef CONFIG_MIPS_VPE_APSP_API_CMP if (aprp_hook) aprp_hook(); #endif -- cgit v0.10.2 From eee5794881d50e8ac3a56c74d3131f2364f200b9 Mon Sep 17 00:00:00 2001 From: Deng-Cheng Zhu Date: Fri, 28 Feb 2014 10:23:02 -0800 Subject: MIPS: APRP: Unregister rtlx interrupt hook at module exit If the aprp_hook is not assigned back to NULL, it will still be called after module exits. This is not wanted. Reviewed-by: Steven J. Hill Signed-off-by: Deng-Cheng Zhu Cc: linux-mips@linux-mips.org Cc: john@phrozen.org Patchwork: https://patchwork.linux-mips.org/patch/6590/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/kernel/rtlx-cmp.c b/arch/mips/kernel/rtlx-cmp.c index 56dc696..758fb3c 100644 --- a/arch/mips/kernel/rtlx-cmp.c +++ b/arch/mips/kernel/rtlx-cmp.c @@ -112,5 +112,8 @@ void __exit rtlx_module_exit(void) for (i = 0; i < RTLX_CHANNELS; i++) device_destroy(mt_class, MKDEV(major, i)); + unregister_chrdev(major, RTLX_MODULE_NAME); + + aprp_hook = NULL; } diff --git a/arch/mips/kernel/rtlx-mt.c b/arch/mips/kernel/rtlx-mt.c index 91d61ba..9c1aca0 100644 --- a/arch/mips/kernel/rtlx-mt.c +++ b/arch/mips/kernel/rtlx-mt.c @@ -144,5 +144,8 @@ void __exit rtlx_module_exit(void) for (i = 0; i < RTLX_CHANNELS; i++) device_destroy(mt_class, MKDEV(major, i)); + unregister_chrdev(major, RTLX_MODULE_NAME); + + aprp_hook = NULL; } -- cgit v0.10.2 From 031365b471e3268e3b8d6991b2cf0bae6b5e8bdc Mon Sep 17 00:00:00 2001 From: Deng-Cheng Zhu Date: Fri, 28 Feb 2014 10:23:03 -0800 Subject: MIPS: APRP: Choose the correct VPE loader by fixing the linking Now we have CONFIG_MIPS_VPE_LOADER and CONFIG_MIPS_VPE_LOADER_[CMP|MT]. The latter two are used by the 2 exclusive flavors. The vpe_run in malta-amon.c is for CMP APRP. Without the fix, this vpe_run will be used in MT APRP. Reviewed-by: Steven J. Hill Signed-off-by: Deng-Cheng Zhu Cc: linux-mips@linux-mips.org Cc: john@phrozen.org Patchwork: https://patchwork.linux-mips.org/patch/6589/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/mti-malta/malta-amon.c b/arch/mips/mti-malta/malta-amon.c index 592ac04..84ac523 100644 --- a/arch/mips/mti-malta/malta-amon.c +++ b/arch/mips/mti-malta/malta-amon.c @@ -72,7 +72,7 @@ int amon_cpu_start(int cpu, return 0; } -#ifdef CONFIG_MIPS_VPE_LOADER +#ifdef CONFIG_MIPS_VPE_LOADER_CMP int vpe_run(struct vpe *v) { struct vpe_notifications *n; -- cgit v0.10.2 From e588e2f286ed7da011ed357c24c5b9a554e26595 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 6 Mar 2014 18:06:41 +0100 Subject: inet: frag: make sure forced eviction removes all frags Quoting Alexander Aring: While fragmentation and unloading of 6lowpan module I got this kernel Oops after few seconds: BUG: unable to handle kernel paging request at f88bbc30 [..] Modules linked in: ipv6 [last unloaded: 6lowpan] Call Trace: [] ? call_timer_fn+0x54/0xb3 [] ? process_timeout+0xa/0xa [] run_timer_softirq+0x140/0x15f Problem is that incomplete frags are still around after unload; when their frag expire timer fires, we get crash. When a netns is removed (also done when unloading module), inet_frag calls the evictor with 'force' argument to purge remaining frags. The evictor loop terminates when accounted memory ('work') drops to 0 or the lru-list becomes empty. However, the mem accounting is done via percpu counters and may not be accurate, i.e. loop may terminate prematurely. Alter evictor to only stop once the lru list is empty when force is requested. Reported-by: Phoebe Buckheister Reported-by: Alexander Aring Tested-by: Alexander Aring Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 322dceb..3b01959 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -208,7 +208,7 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) } work = frag_mem_limit(nf) - nf->low_thresh; - while (work > 0) { + while (work > 0 || force) { spin_lock(&nf->lru_lock); if (list_empty(&nf->lru_list)) { -- cgit v0.10.2 From 0ca49345b6f489e95f8d6edeb0b092e257475b2a Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Thu, 6 Mar 2014 20:39:04 +0100 Subject: firewire: ohci: fix probe failure with Agere/LSI controllers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit bd972688eb24 "firewire: ohci: Fix 'failed to read phy reg' on FW643 rev8", there is a high chance that firewire-ohci fails to initialize LSI née Agere controllers. https://bugzilla.kernel.org/show_bug.cgi?id=65151 Peter Hurley points out the reason: IEEE 1394a:2000 clause 5A.1 (or IEEE 1394:2008 clause 17.2.1) say: "The PHY shall insure that no more than 10 ms elapse from the reassertion of LPS until the interface is reset. The link shall not assert LReq until the reset is complete." In other words, the link needs to give the PHY at least 10 ms to get the interface operational. With just the msleep(1) in bd972688eb24, the first read_phy_reg() during ohci_enable() may happen before the phy-link interface reset was finished, and fail. Due to the high variability of msleep(n) with small n, this failure was not fully reproducible, and not apparent at all with low CONFIG_HZ setting. On the other hand, Peter can no longer reproduce the issue with FW643 rev8. The read phy reg failures that happened back then may have had an unrelated cause. So, just revert bd972688eb24, except for the valid comment on TSB82AA2 cards. Reported-by: Mikhail Gavrilov Reported-by: Jay Fenlason Reported-by: Clemens Ladisch Reported-by: Peter Hurley Cc: stable@vger.kernel.org # v3.10+ Signed-off-by: Stefan Richter diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 6f74d8d..8db6632 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -290,7 +290,6 @@ static char ohci_driver_name[] = KBUILD_MODNAME; #define QUIRK_NO_MSI 0x10 #define QUIRK_TI_SLLZ059 0x20 #define QUIRK_IR_WAKE 0x40 -#define QUIRK_PHY_LCTRL_TIMEOUT 0x80 /* In case of multiple matches in ohci_quirks[], only the first one is used. */ static const struct { @@ -303,10 +302,7 @@ static const struct { QUIRK_BE_HEADERS}, {PCI_VENDOR_ID_ATT, PCI_DEVICE_ID_AGERE_FW643, 6, - QUIRK_PHY_LCTRL_TIMEOUT | QUIRK_NO_MSI}, - - {PCI_VENDOR_ID_ATT, PCI_ANY_ID, PCI_ANY_ID, - QUIRK_PHY_LCTRL_TIMEOUT}, + QUIRK_NO_MSI}, {PCI_VENDOR_ID_CREATIVE, PCI_DEVICE_ID_CREATIVE_SB1394, PCI_ANY_ID, QUIRK_RESET_PACKET}, @@ -353,7 +349,6 @@ MODULE_PARM_DESC(quirks, "Chip quirks (default = 0" ", disable MSI = " __stringify(QUIRK_NO_MSI) ", TI SLLZ059 erratum = " __stringify(QUIRK_TI_SLLZ059) ", IR wake unreliable = " __stringify(QUIRK_IR_WAKE) - ", phy LCtrl timeout = " __stringify(QUIRK_PHY_LCTRL_TIMEOUT) ")"); #define OHCI_PARAM_DEBUG_AT_AR 1 @@ -2299,9 +2294,6 @@ static int ohci_enable(struct fw_card *card, * TI TSB82AA2 + TSB81BA3(A) cards signal LPS enabled early but * cannot actually use the phy at that time. These need tens of * millisecods pause between LPS write and first phy access too. - * - * But do not wait for 50msec on Agere/LSI cards. Their phy - * arbitration state machine may time out during such a long wait. */ reg_write(ohci, OHCI1394_HCControlSet, @@ -2309,11 +2301,8 @@ static int ohci_enable(struct fw_card *card, OHCI1394_HCControl_postedWriteEnable); flush_writes(ohci); - if (!(ohci->quirks & QUIRK_PHY_LCTRL_TIMEOUT)) + for (lps = 0, i = 0; !lps && i < 3; i++) { msleep(50); - - for (lps = 0, i = 0; !lps && i < 150; i++) { - msleep(1); lps = reg_read(ohci, OHCI1394_HCControlSet) & OHCI1394_HCControl_LPS; } -- cgit v0.10.2 From 4ae6e50c76def306d726a5d2678e88998ad5258e Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 4 Mar 2014 17:35:44 -0700 Subject: phy: fix compiler array bounds warning on settings[] With -Werror=array-bounds, gcc v4.7.x warns that in phy_find_valid(), the settings[] "array subscript is above array bounds", I think because idx is a signed integer and if the caller supplied idx < 0, we pass the guard but still reference out of bounds. Fix this by making idx unsigned here and elsewhere. Signed-off-by: Bjorn Helgaas Acked-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 19c9eca..76d96b9 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -164,9 +164,9 @@ static const struct phy_setting settings[] = { * of that setting. Returns the index of the last setting if * none of the others match. */ -static inline int phy_find_setting(int speed, int duplex) +static inline unsigned int phy_find_setting(int speed, int duplex) { - int idx = 0; + unsigned int idx = 0; while (idx < ARRAY_SIZE(settings) && (settings[idx].speed != speed || settings[idx].duplex != duplex)) @@ -185,7 +185,7 @@ static inline int phy_find_setting(int speed, int duplex) * the mask in features. Returns the index of the last setting * if nothing else matches. */ -static inline int phy_find_valid(int idx, u32 features) +static inline unsigned int phy_find_valid(unsigned int idx, u32 features) { while (idx < MAX_NUM_SETTINGS && !(settings[idx].setting & features)) idx++; @@ -204,7 +204,7 @@ static inline int phy_find_valid(int idx, u32 features) static void phy_sanitize_settings(struct phy_device *phydev) { u32 features = phydev->supported; - int idx; + unsigned int idx; /* Sanitize settings based on PHY capabilities */ if ((features & SUPPORTED_Autoneg) == 0) @@ -954,7 +954,8 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable) (phydev->interface == PHY_INTERFACE_MODE_RGMII))) { int eee_lp, eee_cap, eee_adv; u32 lp, cap, adv; - int idx, status; + int status; + unsigned int idx; /* Read phy status to properly get the right settings */ status = phy_read_status(phydev); -- cgit v0.10.2 From adca4767821e54c72d4a2f467af77923f2c87e07 Mon Sep 17 00:00:00 2001 From: Andrew Lutomirski Date: Tue, 4 Mar 2014 17:24:10 -0800 Subject: net: Improve SO_TIMESTAMPING documentation and fix a minor code bug The original documentation was very unclear. The code fix is presumably related to the formerly unclear documentation: SOCK_TIMESTAMPING_RX_SOFTWARE has no effect on __sock_recv_timestamp's behavior, so calling __sock_recv_ts_and_drops from sock_recv_ts_and_drops if only SOCK_TIMESTAMPING_RX_SOFTWARE is set is pointless. This should have no user-observable effect. Signed-off-by: Andy Lutomirski Acked-by: Richard Cochran Signed-off-by: David S. Miller diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt index 661d3c3..048c92b 100644 --- a/Documentation/networking/timestamping.txt +++ b/Documentation/networking/timestamping.txt @@ -21,26 +21,38 @@ has such a feature). SO_TIMESTAMPING: -Instructs the socket layer which kind of information is wanted. The -parameter is an integer with some of the following bits set. Setting -other bits is an error and doesn't change the current state. - -SOF_TIMESTAMPING_TX_HARDWARE: try to obtain send time stamp in hardware -SOF_TIMESTAMPING_TX_SOFTWARE: if SOF_TIMESTAMPING_TX_HARDWARE is off or - fails, then do it in software -SOF_TIMESTAMPING_RX_HARDWARE: return the original, unmodified time stamp - as generated by the hardware -SOF_TIMESTAMPING_RX_SOFTWARE: if SOF_TIMESTAMPING_RX_HARDWARE is off or - fails, then do it in software -SOF_TIMESTAMPING_RAW_HARDWARE: return original raw hardware time stamp -SOF_TIMESTAMPING_SYS_HARDWARE: return hardware time stamp transformed to - the system time base -SOF_TIMESTAMPING_SOFTWARE: return system time stamp generated in - software - -SOF_TIMESTAMPING_TX/RX determine how time stamps are generated. -SOF_TIMESTAMPING_RAW/SYS determine how they are reported in the -following control message: +Instructs the socket layer which kind of information should be collected +and/or reported. The parameter is an integer with some of the following +bits set. Setting other bits is an error and doesn't change the current +state. + +Four of the bits are requests to the stack to try to generate +timestamps. Any combination of them is valid. + +SOF_TIMESTAMPING_TX_HARDWARE: try to obtain send time stamps in hardware +SOF_TIMESTAMPING_TX_SOFTWARE: try to obtain send time stamps in software +SOF_TIMESTAMPING_RX_HARDWARE: try to obtain receive time stamps in hardware +SOF_TIMESTAMPING_RX_SOFTWARE: try to obtain receive time stamps in software + +The other three bits control which timestamps will be reported in a +generated control message. If none of these bits are set or if none of +the set bits correspond to data that is available, then the control +message will not be generated: + +SOF_TIMESTAMPING_SOFTWARE: report systime if available +SOF_TIMESTAMPING_SYS_HARDWARE: report hwtimetrans if available +SOF_TIMESTAMPING_RAW_HARDWARE: report hwtimeraw if available + +It is worth noting that timestamps may be collected for reasons other +than being requested by a particular socket with +SOF_TIMESTAMPING_[TR]X_(HARD|SOFT)WARE. For example, most drivers that +can generate hardware receive timestamps ignore +SOF_TIMESTAMPING_RX_HARDWARE. It is still a good idea to set that flag +in case future drivers pay attention. + +If timestamps are reported, they will appear in a control message with +cmsg_level==SOL_SOCKET, cmsg_type==SO_TIMESTAMPING, and a payload like +this: struct scm_timestamping { struct timespec systime; diff --git a/include/net/sock.h b/include/net/sock.h index 5c3f7c3..7c4167b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2186,7 +2186,6 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, { #define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL) | \ (1UL << SOCK_RCVTSTAMP) | \ - (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \ (1UL << SOCK_TIMESTAMPING_SOFTWARE) | \ (1UL << SOCK_TIMESTAMPING_RAW_HARDWARE) | \ (1UL << SOCK_TIMESTAMPING_SYS_HARDWARE)) -- cgit v0.10.2 From 0a13404dd3bf4ea870e3d96270b5a382edca85c0 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 5 Mar 2014 14:29:58 +1100 Subject: net: unix socket code abuses csum_partial The unix socket code is using the result of csum_partial to hash into a lookup table: unix_hash_fold(csum_partial(sunaddr, len, 0)); csum_partial is only guaranteed to produce something that can be folded into a checksum, as its prototype explains: * returns a 32-bit number suitable for feeding into itself * or csum_tcpudp_magic The 32bit value should not be used directly. Depending on the alignment, the ppc64 csum_partial will return different 32bit partial checksums that will fold into the same 16bit checksum. This difference causes the following testcase (courtesy of Gustavo) to sometimes fail: #include #include int main() { int fd = socket(PF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC, 0); int i = 1; setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &i, 4); struct sockaddr addr; addr.sa_family = AF_LOCAL; bind(fd, &addr, 2); listen(fd, 128); struct sockaddr_storage ss; socklen_t sslen = (socklen_t)sizeof(ss); getsockname(fd, (struct sockaddr*)&ss, &sslen); fd = socket(PF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC, 0); if (connect(fd, (struct sockaddr*)&ss, sslen) == -1){ perror(NULL); return 1; } printf("OK\n"); return 0; } As suggested by davem, fix this by using csum_fold to fold the partial 32bit checksum into a 16bit checksum before using it. Signed-off-by: Anton Blanchard Cc: stable@vger.kernel.org Signed-off-by: David S. Miller diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 29fc8be..ce6ec6c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -163,9 +163,8 @@ static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) static inline unsigned int unix_hash_fold(__wsum n) { - unsigned int hash = (__force unsigned int)n; + unsigned int hash = (__force unsigned int)csum_fold(n); - hash ^= hash>>16; hash ^= hash>>8; return hash&(UNIX_HASH_SIZE-1); } -- cgit v0.10.2 From d746ca9561440685edb62614d1bcbbc27ff50e66 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 5 Mar 2014 14:51:37 +1100 Subject: ibmveth: Fix endian issues with MAC addresses The code to load a MAC address into a u64 for passing to the hypervisor via a register is broken on little endian. Create a helper function called ibmveth_encode_mac_addr which does the right thing in both big and little endian. We were storing the MAC address in a long in struct ibmveth_adapter. It's never used so remove it - we don't need another place in the driver where we create endian issues with MAC addresses. Signed-off-by: Anton Blanchard Cc: stable@vger.kernel.org Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 4be9715..1fc8334 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -522,10 +522,21 @@ retry: return rc; } +static u64 ibmveth_encode_mac_addr(u8 *mac) +{ + int i; + u64 encoded = 0; + + for (i = 0; i < ETH_ALEN; i++) + encoded = (encoded << 8) | mac[i]; + + return encoded; +} + static int ibmveth_open(struct net_device *netdev) { struct ibmveth_adapter *adapter = netdev_priv(netdev); - u64 mac_address = 0; + u64 mac_address; int rxq_entries = 1; unsigned long lpar_rc; int rc; @@ -579,8 +590,7 @@ static int ibmveth_open(struct net_device *netdev) adapter->rx_queue.num_slots = rxq_entries; adapter->rx_queue.toggle = 1; - memcpy(&mac_address, netdev->dev_addr, netdev->addr_len); - mac_address = mac_address >> 16; + mac_address = ibmveth_encode_mac_addr(netdev->dev_addr); rxq_desc.fields.flags_len = IBMVETH_BUF_VALID | adapter->rx_queue.queue_len; @@ -1183,8 +1193,8 @@ static void ibmveth_set_multicast_list(struct net_device *netdev) /* add the addresses to the filter table */ netdev_for_each_mc_addr(ha, netdev) { /* add the multicast address to the filter table */ - unsigned long mcast_addr = 0; - memcpy(((char *)&mcast_addr)+2, ha->addr, ETH_ALEN); + u64 mcast_addr; + mcast_addr = ibmveth_encode_mac_addr(ha->addr); lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address, IbmVethMcastAddFilter, mcast_addr); @@ -1372,9 +1382,6 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) netif_napi_add(netdev, &adapter->napi, ibmveth_poll, 16); - adapter->mac_addr = 0; - memcpy(&adapter->mac_addr, mac_addr_p, ETH_ALEN); - netdev->irq = dev->irq; netdev->netdev_ops = &ibmveth_netdev_ops; netdev->ethtool_ops = &netdev_ethtool_ops; @@ -1383,7 +1390,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; netdev->features |= netdev->hw_features; - memcpy(netdev->dev_addr, &adapter->mac_addr, netdev->addr_len); + memcpy(netdev->dev_addr, mac_addr_p, ETH_ALEN); for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { struct kobject *kobj = &adapter->rx_buff_pool[i].kobj; diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h index 451ba79..1f37499 100644 --- a/drivers/net/ethernet/ibm/ibmveth.h +++ b/drivers/net/ethernet/ibm/ibmveth.h @@ -138,7 +138,6 @@ struct ibmveth_adapter { struct napi_struct napi; struct net_device_stats stats; unsigned int mcastFilterSize; - unsigned long mac_addr; void * buffer_list_addr; void * filter_list_addr; dma_addr_t buffer_list_dma; -- cgit v0.10.2 From e58e241c1788856f69d58821e91e5c988905252d Mon Sep 17 00:00:00 2001 From: David Miller Date: Tue, 4 Mar 2014 15:28:35 -0500 Subject: sparc: serial: Clean up the locking for -rt Signed-off-by: David S. Miller Tested-by: Allen Pais diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c index cf86e72..dc697ce 100644 --- a/drivers/tty/serial/sunhv.c +++ b/drivers/tty/serial/sunhv.c @@ -433,13 +433,10 @@ static void sunhv_console_write_paged(struct console *con, const char *s, unsign unsigned long flags; int locked = 1; - local_irq_save(flags); - if (port->sysrq) { - locked = 0; - } else if (oops_in_progress) { - locked = spin_trylock(&port->lock); - } else - spin_lock(&port->lock); + if (port->sysrq || oops_in_progress) + locked = spin_trylock_irqsave(&port->lock, flags); + else + spin_lock_irqsave(&port->lock, flags); while (n > 0) { unsigned long ra = __pa(con_write_page); @@ -470,8 +467,7 @@ static void sunhv_console_write_paged(struct console *con, const char *s, unsign } if (locked) - spin_unlock(&port->lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&port->lock, flags); } static inline void sunhv_console_putchar(struct uart_port *port, char c) @@ -492,7 +488,10 @@ static void sunhv_console_write_bychar(struct console *con, const char *s, unsig unsigned long flags; int i, locked = 1; - local_irq_save(flags); + if (port->sysrq || oops_in_progress) + locked = spin_trylock_irqsave(&port->lock, flags); + else + spin_lock_irqsave(&port->lock, flags); if (port->sysrq) { locked = 0; } else if (oops_in_progress) { @@ -507,8 +506,7 @@ static void sunhv_console_write_bychar(struct console *con, const char *s, unsig } if (locked) - spin_unlock(&port->lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&port->lock, flags); } static struct console sunhv_console = { diff --git a/drivers/tty/serial/sunsab.c b/drivers/tty/serial/sunsab.c index 380fb53..5faa8e9 100644 --- a/drivers/tty/serial/sunsab.c +++ b/drivers/tty/serial/sunsab.c @@ -844,20 +844,16 @@ static void sunsab_console_write(struct console *con, const char *s, unsigned n) unsigned long flags; int locked = 1; - local_irq_save(flags); - if (up->port.sysrq) { - locked = 0; - } else if (oops_in_progress) { - locked = spin_trylock(&up->port.lock); - } else - spin_lock(&up->port.lock); + if (up->port.sysrq || oops_in_progress) + locked = spin_trylock_irqsave(&up->port.lock, flags); + else + spin_lock_irqsave(&up->port.lock, flags); uart_console_write(&up->port, s, n, sunsab_console_putchar); sunsab_tec_wait(up); if (locked) - spin_unlock(&up->port.lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&up->port.lock, flags); } static int sunsab_console_setup(struct console *con, char *options) diff --git a/drivers/tty/serial/sunsu.c b/drivers/tty/serial/sunsu.c index db79b76..9a0f24f 100644 --- a/drivers/tty/serial/sunsu.c +++ b/drivers/tty/serial/sunsu.c @@ -1295,13 +1295,10 @@ static void sunsu_console_write(struct console *co, const char *s, unsigned int ier; int locked = 1; - local_irq_save(flags); - if (up->port.sysrq) { - locked = 0; - } else if (oops_in_progress) { - locked = spin_trylock(&up->port.lock); - } else - spin_lock(&up->port.lock); + if (up->port.sysrq || oops_in_progress) + locked = spin_trylock_irqsave(&up->port.lock, flags); + else + spin_lock_irqsave(&up->port.lock, flags); /* * First save the UER then disable the interrupts @@ -1319,8 +1316,7 @@ static void sunsu_console_write(struct console *co, const char *s, serial_out(up, UART_IER, ier); if (locked) - spin_unlock(&up->port.lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&up->port.lock, flags); } /* diff --git a/drivers/tty/serial/sunzilog.c b/drivers/tty/serial/sunzilog.c index 45a8c6a..a2c40ed 100644 --- a/drivers/tty/serial/sunzilog.c +++ b/drivers/tty/serial/sunzilog.c @@ -1195,20 +1195,16 @@ sunzilog_console_write(struct console *con, const char *s, unsigned int count) unsigned long flags; int locked = 1; - local_irq_save(flags); - if (up->port.sysrq) { - locked = 0; - } else if (oops_in_progress) { - locked = spin_trylock(&up->port.lock); - } else - spin_lock(&up->port.lock); + if (up->port.sysrq || oops_in_progress) + locked = spin_trylock_irqsave(&up->port.lock, flags); + else + spin_lock_irqsave(&up->port.lock, flags); uart_console_write(&up->port, s, count, sunzilog_putchar); udelay(2); if (locked) - spin_unlock(&up->port.lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&up->port.lock, flags); } static int __init sunzilog_console_setup(struct console *con, char *options) -- cgit v0.10.2 From d2d273ffabd315eecefce21a4391d44b6e156b73 Mon Sep 17 00:00:00 2001 From: Anton Nayshtut Date: Wed, 5 Mar 2014 08:30:08 +0200 Subject: ipv6: Fix exthdrs offload registration. Without this fix, ipv6_exthdrs_offload_init doesn't register IPPROTO_DSTOPTS offload, but returns 0 (as the IPPROTO_ROUTING registration actually succeeds). This then causes the ipv6_gso_segment to drop IPv6 packets with IPPROTO_DSTOPTS header. The issue detected and the fix verified by running MS HCK Offload LSO test on top of QEMU Windows guests, as this test sends IPv6 packets with IPPROTO_DSTOPTS. Signed-off-by: Anton Nayshtut Signed-off-by: David S. Miller diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c index cf77f3a..447a7fb 100644 --- a/net/ipv6/exthdrs_offload.c +++ b/net/ipv6/exthdrs_offload.c @@ -25,11 +25,11 @@ int __init ipv6_exthdrs_offload_init(void) int ret; ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING); - if (!ret) + if (ret) goto out; ret = inet6_add_offload(&dstopt_offload, IPPROTO_DSTOPTS); - if (!ret) + if (ret) goto out_rt; out: -- cgit v0.10.2 From 14eedc32a3c0ec9dd70448a73763ee21feae3111 Mon Sep 17 00:00:00 2001 From: Lauri Kasanen Date: Fri, 28 Feb 2014 20:50:23 +0200 Subject: drm/radeon: TTM must be init with cpu-visible VRAM, v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this, a bo may get created in the cpu-inaccessible vram. Before the CP engines get setup, all copies are done via cpu memcpy. This means that the cpu tries to read from inaccessible memory, fails, and the radeon module proceeds to disable acceleration. Doing this has no downsides, as the real VRAM size gets set as soon as the CP engines get init. This is a candidate for 3.14 fixes. v2: Add comment on why the function is used Signed-off-by: Lauri Kasanen Signed-off-by: Alex Deucher Reviewed-by: Christian König Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 77f5b0c..5cdba9b 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -714,6 +714,9 @@ int radeon_ttm_init(struct radeon_device *rdev) DRM_ERROR("Failed initializing VRAM heap.\n"); return r; } + /* Change the size here instead of the init above so only lpfn is affected */ + radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); + r = radeon_bo_create(rdev, 256 * 1024, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->stollen_vga_memory); -- cgit v0.10.2 From bc6a62955f6ea6aabe26292a21dbdd67f5b89b67 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 25 Feb 2014 12:01:28 -0500 Subject: drm/radeon: resume old pm late Moving the pm resume up in the init order to fix dpm seems to have regressed somes cases with the old pm code. Move it back to late resume. Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index e6419ca..a6d1e6c 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7902,7 +7902,8 @@ int cik_resume(struct radeon_device *rdev) /* init golden registers */ cik_init_golden_registers(rdev); - radeon_pm_resume(rdev); + if (rdev->pm.pm_method == PM_METHOD_DPM) + radeon_pm_resume(rdev); rdev->accel_working = true; r = cik_startup(rdev); diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 8a2c010..27b0ff1 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -5299,7 +5299,8 @@ int evergreen_resume(struct radeon_device *rdev) /* init golden registers */ evergreen_init_golden_registers(rdev); - radeon_pm_resume(rdev); + if (rdev->pm.pm_method == PM_METHOD_DPM) + radeon_pm_resume(rdev); rdev->accel_working = true; r = evergreen_startup(rdev); diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index ea932ac..bf6300c 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2105,7 +2105,8 @@ int cayman_resume(struct radeon_device *rdev) /* init golden registers */ ni_init_golden_registers(rdev); - radeon_pm_resume(rdev); + if (rdev->pm.pm_method == PM_METHOD_DPM) + radeon_pm_resume(rdev); rdev->accel_working = true; r = cayman_startup(rdev); diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index ef024ce..3cc78bb 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -3942,8 +3942,6 @@ int r100_resume(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); - radeon_pm_resume(rdev); - rdev->accel_working = true; r = r100_startup(rdev); if (r) { diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 7c63ef8..0b658b3 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -1430,8 +1430,6 @@ int r300_resume(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); - radeon_pm_resume(rdev); - rdev->accel_working = true; r = r300_startup(rdev); if (r) { diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c index 3768aab..802b192 100644 --- a/drivers/gpu/drm/radeon/r420.c +++ b/drivers/gpu/drm/radeon/r420.c @@ -325,8 +325,6 @@ int r420_resume(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); - radeon_pm_resume(rdev); - rdev->accel_working = true; r = r420_startup(rdev); if (r) { diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c index e209eb7..98d6053 100644 --- a/drivers/gpu/drm/radeon/r520.c +++ b/drivers/gpu/drm/radeon/r520.c @@ -240,8 +240,6 @@ int r520_resume(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); - radeon_pm_resume(rdev); - rdev->accel_working = true; r = r520_startup(rdev); if (r) { diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index cdbc417..647ef40 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2968,7 +2968,8 @@ int r600_resume(struct radeon_device *rdev) /* post card */ atom_asic_init(rdev->mode_info.atom_context); - radeon_pm_resume(rdev); + if (rdev->pm.pm_method == PM_METHOD_DPM) + radeon_pm_resume(rdev); rdev->accel_working = true; r = r600_startup(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index b012cbb..044bc98 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1521,13 +1521,16 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon) if (r) DRM_ERROR("ib ring test failed (%d).\n", r); - if (rdev->pm.dpm_enabled) { + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { /* do dpm late init */ r = radeon_pm_late_init(rdev); if (r) { rdev->pm.dpm_enabled = false; DRM_ERROR("radeon_pm_late_init failed, disabling dpm\n"); } + } else { + /* resume old pm late */ + radeon_pm_resume(rdev); } radeon_restore_bios_scratch_regs(rdev); diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index b5c2369..130d5cc 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -474,8 +474,6 @@ int rs400_resume(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); - radeon_pm_resume(rdev); - rdev->accel_working = true; r = rs400_startup(rdev); if (r) { diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index fdcde76..72d3616 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -1048,8 +1048,6 @@ int rs600_resume(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); - radeon_pm_resume(rdev); - rdev->accel_working = true; r = rs600_startup(rdev); if (r) { diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c index 3595073..3462b64 100644 --- a/drivers/gpu/drm/radeon/rs690.c +++ b/drivers/gpu/drm/radeon/rs690.c @@ -756,8 +756,6 @@ int rs690_resume(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); - radeon_pm_resume(rdev); - rdev->accel_working = true; r = rs690_startup(rdev); if (r) { diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 98e8138..237dd29 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -586,8 +586,6 @@ int rv515_resume(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); - radeon_pm_resume(rdev); - rdev->accel_working = true; r = rv515_startup(rdev); if (r) { diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 4e37a42..fef3107 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -1811,7 +1811,8 @@ int rv770_resume(struct radeon_device *rdev) /* init golden registers */ rv770_init_golden_registers(rdev); - radeon_pm_resume(rdev); + if (rdev->pm.pm_method == PM_METHOD_DPM) + radeon_pm_resume(rdev); rdev->accel_working = true; r = rv770_startup(rdev); diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 8357832..9a124d0 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6618,7 +6618,8 @@ int si_resume(struct radeon_device *rdev) /* init golden registers */ si_init_golden_registers(rdev); - radeon_pm_resume(rdev); + if (rdev->pm.pm_method == PM_METHOD_DPM) + radeon_pm_resume(rdev); rdev->accel_working = true; r = si_startup(rdev); -- cgit v0.10.2 From 0d997b68574217b41c1288ee4c6728c7003aac1f Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Tue, 4 Mar 2014 10:34:48 +0100 Subject: drm/radeon: silence GCC warning on 32 bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building radeon_ttm.o on 32 bit x86 triggers a warning: In file included from include/asm-generic/bug.h:13:0, from [...]/arch/x86/include/asm/bug.h:38, from include/linux/bug.h:4, from include/drm/drm_mm.h:39, from include/drm/drm_vma_manager.h:26, from include/drm/ttm/ttm_bo_api.h:35, from drivers/gpu/drm/radeon/radeon_ttm.c:32: drivers/gpu/drm/radeon/radeon_ttm.c: In function 'radeon_ttm_gtt_read': include/linux/kernel.h:712:17: warning: comparison of distinct pointer types lacks a cast [enabled by default] (void) (&_min1 == &_min2); \ ^ drivers/gpu/drm/radeon/radeon_ttm.c:938:22: note: in expansion of macro 'min' ssize_t cur_size = min(size, PAGE_SIZE - off); ^ Silence this warning by using min_t(). Since cur_size will never be negative and its upper bound is PAGE_SIZE, we can change its type to size_t and use min_t(size_t, [...]) here. Signed-off-by: Paul Bolle Signed-off-by: Alex Deucher Reviewed-by: Christian König diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 5cdba9b..040a2a1 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -938,7 +938,7 @@ static ssize_t radeon_ttm_gtt_read(struct file *f, char __user *buf, while (size) { loff_t p = *pos / PAGE_SIZE; unsigned off = *pos & ~PAGE_MASK; - ssize_t cur_size = min(size, PAGE_SIZE - off); + size_t cur_size = min_t(size_t, size, PAGE_SIZE - off); struct page *page; void *ptr; -- cgit v0.10.2 From 972c5ddb177e14835212de63f8f16a690e4db9ff Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 4 Mar 2014 15:50:29 -0500 Subject: drm/radeon/cik: fix typo in documentation Copy-paste typo. Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index a6d1e6c..e22be84 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3046,7 +3046,7 @@ static u32 cik_create_bitmask(u32 bit_width) } /** - * cik_select_se_sh - select which SE, SH to address + * cik_get_rb_disabled - computes the mask of disabled RBs * * @rdev: radeon_device pointer * @max_rb_num: max RBs (render backends) for the asic -- cgit v0.10.2 From 13714323f83ffa5a772fe0d8b74e0fa32ee08819 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 6 Mar 2014 13:16:55 -0500 Subject: drm/radeon/dpm: fix typo in EVERGREEN_SMC_FIRMWARE_HEADER_softRegisters Should be at 0x8 rather than 0. fixes: https://bugzilla.kernel.org/show_bug.cgi?id=60523 Noticed by ArtForz on #radeon Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/radeon/evergreen_smc.h b/drivers/gpu/drm/radeon/evergreen_smc.h index 76ada8c..3a03ba3 100644 --- a/drivers/gpu/drm/radeon/evergreen_smc.h +++ b/drivers/gpu/drm/radeon/evergreen_smc.h @@ -57,7 +57,7 @@ typedef struct SMC_Evergreen_MCRegisters SMC_Evergreen_MCRegisters; #define EVERGREEN_SMC_FIRMWARE_HEADER_LOCATION 0x100 -#define EVERGREEN_SMC_FIRMWARE_HEADER_softRegisters 0x0 +#define EVERGREEN_SMC_FIRMWARE_HEADER_softRegisters 0x8 #define EVERGREEN_SMC_FIRMWARE_HEADER_stateTable 0xC #define EVERGREEN_SMC_FIRMWARE_HEADER_mcRegisterTable 0x20 -- cgit v0.10.2 From 5bd4e4c158ceb4e76ce9ed005c876d59caad8af2 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 6 Mar 2014 16:53:11 -0500 Subject: bonding: correctly handle out of range parameters for lp_interval We didn't correctly check cases where the value for lp_interval is not within the legal range due to a missing table terminator. This would let userspace trigger a kernel panic by specifying a value out of range: echo -1 > /sys/devices/virtual/net/bond0/bonding/lp_interval Introduced by commit 4325b374f84 ("bonding: convert lp_interval to use the new option API"). Acked-by: Nikolay Aleksandrov Signed-off-by: Sasha Levin Signed-off-by: David S. Miller diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index c378784..298c265 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -121,6 +121,7 @@ static struct bond_opt_value bond_resend_igmp_tbl[] = { static struct bond_opt_value bond_lp_interval_tbl[] = { { "minval", 1, BOND_VALFLAG_MIN | BOND_VALFLAG_DEFAULT}, { "maxval", INT_MAX, BOND_VALFLAG_MAX}, + { NULL, -1, 0}, }; static struct bond_option bond_opts[] = { -- cgit v0.10.2 From 57352ef4f5f19969a50d42e84b274287993b576f Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 6 Mar 2014 18:28:16 +0200 Subject: net/mlx4_core: Fix memory access error in mlx4_QUERY_DEV_CAP_wrapper() Fix a regression introduced by [1]. outbox was accessed instead of outbox->buf. Typo was copy-pasted to [2] and [3]. [1] - cc1ade9 mlx4_core: Disable memory windows for virtual functions [2] - 4de6580 mlx4_core: Add support for steerable IB UD QPs [3] - 7ffdf72 net/mlx4_core: Add basic support for TCP/IP offloads under tunneling Signed-off-by: Or Gerlitz Signed-off-by: Amir Vadai Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 91b69ff..8726e34 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -859,7 +859,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET); /* For guests, disable vxlan tunneling */ - MLX4_GET(field, outbox, QUERY_DEV_CAP_VXLAN); + MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_VXLAN); field &= 0xf7; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_VXLAN); @@ -869,7 +869,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_BF_OFFSET); /* For guests, disable mw type 2 */ - MLX4_GET(bmme_flags, outbox, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); + MLX4_GET(bmme_flags, outbox->buf, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN; MLX4_PUT(outbox->buf, bmme_flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); @@ -883,7 +883,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, } /* turn off ipoib managed steering for guests */ - MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); + MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); field &= ~0x80; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); -- cgit v0.10.2 From 97989356af0ec8b1b1658d804892abb354127330 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 6 Mar 2014 18:28:17 +0200 Subject: net/mlx4_core: mlx4_init_slave() shouldn't access comm channel before PF is ready Currently, the PF call to pci_enable_sriov from the PF probe function stalls for 10 seconds times the number of VFs probed on the host. This happens because the way for such VFs to determine of the PF initialization finished, is by attempting to issue reset on the comm-channel and get timeout (after 10s). The PF probe function is called from a kenernel workqueue, and therefore during that time, rcu lock is being held and kernel's workqueue is stalled. This blocks other processes that try to use the workqueue or rcu lock. For example, interface renaming which is calling rcu_synchronize is blocked, and timedout by systemd. Changed mlx4_init_slave() to allow VF probed on the host to immediatly detect that the PF is not ready, and return EPROBE_DEFER instantly. Only when the PF finishes the initialization, allow such VFs to access the comm channel. This issue and fix are relevant only for probed VFs on the hypervisor, there is no way to pass this information to a VM until comm channel is ready, so in a VM, if PF is not ready, the first command will be timedout after 10 seconds and return EPROBE_DEFER. Signed-off-by: Amir Vadai Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 5a6105f..30a08a6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -150,6 +150,8 @@ struct mlx4_port_config { struct pci_dev *pdev; }; +static atomic_t pf_loading = ATOMIC_INIT(0); + int mlx4_check_port_params(struct mlx4_dev *dev, enum mlx4_port_type *port_type) { @@ -1407,6 +1409,11 @@ static int mlx4_init_slave(struct mlx4_dev *dev) u32 slave_read; u32 cmd_channel_ver; + if (atomic_read(&pf_loading)) { + mlx4_warn(dev, "PF is not ready. Deferring probe\n"); + return -EPROBE_DEFER; + } + mutex_lock(&priv->cmd.slave_cmd_mutex); priv->cmd.max_cmds = 1; mlx4_warn(dev, "Sending reset\n"); @@ -2319,7 +2326,11 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) if (num_vfs) { mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", num_vfs); + + atomic_inc(&pf_loading); err = pci_enable_sriov(pdev, num_vfs); + atomic_dec(&pf_loading); + if (err) { mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n", err); -- cgit v0.10.2 From c88507fbad8055297c1d1e21e599f46960cbee39 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 6 Mar 2014 17:51:57 +0100 Subject: ipv6: don't set DST_NOCOUNT for remotely added routes DST_NOCOUNT should only be used if an authorized user adds routes locally. In case of routes which are added on behalf of router advertisments this flag must not get used as it allows an unlimited number of routes getting added remotely. Signed-off-by: Sabrina Dubroca Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 11dac21..fba54a4 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1513,7 +1513,7 @@ int ip6_route_add(struct fib6_config *cfg) if (!table) goto out; - rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table); + rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table); if (!rt) { err = -ENOMEM; -- cgit v0.10.2 From 77ac9a05d4a0be6b2ab22b61d7fb36d29c212d72 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 6 Mar 2014 13:26:36 +0100 Subject: drm: fix bochs kconfig dependencies Signed-off-by: Gerd Hoffmann Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/bochs/Kconfig b/drivers/gpu/drm/bochs/Kconfig index c8fcf12..5f8b0c2 100644 --- a/drivers/gpu/drm/bochs/Kconfig +++ b/drivers/gpu/drm/bochs/Kconfig @@ -2,6 +2,7 @@ config DRM_BOCHS tristate "DRM Support for bochs dispi vga interface (qemu stdvga)" depends on DRM && PCI select DRM_KMS_HELPER + select DRM_KMS_FB_HELPER select FB_SYS_FILLRECT select FB_SYS_COPYAREA select FB_SYS_IMAGEBLIT -- cgit v0.10.2 From f50d7146a298692daa730b40335e1466110727de Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 3 Mar 2014 12:18:14 +0000 Subject: MAINTAINERS: add maintainer entry for TDA998x driver Add a maintainers entry for the TDA998x driver. Rob Clark has handed this driver over to me to look after. Acked-by: Rob Clark Signed-off-by: Russell King Signed-off-by: Dave Airlie diff --git a/MAINTAINERS b/MAINTAINERS index e9d85f6..07d093e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6171,6 +6171,12 @@ S: Supported F: drivers/block/nvme* F: include/linux/nvme.h +NXP TDA998X DRM DRIVER +M: Russell King +S: Supported +F: drivers/gpu/drm/i2c/tda998x_drv.c +F: include/drm/i2c/tda998x.h + OMAP SUPPORT M: Tony Lindgren L: linux-omap@vger.kernel.org -- cgit v0.10.2 From d03874c881a049a50e12f285077ab1f9fc2686e1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 6 Mar 2014 18:09:52 -0500 Subject: drm/radeon/atom: select the proper number of lanes in transmitter setup We need to check for DVI vs. HDMI when setting up duallink since HDMI is single link only. Fixes 4k modes on newer asics. bug: https://bugs.freedesktop.org/show_bug.cgi?id=75223 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index 2cec2ab..607dc14 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -1314,7 +1314,7 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t } if (is_dp) args.v5.ucLaneNum = dp_lane_count; - else if (radeon_encoder->pixel_clock > 165000) + else if (radeon_dig_monitor_is_duallink(encoder, radeon_encoder->pixel_clock)) args.v5.ucLaneNum = 8; else args.v5.ucLaneNum = 4; -- cgit v0.10.2 From 2432e1364bbee83cb6e63e026c91785031704ba5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 6 Feb 2014 09:41:28 -0500 Subject: x86: Nuke the supervisor_stack field in i386 thread_info Nothing references the supervisor_stack in the thread_info field, and it does not exist in x86_64. To make the two more the same, it is being removed. Acked-by: Thomas Gleixner Cc: Andrew Morton Cc: Peter Zijlstra Cc: Brian Gerst Signed-off-by: Steven Rostedt Link: http://lkml.kernel.org/r/20110806012353.546183789@goodmis.org Link: http://lkml.kernel.org/r/20140206144321.203619611@goodmis.org Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e1940c0..b7aa975 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -36,7 +36,6 @@ struct thread_info { unsigned long previous_esp; /* ESP of the previous stack in case of nested (IRQ) stacks */ - __u8 supervisor_stack[0]; #endif unsigned int sig_on_uaccess_error:1; unsigned int uaccess_err:1; /* uaccess failed */ -- cgit v0.10.2 From b807902a88c470eb06d0acdf3b6590f27f5dce81 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 6 Feb 2014 09:41:29 -0500 Subject: x86: Nuke GET_THREAD_INFO_WITH_ESP() macro for i386 According to a git log -p, GET_THREAD_INFO_WITH_ESP() has only been defined and never been used. Get rid of it. Signed-off-by: Steven Rostedt Link: http://lkml.kernel.org/r/20140206144321.409045251@goodmis.org Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index b7aa975..1cb3501 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -182,10 +182,6 @@ static inline struct thread_info *current_thread_info(void) movl $-THREAD_SIZE, reg; \ andl %esp, reg -/* use this one if reg already contains %esp */ -#define GET_THREAD_INFO_WITH_ESP(reg) \ - andl $-THREAD_SIZE, reg - #endif #else /* X86_32 */ -- cgit v0.10.2 From 0788aa6a23cb9d693fc5040ec774b979f1e906cd Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 6 Feb 2014 09:41:30 -0500 Subject: x86: Prepare removal of previous_esp from i386 thread_info structure The i386 thread_info contains a previous_esp field that is used to daisy chain the different stacks for dump_stack() (ie. irq, softirq, thread stacks). The goal is to eventual make i386 handling of thread_info the same as x86_64, which means that the thread_info will not be in the stack but as a per_cpu variable. We will no longer depend on thread_info being able to daisy chain different stacks as it will only exist in one location (the thread stack). By moving previous_esp to the end of thread_info and referencing it as an offset instead of using a thread_info field, this becomes a stepping stone to moving the thread_info. The offset to get to the previous stack is rather ugly in this patch, but this is only temporary and the prev_esp will be changed in the next commit. This commit is more for sanity checks of the change. Cc: Andrew Morton Cc: Peter Zijlstra Cc: Brian Gerst Cc: Robert Richter Signed-off-by: Steven Rostedt Link: http://lkml.kernel.org/r/20110806012353.891757693@goodmis.org Link: http://lkml.kernel.org/r/20140206144321.608754481@goodmis.org Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 1cb3501..ca2de1b 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -32,13 +32,14 @@ struct thread_info { mm_segment_t addr_limit; struct restart_block restart_block; void __user *sysenter_return; + unsigned int sig_on_uaccess_error:1; + unsigned int uaccess_err:1; /* uaccess failed */ #ifdef CONFIG_X86_32 unsigned long previous_esp; /* ESP of the previous stack in case of nested (IRQ) stacks + (Moved to end, to be removed soon) */ #endif - unsigned int sig_on_uaccess_error:1; - unsigned int uaccess_err:1; /* uaccess failed */ }; #define INIT_THREAD_INFO(tsk) \ diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index f2a1770..187d6a7 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -22,6 +22,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, const struct stacktrace_ops *ops, void *data) { int graph = 0; + u32 *prev_esp; if (!task) task = current; @@ -44,9 +45,17 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, ((unsigned long)stack & (~(THREAD_SIZE - 1))); bp = ops->walk_stack(context, stack, bp, ops, data, NULL, &graph); - stack = (unsigned long *)context->previous_esp; + /* Stop if not on irq stack */ + if (task_stack_page(task) == context) + break; + + /* The previous esp is just above the context */ + prev_esp = (u32 *) ((char *)context + sizeof(struct thread_info) - + sizeof(long)); + stack = (unsigned long *)*prev_esp; if (!stack) break; + if (ops->stack(data, "IRQ") < 0) break; touch_nmi_watchdog(); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index d7fcbed..f135cc2 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -81,7 +81,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { union irq_ctx *curctx, *irqctx; - u32 *isp, arg1, arg2; + u32 *isp, *prev_esp, arg1, arg2; curctx = (union irq_ctx *) current_thread_info(); irqctx = __this_cpu_read(hardirq_ctx); @@ -98,7 +98,10 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) /* build the stack frame on the IRQ stack */ isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); irqctx->tinfo.task = curctx->tinfo.task; - irqctx->tinfo.previous_esp = current_stack_pointer; + /* Save the next esp after thread_info */ + prev_esp = (u32 *) ((char *)irqctx + sizeof(struct thread_info) - + sizeof(long)); + *prev_esp = current_stack_pointer; if (unlikely(overflow)) call_on_stack(print_stack_overflow, isp); @@ -149,16 +152,20 @@ void do_softirq_own_stack(void) { struct thread_info *curctx; union irq_ctx *irqctx; - u32 *isp; + u32 *isp, *prev_esp; curctx = current_thread_info(); irqctx = __this_cpu_read(softirq_ctx); irqctx->tinfo.task = curctx->task; - irqctx->tinfo.previous_esp = current_stack_pointer; /* build the stack frame on the softirq stack */ isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); + /* Push the previous esp onto the stack */ + prev_esp = (u32 *) ((char *)irqctx + sizeof(struct thread_info) - + sizeof(long)); + *prev_esp = current_stack_pointer; + call_on_stack(__do_softirq, isp); } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 7461f50..f352a7c 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -184,14 +184,14 @@ unsigned long kernel_stack_pointer(struct pt_regs *regs) { unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1); unsigned long sp = (unsigned long)®s->sp; - struct thread_info *tinfo; + u32 *prev_esp; if (context == (sp & ~(THREAD_SIZE - 1))) return sp; - tinfo = (struct thread_info *)context; - if (tinfo->previous_esp) - return tinfo->previous_esp; + prev_esp = (u32 *)(context + sizeof(struct thread_info) - sizeof(long)); + if (prev_esp) + return (unsigned long)prev_esp; return (unsigned long)regs; } -- cgit v0.10.2 From 198d208df4371734ac4728f69cb585c284d20a15 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 6 Feb 2014 09:41:31 -0500 Subject: x86: Keep thread_info on thread stack in x86_32 x86_64 uses a per_cpu variable kernel_stack to always point to the thread stack of current. This is where the thread_info is stored and is accessed from this location even when the irq or exception stack is in use. This removes the complexity of having to maintain the thread info on the stack when interrupts are running and having to copy the preempt_count and other fields to the interrupt stack. x86_32 uses the old method of copying the thread_info from the thread stack to the exception stack just before executing the exception. Having the two different requires #ifdefs and also the x86_32 way is a bit of a pain to maintain. By converting x86_32 to the same method of x86_64, we can remove #ifdefs, clean up the x86_32 code a little, and remove the overhead of the copy. Cc: Andrew Morton Cc: Peter Zijlstra Cc: Brian Gerst Signed-off-by: Steven Rostedt Link: http://lkml.kernel.org/r/20110806012354.263834829@goodmis.org Link: http://lkml.kernel.org/r/20140206144321.852942014@goodmis.org Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index fdedd38..a4ea023 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -449,6 +449,15 @@ struct stack_canary { }; DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); #endif +/* + * per-CPU IRQ handling stacks + */ +struct irq_stack { + u32 stack[THREAD_SIZE/sizeof(u32)]; +} __aligned(THREAD_SIZE); + +DECLARE_PER_CPU(struct irq_stack *, hardirq_stack); +DECLARE_PER_CPU(struct irq_stack *, softirq_stack); #endif /* X86_64 */ extern unsigned int xstate_size; diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index ca2de1b..47e5de2 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -9,6 +9,7 @@ #include #include +#include #include /* @@ -34,12 +35,6 @@ struct thread_info { void __user *sysenter_return; unsigned int sig_on_uaccess_error:1; unsigned int uaccess_err:1; /* uaccess failed */ -#ifdef CONFIG_X86_32 - unsigned long previous_esp; /* ESP of the previous stack in - case of nested (IRQ) stacks - (Moved to end, to be removed soon) - */ -#endif }; #define INIT_THREAD_INFO(tsk) \ @@ -153,9 +148,9 @@ struct thread_info { #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) -#ifdef CONFIG_X86_32 +#define STACK_WARN (THREAD_SIZE/8) +#define KERNEL_STACK_OFFSET (5*(BITS_PER_LONG/8)) -#define STACK_WARN (THREAD_SIZE/8) /* * macros/functions for gaining access to the thread information structure * @@ -163,38 +158,6 @@ struct thread_info { */ #ifndef __ASSEMBLY__ -#define current_stack_pointer ({ \ - unsigned long sp; \ - asm("mov %%esp,%0" : "=g" (sp)); \ - sp; \ -}) - -/* how to get the thread information struct from C */ -static inline struct thread_info *current_thread_info(void) -{ - return (struct thread_info *) - (current_stack_pointer & ~(THREAD_SIZE - 1)); -} - -#else /* !__ASSEMBLY__ */ - -/* how to get the thread information struct from ASM */ -#define GET_THREAD_INFO(reg) \ - movl $-THREAD_SIZE, reg; \ - andl %esp, reg - -#endif - -#else /* X86_32 */ - -#include -#define KERNEL_STACK_OFFSET (5*8) - -/* - * macros/functions for gaining access to the thread information structure - * preempt_count needs to be 1 initially, until the scheduler is functional. - */ -#ifndef __ASSEMBLY__ DECLARE_PER_CPU(unsigned long, kernel_stack); static inline struct thread_info *current_thread_info(void) @@ -209,8 +172,8 @@ static inline struct thread_info *current_thread_info(void) /* how to get the thread information struct from ASM */ #define GET_THREAD_INFO(reg) \ - movq PER_CPU_VAR(kernel_stack),reg ; \ - subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg + _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \ + _ASM_SUB $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg ; /* * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in @@ -220,8 +183,6 @@ static inline struct thread_info *current_thread_info(void) #endif -#endif /* !X86_32 */ - /* * Thread-synchronous status. * diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8e28bf2..29c1944 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1078,6 +1078,10 @@ static __init int setup_disablecpuid(char *arg) } __setup("clearcpuid=", setup_disablecpuid); +DEFINE_PER_CPU(unsigned long, kernel_stack) = + (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; +EXPORT_PER_CPU_SYMBOL(kernel_stack); + #ifdef CONFIG_X86_64 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1, @@ -1094,10 +1098,6 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); -DEFINE_PER_CPU(unsigned long, kernel_stack) = - (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; -EXPORT_PER_CPU_SYMBOL(kernel_stack); - DEFINE_PER_CPU(char *, irq_stack_ptr) = init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 187d6a7..dca820b 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -16,11 +16,33 @@ #include +static void *is_irq_stack(void *p, void *irq) +{ + if (p < irq || p >= (irq + THREAD_SIZE)) + return NULL; + return irq + THREAD_SIZE; +} + + +static void *is_hardirq_stack(unsigned long *stack, int cpu) +{ + void *irq = per_cpu(hardirq_stack, cpu); + + return is_irq_stack(stack, irq); +} + +static void *is_softirq_stack(unsigned long *stack, int cpu) +{ + void *irq = per_cpu(softirq_stack, cpu); + + return is_irq_stack(stack, irq); +} void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data) { + const unsigned cpu = get_cpu(); int graph = 0; u32 *prev_esp; @@ -40,18 +62,22 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, for (;;) { struct thread_info *context; + void *end_stack; + + end_stack = is_hardirq_stack(stack, cpu); + if (!end_stack) + end_stack = is_softirq_stack(stack, cpu); - context = (struct thread_info *) - ((unsigned long)stack & (~(THREAD_SIZE - 1))); - bp = ops->walk_stack(context, stack, bp, ops, data, NULL, &graph); + context = task_thread_info(task); + bp = ops->walk_stack(context, stack, bp, ops, data, + end_stack, &graph); /* Stop if not on irq stack */ - if (task_stack_page(task) == context) + if (!end_stack) break; - /* The previous esp is just above the context */ - prev_esp = (u32 *) ((char *)context + sizeof(struct thread_info) - - sizeof(long)); + /* The previous esp is saved on the bottom of the stack */ + prev_esp = (u32 *)(end_stack - THREAD_SIZE); stack = (unsigned long *)*prev_esp; if (!stack) break; @@ -60,6 +86,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, break; touch_nmi_watchdog(); } + put_cpu(); } EXPORT_SYMBOL(dump_trace); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index f135cc2..988dc8b 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -55,16 +55,8 @@ static inline int check_stack_overflow(void) { return 0; } static inline void print_stack_overflow(void) { } #endif -/* - * per-CPU IRQ handling contexts (thread information and stack) - */ -union irq_ctx { - struct thread_info tinfo; - u32 stack[THREAD_SIZE/sizeof(u32)]; -} __attribute__((aligned(THREAD_SIZE))); - -static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx); -static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx); +DEFINE_PER_CPU(struct irq_stack *, hardirq_stack); +DEFINE_PER_CPU(struct irq_stack *, softirq_stack); static void call_on_stack(void *func, void *stack) { @@ -77,14 +69,22 @@ static void call_on_stack(void *func, void *stack) : "memory", "cc", "edx", "ecx", "eax"); } +/* how to get the current stack pointer from C */ +register unsigned long current_stack_pointer asm("esp") __used; + +static inline void *current_stack(void) +{ + return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1)); +} + static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { - union irq_ctx *curctx, *irqctx; + struct irq_stack *curstk, *irqstk; u32 *isp, *prev_esp, arg1, arg2; - curctx = (union irq_ctx *) current_thread_info(); - irqctx = __this_cpu_read(hardirq_ctx); + curstk = (struct irq_stack *) current_stack(); + irqstk = __this_cpu_read(hardirq_stack); /* * this is where we switch to the IRQ stack. However, if we are @@ -92,15 +92,13 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) * handler) we can't do that and just have to keep using the * current stack (which is the irq stack already after all) */ - if (unlikely(curctx == irqctx)) + if (unlikely(curstk == irqstk)) return 0; - /* build the stack frame on the IRQ stack */ - isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); - irqctx->tinfo.task = curctx->tinfo.task; - /* Save the next esp after thread_info */ - prev_esp = (u32 *) ((char *)irqctx + sizeof(struct thread_info) - - sizeof(long)); + isp = (u32 *) ((char *)irqstk + sizeof(*irqstk)); + + /* Save the next esp at the bottom of the stack */ + prev_esp = (u32 *)irqstk; *prev_esp = current_stack_pointer; if (unlikely(overflow)) @@ -121,49 +119,39 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) */ void irq_ctx_init(int cpu) { - union irq_ctx *irqctx; + struct irq_stack *irqstk; - if (per_cpu(hardirq_ctx, cpu)) + if (per_cpu(hardirq_stack, cpu)) return; - irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), + irqstk = page_address(alloc_pages_node(cpu_to_node(cpu), THREADINFO_GFP, THREAD_SIZE_ORDER)); - memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); - irqctx->tinfo.cpu = cpu; - irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); + per_cpu(hardirq_stack, cpu) = irqstk; - per_cpu(hardirq_ctx, cpu) = irqctx; - - irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), + irqstk = page_address(alloc_pages_node(cpu_to_node(cpu), THREADINFO_GFP, THREAD_SIZE_ORDER)); - memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); - irqctx->tinfo.cpu = cpu; - irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); - - per_cpu(softirq_ctx, cpu) = irqctx; + per_cpu(softirq_stack, cpu) = irqstk; printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", - cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); + cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu)); } void do_softirq_own_stack(void) { - struct thread_info *curctx; - union irq_ctx *irqctx; + struct thread_info *curstk; + struct irq_stack *irqstk; u32 *isp, *prev_esp; - curctx = current_thread_info(); - irqctx = __this_cpu_read(softirq_ctx); - irqctx->tinfo.task = curctx->task; + curstk = current_stack(); + irqstk = __this_cpu_read(softirq_stack); /* build the stack frame on the softirq stack */ - isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); + isp = (u32 *) ((char *)irqstk + sizeof(*irqstk)); /* Push the previous esp onto the stack */ - prev_esp = (u32 *) ((char *)irqctx + sizeof(struct thread_info) - - sizeof(long)); + prev_esp = (u32 *)irqstk; *prev_esp = current_stack_pointer; call_on_stack(__do_softirq, isp); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 0de43e9..7bc86bb 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -314,6 +314,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ arch_end_context_switch(next_p); + this_cpu_write(kernel_stack, + (unsigned long)task_stack_page(next_p) + + THREAD_SIZE - KERNEL_STACK_OFFSET); + /* * Restore %gs if needed (which is common) */ diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index f352a7c..678c0ad 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -189,7 +189,7 @@ unsigned long kernel_stack_pointer(struct pt_regs *regs) if (context == (sp & ~(THREAD_SIZE - 1))) return sp; - prev_esp = (u32 *)(context + sizeof(struct thread_info) - sizeof(long)); + prev_esp = (u32 *)(context); if (prev_esp) return (unsigned long)prev_esp; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a32da80..867d53e 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -758,10 +758,10 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) #else clear_tsk_thread_flag(idle, TIF_FORK); initial_gs = per_cpu_offset(cpu); +#endif per_cpu(kernel_stack, cpu) = (unsigned long)task_stack_page(idle) - KERNEL_STACK_OFFSET + THREAD_SIZE; -#endif early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); initial_code = (unsigned long)start_secondary; stack_start = idle->thread.sp; -- cgit v0.10.2 From 2223f6f6eeaad6ea0a3acd3f4bc1ae45e8117ddc Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 6 Feb 2014 09:41:32 -0500 Subject: x86: Clean up dumpstack_64.c code The dump_trace() function in dumpstack_64.c is hard to follow. The test for exception stack is processed differently than the test for irq stack, and the normal stack is outside completely. By restructuring this code to have all the stacks determined by a single function that returns an enum of the following: STACK_IS_NORMAL STACK_IS_EXCEPTION STACK_IS_IRQ STACK_IS_UNKNOWN and has the logic of each within a switch statement. This should make the code much easier to read and understand. Link: http://lkml.kernel.org/r/20110806012354.684598995@goodmis.org Cc: Andrew Morton Cc: Peter Zijlstra Cc: Brian Gerst Signed-off-by: Steven Rostedt Link: http://lkml.kernel.org/r/20140206144322.086050042@goodmis.org Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index addb207..346b1df 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -104,6 +104,45 @@ in_irq_stack(unsigned long *stack, unsigned long *irq_stack, return (stack >= irq_stack && stack < irq_stack_end); } +static const unsigned long irq_stack_size = + (IRQ_STACK_SIZE - 64) / sizeof(unsigned long); + +enum stack_type { + STACK_IS_UNKNOWN, + STACK_IS_NORMAL, + STACK_IS_EXCEPTION, + STACK_IS_IRQ, +}; + +static enum stack_type +analyze_stack(int cpu, struct task_struct *task, + unsigned long *stack, unsigned long **stack_end, char **id) +{ + unsigned long *irq_stack; + unsigned long addr; + unsigned used = 0; + + addr = ((unsigned long)stack & (~(THREAD_SIZE - 1))); + if ((unsigned long)task_stack_page(task) == addr) + return STACK_IS_NORMAL; + + *stack_end = in_exception_stack(cpu, (unsigned long)stack, + &used, id); + if (*stack_end) + return STACK_IS_EXCEPTION; + + *stack_end = (unsigned long *)per_cpu(irq_stack_ptr, cpu); + if (!*stack_end) + return STACK_IS_UNKNOWN; + + irq_stack = *stack_end - irq_stack_size; + + if (in_irq_stack(stack, irq_stack, *stack_end)) + return STACK_IS_IRQ; + + return STACK_IS_UNKNOWN; +} + /* * x86-64 can have up to three kernel stacks: * process stack @@ -116,12 +155,11 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); - unsigned long *irq_stack_end = - (unsigned long *)per_cpu(irq_stack_ptr, cpu); - unsigned used = 0; struct thread_info *tinfo; - int graph = 0; + unsigned long *irq_stack; unsigned long dummy; + int graph = 0; + int done = 0; if (!task) task = current; @@ -143,49 +181,60 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, * exceptions */ tinfo = task_thread_info(task); - for (;;) { + while (!done) { + unsigned long *stack_end; + enum stack_type stype; char *id; - unsigned long *estack_end; - estack_end = in_exception_stack(cpu, (unsigned long)stack, - &used, &id); - if (estack_end) { + stype = analyze_stack(cpu, task, stack, &stack_end, &id); + + /* Default finish unless specified to continue */ + done = 1; + + switch (stype) { + + /* Break out early if we are on the thread stack */ + case STACK_IS_NORMAL: + break; + + case STACK_IS_EXCEPTION: + if (ops->stack(data, id) < 0) break; bp = ops->walk_stack(tinfo, stack, bp, ops, - data, estack_end, &graph); + data, stack_end, &graph); ops->stack(data, ""); /* * We link to the next stack via the * second-to-last pointer (index -2 to end) in the * exception stack: */ - stack = (unsigned long *) estack_end[-2]; - continue; - } - if (irq_stack_end) { - unsigned long *irq_stack; - irq_stack = irq_stack_end - - (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack); - - if (in_irq_stack(stack, irq_stack, irq_stack_end)) { - if (ops->stack(data, "IRQ") < 0) - break; - bp = ops->walk_stack(tinfo, stack, bp, - ops, data, irq_stack_end, &graph); - /* - * We link to the next stack (which would be - * the process stack normally) the last - * pointer (index -1 to end) in the IRQ stack: - */ - stack = (unsigned long *) (irq_stack_end[-1]); - irq_stack_end = NULL; - ops->stack(data, "EOI"); - continue; - } + stack = (unsigned long *) stack_end[-2]; + done = 0; + break; + + case STACK_IS_IRQ: + + if (ops->stack(data, "IRQ") < 0) + break; + bp = ops->walk_stack(tinfo, stack, bp, + ops, data, stack_end, &graph); + /* + * We link to the next stack (which would be + * the process stack normally) the last + * pointer (index -1 to end) in the IRQ stack: + */ + stack = (unsigned long *) (stack_end[-1]); + irq_stack = stack_end - irq_stack_size; + ops->stack(data, "EOI"); + done = 0; + break; + + case STACK_IS_UNKNOWN: + ops->stack(data, "UNK"); + break; } - break; } /* -- cgit v0.10.2 From 621b5060e823301d0cba4cb52a7ee3491922d291 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 3 Mar 2014 14:21:40 +1100 Subject: powerpc/tm: Fix crash when forking inside a transaction When we fork/clone we currently don't copy any of the TM state to the new thread. This results in a TM bad thing (program check) when the new process is switched in as the kernel does a tmrechkpt with TEXASR FS not set. Also, since R1 is from userspace, we trigger the bad kernel stack pointer detection. So we end up with something like this: Bad kernel stack pointer 0 at c0000000000404fc cpu 0x2: Vector: 700 (Program Check) at [c00000003ffefd40] pc: c0000000000404fc: restore_gprs+0xc0/0x148 lr: 0000000000000000 sp: 0 msr: 9000000100201030 current = 0xc000001dd1417c30 paca = 0xc00000000fe00800 softe: 0 irq_happened: 0x01 pid = 0, comm = swapper/2 WARNING: exception is not recoverable, can't continue The below fixes this by flushing the TM state before we copy the task_struct to the clone. To do this we go through the tmreclaim patch, which removes the checkpointed registers from the CPU and transitions the CPU out of TM suspend mode. Hence we need to call tmrechkpt after to restore the checkpointed state and the TM mode for the current task. To make this fail from userspace is simply: tbegin li r0, 2 sc Kudos to Adhemerval Zanella Neto for finding this. Signed-off-by: Michael Neuling cc: Adhemerval Zanella Neto cc: stable@vger.kernel.org Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 8d4c247f1..af064d2 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1048,6 +1048,15 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) flush_altivec_to_thread(src); flush_vsx_to_thread(src); flush_spe_to_thread(src); + /* + * Flush TM state out so we can copy it. __switch_to_tm() does this + * flush but it removes the checkpointed state from the current CPU and + * transitions the CPU out of TM mode. Hence we need to call + * tm_recheckpoint_new_task() (on the same task) to restore the + * checkpointed state back and the TM mode. + */ + __switch_to_tm(src); + tm_recheckpoint_new_task(src); *dst = *src; -- cgit v0.10.2 From a5b2cf5b1af424ee3dd9e3ce6d5cea18cb927e67 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 4 Mar 2014 08:31:24 +1100 Subject: powerpc: Align p_dyn, p_rela and p_st symbols The 64bit relocation code places a few symbols in the text segment. These symbols are only 4 byte aligned where they need to be 8 byte aligned. Add an explicit alignment. Signed-off-by: Anton Blanchard Cc: stable@vger.kernel.org Tested-by: Laurent Dufour Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S index 1482327..d88736f 100644 --- a/arch/powerpc/kernel/reloc_64.S +++ b/arch/powerpc/kernel/reloc_64.S @@ -81,6 +81,7 @@ _GLOBAL(relocate) 6: blr +.balign 8 p_dyn: .llong __dynamic_start - 0b p_rela: .llong __rela_dyn_start - 0b p_st: .llong _stext - 0b -- cgit v0.10.2 From 16c0ae028989df40bdab46b7f241d331ddc97c59 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 5 Mar 2014 14:05:09 +0200 Subject: Target/sbc: Fix sbc_copy_prot for offset scatters When copying between device and command protection scatters we must take into account that device scatters might be offset and we might copy outside scatter range. Thus for each cmd prot scatter we must take the min between cmd prot scatter, dev prot scatter, and whats left (and loop in case we havn't copied enough from/to cmd prot scatter). Example (single t_prot_sg of len 2048): kernel: sbc_dif_copy_prot: se_cmd=ffff880380aaf970, left=2048, len=2048, dev_prot_sg_offset=3072, dev_prot_sg_len=4096 kernel: isert: se_cmd=ffff880380aaf970 PI error found type 0 at sector 0x2600 expected 0x0 vs actual 0x725f, lba=2580 Instead of copying 2048 from offset 3072 (copying junk outside sg limit 4096), we must to copy 1024 and continue to next sg until we complete cmd prot scatter. This issue was found using iSER T10-PI offload over rd_mcp (wasn't discovered with fileio since file_dev prot sglists are never offset). Changes from v1: - Fix sbc_copy_prot copy length miss-calculation Changes from v0: - Removed psg->offset consideration for psg_len computation - Removed sg->offset consideration for offset condition - Added copied consideraiton for len computation - Added copied offset to paddr when doing memcpy Signed-off-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 42f18fc..77e6531 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -1079,25 +1079,31 @@ sbc_dif_copy_prot(struct se_cmd *cmd, unsigned int sectors, bool read, left = sectors * dev->prot_length; for_each_sg(cmd->t_prot_sg, psg, cmd->t_prot_nents, i) { - - len = min(psg->length, left); - if (offset >= sg->length) { - sg = sg_next(sg); - offset = 0; - } + unsigned int psg_len, copied = 0; paddr = kmap_atomic(sg_page(psg)) + psg->offset; - addr = kmap_atomic(sg_page(sg)) + sg->offset + offset; - - if (read) - memcpy(paddr, addr, len); - else - memcpy(addr, paddr, len); - - left -= len; - offset += len; + psg_len = min(left, psg->length); + while (psg_len) { + len = min(psg_len, sg->length - offset); + addr = kmap_atomic(sg_page(sg)) + sg->offset + offset; + + if (read) + memcpy(paddr + copied, addr, len); + else + memcpy(addr, paddr + copied, len); + + left -= len; + offset += len; + copied += len; + psg_len -= len; + + if (offset >= sg->length) { + sg = sg_next(sg); + offset = 0; + } + kunmap_atomic(addr); + } kunmap_atomic(paddr); - kunmap_atomic(addr); } } -- cgit v0.10.2 From 9b745ab897199c2af9f21ca9681ef86d5b971002 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 7 Mar 2014 08:37:19 +0100 Subject: ALSA: hda - Fix loud click noise with IdeaPad 410Y Lenovo IdeaPad 410Y with ALC282 codec makes loud click noises at boot and shutdown. Also, it wrongly misdetects the acpi_thinkpad hook. This patch adds a device-specific fixup for disabling the shutup callback that is the cause of the click noise and also avoiding the thinpad_helper calls. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=71511 Reported-and-tested-by: Guilherme Amadio Cc: Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 850296a..8d0a844 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3616,6 +3616,19 @@ static void alc_fixup_auto_mute_via_amp(struct hda_codec *codec, } } +static void alc_no_shutup(struct hda_codec *codec) +{ +} + +static void alc_fixup_no_shutup(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + struct alc_spec *spec = codec->spec; + spec->shutup = alc_no_shutup; + } +} + static void alc_fixup_headset_mode_alc668(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -3844,6 +3857,7 @@ enum { ALC269_FIXUP_HP_GPIO_LED, ALC269_FIXUP_INV_DMIC, ALC269_FIXUP_LENOVO_DOCK, + ALC269_FIXUP_NO_SHUTUP, ALC286_FIXUP_SONY_MIC_NO_PRESENCE, ALC269_FIXUP_PINCFG_NO_HP_TO_LINEOUT, ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, @@ -4020,6 +4034,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_inv_dmic_0x12, }, + [ALC269_FIXUP_NO_SHUTUP] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_no_shutup, + }, [ALC269_FIXUP_LENOVO_DOCK] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -4405,6 +4423,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x2212, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x2214, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x2215, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x17aa, 0x3978, "IdeaPad Y410P", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x17aa, 0x5013, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x501a, "Thinkpad", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x5026, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), -- cgit v0.10.2 From 706c16f2372316a0a8af3be6e2bd6e391c073ca0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 6 Mar 2014 21:08:24 -0500 Subject: perpcu: fold pcpu_split_block() into the only caller ... and simplify the results a bit. Makes the next step easier to deal with - we will be changing the data representation for chunk->map[] and it's easier to do if the code in question is not split between pcpu_alloc_area() and pcpu_split_block(). Signed-off-by: Al Viro Signed-off-by: Tejun Heo diff --git a/mm/percpu.c b/mm/percpu.c index 036cfe0..592f289 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -418,48 +418,6 @@ out_unlock: } /** - * pcpu_split_block - split a map block - * @chunk: chunk of interest - * @i: index of map block to split - * @head: head size in bytes (can be 0) - * @tail: tail size in bytes (can be 0) - * - * Split the @i'th map block into two or three blocks. If @head is - * non-zero, @head bytes block is inserted before block @i moving it - * to @i+1 and reducing its size by @head bytes. - * - * If @tail is non-zero, the target block, which can be @i or @i+1 - * depending on @head, is reduced by @tail bytes and @tail byte block - * is inserted after the target block. - * - * @chunk->map must have enough free slots to accommodate the split. - * - * CONTEXT: - * pcpu_lock. - */ -static void pcpu_split_block(struct pcpu_chunk *chunk, int i, - int head, int tail) -{ - int nr_extra = !!head + !!tail; - - BUG_ON(chunk->map_alloc < chunk->map_used + nr_extra); - - /* insert new subblocks */ - memmove(&chunk->map[i + nr_extra], &chunk->map[i], - sizeof(chunk->map[0]) * (chunk->map_used - i)); - chunk->map_used += nr_extra; - - if (head) { - chunk->map[i + 1] = chunk->map[i] - head; - chunk->map[i++] = head; - } - if (tail) { - chunk->map[i++] -= tail; - chunk->map[i] = tail; - } -} - -/** * pcpu_alloc_area - allocate area from a pcpu_chunk * @chunk: chunk of interest * @size: wanted size in bytes @@ -524,14 +482,25 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) /* split if warranted */ if (head || tail) { - pcpu_split_block(chunk, i, head, tail); + int nr_extra = !!head + !!tail; + + /* insert new subblocks */ + memmove(&chunk->map[i + nr_extra], &chunk->map[i], + sizeof(chunk->map[0]) * (chunk->map_used - i)); + chunk->map_used += nr_extra; + if (head) { - i++; + chunk->map[i + 1] = chunk->map[i] - head; + chunk->map[i] = head; off += head; - max_contig = max(chunk->map[i - 1], max_contig); + i++; + max_contig = max(head, max_contig); + } + if (tail) { + chunk->map[i] -= tail; + chunk->map[i + 1] = tail; + max_contig = max(tail, max_contig); } - if (tail) - max_contig = max(chunk->map[i + 1], max_contig); } /* update hint and mark allocated */ -- cgit v0.10.2 From 723ad1d90b5663ab623bb3bfba3e4ee7101795d7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 6 Mar 2014 21:13:18 -0500 Subject: percpu: store offsets instead of lengths in ->map[] Current code keeps +-length for each area in chunk->map[]. It has several unpleasant consequences: * even if we know that first 50 areas are all in use, allocation still needs to go through all those areas just to sum their sizes, just to get the offset of free one. * freeing needs to find the array entry refering to the area in question; again, the need to sum the sizes until we reach the offset we are interested in. Note that offsets are monotonous, so simple binary search would do here. New data representation: array of pairs. Each pair is represented by one int - we use offset|1 for and offset for (we make sure that all offsets are even). In the end we put a sentry entry - . The first entry is <0, flag>; it would be possible to store together the flag for Nth area and offset for N+1st, but that leads to much hairier code. In other words, where the old variant would have 4, -8, -4, 4, -12, 100 (4 bytes free, 8 in use, 4 in use, 4 free, 12 in use, 100 free) we store <0,0>, <4,1>, <12,1>, <16,0>, <20,1>, <32,0>, <132,1> i.e. 0, 5, 13, 16, 21, 32, 133 This commit switches to new data representation and takes care of a couple of low-hanging fruits in free_pcpu_area() - one is the switch to binary search, another is not doing two memmove() when one would do. Speeding the alloc side up (by keeping track of how many areas in the beginning are known to be all in use) also becomes possible - that'll be done in the next commit. Signed-off-by: Al Viro Signed-off-by: Tejun Heo diff --git a/mm/percpu.c b/mm/percpu.c index 592f289..49dfccf 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -102,7 +102,7 @@ struct pcpu_chunk { int free_size; /* free bytes in the chunk */ int contig_hint; /* max contiguous size hint */ void *base_addr; /* base address of this chunk */ - int map_used; /* # of map entries used */ + int map_used; /* # of map entries used before the sentry */ int map_alloc; /* # of map entries allocated */ int *map; /* allocation map */ void *data; /* chunk data */ @@ -356,11 +356,11 @@ static int pcpu_need_to_extend(struct pcpu_chunk *chunk) { int new_alloc; - if (chunk->map_alloc >= chunk->map_used + 2) + if (chunk->map_alloc >= chunk->map_used + 3) return 0; new_alloc = PCPU_DFL_MAP_ALLOC; - while (new_alloc < chunk->map_used + 2) + while (new_alloc < chunk->map_used + 3) new_alloc *= 2; return new_alloc; @@ -441,19 +441,22 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) int oslot = pcpu_chunk_slot(chunk); int max_contig = 0; int i, off; + int *p; - for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) { - bool is_last = i + 1 == chunk->map_used; + for (i = 0, p = chunk->map; i < chunk->map_used; i++, p++) { int head, tail; + int this_size; + + off = *p; + if (off & 1) + continue; /* extra for alignment requirement */ head = ALIGN(off, align) - off; - BUG_ON(i == 0 && head != 0); - if (chunk->map[i] < 0) - continue; - if (chunk->map[i] < head + size) { - max_contig = max(chunk->map[i], max_contig); + this_size = (p[1] & ~1) - off; + if (this_size < head + size) { + max_contig = max(this_size, max_contig); continue; } @@ -463,55 +466,50 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) * than sizeof(int), which is very small but isn't too * uncommon for percpu allocations. */ - if (head && (head < sizeof(int) || chunk->map[i - 1] > 0)) { - if (chunk->map[i - 1] > 0) - chunk->map[i - 1] += head; - else { - chunk->map[i - 1] -= head; + if (head && (head < sizeof(int) || !(p[-1] & 1))) { + if (p[-1] & 1) chunk->free_size -= head; - } - chunk->map[i] -= head; - off += head; + *p = off += head; + this_size -= head; head = 0; } /* if tail is small, just keep it around */ - tail = chunk->map[i] - head - size; - if (tail < sizeof(int)) + tail = this_size - head - size; + if (tail < sizeof(int)) { tail = 0; + size = this_size - head; + } /* split if warranted */ if (head || tail) { int nr_extra = !!head + !!tail; /* insert new subblocks */ - memmove(&chunk->map[i + nr_extra], &chunk->map[i], + memmove(p + nr_extra + 1, p + 1, sizeof(chunk->map[0]) * (chunk->map_used - i)); chunk->map_used += nr_extra; if (head) { - chunk->map[i + 1] = chunk->map[i] - head; - chunk->map[i] = head; - off += head; - i++; + *++p = off += head; + ++i; max_contig = max(head, max_contig); } if (tail) { - chunk->map[i] -= tail; - chunk->map[i + 1] = tail; + p[1] = off + size; max_contig = max(tail, max_contig); } } /* update hint and mark allocated */ - if (is_last) + if (i + 1 == chunk->map_used) chunk->contig_hint = max_contig; /* fully scanned */ else chunk->contig_hint = max(chunk->contig_hint, max_contig); - chunk->free_size -= chunk->map[i]; - chunk->map[i] = -chunk->map[i]; + chunk->free_size -= size; + *p |= 1; pcpu_chunk_relocate(chunk, oslot); return off; @@ -539,34 +537,47 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) { int oslot = pcpu_chunk_slot(chunk); - int i, off; - - for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) - if (off == freeme) - break; + int off = 0; + unsigned i, j; + int to_free = 0; + int *p; + + freeme |= 1; /* we are searching for pair */ + + i = 0; + j = chunk->map_used; + while (i != j) { + unsigned k = (i + j) / 2; + off = chunk->map[k]; + if (off < freeme) + i = k + 1; + else if (off > freeme) + j = k; + else + i = j = k; + } BUG_ON(off != freeme); - BUG_ON(chunk->map[i] > 0); - chunk->map[i] = -chunk->map[i]; - chunk->free_size += chunk->map[i]; + p = chunk->map + i; + *p = off &= ~1; + chunk->free_size += (p[1] & ~1) - off; + /* merge with next? */ + if (!(p[1] & 1)) + to_free++; /* merge with previous? */ - if (i > 0 && chunk->map[i - 1] >= 0) { - chunk->map[i - 1] += chunk->map[i]; - chunk->map_used--; - memmove(&chunk->map[i], &chunk->map[i + 1], - (chunk->map_used - i) * sizeof(chunk->map[0])); + if (i > 0 && !(p[-1] & 1)) { + to_free++; i--; + p--; } - /* merge with next? */ - if (i + 1 < chunk->map_used && chunk->map[i + 1] >= 0) { - chunk->map[i] += chunk->map[i + 1]; - chunk->map_used--; - memmove(&chunk->map[i + 1], &chunk->map[i + 2], - (chunk->map_used - (i + 1)) * sizeof(chunk->map[0])); + if (to_free) { + chunk->map_used -= to_free; + memmove(p + 1, p + 1 + to_free, + (chunk->map_used - i) * sizeof(chunk->map[0])); } - chunk->contig_hint = max(chunk->map[i], chunk->contig_hint); + chunk->contig_hint = max(chunk->map[i + 1] - chunk->map[i] - 1, chunk->contig_hint); pcpu_chunk_relocate(chunk, oslot); } @@ -586,7 +597,9 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) } chunk->map_alloc = PCPU_DFL_MAP_ALLOC; - chunk->map[chunk->map_used++] = pcpu_unit_size; + chunk->map[0] = 0; + chunk->map[1] = pcpu_unit_size | 1; + chunk->map_used = 1; INIT_LIST_HEAD(&chunk->list); chunk->free_size = pcpu_unit_size; @@ -682,6 +695,13 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved) unsigned long flags; void __percpu *ptr; + /* + * We want the lowest bit of offset available for in-use/free + * indicator. + */ + if (unlikely(align < 2)) + align = 2; + if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { WARN(true, "illegal size (%zu) or align (%zu) for " "percpu allocation\n", size, align); @@ -1312,9 +1332,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, } schunk->contig_hint = schunk->free_size; - schunk->map[schunk->map_used++] = -ai->static_size; + schunk->map[0] = 1; + schunk->map[1] = ai->static_size; + schunk->map_used = 1; if (schunk->free_size) - schunk->map[schunk->map_used++] = schunk->free_size; + schunk->map[++schunk->map_used] = 1 | (ai->static_size + schunk->free_size); + else + schunk->map[1] |= 1; /* init dynamic chunk if necessary */ if (dyn_size) { @@ -1327,8 +1351,10 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, bitmap_fill(dchunk->populated, pcpu_unit_pages); dchunk->contig_hint = dchunk->free_size = dyn_size; - dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit; - dchunk->map[dchunk->map_used++] = dchunk->free_size; + dchunk->map[0] = 1; + dchunk->map[1] = pcpu_reserved_chunk_limit; + dchunk->map[2] = (pcpu_reserved_chunk_limit + dchunk->free_size) | 1; + dchunk->map_used = 2; } /* link the first chunk in */ -- cgit v0.10.2 From 3d331ad74fa33f0b14a46cf0de8358012d3c1500 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 6 Mar 2014 20:52:32 -0500 Subject: percpu: speed alloc_pcpu_area() up If we know that first N areas are all in use, we can obviously skip them when searching for a free one. And that kind of hint is very easy to maintain. Signed-off-by: Al Viro Signed-off-by: Tejun Heo diff --git a/mm/percpu.c b/mm/percpu.c index 49dfccf..c7206d0 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -106,6 +106,7 @@ struct pcpu_chunk { int map_alloc; /* # of map entries allocated */ int *map; /* allocation map */ void *data; /* chunk data */ + int first_free; /* no free below this */ bool immutable; /* no [de]population allowed */ unsigned long populated[]; /* populated bitmap */ }; @@ -441,9 +442,10 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) int oslot = pcpu_chunk_slot(chunk); int max_contig = 0; int i, off; + bool seen_free = false; int *p; - for (i = 0, p = chunk->map; i < chunk->map_used; i++, p++) { + for (i = chunk->first_free, p = chunk->map + i; i < chunk->map_used; i++, p++) { int head, tail; int this_size; @@ -456,6 +458,10 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) this_size = (p[1] & ~1) - off; if (this_size < head + size) { + if (!seen_free) { + chunk->first_free = i; + seen_free = true; + } max_contig = max(this_size, max_contig); continue; } @@ -491,6 +497,10 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) chunk->map_used += nr_extra; if (head) { + if (!seen_free) { + chunk->first_free = i; + seen_free = true; + } *++p = off += head; ++i; max_contig = max(head, max_contig); @@ -501,6 +511,9 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) } } + if (!seen_free) + chunk->first_free = i + 1; + /* update hint and mark allocated */ if (i + 1 == chunk->map_used) chunk->contig_hint = max_contig; /* fully scanned */ @@ -558,6 +571,9 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) } BUG_ON(off != freeme); + if (i < chunk->first_free) + chunk->first_free = i; + p = chunk->map + i; *p = off &= ~1; chunk->free_size += (p[1] & ~1) - off; -- cgit v0.10.2 From 6967037baf08860407d275fdd6475d2ee41bd9b7 Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Fri, 28 Feb 2014 17:35:52 +0100 Subject: s390/appldata_os: fix cpu array size calculation The cpu array size calculation uses the NR_CPUS config option, which was recently increased from 64 to 256. With a value of 256, the cpu array will no longer fit into one APPLDATA record and loading the appldata_os module fails with the following error: could not insert 'appldata_os': Cannot allocate memory Use num_possible_cpus() instead of NR_CPUS. For z/VM, this will still result in a value of 64. This is not true for LPAR, but the appldata feature is not available for LPAR. Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c index de8e2b3..69b23b2 100644 --- a/arch/s390/appldata/appldata_os.c +++ b/arch/s390/appldata/appldata_os.c @@ -171,7 +171,7 @@ static int __init appldata_os_init(void) int rc, max_size; max_size = sizeof(struct appldata_os_data) + - (NR_CPUS * sizeof(struct appldata_os_per_cpu)); + (num_possible_cpus() * sizeof(struct appldata_os_per_cpu)); if (max_size > APPLDATA_MAX_REC_SIZE) { pr_err("Maximum OS record size %i exceeds the maximum " "record size %i\n", max_size, APPLDATA_MAX_REC_SIZE); -- cgit v0.10.2 From 0563416b8ac2711f315b50aa6efc84b112e5420a Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Wed, 26 Feb 2014 18:13:06 -0800 Subject: s390: select CONFIG_TTY for use of tty in unconditional keyboard driver The unconditionally built keyboard driver, drivers/s390/char/keyboard.c, requires CONFIG_TTY, so select it from CONFIG_S390 to prevent a build error. Signed-off-by: Josh Triplett Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 86db5a8..cbe56154 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -140,6 +140,7 @@ config S390 select OLD_SIGACTION select OLD_SIGSUSPEND3 select SYSCTL_EXCEPTION_TRACE + select TTY select VIRT_CPU_ACCOUNTING select VIRT_TO_BUS -- cgit v0.10.2 From fa2a0627ac70d007c313da150d9bf3705729e9c8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 6 Mar 2014 11:24:41 +0100 Subject: s390/compat: remove compat exec domain The whole compat exec domain code doesn't make any difference. From the registered s390_exec_domain: - exec domain name is only displayed in /proc/execdomains - handler is unused - pers_low and pers_high are only used internally to find this specific exec domain otherwise the default exec domain will be used - all other fields match the default exec domain So let's get rid of this. Reported-by: Thomas Gleixner Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 1b3ac09..a95c4ca 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -47,9 +47,8 @@ obj-$(CONFIG_SCHED_BOOK) += topology.o obj-$(CONFIG_HIBERNATION) += suspend.o swsusp_asm64.o obj-$(CONFIG_AUDIT) += audit.o compat-obj-$(CONFIG_AUDIT) += compat_audit.o -obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o \ - compat_wrapper.o compat_exec_domain.o \ - $(compat-obj-y) +obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o +obj-$(CONFIG_COMPAT) += compat_wrapper.o $(compat-obj-y) obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_KPROBES) += kprobes.o diff --git a/arch/s390/kernel/compat_exec_domain.c b/arch/s390/kernel/compat_exec_domain.c deleted file mode 100644 index 765fabd..0000000 --- a/arch/s390/kernel/compat_exec_domain.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Support for 32-bit Linux for S390 personality. - * - * Copyright IBM Corp. 2000 - * Author(s): Gerhard Tonn (ton@de.ibm.com) - * - * - */ - -#include -#include -#include -#include - -static struct exec_domain s390_exec_domain; - -static int __init s390_init (void) -{ - s390_exec_domain.name = "Linux/s390"; - s390_exec_domain.handler = NULL; - s390_exec_domain.pers_low = PER_LINUX32; - s390_exec_domain.pers_high = PER_LINUX32; - s390_exec_domain.signal_map = default_exec_domain.signal_map; - s390_exec_domain.signal_invmap = default_exec_domain.signal_invmap; - register_exec_domain(&s390_exec_domain); - return 0; -} - -__initcall(s390_init); -- cgit v0.10.2 From 43eab381181c76a19734298b5c925c45fb1afe45 Mon Sep 17 00:00:00 2001 From: Philipp Hachtmann Date: Wed, 26 Feb 2014 16:19:22 +0100 Subject: s390/topology: Remove call to update_cpu_masks() The call to update_cpu_masks() from within topology_init() is completely redundant. This patch removes it. Signed-off-by: Philipp Hachtmann Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 4b2e3e3..6298fed 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -451,7 +451,6 @@ static int __init topology_init(void) } set_topology_timer(); out: - update_cpu_masks(); return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching); } device_initcall(topology_init); -- cgit v0.10.2 From 739c3eea711a255df5ed1246face0a4dce5e589f Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 19 Feb 2014 20:20:21 +0800 Subject: blk-mq: add REQ_SYNC early Add REQ_SYNC early, so rq_dispatched[] in blk_mq_rq_ctx_init is set correctly. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe diff --git a/block/blk-mq.c b/block/blk-mq.c index 1b8b50d..883f720 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -860,6 +860,8 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) ctx = blk_mq_get_ctx(q); hctx = q->mq_ops->map_queue(q, ctx->cpu); + if (is_sync) + rw |= REQ_SYNC; trace_block_getrq(q, bio, rw); rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false); if (likely(rq)) -- cgit v0.10.2 From 70044d71d31d6973665ced5be04ef39ac1c09a48 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 7 Mar 2014 10:19:57 -0500 Subject: firewire: don't use PREPARE_DELAYED_WORK PREPARE_[DELAYED_]WORK() are being phased out. They have few users and a nasty surprise in terms of reentrancy guarantee as workqueue considers work items to be different if they don't have the same work function. firewire core-device and sbp2 have been been multiplexing work items with multiple work functions. Introduce fw_device_workfn() and sbp2_lu_workfn() which invoke fw_device->workfn and sbp2_logical_unit->workfn respectively and always use the two functions as the work functions and update the users to set the ->workfn fields instead of overriding work functions using PREPARE_DELAYED_WORK(). This fixes a variety of possible regressions since a2c1c57be8d9 "workqueue: consider work function when searching for busy work items" due to which fw_workqueue lost its required non-reentrancy property. Signed-off-by: Tejun Heo Acked-by: Stefan Richter Cc: linux1394-devel@lists.sourceforge.net Cc: stable@vger.kernel.org # v3.9+ Cc: stable@vger.kernel.org # v3.8.2+ Cc: stable@vger.kernel.org # v3.4.60+ Cc: stable@vger.kernel.org # v3.2.40+ diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index de4aa40..2c6d5e1 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -916,7 +916,7 @@ static int lookup_existing_device(struct device *dev, void *data) old->config_rom_retries = 0; fw_notice(card, "rediscovered device %s\n", dev_name(dev)); - PREPARE_DELAYED_WORK(&old->work, fw_device_update); + old->workfn = fw_device_update; fw_schedule_device_work(old, 0); if (current_node == card->root_node) @@ -1075,7 +1075,7 @@ static void fw_device_init(struct work_struct *work) if (atomic_cmpxchg(&device->state, FW_DEVICE_INITIALIZING, FW_DEVICE_RUNNING) == FW_DEVICE_GONE) { - PREPARE_DELAYED_WORK(&device->work, fw_device_shutdown); + device->workfn = fw_device_shutdown; fw_schedule_device_work(device, SHUTDOWN_DELAY); } else { fw_notice(card, "created device %s: GUID %08x%08x, S%d00\n", @@ -1196,13 +1196,20 @@ static void fw_device_refresh(struct work_struct *work) dev_name(&device->device), fw_rcode_string(ret)); gone: atomic_set(&device->state, FW_DEVICE_GONE); - PREPARE_DELAYED_WORK(&device->work, fw_device_shutdown); + device->workfn = fw_device_shutdown; fw_schedule_device_work(device, SHUTDOWN_DELAY); out: if (node_id == card->root_node->node_id) fw_schedule_bm_work(card, 0); } +static void fw_device_workfn(struct work_struct *work) +{ + struct fw_device *device = container_of(to_delayed_work(work), + struct fw_device, work); + device->workfn(work); +} + void fw_node_event(struct fw_card *card, struct fw_node *node, int event) { struct fw_device *device; @@ -1252,7 +1259,8 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event) * power-up after getting plugged in. We schedule the * first config rom scan half a second after bus reset. */ - INIT_DELAYED_WORK(&device->work, fw_device_init); + device->workfn = fw_device_init; + INIT_DELAYED_WORK(&device->work, fw_device_workfn); fw_schedule_device_work(device, INITIAL_DELAY); break; @@ -1268,7 +1276,7 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event) if (atomic_cmpxchg(&device->state, FW_DEVICE_RUNNING, FW_DEVICE_INITIALIZING) == FW_DEVICE_RUNNING) { - PREPARE_DELAYED_WORK(&device->work, fw_device_refresh); + device->workfn = fw_device_refresh; fw_schedule_device_work(device, device->is_local ? 0 : INITIAL_DELAY); } @@ -1283,7 +1291,7 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event) smp_wmb(); /* update node_id before generation */ device->generation = card->generation; if (atomic_read(&device->state) == FW_DEVICE_RUNNING) { - PREPARE_DELAYED_WORK(&device->work, fw_device_update); + device->workfn = fw_device_update; fw_schedule_device_work(device, 0); } break; @@ -1308,7 +1316,7 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event) device = node->data; if (atomic_xchg(&device->state, FW_DEVICE_GONE) == FW_DEVICE_RUNNING) { - PREPARE_DELAYED_WORK(&device->work, fw_device_shutdown); + device->workfn = fw_device_shutdown; fw_schedule_device_work(device, list_empty(&card->link) ? 0 : SHUTDOWN_DELAY); } diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c index 281029d..7aef911 100644 --- a/drivers/firewire/sbp2.c +++ b/drivers/firewire/sbp2.c @@ -146,6 +146,7 @@ struct sbp2_logical_unit { */ int generation; int retries; + work_func_t workfn; struct delayed_work work; bool has_sdev; bool blocked; @@ -864,7 +865,7 @@ static void sbp2_login(struct work_struct *work) /* set appropriate retry limit(s) in BUSY_TIMEOUT register */ sbp2_set_busy_timeout(lu); - PREPARE_DELAYED_WORK(&lu->work, sbp2_reconnect); + lu->workfn = sbp2_reconnect; sbp2_agent_reset(lu); /* This was a re-login. */ @@ -918,7 +919,7 @@ static void sbp2_login(struct work_struct *work) * If a bus reset happened, sbp2_update will have requeued * lu->work already. Reset the work from reconnect to login. */ - PREPARE_DELAYED_WORK(&lu->work, sbp2_login); + lu->workfn = sbp2_login; } static void sbp2_reconnect(struct work_struct *work) @@ -952,7 +953,7 @@ static void sbp2_reconnect(struct work_struct *work) lu->retries++ >= 5) { dev_err(tgt_dev(tgt), "failed to reconnect\n"); lu->retries = 0; - PREPARE_DELAYED_WORK(&lu->work, sbp2_login); + lu->workfn = sbp2_login; } sbp2_queue_work(lu, DIV_ROUND_UP(HZ, 5)); @@ -972,6 +973,13 @@ static void sbp2_reconnect(struct work_struct *work) sbp2_conditionally_unblock(lu); } +static void sbp2_lu_workfn(struct work_struct *work) +{ + struct sbp2_logical_unit *lu = container_of(to_delayed_work(work), + struct sbp2_logical_unit, work); + lu->workfn(work); +} + static int sbp2_add_logical_unit(struct sbp2_target *tgt, int lun_entry) { struct sbp2_logical_unit *lu; @@ -998,7 +1006,8 @@ static int sbp2_add_logical_unit(struct sbp2_target *tgt, int lun_entry) lu->blocked = false; ++tgt->dont_block; INIT_LIST_HEAD(&lu->orb_list); - INIT_DELAYED_WORK(&lu->work, sbp2_login); + lu->workfn = sbp2_login; + INIT_DELAYED_WORK(&lu->work, sbp2_lu_workfn); list_add_tail(&lu->link, &tgt->lu_list); return 0; diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 5d7782e..c3683bd 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -200,6 +200,7 @@ struct fw_device { unsigned irmc:1; unsigned bc_implemented:2; + work_func_t workfn; struct delayed_work work; struct fw_attribute_group attribute_group; }; -- cgit v0.10.2 From d82fead4639ab657d70f71b2bfefcd387a7c2a4c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 7 Mar 2014 10:24:47 -0500 Subject: wireless/rt2x00: don't use PREPARE_WORK in rt2800usb.c PREPARE_[DELAYED_]WORK() are being phased out. They have few users and a nasty surprise in terms of reentrancy guarantee as workqueue considers work items to be different if they don't have the same work function. Update rt2800usb.c to use INIT_WORK() instead of PREPARE_WORK(). As the work item isn't in active use during rt2800usb_probe_hw(), this doesn't cause any behavior difference. It would probably be best to route this with other related updates through the workqueue tree. Only compile tested. Signed-off-by: Tejun Heo Cc: Ivo van Doorn Cc: Gertjan van Wingerde Cc: Helmut Schaa Cc: linux-wireless@vger.kernel.org diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c index caddc1b..42a2e06 100644 --- a/drivers/net/wireless/rt2x00/rt2800usb.c +++ b/drivers/net/wireless/rt2x00/rt2800usb.c @@ -764,7 +764,7 @@ static int rt2800usb_probe_hw(struct rt2x00_dev *rt2x00dev) /* * Overwrite TX done handler */ - PREPARE_WORK(&rt2x00dev->txdone_work, rt2800usb_work_txdone); + INIT_WORK(&rt2x00dev->txdone_work, rt2800usb_work_txdone); return 0; } -- cgit v0.10.2 From 4a8bb7f54860a3241b177bdeb06bb880151a23dc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 7 Mar 2014 10:24:48 -0500 Subject: ps3-vuart: don't use PREPARE_WORK PREPARE_[DELAYED_]WORK() are being phased out. They have few users and a nasty surprise in terms of reentrancy guarantee as workqueue considers work items to be different if they don't have the same work function. ps3_vuart wasn't overriding the work item with multiple work functions but was using NULL for INIT_WORK() and then single PREPARE_WORK() to set the work function. We can simply invoke INIT_WORK() with the work function and remove the PREPARE_WORK() usage. It would probably be best to route this with other related updates through the workqueue tree. Geoff: Tested on ps3 (DECR-1400). Signed-off-by: Tejun Heo Acked-by: Geoff Levand Cc: linuxppc-dev@lists.ozlabs.org Cc: cbe-oss-dev@lists.ozlabs.org diff --git a/drivers/ps3/ps3-vuart.c b/drivers/ps3/ps3-vuart.c index fb73008..bc1e513 100644 --- a/drivers/ps3/ps3-vuart.c +++ b/drivers/ps3/ps3-vuart.c @@ -699,8 +699,6 @@ int ps3_vuart_read_async(struct ps3_system_bus_device *dev, unsigned int bytes) BUG_ON(!bytes); - PREPARE_WORK(&priv->rx_list.work.work, ps3_vuart_work); - spin_lock_irqsave(&priv->rx_list.lock, flags); if (priv->rx_list.bytes_held >= bytes) { dev_dbg(&dev->core, "%s:%d: schedule_work %xh bytes\n", @@ -1052,7 +1050,7 @@ static int ps3_vuart_probe(struct ps3_system_bus_device *dev) INIT_LIST_HEAD(&priv->rx_list.head); spin_lock_init(&priv->rx_list.lock); - INIT_WORK(&priv->rx_list.work.work, NULL); + INIT_WORK(&priv->rx_list.work.work, ps3_vuart_work); priv->rx_list.work.trigger = 0; priv->rx_list.work.dev = dev; -- cgit v0.10.2 From 75ddb38f0901d12831264cd74224598e4d8f528b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 7 Mar 2014 10:24:48 -0500 Subject: floppy: don't use PREPARE_[DELAYED_]WORK PREPARE_[DELAYED_]WORK() are being phased out. They have few users and a nasty surprise in terms of reentrancy guarantee as workqueue considers work items to be different if they don't have the same work function. floppy has been multiplexing floppy_work and fd_timer with multiple work functions. Introduce floppy_work_workfn() and fd_timer_workfn() which invoke floppy_work_fn and fd_timer_fn respectively and always use the two functions as the work functions and update the users to set floppy_work_fn and fd_timer_fn instead of overriding work functions using PREPARE_[DELAYED_]WORK(). It would probably be best to route this with other related updates through the workqueue tree. Lightly tested using qemu. Signed-off-by: Tejun Heo Acked-by: Jiri Kosina diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 2023043..8f5565b 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -961,17 +961,31 @@ static void empty(void) { } -static DECLARE_WORK(floppy_work, NULL); +static void (*floppy_work_fn)(void); + +static void floppy_work_workfn(struct work_struct *work) +{ + floppy_work_fn(); +} + +static DECLARE_WORK(floppy_work, floppy_work_workfn); static void schedule_bh(void (*handler)(void)) { WARN_ON(work_pending(&floppy_work)); - PREPARE_WORK(&floppy_work, (work_func_t)handler); + floppy_work_fn = handler; queue_work(floppy_wq, &floppy_work); } -static DECLARE_DELAYED_WORK(fd_timer, NULL); +static void (*fd_timer_fn)(void) = NULL; + +static void fd_timer_workfn(struct work_struct *work) +{ + fd_timer_fn(); +} + +static DECLARE_DELAYED_WORK(fd_timer, fd_timer_workfn); static void cancel_activity(void) { @@ -982,7 +996,7 @@ static void cancel_activity(void) /* this function makes sure that the disk stays in the drive during the * transfer */ -static void fd_watchdog(struct work_struct *arg) +static void fd_watchdog(void) { debug_dcl(DP->flags, "calling disk change from watchdog\n"); @@ -993,7 +1007,7 @@ static void fd_watchdog(struct work_struct *arg) reset_fdc(); } else { cancel_delayed_work(&fd_timer); - PREPARE_DELAYED_WORK(&fd_timer, fd_watchdog); + fd_timer_fn = fd_watchdog; queue_delayed_work(floppy_wq, &fd_timer, HZ / 10); } } @@ -1005,7 +1019,8 @@ static void main_command_interrupt(void) } /* waits for a delay (spinup or select) to pass */ -static int fd_wait_for_completion(unsigned long expires, work_func_t function) +static int fd_wait_for_completion(unsigned long expires, + void (*function)(void)) { if (FDCS->reset) { reset_fdc(); /* do the reset during sleep to win time @@ -1016,7 +1031,7 @@ static int fd_wait_for_completion(unsigned long expires, work_func_t function) if (time_before(jiffies, expires)) { cancel_delayed_work(&fd_timer); - PREPARE_DELAYED_WORK(&fd_timer, function); + fd_timer_fn = function; queue_delayed_work(floppy_wq, &fd_timer, expires - jiffies); return 1; } @@ -1334,8 +1349,7 @@ static int fdc_dtr(void) * Pause 5 msec to avoid trouble. (Needs to be 2 jiffies) */ FDCS->dtr = raw_cmd->rate & 3; - return fd_wait_for_completion(jiffies + 2UL * HZ / 100, - (work_func_t)floppy_ready); + return fd_wait_for_completion(jiffies + 2UL * HZ / 100, floppy_ready); } /* fdc_dtr */ static void tell_sector(void) @@ -1440,7 +1454,7 @@ static void setup_rw_floppy(void) int flags; int dflags; unsigned long ready_date; - work_func_t function; + void (*function)(void); flags = raw_cmd->flags; if (flags & (FD_RAW_READ | FD_RAW_WRITE)) @@ -1454,9 +1468,9 @@ static void setup_rw_floppy(void) */ if (time_after(ready_date, jiffies + DP->select_delay)) { ready_date -= DP->select_delay; - function = (work_func_t)floppy_start; + function = floppy_start; } else - function = (work_func_t)setup_rw_floppy; + function = setup_rw_floppy; /* wait until the floppy is spinning fast enough */ if (fd_wait_for_completion(ready_date, function)) @@ -1486,7 +1500,7 @@ static void setup_rw_floppy(void) inr = result(); cont->interrupt(); } else if (flags & FD_RAW_NEED_DISK) - fd_watchdog(NULL); + fd_watchdog(); } static int blind_seek; @@ -1863,7 +1877,7 @@ static int start_motor(void (*function)(void)) /* wait_for_completion also schedules reset if needed. */ return fd_wait_for_completion(DRS->select_date + DP->select_delay, - (work_func_t)function); + function); } static void floppy_ready(void) -- cgit v0.10.2 From 77fa83cf7478202fac1520ca082ab8f9658d63b4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 7 Mar 2014 10:24:48 -0500 Subject: usb: don't use PREPARE_DELAYED_WORK PREPARE_[DELAYED_]WORK() are being phased out. They have few users and a nasty surprise in terms of reentrancy guarantee as workqueue considers work items to be different if they don't have the same work function. usb_hub->init_work is multiplexed with multiple work functions; however, the work item is never queued while in-flight, so we can simply use INIT_DELAYED_WORK() before each queueing. It would probably be best to route this with other related updates through the workqueue tree. Lightly tested. v2: Greg and Alan confirm that the work item is never queued while in-flight. Simply use INIT_DELAYED_WORK(). Signed-off-by: Tejun Heo Cc: Greg Kroah-Hartman Cc: Alan Stern Cc: linux-usb@vger.kernel.org diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 64ea219..5cbf78d 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1040,7 +1040,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) */ if (type == HUB_INIT) { delay = hub_power_on(hub, false); - PREPARE_DELAYED_WORK(&hub->init_work, hub_init_func2); + INIT_DELAYED_WORK(&hub->init_work, hub_init_func2); schedule_delayed_work(&hub->init_work, msecs_to_jiffies(delay)); @@ -1194,7 +1194,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) /* Don't do a long sleep inside a workqueue routine */ if (type == HUB_INIT2) { - PREPARE_DELAYED_WORK(&hub->init_work, hub_init_func3); + INIT_DELAYED_WORK(&hub->init_work, hub_init_func3); schedule_delayed_work(&hub->init_work, msecs_to_jiffies(delay)); return; /* Continues at init3: below */ -- cgit v0.10.2 From 9ca9737444f1a8602f74b85018d881e7e54b5bd1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 7 Mar 2014 10:24:49 -0500 Subject: nvme: don't use PREPARE_WORK PREPARE_[DELAYED_]WORK() are being phased out. They have few users and a nasty surprise in terms of reentrancy guarantee as workqueue considers work items to be different if they don't have the same work function. nvme_dev->reset_work is multiplexed with multiple work functions. Introduce nvme_reset_workfn() which invokes nvme_dev->reset_workfn and always use it as the work function and update the users to set the ->reset_workfn field instead of overriding the work function using PREPARE_WORK(). It would probably be best to route this with other related updates through the workqueue tree. Compile tested. Signed-off-by: Tejun Heo Cc: Matthew Wilcox Cc: linux-nvme@lists.infradead.org diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 51824d1..8459e4e 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -993,7 +993,7 @@ static void nvme_abort_cmd(int cmdid, struct nvme_queue *nvmeq) dev_warn(&dev->pci_dev->dev, "I/O %d QID %d timeout, reset controller\n", cmdid, nvmeq->qid); - PREPARE_WORK(&dev->reset_work, nvme_reset_failed_dev); + dev->reset_workfn = nvme_reset_failed_dev; queue_work(nvme_workq, &dev->reset_work); return; } @@ -1696,8 +1696,7 @@ static int nvme_kthread(void *data) list_del_init(&dev->node); dev_warn(&dev->pci_dev->dev, "Failed status, reset controller\n"); - PREPARE_WORK(&dev->reset_work, - nvme_reset_failed_dev); + dev->reset_workfn = nvme_reset_failed_dev; queue_work(nvme_workq, &dev->reset_work); continue; } @@ -2406,7 +2405,7 @@ static int nvme_dev_resume(struct nvme_dev *dev) return ret; if (ret == -EBUSY) { spin_lock(&dev_list_lock); - PREPARE_WORK(&dev->reset_work, nvme_remove_disks); + dev->reset_workfn = nvme_remove_disks; queue_work(nvme_workq, &dev->reset_work); spin_unlock(&dev_list_lock); } @@ -2435,6 +2434,12 @@ static void nvme_reset_failed_dev(struct work_struct *ws) nvme_dev_reset(dev); } +static void nvme_reset_workfn(struct work_struct *work) +{ + struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work); + dev->reset_workfn(work); +} + static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) { int result = -ENOMEM; @@ -2453,7 +2458,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto free; INIT_LIST_HEAD(&dev->namespaces); - INIT_WORK(&dev->reset_work, nvme_reset_failed_dev); + dev->reset_workfn = nvme_reset_failed_dev; + INIT_WORK(&dev->reset_work, nvme_reset_workfn); dev->pci_dev = pdev; pci_set_drvdata(pdev, dev); result = nvme_set_instance(dev); @@ -2553,7 +2559,7 @@ static int nvme_resume(struct device *dev) struct nvme_dev *ndev = pci_get_drvdata(pdev); if (nvme_dev_resume(ndev) && !work_busy(&ndev->reset_work)) { - PREPARE_WORK(&ndev->reset_work, nvme_reset_failed_dev); + ndev->reset_workfn = nvme_reset_failed_dev; queue_work(nvme_workq, &ndev->reset_work); } return 0; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 69ae03f..6b9aafe 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -87,6 +87,7 @@ struct nvme_dev { struct list_head namespaces; struct kref kref; struct miscdevice miscdev; + work_func_t reset_workfn; struct work_struct reset_work; char name[12]; char serial[20]; -- cgit v0.10.2 From 059499453a9abd1857d442b44da8b4c126dc72a8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 7 Mar 2014 10:24:50 -0500 Subject: afs: don't use PREPARE_WORK PREPARE_[DELAYED_]WORK() are being phased out. They have few users and a nasty surprise in terms of reentrancy guarantee as workqueue considers work items to be different if they don't have the same work function. afs_call->async_work is multiplexed with multiple work functions. Introduce afs_async_workfn() which invokes afs_call->async_workfn and always use it as the work function and update the users to set the ->async_workfn field instead of overriding the work function using PREPARE_WORK(). It would probably be best to route this with other related updates through the workqueue tree. Compile tested. Signed-off-by: Tejun Heo Cc: David Howells Cc: linux-afs@lists.infradead.org diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 6621f80..be75b50 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -75,6 +75,7 @@ struct afs_call { const struct afs_call_type *type; /* type of call */ const struct afs_wait_mode *wait_mode; /* completion wait mode */ wait_queue_head_t waitq; /* processes awaiting completion */ + work_func_t async_workfn; struct work_struct async_work; /* asynchronous work processor */ struct work_struct work; /* actual work processor */ struct sk_buff_head rx_queue; /* received packets */ diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 8ad8c2a..ef943df 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -644,7 +644,7 @@ static void afs_process_async_call(struct work_struct *work) /* we can't just delete the call because the work item may be * queued */ - PREPARE_WORK(&call->async_work, afs_delete_async_call); + call->async_workfn = afs_delete_async_call; queue_work(afs_async_calls, &call->async_work); } @@ -663,6 +663,13 @@ void afs_transfer_reply(struct afs_call *call, struct sk_buff *skb) call->reply_size += len; } +static void afs_async_workfn(struct work_struct *work) +{ + struct afs_call *call = container_of(work, struct afs_call, async_work); + + call->async_workfn(work); +} + /* * accept the backlog of incoming calls */ @@ -685,7 +692,8 @@ static void afs_collect_incoming_call(struct work_struct *work) return; } - INIT_WORK(&call->async_work, afs_process_async_call); + call->async_workfn = afs_process_async_call; + INIT_WORK(&call->async_work, afs_async_workfn); call->wait_mode = &afs_async_incoming_call; call->type = &afs_RXCMxxxx; init_waitqueue_head(&call->waitq); -- cgit v0.10.2 From 6c256cb6467e60b54f41170076c7f625e231c282 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 7 Mar 2014 10:24:50 -0500 Subject: staging/fwserial: don't use PREPARE_WORK PREPARE_[DELAYED_]WORK() are being phased out. They have few users and a nasty surprise in terms of reentrancy guarantee as workqueue considers work items to be different if they don't have the same work function. fwtty_peer->work is multiplexed with multiple work functions. Introduce fwserial_peer_workfn() which invokes fwtty_peer->workfn and always use it as the work function and update the users to set the ->workfn field instead of overriding the work function using PREPARE_WORK(). It would probably be best to route this with other related updates through the workqueue tree. Compile tested. Signed-off-by: Tejun Heo Acked-by: Peter Hurley diff --git a/drivers/staging/fwserial/fwserial.c b/drivers/staging/fwserial/fwserial.c index 8af136e..b22142e 100644 --- a/drivers/staging/fwserial/fwserial.c +++ b/drivers/staging/fwserial/fwserial.c @@ -2036,6 +2036,13 @@ static void fwserial_auto_connect(struct work_struct *work) schedule_delayed_work(&peer->connect, CONNECT_RETRY_DELAY); } +static void fwserial_peer_workfn(struct work_struct *work) +{ + struct fwtty_peer *peer = to_peer(work, work); + + peer->workfn(work); +} + /** * fwserial_add_peer - add a newly probed 'serial' unit device as a 'peer' * @serial: aggregate representing the specific fw_card to add the peer to @@ -2100,7 +2107,7 @@ static int fwserial_add_peer(struct fw_serial *serial, struct fw_unit *unit) peer->port = NULL; init_timer(&peer->timer); - INIT_WORK(&peer->work, NULL); + INIT_WORK(&peer->work, fwserial_peer_workfn); INIT_DELAYED_WORK(&peer->connect, fwserial_auto_connect); /* associate peer with specific fw_card */ @@ -2702,7 +2709,7 @@ static int fwserial_parse_mgmt_write(struct fwtty_peer *peer, } else { peer->work_params.plug_req = pkt->plug_req; - PREPARE_WORK(&peer->work, fwserial_handle_plug_req); + peer->workfn = fwserial_handle_plug_req; queue_work(system_unbound_wq, &peer->work); } break; @@ -2731,7 +2738,7 @@ static int fwserial_parse_mgmt_write(struct fwtty_peer *peer, fwtty_err(&peer->unit, "unplug req: busy\n"); rcode = RCODE_CONFLICT_ERROR; } else { - PREPARE_WORK(&peer->work, fwserial_handle_unplug_req); + peer->workfn = fwserial_handle_unplug_req; queue_work(system_unbound_wq, &peer->work); } break; diff --git a/drivers/staging/fwserial/fwserial.h b/drivers/staging/fwserial/fwserial.h index 54f7f9b..98b853d 100644 --- a/drivers/staging/fwserial/fwserial.h +++ b/drivers/staging/fwserial/fwserial.h @@ -91,6 +91,7 @@ struct fwtty_peer { struct rcu_head rcu; spinlock_t lock; + work_func_t workfn; struct work_struct work; struct peer_work_params work_params; struct timer_list timer; -- cgit v0.10.2 From f073f9229ff1137d3be20558bec3bfb77e3af2a4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 7 Mar 2014 10:24:50 -0500 Subject: workqueue: remove PREPARE_[DELAYED_]WORK() Peter Hurley noticed that since a2c1c57be8d9 ("workqueue: consider work function when searching for busy work items"), a work item which gets assigned a different work function would break out of the non-reentrancy guarantee as workqueue would consider it a different work item. This is fragile and extremely subtle. PREPARE_[DELAYED_]WORK() have never been used widely and its semantics has always been somewhat iffy. If the work item is known not to be on queue when PREPARE_WORK() is called, there's no difference from using INIT_WORK(). If the work item may be queued at the time of PREPARE_WORK(), we can't really tell whether the old or new function will be executed the next time. We really don't want this level of subtlety in workqueue interface for such marginal use cases. The previous patches converted all existing users away from PREPARE_[DELAYED_]WORK(). Let's remove them. Signed-off-by: Tejun Heo Cc: Peter Hurley Link: http://lkml.kernel.org/g/1392493119-9277-1-git-send-email-peter@hurleysoftware.com diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 8059334..29da9e7 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -177,17 +177,6 @@ struct execute_work { #define DECLARE_DEFERRABLE_WORK(n, f) \ struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, TIMER_DEFERRABLE) -/* - * initialize a work item's function pointer - */ -#define PREPARE_WORK(_work, _func) \ - do { \ - (_work)->func = (_func); \ - } while (0) - -#define PREPARE_DELAYED_WORK(_work, _func) \ - PREPARE_WORK(&(_work)->work, (_func)) - #ifdef CONFIG_DEBUG_OBJECTS_WORK extern void __init_work(struct work_struct *work, int onstack); extern void destroy_work_on_stack(struct work_struct *work); @@ -217,7 +206,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0); \ INIT_LIST_HEAD(&(_work)->entry); \ - PREPARE_WORK((_work), (_func)); \ + (_work)->func = (_func); \ } while (0) #else #define __INIT_WORK(_work, _func, _onstack) \ @@ -225,7 +214,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } __init_work((_work), _onstack); \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ INIT_LIST_HEAD(&(_work)->entry); \ - PREPARE_WORK((_work), (_func)); \ + (_work)->func = (_func); \ } while (0) #endif -- cgit v0.10.2 From cebc2de44d3bce53e46476e774126c298ca2c8a9 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 7 Mar 2014 14:57:19 +0000 Subject: dm space map metadata: fix refcount decrement below 0 which caused corruption This has been a relatively long-standing issue that wasn't nailed down until Teng-Feng Yang's meticulous bug report to dm-devel on 3/7/2014, see: http://www.redhat.com/archives/dm-devel/2014-March/msg00021.html From that report: "When decreasing the reference count of a metadata block with its reference count equals 3, we will call dm_btree_remove() to remove this enrty from the B+tree which keeps the reference count info in metadata device. The B+tree will try to rebalance the entry of the child nodes in each node it traversed, and the rebalance process contains the following steps. (1) Finding the corresponding children in current node (shadow_current(s)) (2) Shadow the children block (issue BOP_INC) (3) redistribute keys among children, and free children if necessary (issue BOP_DEC) Since the update of a metadata block's reference count could be recursive, we will stash these reference count update operations in smm->uncommitted and then process them in a FILO fashion. The problem is that step(3) could free the children which is created in step(2), so the BOP_DEC issued in step(3) will be carried out before the BOP_INC issued in step(2) since these BOPs will be processed in FILO fashion. Once the BOP_DEC from step(3) tries to decrease the reference count of newly shadow block, it will report failure for its reference equals 0 before decreasing. It looks like we can solve this issue by processing these BOPs in a FIFO fashion instead of FILO." Commit 5b564d80 ("dm space map: disallow decrementing a reference count below zero") changed the code to report an error for this temporary refcount decrement below zero. So what was previously a harmless invalid refcount became a hard failure due to the new error path: device-mapper: space map common: unable to decrement a reference count below 0 device-mapper: thin: 253:6: dm_thin_insert_block() failed: error = -22 device-mapper: thin: 253:6: switching pool to read-only mode This bug is in dm persistent-data code that is common to the DM thin and cache targets. So any users of those targets should apply this fix. Fix this by applying recursive space map operations in FIFO order rather than FILO. Resolves: https://bugzilla.kernel.org/show_bug.cgi?id=68801 Reported-by: Apollon Oikonomopoulos Reported-by: edwillam1007@gmail.com Reported-by: Teng-Feng Yang Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # 3.13+ diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index e9bdd46..786b689 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -91,6 +91,69 @@ struct block_op { dm_block_t block; }; +struct bop_ring_buffer { + unsigned begin; + unsigned end; + struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1]; +}; + +static void brb_init(struct bop_ring_buffer *brb) +{ + brb->begin = 0; + brb->end = 0; +} + +static bool brb_empty(struct bop_ring_buffer *brb) +{ + return brb->begin == brb->end; +} + +static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old) +{ + unsigned r = old + 1; + return (r >= (sizeof(brb->bops) / sizeof(*brb->bops))) ? 0 : r; +} + +static int brb_push(struct bop_ring_buffer *brb, + enum block_op_type type, dm_block_t b) +{ + struct block_op *bop; + unsigned next = brb_next(brb, brb->end); + + /* + * We don't allow the last bop to be filled, this way we can + * differentiate between full and empty. + */ + if (next == brb->begin) + return -ENOMEM; + + bop = brb->bops + brb->end; + bop->type = type; + bop->block = b; + + brb->end = next; + + return 0; +} + +static int brb_pop(struct bop_ring_buffer *brb, struct block_op *result) +{ + struct block_op *bop; + + if (brb_empty(brb)) + return -ENODATA; + + bop = brb->bops + brb->begin; + result->type = bop->type; + result->block = bop->block; + + brb->begin = brb_next(brb, brb->begin); + + return 0; +} + +/*----------------------------------------------------------------*/ + struct sm_metadata { struct dm_space_map sm; @@ -101,25 +164,20 @@ struct sm_metadata { unsigned recursion_count; unsigned allocated_this_transaction; - unsigned nr_uncommitted; - struct block_op uncommitted[MAX_RECURSIVE_ALLOCATIONS]; + struct bop_ring_buffer uncommitted; struct threshold threshold; }; static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b) { - struct block_op *op; + int r = brb_push(&smm->uncommitted, type, b); - if (smm->nr_uncommitted == MAX_RECURSIVE_ALLOCATIONS) { + if (r) { DMERR("too many recursive allocations"); return -ENOMEM; } - op = smm->uncommitted + smm->nr_uncommitted++; - op->type = type; - op->block = b; - return 0; } @@ -158,11 +216,17 @@ static int out(struct sm_metadata *smm) return -ENOMEM; } - if (smm->recursion_count == 1 && smm->nr_uncommitted) { - while (smm->nr_uncommitted && !r) { - smm->nr_uncommitted--; - r = commit_bop(smm, smm->uncommitted + - smm->nr_uncommitted); + if (smm->recursion_count == 1) { + while (!brb_empty(&smm->uncommitted)) { + struct block_op bop; + + r = brb_pop(&smm->uncommitted, &bop); + if (r) { + DMERR("bug in bop ring buffer"); + break; + } + + r = commit_bop(smm, &bop); if (r) break; } @@ -217,7 +281,8 @@ static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count) static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, uint32_t *result) { - int r, i; + int r; + unsigned i; struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); unsigned adjustment = 0; @@ -225,8 +290,10 @@ static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, * We may have some uncommitted adjustments to add. This list * should always be really short. */ - for (i = 0; i < smm->nr_uncommitted; i++) { - struct block_op *op = smm->uncommitted + i; + for (i = smm->uncommitted.begin; + i != smm->uncommitted.end; + i = brb_next(&smm->uncommitted, i)) { + struct block_op *op = smm->uncommitted.bops + i; if (op->block != b) continue; @@ -254,7 +321,8 @@ static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm, dm_block_t b, int *result) { - int r, i, adjustment = 0; + int r, adjustment = 0; + unsigned i; struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); uint32_t rc; @@ -262,8 +330,11 @@ static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm, * We may have some uncommitted adjustments to add. This list * should always be really short. */ - for (i = 0; i < smm->nr_uncommitted; i++) { - struct block_op *op = smm->uncommitted + i; + for (i = smm->uncommitted.begin; + i != smm->uncommitted.end; + i = brb_next(&smm->uncommitted, i)) { + + struct block_op *op = smm->uncommitted.bops + i; if (op->block != b) continue; @@ -671,7 +742,7 @@ int dm_sm_metadata_create(struct dm_space_map *sm, smm->begin = superblock + 1; smm->recursion_count = 0; smm->allocated_this_transaction = 0; - smm->nr_uncommitted = 0; + brb_init(&smm->uncommitted); threshold_init(&smm->threshold); memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); @@ -715,7 +786,7 @@ int dm_sm_metadata_open(struct dm_space_map *sm, smm->begin = 0; smm->recursion_count = 0; smm->allocated_this_transaction = 0; - smm->nr_uncommitted = 0; + brb_init(&smm->uncommitted); threshold_init(&smm->threshold); memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); -- cgit v0.10.2 From b053940df41808f0f27568eb36820d10a8a987f8 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 7 Mar 2014 13:22:22 +0530 Subject: ARC: Use correct PTAG register for icache flush This fixes a subtle issue with cache flush which could potentially cause random userspace crashes because of stale icache lines. This error crept in when consolidating the cache flush code Fixes: bd12976c3664 (ARC: cacheflush refactor #3: Unify the {d,i}cache) Signed-off-by: Vineet Gupta Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # 3.13 Cc: arc-linux-dev@synopsys.com Signed-off-by: Linus Torvalds diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c index 6b58c1d..400c663 100644 --- a/arch/arc/mm/cache_arc700.c +++ b/arch/arc/mm/cache_arc700.c @@ -282,7 +282,7 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr, #else /* if V-P const for loop, PTAG can be written once outside loop */ if (full_page_op) - write_aux_reg(ARC_REG_DC_PTAG, paddr); + write_aux_reg(aux_tag, paddr); #endif while (num_lines-- > 0) { @@ -296,7 +296,7 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr, write_aux_reg(aux_cmd, vaddr); vaddr += L1_CACHE_BYTES; #else - write_aux_reg(aux, paddr); + write_aux_reg(aux_cmd, paddr); paddr += L1_CACHE_BYTES; #endif } -- cgit v0.10.2 From b28a613e9138e4b3a64649bd60b13436f4b4b49b Mon Sep 17 00:00:00 2001 From: Michele Baldessari Date: Fri, 7 Mar 2014 16:34:29 +0000 Subject: libata: add ATA_HORKAGE_BROKEN_FPDMA_AA quirk for Seagate Momentus SpinPoint M8 (2BA30001) Via commit 87809942d3fa "libata: add ATA_HORKAGE_BROKEN_FPDMA_AA quirk for Seagate Momentus SpinPoint M8" we added a quirk for disks named "ST1000LM024 HN-M101MBB" with firmware revision "2AR10001". As reported on https://bugzilla.redhat.com/show_bug.cgi?id=1073901, we need to also add firmware revision 2BA30001 as it is broken as well. Reported-by: Nicholas Signed-off-by: Michele Baldessari Tested-by: Guilherme Amadio Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 191cdfb..65d3f1b 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4175,6 +4175,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* Seagate Momentus SpinPoint M8 seem to have FPMDA_AA issues */ { "ST1000LM024 HN-M101MBB", "2AR10001", ATA_HORKAGE_BROKEN_FPDMA_AA }, + { "ST1000LM024 HN-M101MBB", "2BA30001", ATA_HORKAGE_BROKEN_FPDMA_AA }, /* Blacklist entries taken from Silicon Image 3124/3132 Windows driver .inf file - also several Linux problem reports */ -- cgit v0.10.2 From e0429362ab15c46ea4d64c3f8c9e0933e48a143a Mon Sep 17 00:00:00 2001 From: Julius Werner Date: Tue, 4 Mar 2014 10:52:39 -0800 Subject: usb: Add device quirk for Logitech HD Pro Webcams C920 and C930e We've encountered a rare issue when enumerating two Logitech webcams after a reboot that doesn't power cycle the USB ports. They are spewing random data (possibly some leftover UVC buffers) on the second (full-sized) Get Configuration request of the enumeration phase. Since the data is random this can potentially cause all kinds of odd behavior, and since it occasionally happens multiple times (after the kernel issues another reset due to the garbled configuration descriptor), it is not always recoverable. Set the USB_DELAY_INIT quirk that seems to work around the issue. Signed-off-by: Julius Werner Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 8f37063..739ee8e 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -47,6 +47,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* Microsoft LifeCam-VX700 v2.0 */ { USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Logitech HD Pro Webcams C920 and C930e */ + { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT }, + { USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Logitech Quickcam Fusion */ { USB_DEVICE(0x046d, 0x08c1), .driver_info = USB_QUIRK_RESET_RESUME }, -- cgit v0.10.2 From d86db25e53fa69e3e97f3b55dd82a70689787c5d Mon Sep 17 00:00:00 2001 From: Julius Werner Date: Tue, 4 Mar 2014 11:27:38 -0800 Subject: usb: Make DELAY_INIT quirk wait 100ms between Get Configuration requests The DELAY_INIT quirk only reduces the frequency of enumeration failures with the Logitech HD Pro C920 and C930e webcams, but does not quite eliminate them. We have found that adding a delay of 100ms between the first and second Get Configuration request makes the device enumerate perfectly reliable even after several weeks of extensive testing. The reasons for that are anyone's guess, but since the DELAY_INIT quirk already delays enumeration by a whole second, wating for another 10th of that isn't really a big deal for the one other device that uses it, and it will resolve the problems with these webcams. Signed-off-by: Julius Werner Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 8d72f0c..062967c 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -717,6 +717,10 @@ int usb_get_configuration(struct usb_device *dev) result = -ENOMEM; goto err; } + + if (dev->quirks & USB_QUIRK_DELAY_INIT) + msleep(100); + result = usb_get_descriptor(dev, USB_DT_CONFIG, cfgno, bigbuffer, length); if (result < 0) { -- cgit v0.10.2 From e2ed511400d41e0d136089d5a55ceab57c6a2426 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 7 Mar 2014 17:06:57 +0200 Subject: Revert "xhci 1.0: Limit arbitrarily-aligned scatter gather." This reverts commit 247bf557273dd775505fb9240d2d152f4f20d304. This commit, together with commit 3804fad45411b48233b48003e33a78f290d227c8 "USBNET: ax88179_178a: enable tso if usb host supports sg dma" were origially added to get xHCI 1.0 hosts and usb ethernet ax88179_178a devices working together with scatter gather. xHCI 1.0 hosts pose some requirement on how transfer buffers are aligned, setting this requirement for 1.0 hosts caused USB 3.0 mass storage devices to fail more frequently. USB 3.0 mass storage devices used to work before 3.14-rc1. Theoretically, the TD fragment rules could have caused an occasional disk glitch. Now the devices *will* fail, instead of theoretically failing. >From a user perspective, this looks like a regression; the USB device obviously fails on 3.14-rc1, and may sometimes silently fail on prior kernels. The proper soluition is to implement the TD fragment rules required, but for now this patch needs to be reverted to get USB 3.0 mass storage devices working at the level they used to. Signed-off-by: Mathias Nyman Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 6fe577d..924a6cc 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4733,6 +4733,9 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) /* Accept arbitrarily long scatter-gather lists */ hcd->self.sg_tablesize = ~0; + /* support to build packet from discontinuous buffers */ + hcd->self.no_sg_constraint = 1; + /* XHCI controllers don't stop the ep queue on short packets :| */ hcd->self.no_stop_on_short = 1; @@ -4757,14 +4760,6 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) /* xHCI private pointer was set in xhci_pci_probe for the second * registered roothub. */ - xhci = hcd_to_xhci(hcd); - /* - * Support arbitrarily aligned sg-list entries on hosts without - * TD fragment rules (which are currently unsupported). - */ - if (xhci->hci_version < 0x100) - hcd->self.no_sg_constraint = 1; - return 0; } @@ -4793,9 +4788,6 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) if (xhci->hci_version > 0x96) xhci->quirks |= XHCI_SPURIOUS_SUCCESS; - if (xhci->hci_version < 0x100) - hcd->self.no_sg_constraint = 1; - /* Make sure the HC is halted. */ retval = xhci_halt(xhci); if (retval) -- cgit v0.10.2 From 469d417b68958a064c09e7875646c97c6e783dfc Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 7 Mar 2014 17:06:58 +0200 Subject: Revert "USBNET: ax88179_178a: enable tso if usb host supports sg dma" This reverts commit 3804fad45411b48233b48003e33a78f290d227c8. This commit, together with commit 247bf557273dd775505fb9240d2d152f4f20d304 "xhci 1.0: Limit arbitrarily-aligned scatter gather." were origially added to get xHCI 1.0 hosts and usb ethernet ax88179_178a devices working together with scatter gather. xHCI 1.0 hosts pose some requirement on how transfer buffers are aligned, setting this requirement for 1.0 hosts caused USB 3.0 mass storage devices to fail more frequently. USB 3.0 mass storage devices used to work before 3.14-rc1. Theoretically, the TD fragment rules could have caused an occasional disk glitch. Now the devices *will* fail, instead of theoretically failing. >From a user perspective, this looks like a regression; the USB device obviously fails on 3.14-rc1, and may sometimes silently fail on prior kernels. The proper soluition is to implement the TD fragment rules for xHCI 1.0 hosts, but for now, revert this patch until scatter gather can be properly supported. Signed-off-by: Mathias Nyman Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 955df81..42085e6 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1029,20 +1029,12 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) dev->mii.phy_id = 0x03; dev->mii.supports_gmii = 1; - if (usb_device_no_sg_constraint(dev->udev)) - dev->can_dma_sg = 1; - dev->net->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM; dev->net->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM; - if (dev->can_dma_sg) { - dev->net->features |= NETIF_F_SG | NETIF_F_TSO; - dev->net->hw_features |= NETIF_F_SG | NETIF_F_TSO; - } - /* Enable checksum offload */ *tmp = AX_RXCOE_IP | AX_RXCOE_TCP | AX_RXCOE_UDP | AX_RXCOE_TCPV6 | AX_RXCOE_UDPV6; -- cgit v0.10.2 From 006fa2599bf0daf107cbb7a8a99fcfb9a998a169 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 26 Feb 2014 19:40:46 +0000 Subject: ARM: fix noMMU kallsyms symbol filtering With noMMU, CONFIG_PAGE_OFFSET was not being set correctly. As there's no MMU, PAGE_OFFSET should be equal to PHYS_OFFSET in all cases. This commit makes that explicit. Since we do this, we don't need to mess around in asm/memory.h with ifdefs to sort this out, so let's get rid of that, and there's no point offering the "Memory split" option for noMMU as that's meaningless there. Fixes: b9b32bf70f2f ("ARM: use linker magic for vectors and vector stubs") Cc: Signed-off-by: Russell King diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index e254198..2a232ce 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1578,6 +1578,7 @@ config BL_SWITCHER_DUMMY_IF choice prompt "Memory split" + depends on MMU default VMSPLIT_3G help Select the desired split between kernel and user memory. @@ -1595,6 +1596,7 @@ endchoice config PAGE_OFFSET hex + default PHYS_OFFSET if !MMU default 0x40000000 if VMSPLIT_1G default 0x80000000 if VMSPLIT_2G default 0xC0000000 diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 8756e4b..4afb376 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -30,14 +30,15 @@ */ #define UL(x) _AC(x, UL) +/* PAGE_OFFSET - the virtual address of the start of the kernel image */ +#define PAGE_OFFSET UL(CONFIG_PAGE_OFFSET) + #ifdef CONFIG_MMU /* - * PAGE_OFFSET - the virtual address of the start of the kernel image * TASK_SIZE - the maximum size of a user space task. * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area */ -#define PAGE_OFFSET UL(CONFIG_PAGE_OFFSET) #define TASK_SIZE (UL(CONFIG_PAGE_OFFSET) - UL(SZ_16M)) #define TASK_UNMAPPED_BASE ALIGN(TASK_SIZE / 3, SZ_16M) @@ -104,10 +105,6 @@ #define END_MEM (UL(CONFIG_DRAM_BASE) + CONFIG_DRAM_SIZE) #endif -#ifndef PAGE_OFFSET -#define PAGE_OFFSET PLAT_PHYS_OFFSET -#endif - /* * The module can be at any place in ram in nommu mode. */ -- cgit v0.10.2 From 052450fdc55894a39fbae93d9bbe43947956f663 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 25 Feb 2014 22:41:41 +0100 Subject: ARM: 7991/1: sa1100: fix compile problem on Collie Due to a problem in the MFD Kconfig it was not possible to compile the UCB battery driver for the Collie SA1100 system, in turn making it impossible to compile in the battery driver. (See patch "mfd: include all drivers in subsystem menu".) After fixing the MFD Kconfig (separate patch) a compile error appears in the Collie battery driver due to the implicitly requiring through via prior to commit 40ca061b "ARM: 7841/1: sa1100: remove complex GPIO interface". Fix this up by including the required header into . Cc: stable@vger.kernel.org Cc: Andrea Adami Cc: Dmitry Eremin-Solenikov Signed-off-by: Linus Walleij Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/include/mach/collie.h b/arch/arm/mach-sa1100/include/mach/collie.h index f33679d..50e1d85 100644 --- a/arch/arm/mach-sa1100/include/mach/collie.h +++ b/arch/arm/mach-sa1100/include/mach/collie.h @@ -13,6 +13,8 @@ #ifndef __ASM_ARCH_COLLIE_H #define __ASM_ARCH_COLLIE_H +#include "hardware.h" /* Gives GPIO_MAX */ + extern void locomolcd_power(int on); #define COLLIE_SCOOP_GPIO_BASE (GPIO_MAX + 1) -- cgit v0.10.2 From 38e0b088d322e5762ac21fb4df433e83faf128eb Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 26 Feb 2014 17:21:26 +0100 Subject: ARM: 7992/1: boot: compressed: ignore bswapsdi2.S Commit 017f161a55b4 (ARM: 7877/1: use built-in byte swap function) added bswapsdi2.{o,S} to arch/arm/boot/compressed/Makefile, but didn't update the .gitignore. Thus after a a build git status shows bswapsdi2.S as a new file, which is a little annoying. This patch updates arch/arm/boot/compressed/.gitignore to ignore bswapsdi2.S, as we already do for ashldi3.S and others. Signed-off-by: Mark Rutland Acked-by: Nicolas Pitre Acked-by: Kim Phillips Cc: David Woodhouse Signed-off-by: Russell King diff --git a/arch/arm/boot/compressed/.gitignore b/arch/arm/boot/compressed/.gitignore index 47279aa..0714e03 100644 --- a/arch/arm/boot/compressed/.gitignore +++ b/arch/arm/boot/compressed/.gitignore @@ -1,4 +1,5 @@ ashldi3.S +bswapsdi2.S font.c lib1funcs.S hyp-stub.S -- cgit v0.10.2 From 35bf88212b5e5d1f9145481bc15e8c1801da58dc Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 7 Mar 2014 14:22:48 -0800 Subject: libata: end the r-word Prompted by the social effort in the US to discourage usage of the adjective "retarded". In this case we needlessly anthropomorphize hard drives. The implication is that due to design deficiencies in the device reset recovery time is negatively impacted. We can simply clearly state that fact. "Exceptional devices cause outliers in reset recovery time." This steers clear of any unintended comparison of such devices to humans with cognitive disabilities. Signed-off-by: Dan Williams Signed-off-by: Tejun Heo diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 6d87570..c1d0170 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -95,12 +95,13 @@ enum { * represents timeout for that try. The first try can be soft or * hardreset. All others are hardreset if available. In most cases * the first reset w/ 10sec timeout should succeed. Following entries - * are mostly for error handling, hotplug and retarded devices. + * are mostly for error handling, hotplug and those outlier devices that + * take an exceptionally long time to recover from reset. */ static const unsigned long ata_eh_reset_timeouts[] = { 10000, /* most drives spin up by 10sec */ 10000, /* > 99% working drives spin up before 20sec */ - 35000, /* give > 30 secs of idleness for retarded devices */ + 35000, /* give > 30 secs of idleness for outlier devices */ 5000, /* and sweet one last chance */ ULONG_MAX, /* > 1 min has elapsed, give up */ }; -- cgit v0.10.2 From 5fa10196bdb5f190f595ebd048490ee52dddea0f Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 7 Mar 2014 15:05:20 -0800 Subject: x86: Ignore NMIs that come in during early boot Don Zickus reports: A customer generated an external NMI using their iLO to test kdump worked. Unfortunately, the machine hung. Disabling the nmi_watchdog made things work. I speculated the external NMI fired, caused the machine to panic (as expected) and the perf NMI from the watchdog came in and was latched. My guess was this somehow caused the hang. ---- It appears that the latched NMI stays latched until the early page table generation on 64 bits, which causes exceptions to happen which end in IRET, which re-enable NMI. Therefore, ignore NMIs that come in during early execution, until we have proper exception handling. Reported-and-tested-by: Don Zickus Link: http://lkml.kernel.org/r/1394221143-29713-1-git-send-email-dzickus@redhat.com Signed-off-by: H. Peter Anvin Cc: # v3.5+, older with some backport effort diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 81ba276..d2a2159 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -544,6 +544,10 @@ ENDPROC(early_idt_handlers) /* This is global to keep gas from relaxing the jumps */ ENTRY(early_idt_handler) cld + + cmpl $X86_TRAP_NMI,(%esp) + je is_nmi # Ignore NMI + cmpl $2,%ss:early_recursion_flag je hlt_loop incl %ss:early_recursion_flag @@ -594,8 +598,9 @@ ex_entry: pop %edx pop %ecx pop %eax - addl $8,%esp /* drop vector number and error code */ decl %ss:early_recursion_flag +is_nmi: + addl $8,%esp /* drop vector number and error code */ iret ENDPROC(early_idt_handler) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index e1aabdb..33f36c7 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -343,6 +343,9 @@ early_idt_handlers: ENTRY(early_idt_handler) cld + cmpl $X86_TRAP_NMI,(%rsp) + je is_nmi # Ignore NMI + cmpl $2,early_recursion_flag(%rip) jz 1f incl early_recursion_flag(%rip) @@ -405,8 +408,9 @@ ENTRY(early_idt_handler) popq %rdx popq %rcx popq %rax - addq $16,%rsp # drop vector number and error code decl early_recursion_flag(%rip) +is_nmi: + addq $16,%rsp # drop vector number and error code INTERRUPT_RETURN ENDPROC(early_idt_handler) -- cgit v0.10.2 From 2ca310fc4160ed0420da65534a21ae77b24326a8 Mon Sep 17 00:00:00 2001 From: Ditang Chen Date: Fri, 7 Mar 2014 13:27:57 +0800 Subject: SUNRPC: Fix oops when trace sunrpc_task events in nfs client When tracking sunrpc_task events in nfs client, the clnt pointer may be NULL. [ 139.269266] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 [ 139.269915] IP: [] ftrace_raw_event_rpc_task_running+0x86/0xf0 [sunrpc] [ 139.269915] PGD 1d293067 PUD 1d294067 PMD 0 [ 139.269915] Oops: 0000 [#1] SMP [ 139.269915] Modules linked in: nfsv4 dns_resolver nfs lockd sunrpc fscache sg ppdev e1000 serio_raw pcspkr parport_pc parport i2c_piix4 i2c_core microcode xfs libcrc32c sd_mod sr_mod cdrom ata_generic crc_t10dif crct10dif_common pata_acpi ahci libahci ata_piix libata dm_mirror dm_region_hash dm_log dm_mod [ 139.269915] CPU: 0 PID: 59 Comm: kworker/0:2 Not tainted 3.10.0-84.el7.x86_64 #1 [ 139.269915] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 139.269915] Workqueue: rpciod rpc_async_schedule [sunrpc] [ 139.269915] task: ffff88001b598000 ti: ffff88001b632000 task.ti: ffff88001b632000 [ 139.269915] RIP: 0010:[] [] ftrace_raw_event_rpc_task_running+0x86/0xf0 [sunrpc] [ 139.269915] RSP: 0018:ffff88001b633d70 EFLAGS: 00010206 [ 139.269915] RAX: ffff88001dfc5338 RBX: ffff88001cc37a00 RCX: ffff88001dfc5334 [ 139.269915] RDX: ffff88001dfc5338 RSI: 0000000000000000 RDI: ffff88001dfc533c [ 139.269915] RBP: ffff88001b633db0 R08: 000000000000002c R09: 000000000000000a [ 139.269915] R10: 0000000000062180 R11: 00000020759fb9dc R12: ffffffffa0292c20 [ 139.269915] R13: ffff88001dfc5334 R14: 0000000000000000 R15: 0000000000000000 [ 139.269915] FS: 0000000000000000(0000) GS:ffff88001fc00000(0000) knlGS:0000000000000000 [ 139.269915] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 139.269915] CR2: 0000000000000004 CR3: 000000001d290000 CR4: 00000000000006f0 [ 139.269915] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 139.269915] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 139.269915] Stack: [ 139.269915] 000000001b633d98 0000000000000246 ffff88001df1dc00 ffff88001cc37a00 [ 139.269915] ffff88001bc35e60 0000000000000000 ffff88001ffa0a48 ffff88001bc35ee0 [ 139.269915] ffff88001b633e08 ffffffffa02704b5 0000000000010000 ffff88001cc37a70 [ 139.269915] Call Trace: [ 139.269915] [] __rpc_execute+0x1d5/0x400 [sunrpc] [ 139.269915] [] rpc_async_schedule+0x26/0x30 [sunrpc] [ 139.269915] [] process_one_work+0x17b/0x460 [ 139.269915] [] worker_thread+0x11b/0x400 [ 139.269915] [] ? rescuer_thread+0x3e0/0x3e0 [ 139.269915] [] kthread+0xc0/0xd0 [ 139.269915] [] ? kthread_create_on_node+0x110/0x110 [ 139.269915] [] ret_from_fork+0x7c/0xb0 [ 139.269915] [] ? kthread_create_on_node+0x110/0x110 [ 139.269915] Code: 4c 8b 45 c8 48 8d 7d d0 89 4d c4 41 89 c9 b9 28 00 00 00 e8 9d b4 e9 e0 48 85 c0 49 89 c5 74 a2 48 89 c7 e8 9d 3f e9 e0 48 89 c2 <41> 8b 46 04 48 8b 7d d0 4c 89 e9 4c 89 e6 89 42 0c 0f b7 83 d4 [ 139.269915] RIP [] ftrace_raw_event_rpc_task_running+0x86/0xf0 [sunrpc] [ 139.269915] RSP [ 139.269915] CR2: 0000000000000004 [ 140.946406] ---[ end trace ba486328b98d7622 ]--- Signed-off-by: Ditang Chen Signed-off-by: Trond Myklebust diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index ddc179b..1fef3e6 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -83,7 +83,7 @@ DECLARE_EVENT_CLASS(rpc_task_running, ), TP_fast_assign( - __entry->client_id = clnt->cl_clid; + __entry->client_id = clnt ? clnt->cl_clid : -1; __entry->task_id = task->tk_pid; __entry->action = action; __entry->runstate = task->tk_runstate; @@ -91,7 +91,7 @@ DECLARE_EVENT_CLASS(rpc_task_running, __entry->flags = task->tk_flags; ), - TP_printk("task:%u@%u flags=%4.4x state=%4.4lx status=%d action=%pf", + TP_printk("task:%u@%d flags=%4.4x state=%4.4lx status=%d action=%pf", __entry->task_id, __entry->client_id, __entry->flags, __entry->runstate, -- cgit v0.10.2 From b01d4e68933ec23e43b1046fa35d593cefcf37d1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 7 Mar 2014 18:58:40 -0800 Subject: x86: fix compile error due to X86_TRAP_NMI use in asm files It's an enum, not a #define, you can't use it in asm files. Introduced in commit 5fa10196bdb5 ("x86: Ignore NMIs that come in during early boot"), and sadly I didn't compile-test things like I should have before pushing out. My weak excuse is that the x86 tree generally doesn't introduce stupid things like this (and the ARM pull afterwards doesn't cause me to do a compile-test either, since I don't cross-compile). Cc: Don Zickus Cc: H. Peter Anvin Signed-off-by: Linus Torvalds diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index d2a2159..f36bd42 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -545,7 +545,7 @@ ENDPROC(early_idt_handlers) ENTRY(early_idt_handler) cld - cmpl $X86_TRAP_NMI,(%esp) + cmpl $2,(%esp) # X86_TRAP_NMI je is_nmi # Ignore NMI cmpl $2,%ss:early_recursion_flag diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 33f36c7..a468c0a 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -343,7 +343,7 @@ early_idt_handlers: ENTRY(early_idt_handler) cld - cmpl $X86_TRAP_NMI,(%rsp) + cmpl $2,(%rsp) # X86_TRAP_NMI je is_nmi # Ignore NMI cmpl $2,early_recursion_flag(%rip) -- cgit v0.10.2 From d211f177b28ec070c25b3d0b960aa55f352f731f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 8 Mar 2014 15:31:54 -0800 Subject: audit: Update kdoc for audit_send_reply and audit_list_rules_send The kbuild test robot reported: > tree: git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git for-next > head: 6f285b19d09f72e801525f5eea1bdad22e559bf0 > commit: 6f285b19d09f72e801525f5eea1bdad22e559bf0 [2/2] audit: Send replies in the proper network namespace. > reproduce: make htmldocs > > >> Warning(kernel/audit.c:575): No description found for parameter 'request_skb' > >> Warning(kernel/audit.c:575): Excess function parameter 'portid' description in 'audit_send_reply' > >> Warning(kernel/auditfilter.c:1074): No description found for parameter 'request_skb' > >> Warning(kernel/auditfilter.c:1074): Excess function parameter 'portid' description in 'audit_list_rules_s Which was caused by my failure to update the kdoc annotations when I updated the functions. Fix that small oversight now. Signed-off-by: "Eric W. Biederman" diff --git a/kernel/audit.c b/kernel/audit.c index 32086bf..3392d3e 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -559,7 +559,7 @@ static int audit_send_reply_thread(void *arg) } /** * audit_send_reply - send an audit reply message via netlink - * @portid: netlink port to which to send reply + * @request_skb: skb of request we are replying to (used to target the reply) * @seq: sequence number * @type: audit message type * @done: done (last) flag diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index e8d1c7c..92062fd 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1067,7 +1067,7 @@ int audit_rule_change(int type, __u32 portid, int seq, void *data, /** * audit_list_rules_send - list the audit rules - * @portid: target portid for netlink audit messages + * @request_skb: skb of request we are replying to (used to target the reply) * @seq: netlink audit message sequence (serial) number */ int audit_list_rules_send(struct sk_buff *request_skb, int seq) -- cgit v0.10.2 From 7982e90c3a574c90b51aa1c77404e7e6189d58d5 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Sat, 8 Mar 2014 17:20:01 -0700 Subject: block: fix q->flush_rq NULL pointer crash on dm-mpath flush Commit 1874198 ("blk-mq: rework flush sequencing logic") switched ->flush_rq from being an embedded member of the request_queue structure to being dynamically allocated in blk_init_queue_node(). Request-based DM multipath doesn't use blk_init_queue_node(), instead it uses blk_alloc_queue_node() + blk_init_allocated_queue(). Because commit 1874198 placed the dynamic allocation of ->flush_rq in blk_init_queue_node() any flush issued to a dm-mpath device would crash with a NULL pointer, e.g.: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] blk_rq_init+0x1e/0xb0 PGD bb3c7067 PUD bb01d067 PMD 0 Oops: 0002 [#1] SMP ... CPU: 5 PID: 5028 Comm: dt Tainted: G W O 3.14.0-rc3.snitm+ #10 ... task: ffff88032fb270e0 ti: ffff880079564000 task.ti: ffff880079564000 RIP: 0010:[] [] blk_rq_init+0x1e/0xb0 RSP: 0018:ffff880079565c98 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000030 RDX: ffff880260c74048 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffff880079565ca8 R08: ffff880260aa1e98 R09: 0000000000000001 R10: ffff88032fa78500 R11: 0000000000000246 R12: 0000000000000000 R13: ffff880260aa1de8 R14: 0000000000000650 R15: 0000000000000000 FS: 00007f8d36a2a700(0000) GS:ffff88033fca0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000079b36000 CR4: 00000000000007e0 Stack: 0000000000000000 ffff880260c74048 ffff880079565cd8 ffffffff81257a47 ffff880260aa1de8 ffff880260c74048 0000000000000001 0000000000000000 ffff880079565d08 ffffffff81257c2d 0000000000000000 ffff880260aa1de8 Call Trace: [] blk_flush_complete_seq+0x2d7/0x2e0 [] blk_insert_flush+0x1dd/0x210 [] __elv_add_request+0x1f9/0x320 [] ? blk_account_io_start+0x111/0x190 [] blk_queue_bio+0x25b/0x330 [] dm_request+0x35/0x40 [dm_mod] [] generic_make_request+0xc0/0x100 [] submit_bio+0x73/0x140 [] submit_bio_wait+0x5d/0x80 [] blkdev_issue_flush+0x78/0xa0 [] blkdev_fsync+0x3f/0x60 [] vfs_fsync_range+0x1e/0x20 [] vfs_fsync+0x1c/0x20 [] do_fsync+0x41/0x80 [] ? SyS_lseek+0x7e/0x80 [] SyS_fsync+0x10/0x20 [] system_call_fastpath+0x16/0x1b Fix this by moving the ->flush_rq allocation from blk_init_queue_node() to blk_init_allocated_queue(). blk_init_queue_node() also calls blk_init_allocated_queue() so this change is functionality equivalent for all blk_init_queue_node() callers. Reported-by: Hannes Reinecke Reported-by: Christoph Hellwig Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe diff --git a/block/blk-core.c b/block/blk-core.c index 853f927..4cd5ffc 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -693,20 +693,11 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) if (!uninit_q) return NULL; - uninit_q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL); - if (!uninit_q->flush_rq) - goto out_cleanup_queue; - q = blk_init_allocated_queue(uninit_q, rfn, lock); if (!q) - goto out_free_flush_rq; - return q; + blk_cleanup_queue(uninit_q); -out_free_flush_rq: - kfree(uninit_q->flush_rq); -out_cleanup_queue: - blk_cleanup_queue(uninit_q); - return NULL; + return q; } EXPORT_SYMBOL(blk_init_queue_node); @@ -717,6 +708,10 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, if (!q) return NULL; + q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL); + if (!q->flush_rq) + return NULL; + if (blk_init_rl(&q->root_rl, q, GFP_KERNEL)) return NULL; -- cgit v0.10.2 From 10beafc190abcc4ad64024a053441520ba116127 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Sat, 8 Mar 2014 20:19:20 -0700 Subject: block: change flush sequence list addition back to front add Commit 18741986 inadvertently changed the rq flush insertion from a head to a tail insertion. Fix that back up. Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe diff --git a/block/blk-flush.c b/block/blk-flush.c index f598f79..43e6b47 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -140,14 +140,17 @@ static void mq_flush_run(struct work_struct *work) blk_mq_insert_request(rq, false, true, false); } -static bool blk_flush_queue_rq(struct request *rq) +static bool blk_flush_queue_rq(struct request *rq, bool add_front) { if (rq->q->mq_ops) { INIT_WORK(&rq->mq_flush_work, mq_flush_run); kblockd_schedule_work(rq->q, &rq->mq_flush_work); return false; } else { - list_add_tail(&rq->queuelist, &rq->q->queue_head); + if (add_front) + list_add(&rq->queuelist, &rq->q->queue_head); + else + list_add_tail(&rq->queuelist, &rq->q->queue_head); return true; } } @@ -193,7 +196,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq, case REQ_FSEQ_DATA: list_move_tail(&rq->flush.list, &q->flush_data_in_flight); - queued = blk_flush_queue_rq(rq); + queued = blk_flush_queue_rq(rq, true); break; case REQ_FSEQ_DONE: @@ -326,7 +329,7 @@ static bool blk_kick_flush(struct request_queue *q) q->flush_rq->rq_disk = first_rq->rq_disk; q->flush_rq->end_io = flush_end_io; - return blk_flush_queue_rq(q->flush_rq); + return blk_flush_queue_rq(q->flush_rq, false); } static void flush_data_end_io(struct request *rq, int error) -- cgit v0.10.2 From 4c7b70406e5aadc1c78db4d375d5744df65c5b58 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 9 Mar 2014 18:30:50 +0100 Subject: Revert "ACPI / sleep: pm_power_off needs more sanity checks to be installed" Revert commit 3130497f5bab ("ACPI / sleep: pm_power_off needs more sanity checks to be installed") that breaks power ACPI power off on a lot of systems, because it checks wrong registers. Fixes: 3130497f5bab ("ACPI / sleep: pm_power_off needs more sanity checks to be installed") Signed-off-by: Rafael J. Wysocki Signed-off-by: Linus Torvalds diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index b0f6c4a..b718806 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -807,12 +807,7 @@ int __init acpi_sleep_init(void) acpi_sleep_hibernate_setup(); status = acpi_get_sleep_type_data(ACPI_STATE_S5, &type_a, &type_b); - /* - * Check both ACPI S5 object and ACPI sleep registers to - * install pm_power_off_prepare/pm_power_off hook - */ - if (ACPI_SUCCESS(status) && acpi_gbl_FADT.sleep_control.address - && acpi_gbl_FADT.sleep_status.address) { + if (ACPI_SUCCESS(status)) { sleep_states[ACPI_STATE_S5] = 1; pm_power_off_prepare = acpi_power_off_prepare; pm_power_off = acpi_power_off; -- cgit v0.10.2 From a8d9bc2e9f5d1c5a25e33cec096d2a1652d3fd52 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 9 Mar 2014 15:45:32 -0800 Subject: bnx2: Fix shutdown sequence The pci shutdown handler added in: bnx2: Add pci shutdown handler commit 25bfb1dd4ba3b2d9a49ce9d9b0cd7be1840e15ed created a shutdown down sequence without chip reset if the device was never brought up. This can cause the firmware to shutdown the PHY prematurely and cause MMIO read cycles to be unresponsive. On some systems, it may generate NMI in the bnx2's pci shutdown handler. The fix is to tell the firmware not to shutdown the PHY if there was no prior chip reset. Signed-off-by: Michael Chan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index cda25ac..6c9e1c9 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -2507,6 +2507,7 @@ bnx2_fw_sync(struct bnx2 *bp, u32 msg_data, int ack, int silent) bp->fw_wr_seq++; msg_data |= bp->fw_wr_seq; + bp->fw_last_msg = msg_data; bnx2_shmem_wr(bp, BNX2_DRV_MB, msg_data); @@ -4000,8 +4001,23 @@ bnx2_setup_wol(struct bnx2 *bp) wol_msg = BNX2_DRV_MSG_CODE_SUSPEND_NO_WOL; } - if (!(bp->flags & BNX2_FLAG_NO_WOL)) - bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT3 | wol_msg, 1, 0); + if (!(bp->flags & BNX2_FLAG_NO_WOL)) { + u32 val; + + wol_msg |= BNX2_DRV_MSG_DATA_WAIT3; + if (bp->fw_last_msg || BNX2_CHIP(bp) != BNX2_CHIP_5709) { + bnx2_fw_sync(bp, wol_msg, 1, 0); + return; + } + /* Tell firmware not to power down the PHY yet, otherwise + * the chip will take a long time to respond to MMIO reads. + */ + val = bnx2_shmem_rd(bp, BNX2_PORT_FEATURE); + bnx2_shmem_wr(bp, BNX2_PORT_FEATURE, + val | BNX2_PORT_FEATURE_ASF_ENABLED); + bnx2_fw_sync(bp, wol_msg, 1, 0); + bnx2_shmem_wr(bp, BNX2_PORT_FEATURE, val); + } } @@ -4033,9 +4049,22 @@ bnx2_set_power_state(struct bnx2 *bp, pci_power_t state) if (bp->wol) pci_set_power_state(bp->pdev, PCI_D3hot); - } else { - pci_set_power_state(bp->pdev, PCI_D3hot); + break; + + } + if (!bp->fw_last_msg && BNX2_CHIP(bp) == BNX2_CHIP_5709) { + u32 val; + + /* Tell firmware not to power down the PHY yet, + * otherwise the other port may not respond to + * MMIO reads. + */ + val = bnx2_shmem_rd(bp, BNX2_BC_STATE_CONDITION); + val &= ~BNX2_CONDITION_PM_STATE_MASK; + val |= BNX2_CONDITION_PM_STATE_UNPREP; + bnx2_shmem_wr(bp, BNX2_BC_STATE_CONDITION, val); } + pci_set_power_state(bp->pdev, PCI_D3hot); /* No more memory access after this point until * device is brought back to D0. diff --git a/drivers/net/ethernet/broadcom/bnx2.h b/drivers/net/ethernet/broadcom/bnx2.h index f1cf2c4..e341bc3 100644 --- a/drivers/net/ethernet/broadcom/bnx2.h +++ b/drivers/net/ethernet/broadcom/bnx2.h @@ -6900,6 +6900,7 @@ struct bnx2 { u16 fw_wr_seq; u16 fw_drv_pulse_wr_seq; + u32 fw_last_msg; int rx_max_ring; int rx_ring_size; @@ -7406,6 +7407,10 @@ struct bnx2_rv2p_fw_file { #define BNX2_CONDITION_MFW_RUN_NCSI 0x00006000 #define BNX2_CONDITION_MFW_RUN_NONE 0x0000e000 #define BNX2_CONDITION_MFW_RUN_MASK 0x0000e000 +#define BNX2_CONDITION_PM_STATE_MASK 0x00030000 +#define BNX2_CONDITION_PM_STATE_FULL 0x00030000 +#define BNX2_CONDITION_PM_STATE_PREP 0x00020000 +#define BNX2_CONDITION_PM_STATE_UNPREP 0x00010000 #define BNX2_BC_STATE_DEBUG_CMD 0x1dc #define BNX2_BC_STATE_BC_DBG_CMD_SIGNATURE 0x42440000 -- cgit v0.10.2 From 979e0d74651ba5aa533277f2a6423d0f982fb6f6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 9 Mar 2014 08:21:58 +0000 Subject: KEYS: Make the keyring cycle detector ignore other keyrings of the same name This fixes CVE-2014-0102. The following command sequence produces an oops: keyctl new_session i=`keyctl newring _ses @s` keyctl link @s $i The problem is that search_nested_keyrings() sees two keyrings that have matching type and description, so keyring_compare_object() returns true. s_n_k() then passes the key to the iterator function - keyring_detect_cycle_iterator() - which *should* check to see whether this is the keyring of interest, not just one with the same name. Because assoc_array_find() will return one and only one match, I assumed that the iterator function would only see an exact match or never be called - but the iterator isn't only called from assoc_array_find()... The oops looks something like this: kernel BUG at /data/fs/linux-2.6-fscache/security/keys/keyring.c:1003! invalid opcode: 0000 [#1] SMP ... RIP: keyring_detect_cycle_iterator+0xe/0x1f ... Call Trace: search_nested_keyrings+0x76/0x2aa __key_link_check_live_key+0x50/0x5f key_link+0x4e/0x85 keyctl_keyring_link+0x60/0x81 SyS_keyctl+0x65/0xe4 tracesys+0xdd/0xe2 The fix is to make keyring_detect_cycle_iterator() check that the key it has is the key it was actually looking for rather than calling BUG_ON(). A testcase has been included in the keyutils testsuite for this: http://git.kernel.org/cgit/linux/kernel/git/dhowells/keyutils.git/commit/?id=891f3365d07f1996778ade0e3428f01878a1790b Reported-by: Tommi Rantala Signed-off-by: David Howells Acked-by: James Morris Signed-off-by: Linus Torvalds diff --git a/security/keys/keyring.c b/security/keys/keyring.c index d46cbc5..2fb2576 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -1000,7 +1000,11 @@ static int keyring_detect_cycle_iterator(const void *object, kenter("{%d}", key->serial); - BUG_ON(key != ctx->match_data); + /* We might get a keyring with matching index-key that is nonetheless a + * different keyring. */ + if (key != ctx->match_data) + return 0; + ctx->result = ERR_PTR(-EDEADLK); return 1; } -- cgit v0.10.2 From fa389e220254c69ffae0d403eac4146171062d08 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 9 Mar 2014 19:41:57 -0700 Subject: Linux 3.14-rc6 diff --git a/Makefile b/Makefile index 78209ee..1a2628e 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Shuffling Zombie Juror # *DOCUMENTATION* -- cgit v0.10.2 From 87536a81e1f52409b45333ce8cac415a1218163c Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 7 Mar 2014 12:44:18 +0100 Subject: net: af_key: fix sleeping under rcu There's a kmalloc with GFP_KERNEL in a helper (pfkey_sadb2xfrm_user_sec_ctx) used in pfkey_compile_policy which is called under rcu_read_lock. Adjust pfkey_sadb2xfrm_user_sec_ctx to have a gfp argument and adjust the users. CC: Dave Jones CC: Steffen Klassert CC: Fan Du CC: David S. Miller Signed-off-by: Nikolay Aleksandrov Signed-off-by: Steffen Klassert diff --git a/net/key/af_key.c b/net/key/af_key.c index 1a04c13..1526023 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -433,12 +433,13 @@ static inline int verify_sec_ctx_len(const void *p) return 0; } -static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(const struct sadb_x_sec_ctx *sec_ctx) +static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(const struct sadb_x_sec_ctx *sec_ctx, + gfp_t gfp) { struct xfrm_user_sec_ctx *uctx = NULL; int ctx_size = sec_ctx->sadb_x_ctx_len; - uctx = kmalloc((sizeof(*uctx)+ctx_size), GFP_KERNEL); + uctx = kmalloc((sizeof(*uctx)+ctx_size), gfp); if (!uctx) return NULL; @@ -1124,7 +1125,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { - struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); + struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_KERNEL); if (!uctx) goto out; @@ -2231,7 +2232,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_ sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { - struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); + struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_KERNEL); if (!uctx) { err = -ENOBUFS; @@ -2335,7 +2336,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { - struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); + struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_KERNEL); if (!uctx) return -ENOMEM; @@ -3239,7 +3240,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, } if ((*dir = verify_sec_ctx_len(p))) goto out; - uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); + uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_ATOMIC); *dir = security_xfrm_policy_alloc(&xp->security, uctx); kfree(uctx); -- cgit v0.10.2 From 52a4c6404f91f2d2c5592ee6365a8418c4565f53 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 7 Mar 2014 12:44:19 +0100 Subject: selinux: add gfp argument to security_xfrm_policy_alloc and fix callers security_xfrm_policy_alloc can be called in atomic context so the allocation should be done with GFP_ATOMIC. Add an argument to let the callers choose the appropriate way. In order to do so a gfp argument needs to be added to the method xfrm_policy_alloc_security in struct security_operations and to the internal function selinux_xfrm_alloc_user. After that switch to GFP_ATOMIC in the atomic callers and leave GFP_KERNEL as before for the rest. The path that needed the gfp argument addition is: security_xfrm_policy_alloc -> security_ops.xfrm_policy_alloc_security -> all users of xfrm_policy_alloc_security (e.g. selinux_xfrm_policy_alloc) -> selinux_xfrm_alloc_user (here the allocation used to be GFP_KERNEL only) Now adding a gfp argument to selinux_xfrm_alloc_user requires us to also add it to security_context_to_sid which is used inside and prior to this patch did only GFP_KERNEL allocation. So add gfp argument to security_context_to_sid and adjust all of its callers as well. CC: Paul Moore CC: Dave Jones CC: Steffen Klassert CC: Fan Du CC: David S. Miller CC: LSM list CC: SELinux list Signed-off-by: Nikolay Aleksandrov Acked-by: Paul Moore Signed-off-by: Steffen Klassert diff --git a/include/linux/security.h b/include/linux/security.h index 5623a7f..2fc42d1 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1040,6 +1040,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Allocate a security structure to the xp->security field; the security * field is initialized to NULL when the xfrm_policy is allocated. * Return 0 if operation was successful (memory to allocate, legal context) + * @gfp is to specify the context for the allocation * @xfrm_policy_clone_security: * @old_ctx contains an existing xfrm_sec_ctx. * @new_ctxp contains a new xfrm_sec_ctx being cloned from old. @@ -1683,7 +1684,7 @@ struct security_operations { #ifdef CONFIG_SECURITY_NETWORK_XFRM int (*xfrm_policy_alloc_security) (struct xfrm_sec_ctx **ctxp, - struct xfrm_user_sec_ctx *sec_ctx); + struct xfrm_user_sec_ctx *sec_ctx, gfp_t gfp); int (*xfrm_policy_clone_security) (struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctx); void (*xfrm_policy_free_security) (struct xfrm_sec_ctx *ctx); int (*xfrm_policy_delete_security) (struct xfrm_sec_ctx *ctx); @@ -2859,7 +2860,8 @@ static inline void security_skb_owned_by(struct sk_buff *skb, struct sock *sk) #ifdef CONFIG_SECURITY_NETWORK_XFRM -int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *sec_ctx); +int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, + struct xfrm_user_sec_ctx *sec_ctx, gfp_t gfp); int security_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctxp); void security_xfrm_policy_free(struct xfrm_sec_ctx *ctx); int security_xfrm_policy_delete(struct xfrm_sec_ctx *ctx); @@ -2877,7 +2879,9 @@ void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl); #else /* CONFIG_SECURITY_NETWORK_XFRM */ -static inline int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *sec_ctx) +static inline int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, + struct xfrm_user_sec_ctx *sec_ctx, + gfp_t gfp) { return 0; } diff --git a/net/key/af_key.c b/net/key/af_key.c index 1526023..7932697 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2239,7 +2239,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_ goto out; } - err = security_xfrm_policy_alloc(&xp->security, uctx); + err = security_xfrm_policy_alloc(&xp->security, uctx, GFP_KERNEL); kfree(uctx); if (err) @@ -2341,7 +2341,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa if (!uctx) return -ENOMEM; - err = security_xfrm_policy_alloc(&pol_ctx, uctx); + err = security_xfrm_policy_alloc(&pol_ctx, uctx, GFP_KERNEL); kfree(uctx); if (err) return err; @@ -3241,7 +3241,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, if ((*dir = verify_sec_ctx_len(p))) goto out; uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_ATOMIC); - *dir = security_xfrm_policy_alloc(&xp->security, uctx); + *dir = security_xfrm_policy_alloc(&xp->security, uctx, GFP_ATOMIC); kfree(uctx); if (*dir) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index c274179..2f7ddc3 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1221,7 +1221,7 @@ static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct nlattr **attrs return 0; uctx = nla_data(rt); - return security_xfrm_policy_alloc(&pol->security, uctx); + return security_xfrm_policy_alloc(&pol->security, uctx, GFP_KERNEL); } static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut, @@ -1626,7 +1626,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (rt) { struct xfrm_user_sec_ctx *uctx = nla_data(rt); - err = security_xfrm_policy_alloc(&ctx, uctx); + err = security_xfrm_policy_alloc(&ctx, uctx, GFP_KERNEL); if (err) return err; } @@ -1928,7 +1928,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, if (rt) { struct xfrm_user_sec_ctx *uctx = nla_data(rt); - err = security_xfrm_policy_alloc(&ctx, uctx); + err = security_xfrm_policy_alloc(&ctx, uctx, GFP_KERNEL); if (err) return err; } diff --git a/security/capability.c b/security/capability.c index 8b4f24a..21e2b9c 100644 --- a/security/capability.c +++ b/security/capability.c @@ -757,7 +757,8 @@ static void cap_skb_owned_by(struct sk_buff *skb, struct sock *sk) #ifdef CONFIG_SECURITY_NETWORK_XFRM static int cap_xfrm_policy_alloc_security(struct xfrm_sec_ctx **ctxp, - struct xfrm_user_sec_ctx *sec_ctx) + struct xfrm_user_sec_ctx *sec_ctx, + gfp_t gfp) { return 0; } diff --git a/security/security.c b/security/security.c index 15b6928..919cad9 100644 --- a/security/security.c +++ b/security/security.c @@ -1317,9 +1317,11 @@ void security_skb_owned_by(struct sk_buff *skb, struct sock *sk) #ifdef CONFIG_SECURITY_NETWORK_XFRM -int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *sec_ctx) +int security_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, + struct xfrm_user_sec_ctx *sec_ctx, + gfp_t gfp) { - return security_ops->xfrm_policy_alloc_security(ctxp, sec_ctx); + return security_ops->xfrm_policy_alloc_security(ctxp, sec_ctx, gfp); } EXPORT_SYMBOL(security_xfrm_policy_alloc); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 4b34847..b332e2c 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -668,7 +668,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, if (flags[i] == SBLABEL_MNT) continue; rc = security_context_to_sid(mount_options[i], - strlen(mount_options[i]), &sid); + strlen(mount_options[i]), &sid, GFP_KERNEL); if (rc) { printk(KERN_WARNING "SELinux: security_context_to_sid" "(%s) failed for (dev %s, type %s) errno=%d\n", @@ -2489,7 +2489,8 @@ static int selinux_sb_remount(struct super_block *sb, void *data) if (flags[i] == SBLABEL_MNT) continue; len = strlen(mount_options[i]); - rc = security_context_to_sid(mount_options[i], len, &sid); + rc = security_context_to_sid(mount_options[i], len, &sid, + GFP_KERNEL); if (rc) { printk(KERN_WARNING "SELinux: security_context_to_sid" "(%s) failed for (dev %s, type %s) errno=%d\n", @@ -2893,7 +2894,7 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name, if (rc) return rc; - rc = security_context_to_sid(value, size, &newsid); + rc = security_context_to_sid(value, size, &newsid, GFP_KERNEL); if (rc == -EINVAL) { if (!capable(CAP_MAC_ADMIN)) { struct audit_buffer *ab; @@ -3050,7 +3051,7 @@ static int selinux_inode_setsecurity(struct inode *inode, const char *name, if (!value || !size) return -EACCES; - rc = security_context_to_sid((void *)value, size, &newsid); + rc = security_context_to_sid((void *)value, size, &newsid, GFP_KERNEL); if (rc) return rc; @@ -5529,7 +5530,7 @@ static int selinux_setprocattr(struct task_struct *p, str[size-1] = 0; size--; } - error = security_context_to_sid(value, size, &sid); + error = security_context_to_sid(value, size, &sid, GFP_KERNEL); if (error == -EINVAL && !strcmp(name, "fscreate")) { if (!capable(CAP_MAC_ADMIN)) { struct audit_buffer *ab; @@ -5638,7 +5639,7 @@ static int selinux_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) static int selinux_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid) { - return security_context_to_sid(secdata, seclen, secid); + return security_context_to_sid(secdata, seclen, secid, GFP_KERNEL); } static void selinux_release_secctx(char *secdata, u32 seclen) diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 8ed8daf..ce7852c 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -134,7 +134,7 @@ int security_sid_to_context(u32 sid, char **scontext, int security_sid_to_context_force(u32 sid, char **scontext, u32 *scontext_len); int security_context_to_sid(const char *scontext, u32 scontext_len, - u32 *out_sid); + u32 *out_sid, gfp_t gfp); int security_context_to_sid_default(const char *scontext, u32 scontext_len, u32 *out_sid, u32 def_sid, gfp_t gfp_flags); diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index 48c3cc9..9f05847 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -10,7 +10,8 @@ #include int selinux_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, - struct xfrm_user_sec_ctx *uctx); + struct xfrm_user_sec_ctx *uctx, + gfp_t gfp); int selinux_xfrm_policy_clone(struct xfrm_sec_ctx *old_ctx, struct xfrm_sec_ctx **new_ctxp); void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx); diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 5122aff..d60c0ee 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -576,7 +576,7 @@ static ssize_t sel_write_context(struct file *file, char *buf, size_t size) if (length) goto out; - length = security_context_to_sid(buf, size, &sid); + length = security_context_to_sid(buf, size, &sid, GFP_KERNEL); if (length) goto out; @@ -731,11 +731,13 @@ static ssize_t sel_write_access(struct file *file, char *buf, size_t size) if (sscanf(buf, "%s %s %hu", scon, tcon, &tclass) != 3) goto out; - length = security_context_to_sid(scon, strlen(scon) + 1, &ssid); + length = security_context_to_sid(scon, strlen(scon) + 1, &ssid, + GFP_KERNEL); if (length) goto out; - length = security_context_to_sid(tcon, strlen(tcon) + 1, &tsid); + length = security_context_to_sid(tcon, strlen(tcon) + 1, &tsid, + GFP_KERNEL); if (length) goto out; @@ -817,11 +819,13 @@ static ssize_t sel_write_create(struct file *file, char *buf, size_t size) objname = namebuf; } - length = security_context_to_sid(scon, strlen(scon) + 1, &ssid); + length = security_context_to_sid(scon, strlen(scon) + 1, &ssid, + GFP_KERNEL); if (length) goto out; - length = security_context_to_sid(tcon, strlen(tcon) + 1, &tsid); + length = security_context_to_sid(tcon, strlen(tcon) + 1, &tsid, + GFP_KERNEL); if (length) goto out; @@ -878,11 +882,13 @@ static ssize_t sel_write_relabel(struct file *file, char *buf, size_t size) if (sscanf(buf, "%s %s %hu", scon, tcon, &tclass) != 3) goto out; - length = security_context_to_sid(scon, strlen(scon) + 1, &ssid); + length = security_context_to_sid(scon, strlen(scon) + 1, &ssid, + GFP_KERNEL); if (length) goto out; - length = security_context_to_sid(tcon, strlen(tcon) + 1, &tsid); + length = security_context_to_sid(tcon, strlen(tcon) + 1, &tsid, + GFP_KERNEL); if (length) goto out; @@ -934,7 +940,7 @@ static ssize_t sel_write_user(struct file *file, char *buf, size_t size) if (sscanf(buf, "%s %s", con, user) != 2) goto out; - length = security_context_to_sid(con, strlen(con) + 1, &sid); + length = security_context_to_sid(con, strlen(con) + 1, &sid, GFP_KERNEL); if (length) goto out; @@ -994,11 +1000,13 @@ static ssize_t sel_write_member(struct file *file, char *buf, size_t size) if (sscanf(buf, "%s %s %hu", scon, tcon, &tclass) != 3) goto out; - length = security_context_to_sid(scon, strlen(scon) + 1, &ssid); + length = security_context_to_sid(scon, strlen(scon) + 1, &ssid, + GFP_KERNEL); if (length) goto out; - length = security_context_to_sid(tcon, strlen(tcon) + 1, &tsid); + length = security_context_to_sid(tcon, strlen(tcon) + 1, &tsid, + GFP_KERNEL); if (length) goto out; diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 5d0144e..4bca494 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1289,16 +1289,18 @@ out: * @scontext: security context * @scontext_len: length in bytes * @sid: security identifier, SID + * @gfp: context for the allocation * * Obtains a SID associated with the security context that * has the string representation specified by @scontext. * Returns -%EINVAL if the context is invalid, -%ENOMEM if insufficient * memory is available, or 0 on success. */ -int security_context_to_sid(const char *scontext, u32 scontext_len, u32 *sid) +int security_context_to_sid(const char *scontext, u32 scontext_len, u32 *sid, + gfp_t gfp) { return security_context_to_sid_core(scontext, scontext_len, - sid, SECSID_NULL, GFP_KERNEL, 0); + sid, SECSID_NULL, gfp, 0); } /** diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 0462cb3..98b0426 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -78,7 +78,8 @@ static inline int selinux_authorizable_xfrm(struct xfrm_state *x) * xfrm_user_sec_ctx context. */ static int selinux_xfrm_alloc_user(struct xfrm_sec_ctx **ctxp, - struct xfrm_user_sec_ctx *uctx) + struct xfrm_user_sec_ctx *uctx, + gfp_t gfp) { int rc; const struct task_security_struct *tsec = current_security(); @@ -94,7 +95,7 @@ static int selinux_xfrm_alloc_user(struct xfrm_sec_ctx **ctxp, if (str_len >= PAGE_SIZE) return -ENOMEM; - ctx = kmalloc(sizeof(*ctx) + str_len + 1, GFP_KERNEL); + ctx = kmalloc(sizeof(*ctx) + str_len + 1, gfp); if (!ctx) return -ENOMEM; @@ -103,7 +104,7 @@ static int selinux_xfrm_alloc_user(struct xfrm_sec_ctx **ctxp, ctx->ctx_len = str_len; memcpy(ctx->ctx_str, &uctx[1], str_len); ctx->ctx_str[str_len] = '\0'; - rc = security_context_to_sid(ctx->ctx_str, str_len, &ctx->ctx_sid); + rc = security_context_to_sid(ctx->ctx_str, str_len, &ctx->ctx_sid, gfp); if (rc) goto err; @@ -282,9 +283,10 @@ int selinux_xfrm_skb_sid(struct sk_buff *skb, u32 *sid) * LSM hook implementation that allocs and transfers uctx spec to xfrm_policy. */ int selinux_xfrm_policy_alloc(struct xfrm_sec_ctx **ctxp, - struct xfrm_user_sec_ctx *uctx) + struct xfrm_user_sec_ctx *uctx, + gfp_t gfp) { - return selinux_xfrm_alloc_user(ctxp, uctx); + return selinux_xfrm_alloc_user(ctxp, uctx, gfp); } /* @@ -332,7 +334,7 @@ int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx) int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uctx) { - return selinux_xfrm_alloc_user(&x->security, uctx); + return selinux_xfrm_alloc_user(&x->security, uctx, GFP_KERNEL); } /* -- cgit v0.10.2 From f324777ea88bab2522602671e46fc0851d7d5e35 Mon Sep 17 00:00:00 2001 From: Chad Dupuis Date: Wed, 26 Feb 2014 04:15:14 -0500 Subject: [SCSI] qla2xxx: Fix multiqueue MSI-X registration. This fixes requesting of the MSI-X vectors for the base response queue. The iteration in the for loop in qla24xx_enable_msix() was incorrect. We should only iterate of the first two MSI-X vectors and not the total number of MSI-X vectors that have given to the driver for this device from pci_enable_msix() in this function. Cc: Signed-off-by: Chad Dupuis Signed-off-by: Saurav Kashyap Signed-off-by: James Bottomley diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 9bc86b9..0a1dcb4 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -2880,6 +2880,7 @@ static int qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp) { #define MIN_MSIX_COUNT 2 +#define ATIO_VECTOR 2 int i, ret; struct msix_entry *entries; struct qla_msix_entry *qentry; @@ -2936,34 +2937,47 @@ msix_failed: } /* Enable MSI-X vectors for the base queue */ - for (i = 0; i < ha->msix_count; i++) { + for (i = 0; i < 2; i++) { qentry = &ha->msix_entries[i]; - if (QLA_TGT_MODE_ENABLED() && IS_ATIO_MSIX_CAPABLE(ha)) { - ret = request_irq(qentry->vector, - qla83xx_msix_entries[i].handler, - 0, qla83xx_msix_entries[i].name, rsp); - } else if (IS_P3P_TYPE(ha)) { + if (IS_P3P_TYPE(ha)) ret = request_irq(qentry->vector, qla82xx_msix_entries[i].handler, 0, qla82xx_msix_entries[i].name, rsp); - } else { + else ret = request_irq(qentry->vector, msix_entries[i].handler, 0, msix_entries[i].name, rsp); - } - if (ret) { - ql_log(ql_log_fatal, vha, 0x00cb, - "MSI-X: unable to register handler -- %x/%d.\n", - qentry->vector, ret); - qla24xx_disable_msix(ha); - ha->mqenable = 0; - goto msix_out; - } + if (ret) + goto msix_register_fail; qentry->have_irq = 1; qentry->rsp = rsp; rsp->msix = qentry; } + /* + * If target mode is enable, also request the vector for the ATIO + * queue. + */ + if (QLA_TGT_MODE_ENABLED() && IS_ATIO_MSIX_CAPABLE(ha)) { + qentry = &ha->msix_entries[ATIO_VECTOR]; + ret = request_irq(qentry->vector, + qla83xx_msix_entries[ATIO_VECTOR].handler, + 0, qla83xx_msix_entries[ATIO_VECTOR].name, rsp); + qentry->have_irq = 1; + qentry->rsp = rsp; + rsp->msix = qentry; + } + +msix_register_fail: + if (ret) { + ql_log(ql_log_fatal, vha, 0x00cb, + "MSI-X: unable to register handler -- %x/%d.\n", + qentry->vector, ret); + qla24xx_disable_msix(ha); + ha->mqenable = 0; + goto msix_out; + } + /* Enable MSI-X vector for response queue update for queue 0 */ if (IS_QLA83XX(ha)) { if (ha->msixbase && ha->mqiobase && -- cgit v0.10.2 From 126e964a444f125bd428757fb88c24c730f6fcf9 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 19 Dec 2013 01:16:21 -0600 Subject: [SCSI] be2iscsi: fix bad if expression https://bugzilla.kernel.org/show_bug.cgi?id=67091 Cc: Jayamohan Kallickal Signed-off-by: James Bottomley diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index 1f37505..5642a9b 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -325,7 +325,7 @@ static int beiscsi_eh_device_reset(struct scsi_cmnd *sc) if (!abrt_task->sc || abrt_task->state == ISCSI_TASK_FREE) continue; - if (abrt_task->sc->device->lun != abrt_task->sc->device->lun) + if (sc->device->lun != abrt_task->sc->device->lun) continue; /* Invalidate WRB Posted for this Task */ -- cgit v0.10.2 From ddfadd7736b677de2d4ca2cd5b4b655368c85a7a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 6 Feb 2014 12:23:01 -0800 Subject: [SCSI] isci: fix reset timeout handling Remove an erroneous BUG_ON() in the case of a hard reset timeout. The reset timeout handler puts the port into the "awaiting link-up" state. The timeout causes the device to be disconnected and we need to be in the awaiting link-up state to re-connect the port. The BUG_ON() made the incorrect assumption that resets never timeout and we always complete the reset in the "resetting" state. Testing this patch also uncovered that libata continues to attempt to reset the port long after the driver has torn down the context. Once the driver has committed to abandoning the link it must indicate to libata that recovery ends by returning -ENODEV from ->lldd_I_T_nexus_reset(). Cc: Acked-by: Lukasz Dorau Reported-by: David Milburn Reported-by: Xun Ni Tested-by: Xun Ni Signed-off-by: Dan Williams Signed-off-by: James Bottomley diff --git a/drivers/scsi/isci/port_config.c b/drivers/scsi/isci/port_config.c index 85c77f6..ac87974 100644 --- a/drivers/scsi/isci/port_config.c +++ b/drivers/scsi/isci/port_config.c @@ -615,13 +615,6 @@ static void sci_apc_agent_link_up(struct isci_host *ihost, SCIC_SDS_APC_WAIT_LINK_UP_NOTIFICATION); } else { /* the phy is already the part of the port */ - u32 port_state = iport->sm.current_state_id; - - /* if the PORT'S state is resetting then the link up is from - * port hard reset in this case, we need to tell the port - * that link up is recieved - */ - BUG_ON(port_state != SCI_PORT_RESETTING); port_agent->phy_ready_mask |= 1 << phy_index; sci_port_link_up(iport, iphy); } diff --git a/drivers/scsi/isci/task.c b/drivers/scsi/isci/task.c index 0d30ca8..5d6fda7 100644 --- a/drivers/scsi/isci/task.c +++ b/drivers/scsi/isci/task.c @@ -801,7 +801,7 @@ int isci_task_I_T_nexus_reset(struct domain_device *dev) /* XXX: need to cleanup any ireqs targeting this * domain_device */ - ret = TMF_RESP_FUNC_COMPLETE; + ret = -ENODEV; goto out; } -- cgit v0.10.2 From c59053a23d586675c25d789a7494adfdc02fba57 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Thu, 6 Feb 2014 12:23:20 -0800 Subject: [SCSI] isci: correct erroneous for_each_isci_host macro In the first place, the loop 'for' in the macro 'for_each_isci_host' (drivers/scsi/isci/host.h:314) is incorrect, because it accesses the 3rd element of 2 element array. After the 2nd iteration it executes the instruction: ihost = to_pci_info(pdev)->hosts[2] (while the size of the 'hosts' array equals 2) and reads an out of range element. In the second place, this loop is incorrectly optimized by GCC v4.8 (see http://marc.info/?l=linux-kernel&m=138998871911336&w=2). As a result, on platforms with two SCU controllers, the loop is executed more times than it can be (for i=0,1 and 2). It causes kernel panic during entering the S3 state and the following oops after 'rmmod isci': BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] __list_add+0x1b/0xc0 Oops: 0000 [#1] SMP RIP: 0010:[] [] __list_add+0x1b/0xc0 Call Trace: [] __mutex_lock_slowpath+0x114/0x1b0 [] mutex_lock+0x1f/0x30 [] sas_disable_events+0x1b/0x50 [libsas] [] sas_unregister_ha+0x18/0x60 [libsas] [] isci_unregister+0x1e/0x40 [isci] [] isci_pci_remove+0x5d/0x100 [isci] [] pci_device_remove+0x3b/0xb0 [] __device_release_driver+0x7f/0xf0 [] driver_detach+0xa8/0xb0 [] bus_remove_driver+0x9b/0x120 [] driver_unregister+0x2c/0x50 [] pci_unregister_driver+0x23/0x80 [] isci_exit+0x10/0x1e [isci] [] SyS_delete_module+0x16b/0x2d0 [] ? do_notify_resume+0x61/0xa0 [] system_call_fastpath+0x16/0x1b The loop has been corrected. This patch fixes kernel panic during entering the S3 state and the above oops. Signed-off-by: Lukasz Dorau Reviewed-by: Maciej Patelczyk Tested-by: Lukasz Dorau Cc: Signed-off-by: Dan Williams Signed-off-by: James Bottomley diff --git a/drivers/scsi/isci/host.h b/drivers/scsi/isci/host.h index 4911310..22a9bb1 100644 --- a/drivers/scsi/isci/host.h +++ b/drivers/scsi/isci/host.h @@ -311,9 +311,8 @@ static inline struct Scsi_Host *to_shost(struct isci_host *ihost) } #define for_each_isci_host(id, ihost, pdev) \ - for (id = 0, ihost = to_pci_info(pdev)->hosts[id]; \ - id < ARRAY_SIZE(to_pci_info(pdev)->hosts) && ihost; \ - ihost = to_pci_info(pdev)->hosts[++id]) + for (id = 0; id < SCI_MAX_CONTROLLERS && \ + (ihost = to_pci_info(pdev)->hosts[id]); id++) static inline void wait_for_start(struct isci_host *ihost) { -- cgit v0.10.2 From b77ed25c9f8402e8b3e49e220edb4ef09ecfbb53 Mon Sep 17 00:00:00 2001 From: Giridhar Malavali Date: Wed, 26 Feb 2014 04:15:12 -0500 Subject: [SCSI] qla2xxx: Poll during initialization for ISP25xx and ISP83xx Cc: stable@vger.kernel.org Signed-off-by: Giridhar Malavali Signed-off-by: Saurav Kashyap Signed-off-by: James Bottomley diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index e1fe95e..266724b 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -2996,8 +2996,7 @@ struct qla_hw_data { IS_QLA82XX(ha) || IS_QLA83XX(ha) || \ IS_QLA8044(ha)) #define IS_MSIX_NACK_CAPABLE(ha) (IS_QLA81XX(ha) || IS_QLA83XX(ha)) -#define IS_NOPOLLING_TYPE(ha) ((IS_QLA25XX(ha) || IS_QLA81XX(ha) || \ - IS_QLA83XX(ha)) && (ha)->flags.msix_enabled) +#define IS_NOPOLLING_TYPE(ha) (IS_QLA81XX(ha) && (ha)->flags.msix_enabled) #define IS_FAC_REQUIRED(ha) (IS_QLA81XX(ha) || IS_QLA83XX(ha)) #define IS_NOCACHE_VPD_TYPE(ha) (IS_QLA81XX(ha) || IS_QLA83XX(ha)) #define IS_ALOGIO_CAPABLE(ha) (IS_QLA23XX(ha) || IS_FWI2_CAPABLE(ha)) -- cgit v0.10.2 From 58d4d3c976b33784a1443c446a3d7203bf2153f0 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 4 Mar 2014 09:41:53 +0100 Subject: ASoC: si476x: Fix IO setup The si476x is a MFD device and the CODEC driver is using the regmap struct of the parent device, hence automatic IO setup will not work and we need to manually call snd_soc_codec_set_cache_io(). The issue was introduced commit d6173df35f ("ASoC: si476x: Remove custom register I/O implementation") Fixes: d6173df35f ("ASoC: si476x: Remove custom register I/O implementation") Signed-off-by: Lars-Peter Clausen Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/sound/soc/codecs/si476x.c b/sound/soc/codecs/si476x.c index 52e7cb0..fa2b8e0 100644 --- a/sound/soc/codecs/si476x.c +++ b/sound/soc/codecs/si476x.c @@ -210,7 +210,7 @@ out: static int si476x_codec_probe(struct snd_soc_codec *codec) { codec->control_data = dev_get_regmap(codec->dev->parent, NULL); - return 0; + return snd_soc_codec_set_cache_io(codec, 0, 0, SND_SOC_REGMAP); } static struct snd_soc_dai_ops si476x_dai_ops = { -- cgit v0.10.2 From 8eeb5c15131d7b5061c10423eda3ae4c68db4eaf Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 4 Mar 2014 09:39:24 +0100 Subject: ASoC: 88pm860: Fix IO setup The 88pm860 is a MFD device and the CODEC driver is using the regmap struct of the parent device, hence automatic IO setup will not work and we need to manually call snd_soc_codec_set_cache_io(). The issue was introduced in commit f9ded3b2e7 ("ASoC: 88pm860x: Use regmap for I/O"). Fixes: f9ded3b2e7 ("ASoC: 88pm860x: Use regmap for I/O"). Signed-off-by: Lars-Peter Clausen Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/sound/soc/codecs/88pm860x-codec.c b/sound/soc/codecs/88pm860x-codec.c index 75d0ad5..647a72c 100644 --- a/sound/soc/codecs/88pm860x-codec.c +++ b/sound/soc/codecs/88pm860x-codec.c @@ -1328,6 +1328,9 @@ static int pm860x_probe(struct snd_soc_codec *codec) pm860x->codec = codec; codec->control_data = pm860x->regmap; + ret = snd_soc_codec_set_cache_io(codec, 0, 0, SND_SOC_REGMAP); + if (ret) + return ret; for (i = 0; i < 4; i++) { ret = request_threaded_irq(pm860x->irq[i], NULL, -- cgit v0.10.2 From d152d22a18c240286c19997a6249ee76ea055926 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 10 Mar 2014 10:36:52 +0000 Subject: arm64: barriers: add dmb barrier Commit 8adbf57fc429 ("irqchip: gic: use dmb ishst instead of dsb when raising a softirq") added an explicit dmb(...) call to the GIC driver. This patch adds a simple dmb() macro to arm64, which expands to a DMB SY instruction. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 409ca37..66eb764 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -25,6 +25,7 @@ #define wfi() asm volatile("wfi" : : : "memory") #define isb() asm volatile("isb" : : : "memory") +#define dmb(opt) asm volatile("dmb sy" : : : "memory") #define dsb(opt) asm volatile("dsb sy" : : : "memory") #define mb() dsb() -- cgit v0.10.2 From 02c5bb4a352a4cca56e9b5d3a2a57d61062eb2e1 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 6 Feb 2014 01:00:41 +0000 Subject: perf trace: Decode architecture-specific signal numbers SIGSTKFLT is not defined on alpha, mips or sparc. SIGEMT and SIGSWI are defined on some architectures and should be decoded here if so. Signed-off-by: Ben Hutchings Fixes: 8bad5b0abfdb ('perf trace: Beautify signal number arg in several syscalls') Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1391648441.3003.101.camel@deadeye.wl.decadent.org.uk Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 6aa6fb6..f954c26 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -825,7 +825,6 @@ static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscal P_SIGNUM(PIPE); P_SIGNUM(ALRM); P_SIGNUM(TERM); - P_SIGNUM(STKFLT); P_SIGNUM(CHLD); P_SIGNUM(CONT); P_SIGNUM(STOP); @@ -841,6 +840,15 @@ static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscal P_SIGNUM(IO); P_SIGNUM(PWR); P_SIGNUM(SYS); +#ifdef SIGEMT + P_SIGNUM(EMT); +#endif +#ifdef SIGSTKFLT + P_SIGNUM(STKFLT); +#endif +#ifdef SIGSWI + P_SIGNUM(SWI); +#endif default: break; } -- cgit v0.10.2 From 155b3a13a65d4217316dbe094843f2a7df0711fa Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 2 Mar 2014 14:32:07 +0100 Subject: perf symbols: Fix crash in elf_section_by_name Fixing crash in elf_section_by_name function caused by missing section name in elf binary. Reported-by: Albert Strasheim Signed-off-by: Jiri Olsa Cc: Albert Strasheim Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1393767127-599-1-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 3e9f336..516d19f 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -151,15 +151,15 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, gelf_getshdr(sec, shp); str = elf_strptr(elf, ep->e_shstrndx, shp->sh_name); - if (!strcmp(name, str)) { + if (str && !strcmp(name, str)) { if (idx) *idx = cnt; - break; + return sec; } ++cnt; } - return sec; + return NULL; } #define elf_section__for_each_rel(reldata, pos, pos_mem, idx, nr_entries) \ -- cgit v0.10.2 From fdf57dd052d5cbd415533ae98f4d423286a85220 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 25 Feb 2014 22:43:45 -0500 Subject: perf machine: Use map as success in ip__resolve_ams When trying to map a bunch of instruction addresses to their respective threads, I kept getting a lot of bogus entries [I forget the exact reason as I patched my code months ago]. Looking through ip__resolve_ams, I noticed the check for if (al.sym) and realized, most times I have an al.map definition but sometimes an al.sym is undefined. In the cases where al.sym is undefined, the loop keeps going even though a valid al.map exists. Modify this check to use the more reliable al.map. This fixed my bogus entries. Signed-off-by: Don Zickus Cc: Jiri Olsa Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1393386227-149412-2-git-send-email-dzickus@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index c872991..620a198 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1213,7 +1213,7 @@ static void ip__resolve_ams(struct machine *machine, struct thread *thread, */ thread__find_addr_location(thread, machine, m, MAP__FUNCTION, ip, &al); - if (al.sym) + if (al.map) goto found; } found: -- cgit v0.10.2 From 83493d7e782d2630f1a55def14a79f0e7c4faac3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 10 Mar 2014 11:13:43 -0400 Subject: libata: use wider match for blacklisting Crucial M500 We're now blacklisting "Crucial_CT???M500SSD1" and "Crucial_CT???M500SSD3". Also, "Micron_M500*" is blacklisted which is about the same devices as the crucial branded ones. Let's merge the two Crucial M500 entries and widen the match to "Crucial_CT???M500SSD*" so that we don't have to fiddle with new entries for similar devices. Signed-off-by: Tejun Heo Suggested-by: Linus Torvalds Cc: stable@vger.kernel.org diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 65d3f1b..8cb2522 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4225,8 +4225,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* devices that don't properly handle queued TRIM commands */ { "Micron_M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, - { "Crucial_CT???M500SSD1", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, - { "Crucial_CT???M500SSD3", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, + { "Crucial_CT???M500SSD*", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, /* * Some WD SATA-I drives spin up and down erratically when the link -- cgit v0.10.2 From 1b56e98990bcdbb20b9fab163654b9315bf158e8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 10 Feb 2014 15:18:55 -0500 Subject: ocfs2 syncs the wrong range... Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8450262bc..51632c4 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2393,8 +2393,8 @@ out_dio: if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || ((file->f_flags & O_DIRECT) && !direct_io)) { - ret = filemap_fdatawrite_range(file->f_mapping, pos, - pos + count - 1); + ret = filemap_fdatawrite_range(file->f_mapping, *ppos, + *ppos + count - 1); if (ret < 0) written = ret; @@ -2407,8 +2407,8 @@ out_dio: } if (!ret) - ret = filemap_fdatawait_range(file->f_mapping, pos, - pos + count - 1); + ret = filemap_fdatawait_range(file->f_mapping, *ppos, + *ppos + count - 1); } /* -- cgit v0.10.2 From 9c225f2655e36a470c4f58dbbc99244c5fc7f2d4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 3 Mar 2014 09:36:58 -0800 Subject: vfs: atomic f_pos accesses as per POSIX Our write() system call has always been atomic in the sense that you get the expected thread-safe contiguous write, but we haven't actually guaranteed that concurrent writes are serialized wrt f_pos accesses, so threads (or processes) that share a file descriptor and use "write()" concurrently would quite likely overwrite each others data. This violates POSIX.1-2008/SUSv4 Section XSI 2.9.7 that says: "2.9.7 Thread Interactions with Regular File Operations All of the following functions shall be atomic with respect to each other in the effects specified in POSIX.1-2008 when they operate on regular files or symbolic links: [...]" and one of the effects is the file position update. This unprotected file position behavior is not new behavior, and nobody has ever cared. Until now. Yongzhi Pan reported unexpected behavior to Michael Kerrisk that was due to this. This resolves the issue with a f_pos-specific lock that is taken by read/write/lseek on file descriptors that may be shared across threads or processes. Reported-by: Yongzhi Pan Reported-by: Michael Kerrisk Cc: Al Viro Signed-off-by: Linus Torvalds Signed-off-by: Al Viro diff --git a/fs/file_table.c b/fs/file_table.c index 5fff903..5b24008 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -135,6 +135,7 @@ struct file *get_empty_filp(void) atomic_long_set(&f->f_count, 1); rwlock_init(&f->f_owner.lock); spin_lock_init(&f->f_lock); + mutex_init(&f->f_pos_lock); eventpoll_init_file(f); /* f->f_version: 0 */ return f; diff --git a/fs/namei.c b/fs/namei.c index 385f781..2f730ef 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1884,7 +1884,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->path = f.file->f_path; if (flags & LOOKUP_RCU) { - if (f.need_put) + if (f.flags & FDPUT_FPUT) *fp = f.file; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); rcu_read_lock(); diff --git a/fs/open.c b/fs/open.c index 4b3e1ed..b9ed8b2 100644 --- a/fs/open.c +++ b/fs/open.c @@ -705,6 +705,10 @@ static int do_dentry_open(struct file *f, return 0; } + /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */ + if (S_ISREG(inode->i_mode)) + f->f_mode |= FMODE_ATOMIC_POS; + f->f_op = fops_get(inode->i_fop); if (unlikely(WARN_ON(!f->f_op))) { error = -ENODEV; diff --git a/fs/read_write.c b/fs/read_write.c index edc5746..932bb34 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -264,10 +264,36 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence) } EXPORT_SYMBOL(vfs_llseek); +/* + * We only lock f_pos if we have threads or if the file might be + * shared with another process. In both cases we'll have an elevated + * file count (done either by fdget() or by fork()). + */ +static inline struct fd fdget_pos(int fd) +{ + struct fd f = fdget(fd); + struct file *file = f.file; + + if (file && (file->f_mode & FMODE_ATOMIC_POS)) { + if (file_count(file) > 1) { + f.flags |= FDPUT_POS_UNLOCK; + mutex_lock(&file->f_pos_lock); + } + } + return f; +} + +static inline void fdput_pos(struct fd f) +{ + if (f.flags & FDPUT_POS_UNLOCK) + mutex_unlock(&f.file->f_pos_lock); + fdput(f); +} + SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) { off_t retval; - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); if (!f.file) return -EBADF; @@ -278,7 +304,7 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) if (res != (loff_t)retval) retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ } - fdput(f); + fdput_pos(f); return retval; } @@ -498,7 +524,7 @@ static inline void file_pos_write(struct file *file, loff_t pos) SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) { - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; if (f.file) { @@ -506,7 +532,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) ret = vfs_read(f.file, buf, count, &pos); if (ret >= 0) file_pos_write(f.file, pos); - fdput(f); + fdput_pos(f); } return ret; } @@ -514,7 +540,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, size_t, count) { - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; if (f.file) { @@ -522,7 +548,7 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, ret = vfs_write(f.file, buf, count, &pos); if (ret >= 0) file_pos_write(f.file, pos); - fdput(f); + fdput_pos(f); } return ret; @@ -797,7 +823,7 @@ EXPORT_SYMBOL(vfs_writev); SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, unsigned long, vlen) { - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; if (f.file) { @@ -805,7 +831,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, ret = vfs_readv(f.file, vec, vlen, &pos); if (ret >= 0) file_pos_write(f.file, pos); - fdput(f); + fdput_pos(f); } if (ret > 0) @@ -817,7 +843,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, unsigned long, vlen) { - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; if (f.file) { @@ -825,7 +851,7 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, ret = vfs_writev(f.file, vec, vlen, &pos); if (ret >= 0) file_pos_write(f.file, pos); - fdput(f); + fdput_pos(f); } if (ret > 0) @@ -968,7 +994,7 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, const struct compat_iovec __user *,vec, compat_ulong_t, vlen) { - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); ssize_t ret; loff_t pos; @@ -978,7 +1004,7 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, ret = compat_readv(f.file, vec, vlen, &pos); if (ret >= 0) f.file->f_pos = pos; - fdput(f); + fdput_pos(f); return ret; } @@ -1035,7 +1061,7 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, const struct compat_iovec __user *, vec, compat_ulong_t, vlen) { - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); ssize_t ret; loff_t pos; @@ -1045,7 +1071,7 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, ret = compat_writev(f.file, vec, vlen, &pos); if (ret >= 0) f.file->f_pos = pos; - fdput(f); + fdput_pos(f); return ret; } diff --git a/include/linux/file.h b/include/linux/file.h index cbacf4f..f2517fa 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -28,12 +28,14 @@ static inline void fput_light(struct file *file, int fput_needed) struct fd { struct file *file; - int need_put; + unsigned int flags; }; +#define FDPUT_FPUT 1 +#define FDPUT_POS_UNLOCK 2 static inline void fdput(struct fd fd) { - if (fd.need_put) + if (fd.flags & FDPUT_FPUT) fput(fd.file); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 6082956..ebfde04 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -123,6 +123,9 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File is opened with O_PATH; almost nothing can be done with it */ #define FMODE_PATH ((__force fmode_t)0x4000) +/* File needs atomic accesses to f_pos */ +#define FMODE_ATOMIC_POS ((__force fmode_t)0x8000) + /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x1000000) @@ -780,13 +783,14 @@ struct file { const struct file_operations *f_op; /* - * Protects f_ep_links, f_flags, f_pos vs i_size in lseek SEEK_CUR. + * Protects f_ep_links, f_flags. * Must not be taken from IRQ context. */ spinlock_t f_lock; atomic_long_t f_count; unsigned int f_flags; fmode_t f_mode; + struct mutex f_pos_lock; loff_t f_pos; struct fown_struct f_owner; const struct cred *f_cred; -- cgit v0.10.2 From 00e188ef6a7e7bf2883bcffc30d89e98a0bb76b3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 3 Mar 2014 23:48:18 -0500 Subject: sockfd_lookup_light(): switch to fdget^W^Waway from fget_light Signed-off-by: Al Viro diff --git a/net/socket.c b/net/socket.c index 879933a..fd8d86e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -450,16 +450,17 @@ EXPORT_SYMBOL(sockfd_lookup); static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) { - struct file *file; + struct fd f = fdget(fd); struct socket *sock; *err = -EBADF; - file = fget_light(fd, fput_needed); - if (file) { - sock = sock_from_file(file, err); - if (sock) + if (f.file) { + sock = sock_from_file(f.file, err); + if (likely(sock)) { + *fput_needed = f.flags; return sock; - fput_light(file, *fput_needed); + } + fdput(f); } return NULL; } -- cgit v0.10.2 From bd2a31d522344b3ac2fb680bd2366e77a9bd8209 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 4 Mar 2014 14:54:22 -0500 Subject: get rid of fget_light() instead of returning the flags by reference, we can just have the low-level primitive return those in lower bits of unsigned long, with struct file * derived from the rest. Signed-off-by: Al Viro diff --git a/fs/file.c b/fs/file.c index db25c2b..60a45e9 100644 --- a/fs/file.c +++ b/fs/file.c @@ -683,35 +683,65 @@ EXPORT_SYMBOL(fget_raw); * The fput_needed flag returned by fget_light should be passed to the * corresponding fput_light. */ -struct file *__fget_light(unsigned int fd, fmode_t mask, int *fput_needed) +static unsigned long __fget_light(unsigned int fd, fmode_t mask) { struct files_struct *files = current->files; struct file *file; - *fput_needed = 0; if (atomic_read(&files->count) == 1) { file = __fcheck_files(files, fd); - if (file && (file->f_mode & mask)) - file = NULL; + if (!file || unlikely(file->f_mode & mask)) + return 0; + return (unsigned long)file; } else { file = __fget(fd, mask); - if (file) - *fput_needed = 1; + if (!file) + return 0; + return FDPUT_FPUT | (unsigned long)file; } - - return file; } -struct file *fget_light(unsigned int fd, int *fput_needed) +unsigned long __fdget(unsigned int fd) { - return __fget_light(fd, FMODE_PATH, fput_needed); + return __fget_light(fd, FMODE_PATH); } -EXPORT_SYMBOL(fget_light); +EXPORT_SYMBOL(__fdget); -struct file *fget_raw_light(unsigned int fd, int *fput_needed) +unsigned long __fdget_raw(unsigned int fd) { - return __fget_light(fd, 0, fput_needed); + return __fget_light(fd, 0); +} + +unsigned long __fdget_pos(unsigned int fd) +{ + struct files_struct *files = current->files; + struct file *file; + unsigned long v; + + if (atomic_read(&files->count) == 1) { + file = __fcheck_files(files, fd); + v = 0; + } else { + file = __fget(fd, 0); + v = FDPUT_FPUT; + } + if (!file) + return 0; + + if (file->f_mode & FMODE_ATOMIC_POS) { + if (file_count(file) > 1) { + v |= FDPUT_POS_UNLOCK; + mutex_lock(&file->f_pos_lock); + } + } + return v | (unsigned long)file; } +/* + * We only lock f_pos if we have threads or if the file might be + * shared with another process. In both cases we'll have an elevated + * file count (done either by fdget() or by fork()). + */ + void set_close_on_exec(unsigned int fd, int flag) { struct files_struct *files = current->files; diff --git a/fs/read_write.c b/fs/read_write.c index 932bb34..54e19b9 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -264,23 +264,9 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence) } EXPORT_SYMBOL(vfs_llseek); -/* - * We only lock f_pos if we have threads or if the file might be - * shared with another process. In both cases we'll have an elevated - * file count (done either by fdget() or by fork()). - */ static inline struct fd fdget_pos(int fd) { - struct fd f = fdget(fd); - struct file *file = f.file; - - if (file && (file->f_mode & FMODE_ATOMIC_POS)) { - if (file_count(file) > 1) { - f.flags |= FDPUT_POS_UNLOCK; - mutex_lock(&file->f_pos_lock); - } - } - return f; + return __to_fd(__fdget_pos(fd)); } static inline void fdput_pos(struct fd f) diff --git a/include/linux/file.h b/include/linux/file.h index f2517fa..4d69123 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -40,23 +40,24 @@ static inline void fdput(struct fd fd) } extern struct file *fget(unsigned int fd); -extern struct file *fget_light(unsigned int fd, int *fput_needed); +extern struct file *fget_raw(unsigned int fd); +extern unsigned long __fdget(unsigned int fd); +extern unsigned long __fdget_raw(unsigned int fd); +extern unsigned long __fdget_pos(unsigned int fd); -static inline struct fd fdget(unsigned int fd) +static inline struct fd __to_fd(unsigned long v) { - int b; - struct file *f = fget_light(fd, &b); - return (struct fd){f,b}; + return (struct fd){(struct file *)(v & ~3),v & 3}; } -extern struct file *fget_raw(unsigned int fd); -extern struct file *fget_raw_light(unsigned int fd, int *fput_needed); +static inline struct fd fdget(unsigned int fd) +{ + return __to_fd(__fdget(fd)); +} static inline struct fd fdget_raw(unsigned int fd) { - int b; - struct file *f = fget_raw_light(fd, &b); - return (struct fd){f,b}; + return __to_fd(__fdget_raw(fd)); } extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); diff --git a/include/linux/fs.h b/include/linux/fs.h index ebfde04..23b2a35 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -812,7 +812,7 @@ struct file { #ifdef CONFIG_DEBUG_WRITECOUNT unsigned long f_mnt_write_state; #endif -}; +} __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ struct file_handle { __u32 handle_bytes; -- cgit v0.10.2 From 37314363cd65d19c71bea5f222e5108c93dc3c78 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 8 Mar 2014 08:01:19 -0800 Subject: pkt_sched: move the sanity test in qdisc_list_add() The WARN_ON(root == &noop_qdisc)) added in qdisc_list_add() can trigger in normal conditions when devices are not up. It should be done only right before the list_add_tail() call. Fixes: e57a784d8cae4 ("pkt_sched: set root qdisc before change() in attach_default_qdiscs()") Reported-by: Valdis Kletnieks Tested-by: Mirco Tischler Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 1313145..a07d55e 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -273,11 +273,12 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) void qdisc_list_add(struct Qdisc *q) { - struct Qdisc *root = qdisc_dev(q)->qdisc; + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { + struct Qdisc *root = qdisc_dev(q)->qdisc; - WARN_ON_ONCE(root == &noop_qdisc); - if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) + WARN_ON_ONCE(root == &noop_qdisc); list_add_tail(&q->list, &root->list); + } } EXPORT_SYMBOL(qdisc_list_add); -- cgit v0.10.2 From bc48bc806442114ea44f61d6b18e02c2bd6236fd Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 9 Mar 2014 04:03:22 +0000 Subject: bna: Replace large udelay() with mdelay() udelay() does not work on some architectures for values above 2000, in particular on ARM: ERROR: "__bad_udelay" [drivers/net/ethernet/brocade/bna/bna.ko] undefined! Reported-by: Vagrant Cascadian Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c index 1803c39..354ae97 100644 --- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c +++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c @@ -1704,7 +1704,7 @@ bfa_flash_sem_get(void __iomem *bar) while (!bfa_raw_sem_get(bar)) { if (--n <= 0) return BFA_STATUS_BADFLASH; - udelay(10000); + mdelay(10); } return BFA_STATUS_OK; } -- cgit v0.10.2 From a91c406c742b3fc28969ec3316c9588acde66e5a Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 15 Feb 2014 15:45:29 +1100 Subject: m68k: Remove CONSOLE_PENGUIN macro, adopt CONFIG_LOGO Allow CONFIG_LOGO to enable/disable the head.S penguin logo as well as the framebuffer console logo. This should save a few bytes. It also gets rid of the obscure CONSOLE_PENGUIN macro. Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven diff --git a/arch/m68k/kernel/head.S b/arch/m68k/kernel/head.S index 4c99bab..cea4af9 100644 --- a/arch/m68k/kernel/head.S +++ b/arch/m68k/kernel/head.S @@ -275,7 +275,6 @@ #ifdef CONFIG_FRAMEBUFFER_CONSOLE #define CONSOLE -#define CONSOLE_PENGUIN #endif #ifdef CONFIG_EARLY_PRINTK @@ -907,15 +906,15 @@ L(nothp): */ #ifdef CONFIG_MAC is_not_mac(L(nocon)) -#ifdef CONSOLE +# ifdef CONSOLE console_init -#ifdef CONSOLE_PENGUIN +# ifdef CONFIG_LOGO console_put_penguin -#endif /* CONSOLE_PENGUIN */ +# endif /* CONFIG_LOGO */ console_put_stats -#endif /* CONSOLE */ +# endif /* CONSOLE */ L(nocon): -#endif /* CONFIG_MAC */ +#endif /* CONFIG_MAC */ putc '\n' @@ -3456,7 +3455,7 @@ func_start console_put_stats,%a0/%d7 func_return console_put_stats -#ifdef CONSOLE_PENGUIN +#ifdef CONFIG_LOGO func_start console_put_penguin,%a0-%a1/%d0-%d7 /* * Get 'that_penguin' onto the screen in the upper right corner -- cgit v0.10.2 From 7ce0526a6c2681f8edd1f9940c3d668e4cde4cf2 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 15 Feb 2014 15:46:24 +1100 Subject: m68k: Remove dead code Signed-off-by: Finn Thain Signed-off-by: Geert Uytterhoeven diff --git a/arch/m68k/kernel/head.S b/arch/m68k/kernel/head.S index cea4af9..cb9c40e 100644 --- a/arch/m68k/kernel/head.S +++ b/arch/m68k/kernel/head.S @@ -657,27 +657,6 @@ ENTRY(__start) movel %a0@,%a1@ #endif -#if 0 - /* - * Clear the screen - */ - lea %pc@(L(mac_videobase)),%a0 - movel %a0@,%a1 - lea %pc@(L(mac_dimensions)),%a0 - movel %a0@,%d1 - swap %d1 /* #rows is high bytes */ - andl #0xFFFF,%d1 /* rows */ - subl #10,%d1 - lea %pc@(L(mac_rowbytes)),%a0 -loopy2: - movel %a0@,%d0 - subql #1,%d0 -loopx2: - moveb #0x55, %a1@+ - dbra %d0,loopx2 - dbra %d1,loopy2 -#endif - L(test_notmac): #endif /* CONFIG_MAC */ @@ -3323,7 +3302,6 @@ func_return set_leds #define Lconsole_struct_num_columns 8 #define Lconsole_struct_num_rows 12 #define Lconsole_struct_left_edge 16 -#define Lconsole_struct_penguin_putc 20 func_start console_init,%a0-%a4/%d0-%d7 /* @@ -3798,38 +3776,6 @@ L(console_plot_pixel_exit): func_return console_plot_pixel #endif /* CONSOLE */ -#if 0 -/* - * This is some old code lying around. I don't believe - * it's used or important anymore. My guess is it contributed - * to getting to this point, but it's done for now. - * It was still in the 2.1.77 head.S, so it's still here. - * (And still not used!) - */ -L(showtest): - moveml %a0/%d7,%sp@- - puts "A=" - putn %a1 - - .long 0xf0119f15 | ptestr #5,%a1@,#7,%a0 - - puts "DA=" - putn %a0 - - puts "D=" - putn %a0@ - - puts "S=" - lea %pc@(L(mmu)),%a0 - .long 0xf0106200 | pmove %psr,%a0@ - clrl %d7 - movew %a0@,%d7 - putn %d7 - - putc '\n' - moveml %sp@+,%a0/%d7 - rts -#endif /* 0 */ __INITDATA .align 4 @@ -3848,7 +3794,6 @@ L(console_globals): .long 0 /* max num columns */ .long 0 /* max num rows */ .long 0 /* left edge */ - .long 0 /* mac putc */ L(console_font): .long 0 /* pointer to console font (struct font_desc) */ L(console_font_data): -- cgit v0.10.2 From ecc79d4964c4154b8bc2de2a8ffed108f009c405 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sun, 16 Feb 2014 11:49:15 +0100 Subject: m68k: head.S - Remove bogus L prefix in comment Signed-off-by: Geert Uytterhoeven diff --git a/arch/m68k/kernel/head.S b/arch/m68k/kernel/head.S index cb9c40e..3ab329b 100644 --- a/arch/m68k/kernel/head.S +++ b/arch/m68k/kernel/head.S @@ -3308,7 +3308,7 @@ func_start console_init,%a0-%a4/%d0-%d7 * Some of the register usage that follows * a0 = pointer to boot_info * a1 = pointer to screen - * a2 = pointer to Lconsole_globals + * a2 = pointer to console_globals * d3 = pixel width of screen * d4 = pixel height of screen * (d3,d4) ~= (x,y) of a point just below -- cgit v0.10.2 From eff9cf8d6e8b048f2f744a1cc382e213a00f3d2c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 1 Mar 2014 20:51:03 +1300 Subject: [SCSI] atari_scsi: Fix sleep_on race sleep_on is known broken and going away. The atari_scsi driver is one of two remaining users in the falcon_get_lock() function, which is a rather crazy piece of code. This does not attempt to fix the driver's locking scheme in general, but at least prevents falcon_get_lock from going to sleep when no other thread holds the same lock or tries to get it, and we no longer schedule with irqs disabled. Signed-off-by: Arnd Bergmann [MSch: fixed completion conditions missed in Arnds' original RFC patch] Signed-off-by: Michael Schmitz Cc: Geert Uytterhoeven Cc: James E.J. Bottomley Cc: linux-scsi@vger.kernel.org Signed-off-by: Geert Uytterhoeven diff --git a/drivers/scsi/atari_scsi.c b/drivers/scsi/atari_scsi.c index a3e6c8a..296c936 100644 --- a/drivers/scsi/atari_scsi.c +++ b/drivers/scsi/atari_scsi.c @@ -90,6 +90,7 @@ #include #include #include +#include #include #include @@ -549,8 +550,10 @@ static void falcon_get_lock(void) local_irq_save(flags); - while (!in_irq() && falcon_got_lock && stdma_others_waiting()) - sleep_on(&falcon_fairness_wait); + wait_event_cmd(falcon_fairness_wait, + in_interrupt() || !falcon_got_lock || !stdma_others_waiting(), + local_irq_restore(flags), + local_irq_save(flags)); while (!falcon_got_lock) { if (in_irq()) @@ -562,7 +565,10 @@ static void falcon_get_lock(void) falcon_trying_lock = 0; wake_up(&falcon_try_wait); } else { - sleep_on(&falcon_try_wait); + wait_event_cmd(falcon_try_wait, + falcon_got_lock && !falcon_trying_lock, + local_irq_restore(flags), + local_irq_save(flags)); } } -- cgit v0.10.2 From 2818fa0fa068bcbc87d6bd9064e3c1f72d6fcc2a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Mar 2014 22:57:52 -0800 Subject: pkt_sched: fq: do not hold qdisc lock while allocating memory Resizing fq hash table allocates memory while holding qdisc spinlock, with BH disabled. This is definitely not good, as allocation might sleep. We can drop the lock and get it when needed, we hold RTNL so no other changes can happen at the same time. Signed-off-by: Eric Dumazet Fixes: afe4fd062416 ("pkt_sched: fq: Fair Queue packet scheduler") Signed-off-by: David S. Miller diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 08ef7a4..21e2517 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -601,6 +601,7 @@ static int fq_resize(struct Qdisc *sch, u32 log) { struct fq_sched_data *q = qdisc_priv(sch); struct rb_root *array; + void *old_fq_root; u32 idx; if (q->fq_root && log == q->fq_trees_log) @@ -615,13 +616,19 @@ static int fq_resize(struct Qdisc *sch, u32 log) for (idx = 0; idx < (1U << log); idx++) array[idx] = RB_ROOT; - if (q->fq_root) { - fq_rehash(q, q->fq_root, q->fq_trees_log, array, log); - fq_free(q->fq_root); - } + sch_tree_lock(sch); + + old_fq_root = q->fq_root; + if (old_fq_root) + fq_rehash(q, old_fq_root, q->fq_trees_log, array, log); + q->fq_root = array; q->fq_trees_log = log; + sch_tree_unlock(sch); + + fq_free(old_fq_root); + return 0; } @@ -697,9 +704,11 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) q->flow_refill_delay = usecs_to_jiffies(usecs_delay); } - if (!err) + if (!err) { + sch_tree_unlock(sch); err = fq_resize(sch, fq_log); - + sch_tree_lock(sch); + } while (sch->q.qlen > sch->limit) { struct sk_buff *skb = fq_dequeue(sch); -- cgit v0.10.2 From 7f328908f9cb69a72ce1c9279508c786cf85f1f5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 10 Mar 2014 14:29:37 -0600 Subject: mtip32xx: fix bad use of smp_processor_id() mtip_pci_probe() dumps the current CPU when loaded, but it does so in a preemptible context. Hence smp_processor_id() correctly warns: BUG: using smp_processor_id() in preemptible [00000000] code: systemd-udevd/155 caller is mtip_pci_probe+0x53/0x880 [mtip32xx] Switch to raw_smp_processor_id(), since it's just informational and persistent accuracy isn't important. Signed-off-by: Jens Axboe diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 5160269..d777bb7 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -4498,7 +4498,7 @@ static int mtip_pci_probe(struct pci_dev *pdev, } dev_info(&pdev->dev, "NUMA node %d (closest: %d,%d, probe on %d:%d)\n", my_node, pcibus_to_node(pdev->bus), dev_to_node(&pdev->dev), - cpu_to_node(smp_processor_id()), smp_processor_id()); + cpu_to_node(raw_smp_processor_id()), raw_smp_processor_id()); dd = kzalloc_node(sizeof(struct driver_data), GFP_KERNEL, my_node); if (dd == NULL) { -- cgit v0.10.2 From e97ca8e5b864f88b028c1759ba8536fa827d6d96 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 10 Mar 2014 15:49:43 -0700 Subject: mm: fix GFP_THISNODE callers and clarify GFP_THISNODE is for callers that implement their own clever fallback to remote nodes. It restricts the allocation to the specified node and does not invoke reclaim, assuming that the caller will take care of it when the fallback fails, e.g. through a subsequent allocation request without GFP_THISNODE set. However, many current GFP_THISNODE users only want the node exclusive aspect of the flag, without actually implementing their own fallback or triggering reclaim if necessary. This results in things like page migration failing prematurely even when there is easily reclaimable memory available, unless kswapd happens to be running already or a concurrent allocation attempt triggers the necessary reclaim. Convert all callsites that don't implement their own fallback strategy to __GFP_THISNODE. This restricts the allocation a single node too, but at the same time allows the allocator to enter the slowpath, wake kswapd, and invoke direct reclaim if necessary, to make the allocation happen when memory is full. Signed-off-by: Johannes Weiner Acked-by: Rik van Riel Cc: Jan Stancek Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c index a96bcf8..20e8a9b 100644 --- a/arch/ia64/kernel/uncached.c +++ b/arch/ia64/kernel/uncached.c @@ -98,7 +98,7 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid) /* attempt to allocate a granule's worth of cached memory pages */ page = alloc_pages_exact_node(nid, - GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, IA64_GRANULE_SHIFT-PAGE_SHIFT); if (!page) { mutex_unlock(&uc_pool->add_chunk_mutex); diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c index 5ec1e47..e865d74 100644 --- a/arch/powerpc/platforms/cell/ras.c +++ b/arch/powerpc/platforms/cell/ras.c @@ -123,7 +123,8 @@ static int __init cbe_ptcal_enable_on_node(int nid, int order) area->nid = nid; area->order = order; - area->pages = alloc_pages_exact_node(area->nid, GFP_KERNEL|GFP_THISNODE, + area->pages = alloc_pages_exact_node(area->nid, + GFP_KERNEL|__GFP_THISNODE, area->order); if (!area->pages) { diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c index b9e2000..95c8944 100644 --- a/drivers/misc/sgi-xp/xpc_uv.c +++ b/drivers/misc/sgi-xp/xpc_uv.c @@ -240,7 +240,7 @@ xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name, nid = cpu_to_node(cpu); page = alloc_pages_exact_node(nid, - GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, pg_order); if (page == NULL) { dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d " diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 0437439..39b81dc 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -123,6 +123,10 @@ struct vm_area_struct; __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \ __GFP_NO_KSWAPD) +/* + * GFP_THISNODE does not perform any reclaim, you most likely want to + * use __GFP_THISNODE to allocate from a given node without fallback! + */ #ifdef CONFIG_NUMA #define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) #else diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 5f2052c..9b61b9b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -590,10 +590,10 @@ static inline bool zone_is_empty(struct zone *zone) /* * The NUMA zonelists are doubled because we need zonelists that restrict the - * allocations to a single node for GFP_THISNODE. + * allocations to a single node for __GFP_THISNODE. * * [0] : Zonelist with fallback - * [1] : No fallback (GFP_THISNODE) + * [1] : No fallback (__GFP_THISNODE) */ #define MAX_ZONELISTS 2 diff --git a/include/linux/slab.h b/include/linux/slab.h index 9260abd..b5b2df6 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -410,7 +410,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) * * %GFP_NOWAIT - Allocation will not sleep. * - * %GFP_THISNODE - Allocate node-local memory only. + * %__GFP_THISNODE - Allocate node-local memory only. * * %GFP_DMA - Allocation suitable for DMA. * Should only be used for kmalloc() caches. Otherwise, use a diff --git a/kernel/profile.c b/kernel/profile.c index 6631e1e..ebdd9c1 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -549,14 +549,14 @@ static int create_hash_tables(void) struct page *page; page = alloc_pages_exact_node(node, - GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, 0); if (!page) goto out_cleanup; per_cpu(cpu_profile_hits, cpu)[1] = (struct profile_hit *)page_address(page); page = alloc_pages_exact_node(node, - GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, 0); if (!page) goto out_cleanup; diff --git a/mm/migrate.c b/mm/migrate.c index 482a33d..b494fdb 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1158,7 +1158,7 @@ static struct page *new_page_node(struct page *p, unsigned long private, pm->node); else return alloc_pages_exact_node(pm->node, - GFP_HIGHUSER_MOVABLE | GFP_THISNODE, 0); + GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0); } /* @@ -1544,9 +1544,9 @@ static struct page *alloc_misplaced_dst_page(struct page *page, struct page *newpage; newpage = alloc_pages_exact_node(nid, - (GFP_HIGHUSER_MOVABLE | GFP_THISNODE | - __GFP_NOMEMALLOC | __GFP_NORETRY | - __GFP_NOWARN) & + (GFP_HIGHUSER_MOVABLE | + __GFP_THISNODE | __GFP_NOMEMALLOC | + __GFP_NORETRY | __GFP_NOWARN) & ~GFP_IOFS, 0); return newpage; @@ -1747,7 +1747,8 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, goto out_dropref; new_page = alloc_pages_node(node, - (GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER); + (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_WAIT, + HPAGE_PMD_ORDER); if (!new_page) goto out_fail; -- cgit v0.10.2 From 2af120bc040c5ebcda156df6be6a66610ab6957f Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Mon, 10 Mar 2014 15:49:44 -0700 Subject: mm/compaction: break out of loop on !PageBuddy in isolate_freepages_block We received several reports of bad page state when freeing CMA pages previously allocated with alloc_contig_range: BUG: Bad page state in process Binder_A pfn:63202 page:d21130b0 count:0 mapcount:1 mapping: (null) index:0x7dfbf page flags: 0x40080068(uptodate|lru|active|swapbacked) Based on the page state, it looks like the page was still in use. The page flags do not make sense for the use case though. Further debugging showed that despite alloc_contig_range returning success, at least one page in the range still remained in the buddy allocator. There is an issue with isolate_freepages_block. In strict mode (which CMA uses), if any pages in the range cannot be isolated, isolate_freepages_block should return failure 0. The current check keeps track of the total number of isolated pages and compares against the size of the range: if (strict && nr_strict_required > total_isolated) total_isolated = 0; After taking the zone lock, if one of the pages in the range is not in the buddy allocator, we continue through the loop and do not increment total_isolated. If in the last iteration of the loop we isolate more than one page (e.g. last page needed is a higher order page), the check for total_isolated may pass and we fail to detect that a page was skipped. The fix is to bail out if the loop immediately if we are in strict mode. There's no benfit to continuing anyway since we need all pages to be isolated. Additionally, drop the error checking based on nr_strict_required and just check the pfn ranges. This matches with what isolate_freepages_range does. Signed-off-by: Laura Abbott Acked-by: Minchan Kim Cc: Mel Gorman Acked-by: Vlastimil Babka Cc: Joonsoo Kim Acked-by: Bartlomiej Zolnierkiewicz Acked-by: Michal Nazarewicz Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/compaction.c b/mm/compaction.c index b48c525..9185775 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -251,7 +251,6 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, { int nr_scanned = 0, total_isolated = 0; struct page *cursor, *valid_page = NULL; - unsigned long nr_strict_required = end_pfn - blockpfn; unsigned long flags; bool locked = false; @@ -264,11 +263,12 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, nr_scanned++; if (!pfn_valid_within(blockpfn)) - continue; + goto isolate_fail; + if (!valid_page) valid_page = page; if (!PageBuddy(page)) - continue; + goto isolate_fail; /* * The zone lock must be held to isolate freepages. @@ -289,12 +289,10 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, /* Recheck this is a buddy page under lock */ if (!PageBuddy(page)) - continue; + goto isolate_fail; /* Found a free page, break it into order-0 pages */ isolated = split_free_page(page); - if (!isolated && strict) - break; total_isolated += isolated; for (i = 0; i < isolated; i++) { list_add(&page->lru, freelist); @@ -305,7 +303,15 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, if (isolated) { blockpfn += isolated - 1; cursor += isolated - 1; + continue; } + +isolate_fail: + if (strict) + break; + else + continue; + } trace_mm_compaction_isolate_freepages(nr_scanned, total_isolated); @@ -315,7 +321,7 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, * pages requested were isolated. If there were any failures, 0 is * returned and CMA will fail. */ - if (strict && nr_strict_required > total_isolated) + if (strict && blockpfn < end_pfn) total_isolated = 0; if (locked) -- cgit v0.10.2 From 70335abb2689c8cd5df91bf2d95a65649addf50b Mon Sep 17 00:00:00 2001 From: Artem Fetishev Date: Mon, 10 Mar 2014 15:49:45 -0700 Subject: fs/proc/base.c: fix GPF in /proc/$PID/map_files The expected logic of proc_map_files_get_link() is either to return 0 and initialize 'path' or return an error and leave 'path' uninitialized. By the time dname_to_vma_addr() returns 0 the corresponding vma may have already be gone. In this case the path is not initialized but the return value is still 0. This results in 'general protection fault' inside d_path(). Steps to reproduce: CONFIG_CHECKPOINT_RESTORE=y fd = open(...); while (1) { mmap(fd, ...); munmap(fd, ...); } ls -la /proc/$PID/map_files Addresses https://bugzilla.kernel.org/show_bug.cgi?id=68991 Signed-off-by: Artem Fetishev Signed-off-by: Aleksandr Terekhov Reported-by: Acked-by: Pavel Emelyanov Acked-by: Cyrill Gorcunov Reviewed-by: "Eric W. Biederman" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/proc/base.c b/fs/proc/base.c index 5150706..b976062 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1824,6 +1824,7 @@ static int proc_map_files_get_link(struct dentry *dentry, struct path *path) if (rc) goto out_mmput; + rc = -ENOENT; down_read(&mm->mmap_sem); vma = find_exact_vma(mm, vm_start, vm_end); if (vma && vma->vm_file) { -- cgit v0.10.2 From 2216ee853017f9c9371106c5c02d4fe42f61cbfa Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 10 Mar 2014 15:49:46 -0700 Subject: mm/Kconfig: fix URL for zsmalloc benchmark The help text for CONFIG_PGTABLE_MAPPING has an incorrect URL. While we're at it, remove the unnecessary footnote notation. Signed-off-by: Ben Hutchings Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/Kconfig b/mm/Kconfig index 2d9f150..2888024 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -575,5 +575,5 @@ config PGTABLE_MAPPING then you should select this. This causes zsmalloc to use page table mapping rather than copying for object mapping. - You can check speed with zsmalloc benchmark[1]. - [1] https://github.com/spartacus06/zsmalloc + You can check speed with zsmalloc benchmark: + https://github.com/spartacus06/zsmapbench -- cgit v0.10.2 From 2930ffc7593b64fe00fd7c5a0a7f543078d73ed9 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 10 Mar 2014 15:49:48 -0700 Subject: revert "kallsyms: fix absolute addresses for kASLR" Revert the recently applied 0f55159d091c ("kallsyms: fix absolute addresses for kASLR"). Kees said : This got NAKed, please don't apply -- this patch works for x86 and : ARM, but may cause problems for others: : : https://lkml.org/lkml/2014/2/24/718 It appears that Kees will be fixing all this up for 3.15. Cc: Andy Honig Cc: Kees Cook Cc: Michal Marek Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 276e84b..10085de 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -330,7 +330,8 @@ static void write_src(void) printf("\tPTR\t_text + %#llx\n", table[i].addr - _text); else - printf("\tPTR\t%#llx\n", table[i].addr); + printf("\tPTR\t_text - %#llx\n", + _text - table[i].addr); } else { printf("\tPTR\t%#llx\n", table[i].addr); } -- cgit v0.10.2 From 1443176fd6857744d6772e6a607d7d08f0f6afd9 Mon Sep 17 00:00:00 2001 From: Michael Opdenacker Date: Mon, 10 Mar 2014 15:49:49 -0700 Subject: MAINTAINERS: blackfin: add git repository Add the git repository currently in use for blackfin architecture development. This information was obtained from Steven Miao. Signed-off-by: Michael Opdenacker Cc: Steven Miao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index b7befe7..1ecfde1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1738,6 +1738,7 @@ F: include/uapi/linux/bfs_fs.h BLACKFIN ARCHITECTURE M: Steven Miao L: adi-buildroot-devel@lists.sourceforge.net +T: git git://git.code.sf.net/p/adi-linux/code W: http://blackfin.uclinux.org S: Supported F: arch/blackfin/ -- cgit v0.10.2 From 53942232369ec317f77dc8c2d5a0637bb85a6e84 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 10 Mar 2014 15:49:50 -0700 Subject: tools/testing/selftests/ipc/msgque.c: handle msgget failure return correctly A failed msgget causes the test to return an uninitialised value in ret. Assign ret to -errno on error exit. Signed-off-by: Colin Ian King Acked-by: Davidlohr Bueso Cc: Stanislav Kinsbursky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c index d664182..aa290c0 100644 --- a/tools/testing/selftests/ipc/msgque.c +++ b/tools/testing/selftests/ipc/msgque.c @@ -201,6 +201,7 @@ int main(int argc, char **argv) msgque.msq_id = msgget(msgque.key, IPC_CREAT | IPC_EXCL | 0666); if (msgque.msq_id == -1) { + err = -errno; printf("Can't create queue\n"); goto err_out; } -- cgit v0.10.2 From d7d673a591701f131e53d4fd4e2b9352f1316642 Mon Sep 17 00:00:00 2001 From: Sergei Antonov Date: Mon, 10 Mar 2014 15:49:51 -0700 Subject: hfsplus: add HFSX subfolder count support Adds support for HFSX 'HasFolderCount' flag and a corresponding 'folderCount' field in folder records. (For reference see HFS_FOLDERCOUNT and kHFSHasFolderCountBit/kHFSHasFolderCountMask in Apple's source code.) Ignoring subfolder count leads to fs errors found by Mac: ... Checking catalog hierarchy. HasFolderCount flag needs to be set (id = 105) (It should be 0x10 instead of 0) Incorrect folder count in a directory (id = 2) (It should be 7 instead of 6) ... Steps to reproduce: Format with "newfs_hfs -s /dev/diskXXX". Mount in Linux. Create a new directory in root. Unmount. Run "fsck_hfs /dev/diskXXX". The patch handles directory creation, deletion, and rename. Signed-off-by: Sergei Antonov Reviewed-by: Vyacheslav Dubeyko Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index 968ce41..32602c6 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -103,6 +103,8 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, folder = &entry->folder; memset(folder, 0, sizeof(*folder)); folder->type = cpu_to_be16(HFSPLUS_FOLDER); + if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) + folder->flags |= cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT); folder->id = cpu_to_be32(inode->i_ino); HFSPLUS_I(inode)->create_date = folder->create_date = @@ -203,6 +205,36 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid, return hfs_brec_find(fd, hfs_find_rec_by_key); } +static void hfsplus_subfolders_inc(struct inode *dir) +{ + struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); + + if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) { + /* + * Increment subfolder count. Note, the value is only meaningful + * for folders with HFSPLUS_HAS_FOLDER_COUNT flag set. + */ + HFSPLUS_I(dir)->subfolders++; + } +} + +static void hfsplus_subfolders_dec(struct inode *dir) +{ + struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); + + if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) { + /* + * Decrement subfolder count. Note, the value is only meaningful + * for folders with HFSPLUS_HAS_FOLDER_COUNT flag set. + * + * Check for zero. Some subfolders may have been created + * by an implementation ignorant of this counter. + */ + if (HFSPLUS_I(dir)->subfolders) + HFSPLUS_I(dir)->subfolders--; + } +} + int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct inode *inode) { @@ -247,6 +279,8 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, goto err1; dir->i_size++; + if (S_ISDIR(inode->i_mode)) + hfsplus_subfolders_inc(dir); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY); @@ -336,6 +370,8 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) goto out; dir->i_size--; + if (type == HFSPLUS_FOLDER) + hfsplus_subfolders_dec(dir); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY); @@ -380,6 +416,7 @@ int hfsplus_rename_cat(u32 cnid, hfs_bnode_read(src_fd.bnode, &entry, src_fd.entryoffset, src_fd.entrylength); + type = be16_to_cpu(entry.type); /* create new dir entry with the data from the old entry */ hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name); @@ -394,6 +431,8 @@ int hfsplus_rename_cat(u32 cnid, if (err) goto out; dst_dir->i_size++; + if (type == HFSPLUS_FOLDER) + hfsplus_subfolders_inc(dst_dir); dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC; /* finally remove the old entry */ @@ -405,6 +444,8 @@ int hfsplus_rename_cat(u32 cnid, if (err) goto out; src_dir->i_size--; + if (type == HFSPLUS_FOLDER) + hfsplus_subfolders_dec(src_dir); src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC; /* remove old thread entry */ diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 08846425b..62d571e 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -242,6 +242,7 @@ struct hfsplus_inode_info { */ sector_t fs_blocks; u8 userflags; /* BSD user file flags */ + u32 subfolders; /* Subfolder count (HFSX only) */ struct list_head open_dir_list; loff_t phys_size; diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h index 8ffb3a8..5a12682 100644 --- a/fs/hfsplus/hfsplus_raw.h +++ b/fs/hfsplus/hfsplus_raw.h @@ -261,7 +261,7 @@ struct hfsplus_cat_folder { struct DInfo user_info; struct DXInfo finder_info; __be32 text_encoding; - u32 reserved; + __be32 subfolders; /* Subfolder count in HFSX. Reserved in HFS+. */ } __packed; /* HFS file info (stolen from hfs.h) */ @@ -301,11 +301,13 @@ struct hfsplus_cat_file { struct hfsplus_fork_raw rsrc_fork; } __packed; -/* File attribute bits */ +/* File and folder flag bits */ #define HFSPLUS_FILE_LOCKED 0x0001 #define HFSPLUS_FILE_THREAD_EXISTS 0x0002 #define HFSPLUS_XATTR_EXISTS 0x0004 #define HFSPLUS_ACL_EXISTS 0x0008 +#define HFSPLUS_HAS_FOLDER_COUNT 0x0010 /* Folder has subfolder count + * (HFSX only) */ /* HFS+ catalog thread (part of a cat_entry) */ struct hfsplus_cat_thread { diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index fa929f3..a4f45bd 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -375,6 +375,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode) hip->extent_state = 0; hip->flags = 0; hip->userflags = 0; + hip->subfolders = 0; memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec)); memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); hip->alloc_blocks = 0; @@ -494,6 +495,10 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date); HFSPLUS_I(inode)->create_date = folder->create_date; HFSPLUS_I(inode)->fs_blocks = 0; + if (folder->flags & cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT)) { + HFSPLUS_I(inode)->subfolders = + be32_to_cpu(folder->subfolders); + } inode->i_op = &hfsplus_dir_inode_operations; inode->i_fop = &hfsplus_dir_operations; } else if (type == HFSPLUS_FILE) { @@ -566,6 +571,10 @@ int hfsplus_cat_write_inode(struct inode *inode) folder->content_mod_date = hfsp_ut2mt(inode->i_mtime); folder->attribute_mod_date = hfsp_ut2mt(inode->i_ctime); folder->valence = cpu_to_be32(inode->i_size - 2); + if (folder->flags & cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT)) { + folder->subfolders = + cpu_to_be32(HFSPLUS_I(inode)->subfolders); + } hfs_bnode_write(fd.bnode, &entry, fd.entryoffset, sizeof(struct hfsplus_cat_folder)); } else if (HFSPLUS_IS_RSRC(inode)) { -- cgit v0.10.2 From 0eb808eb753cd44e740da8e64f5c97a2a8e07578 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 10 Mar 2014 15:49:52 -0700 Subject: cris: convert ffs from an object-like macro to a function-like macro This avoids bad interactions with code using identifiers called "ffs": drivers/usb/gadget/f_fs.c: In function 'ffsmod_init': drivers/usb/gadget/f_fs.c:2693:494: error: 'ffsusb_func' undeclared (first use in this function) drivers/usb/gadget/f_fs.c:2693:494: note: each undeclared identifier is reported only once for each function it appears in drivers/usb/gadget/f_fs.c: In function 'ffsmod_exit': drivers/usb/gadget/f_fs.c:2693:677: error: 'ffsusb_func' undeclared (first use in this function) drivers/usb/gadget/f_fs.c: At top level: drivers/usb/gadget/f_fs.c:2693:35: warning: 'kernel_ffsusb_func' defined but not used [-Wunused-variable] drivers/usb/gadget/f_fs.c: In function 'ffsmod_init': drivers/usb/gadget/f_fs.c:2693:15: warning: control reaches end of non-void function [-Wreturn-type] See http://kisskb.ellerman.id.au/kisskb/buildresult/10715817/ Signed-off-by: Geert Uytterhoeven Cc: Mikael Starvik Cc: Jesper Nilsson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/cris/include/asm/bitops.h b/arch/cris/include/asm/bitops.h index 184066c..053c17b 100644 --- a/arch/cris/include/asm/bitops.h +++ b/arch/cris/include/asm/bitops.h @@ -144,7 +144,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr) * definition, which doesn't have the same semantics. We don't want to * use -fno-builtin, so just hide the name ffs. */ -#define ffs kernel_ffs +#define ffs(x) kernel_ffs(x) #include #include -- cgit v0.10.2 From 6cce16f99d7be23cec7cabdf32a8166eec6e5393 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Fri, 7 Mar 2014 08:52:32 +0100 Subject: x86, threadinfo: Redo "x86: Use inline assembler to get sp" This patch restores the changes of commit dff38e3e93 "x86: Use inline assembler instead of global register variable to get sp". They got lost in commit 198d208df4 "x86: Keep thread_info on thread stack in x86_32" while moving the code to arch/x86/kernel/irq_32.c. Quoting Andi from commit dff38e3e93: """ LTO in gcc 4.6/47. has trouble with global register variables. They were used to read the stack pointer. Use a simple inline assembler statement with a mov instead. This also helps LLVM/clang, which does not support global register variables. """ Cc: Steven Rostedt Cc: Andrew Morton Cc: Peter Zijlstra Cc: Brian Gerst Cc: Andi Kleen Signed-off-by: Mathias Krause Link: http://lkml.kernel.org/r/1394178752-18047-1-git-send-email-minipli@googlemail.com Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 988dc8b..63ce838 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -70,7 +70,11 @@ static void call_on_stack(void *func, void *stack) } /* how to get the current stack pointer from C */ -register unsigned long current_stack_pointer asm("esp") __used; +#define current_stack_pointer ({ \ + unsigned long sp; \ + asm("mov %%esp,%0" : "=g" (sp)); \ + sp; \ +}) static inline void *current_stack(void) { -- cgit v0.10.2 From 5bd076708664313f2bdbbc1cf71093313b7774a1 Mon Sep 17 00:00:00 2001 From: Annie Li Date: Mon, 10 Mar 2014 22:58:34 +0800 Subject: Xen-netback: Fix issue caused by using gso_type wrongly Current netback uses gso_type to check whether the skb contains gso offload, and this is wrong. Gso_size is the right one to check gso existence, and gso_type is only used to check gso type. Some skbs contains nonzero gso_type and zero gso_size, current netback would treat these skbs as gso and create wrong response for this. This also causes ssh failure to domu from other server. V2: use skb_is_gso function as Paul Durrant suggested Signed-off-by: Annie Li Acked-by: Wei Liu Reviewed-by: Paul Durrant Signed-off-by: David S. Miller diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index e5284bc..438d0c0 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -240,7 +240,7 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, struct gnttab_copy *copy_gop; struct xenvif_rx_meta *meta; unsigned long bytes; - int gso_type; + int gso_type = XEN_NETIF_GSO_TYPE_NONE; /* Data must not cross a page boundary. */ BUG_ON(size + offset > PAGE_SIZE<gso_type & SKB_GSO_TCPV4) - gso_type = XEN_NETIF_GSO_TYPE_TCPV4; - else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) - gso_type = XEN_NETIF_GSO_TYPE_TCPV6; - else - gso_type = XEN_NETIF_GSO_TYPE_NONE; + if (skb_is_gso(skb)) { + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) + gso_type = XEN_NETIF_GSO_TYPE_TCPV4; + else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) + gso_type = XEN_NETIF_GSO_TYPE_TCPV6; + } if (*head && ((1 << gso_type) & vif->gso_mask)) vif->rx.req_cons++; @@ -338,19 +338,15 @@ static int xenvif_gop_skb(struct sk_buff *skb, int head = 1; int old_meta_prod; int gso_type; - int gso_size; old_meta_prod = npo->meta_prod; - if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { - gso_type = XEN_NETIF_GSO_TYPE_TCPV4; - gso_size = skb_shinfo(skb)->gso_size; - } else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { - gso_type = XEN_NETIF_GSO_TYPE_TCPV6; - gso_size = skb_shinfo(skb)->gso_size; - } else { - gso_type = XEN_NETIF_GSO_TYPE_NONE; - gso_size = 0; + gso_type = XEN_NETIF_GSO_TYPE_NONE; + if (skb_is_gso(skb)) { + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) + gso_type = XEN_NETIF_GSO_TYPE_TCPV4; + else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) + gso_type = XEN_NETIF_GSO_TYPE_TCPV6; } /* Set up a GSO prefix descriptor, if necessary */ @@ -358,7 +354,7 @@ static int xenvif_gop_skb(struct sk_buff *skb, req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); meta = npo->meta + npo->meta_prod++; meta->gso_type = gso_type; - meta->gso_size = gso_size; + meta->gso_size = skb_shinfo(skb)->gso_size; meta->size = 0; meta->id = req->id; } @@ -368,7 +364,7 @@ static int xenvif_gop_skb(struct sk_buff *skb, if ((1 << gso_type) & vif->gso_mask) { meta->gso_type = gso_type; - meta->gso_size = gso_size; + meta->gso_size = skb_shinfo(skb)->gso_size; } else { meta->gso_type = XEN_NETIF_GSO_TYPE_NONE; meta->gso_size = 0; @@ -500,8 +496,9 @@ static void xenvif_rx_action(struct xenvif *vif) size = skb_frag_size(&skb_shinfo(skb)->frags[i]); max_slots_needed += DIV_ROUND_UP(size, PAGE_SIZE); } - if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 || - skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) + if (skb_is_gso(skb) && + (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 || + skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)) max_slots_needed++; /* If the skb may not fit then bail out now */ -- cgit v0.10.2 From dd38743b4cc2f86be250eaf156cf113ba3dd531a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20Bostr=C3=B6m?= Date: Mon, 10 Mar 2014 16:17:15 +0100 Subject: vlan: Set correct source MAC address with TX VLAN offload enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With TX VLAN offload enabled the source MAC address for frames sent using the VLAN interface is currently set to the address of the real interface. This is wrong since the VLAN interface may be configured with a different address. The bug was introduced in commit 2205369a314e12fcec4781cc73ac9c08fc2b47de ("vlan: Fix header ops passthru when doing TX VLAN offload."). This patch sets the source address before calling the create function of the real interface. Signed-off-by: Peter Boström Signed-off-by: David S. Miller diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index de51c48..4b65aa4 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -538,6 +538,9 @@ static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; + if (saddr == NULL) + saddr = dev->dev_addr; + return dev_hard_header(skb, real_dev, type, daddr, saddr, len); } -- cgit v0.10.2 From d44753b843e093f9e1f2f14806fbe106fff74898 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Mon, 3 Mar 2014 12:09:21 +0100 Subject: sched/deadline: Deny unprivileged users to set/change SCHED_DEADLINE policy Deny the use of SCHED_DEADLINE policy to unprivileged users. Even if root users can set the policy for normal users, we don't want the latter to be able to change their parameters (safest behavior). Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1393844961-18097-1-git-send-email-juri.lelli@gmail.com Signed-off-by: Ingo Molnar diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 6edbef2..f5c6635 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3338,6 +3338,15 @@ recheck: return -EPERM; } + /* + * Can't set/change SCHED_DEADLINE policy at all for now + * (safest behavior); in the future we would like to allow + * unprivileged DL tasks to increase their relative deadline + * or reduce their runtime (both ways reducing utilization) + */ + if (dl_policy(policy)) + return -EPERM; + /* * Treat SCHED_IDLE as nice 20. Only allow a switch to * SCHED_NORMAL if the RLIMIT_NICE would normally permit it. -- cgit v0.10.2 From 177c53d943368fc97644ebc0a250dc8e2d124250 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 28 Feb 2014 13:39:05 +0100 Subject: stop_machine: Fix^2 race between stop_two_cpus() and stop_cpus() We must use smp_call_function_single(.wait=1) for the irq_cpu_stop_queue_work() to ensure the queueing is actually done under stop_cpus_lock. Without this we could have dropped the lock by the time we do the queueing and get the race we tried to fix. Fixes: 7053ea1a34fa ("stop_machine: Fix race between stop_two_cpus() and stop_cpus()") Signed-off-by: Peter Zijlstra Cc: Prarit Bhargava Cc: Rik van Riel Cc: Mel Gorman Cc: Christoph Hellwig Cc: Andrew Morton Link: http://lkml.kernel.org/r/20140228123905.GK3104@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 84571e0..01fbae5 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -293,7 +293,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void * */ smp_call_function_single(min(cpu1, cpu2), &irq_cpu_stop_queue_work, - &call_args, 0); + &call_args, 1); lg_local_unlock(&stop_cpus_lock); preempt_enable(); -- cgit v0.10.2 From 96b3d28bf4b00f62fc8386ff5d487d1830793a3d Mon Sep 17 00:00:00 2001 From: Fernando Luis Vazquez Cao Date: Thu, 6 Mar 2014 14:25:28 +0900 Subject: sched/clock: Prevent tracing recursion in sched_clock_cpu() Prevent tracing of preempt_disable/enable() in sched_clock_cpu(). When CONFIG_DEBUG_PREEMPT is enabled, preempt_disable/enable() are traced and this causes trace_clock() users (and probably others) to go into an infinite recursion. Systems with a stable sched_clock() are not affected. This problem is similar to that fixed by upstream commit 95ef1e52922 ("KVM guest: prevent tracing recursion with kvmclock"). Signed-off-by: Fernando Luis Vazquez Cao Signed-off-by: Peter Zijlstra Acked-by: Steven Rostedt Cc: Andrew Morton Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1394083528.4524.3.camel@nexus Signed-off-by: Ingo Molnar diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index 43c2bcc..b30a292 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -301,14 +301,14 @@ u64 sched_clock_cpu(int cpu) if (unlikely(!sched_clock_running)) return 0ull; - preempt_disable(); + preempt_disable_notrace(); scd = cpu_sdc(cpu); if (cpu != smp_processor_id()) clock = sched_clock_remote(scd); else clock = sched_clock_local(scd); - preempt_enable(); + preempt_enable_notrace(); return clock; } -- cgit v0.10.2 From ef11dadb8373fcbe53bdd09ceb262b2f18da10c5 Mon Sep 17 00:00:00 2001 From: Dongsheng Yang Date: Wed, 5 Mar 2014 17:58:36 +0800 Subject: perf/x86/uncore: Add __init for uncore_cpumask_init() Commit: 411cf180fa00 perf/x86/uncore: fix initialization of cpumask introduced the function uncore_cpumask_init(), which is only called in __init intel_uncore_init(). But it is not marked with __init, which produces the following warning: WARNING: vmlinux.o(.text+0x2464a): Section mismatch in reference from the function uncore_cpumask_init() to the function .init.text:uncore_cpu_setup() The function uncore_cpumask_init() references the function __init uncore_cpu_setup(). This is often because uncore_cpumask_init lacks a __init annotation or the annotation of uncore_cpu_setup is wrong. This patch marks uncore_cpumask_init() with __init. Signed-off-by: Dongsheng Yang Acked-by: Stephane Eranian Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: "H. Peter Anvin" Link: http://lkml.kernel.org/r/1394013516-4964-1-git-send-email-yangds.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index b262c61..5c2537a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -4229,7 +4229,7 @@ static int __init uncore_pmus_register(void) return 0; } -static void uncore_cpumask_init(void) +static void __init uncore_cpumask_init(void) { int cpu; -- cgit v0.10.2 From cfa77bc4af2c75c0781ee76cde2dd104c6c8e2b7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 2 Mar 2014 16:56:38 +0100 Subject: perf: Disallow user-space callchains for function trace events Recent issues with user space callchains processing within page fault handler tracing showed as Peter said 'there's just too much fail surface'. Related list discussions: http://marc.info/?t=139302086500001&r=1&w=2 http://marc.info/?t=139301437300003&r=1&w=2 Suggested-by: Peter Zijlstra Signed-off-by: Jiri Olsa Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: "H. Peter Anvin" Cc: Vince Weaver Cc: Steven Rostedt Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1393775800-13524-2-git-send-email-jolsa@redhat.com Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index e854f42..d5e01c3 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -31,9 +31,18 @@ static int perf_trace_event_perm(struct ftrace_event_call *tp_event, } /* The ftrace function trace is allowed only for root. */ - if (ftrace_event_is_function(tp_event) && - perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) - return -EPERM; + if (ftrace_event_is_function(tp_event)) { + if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + /* + * We don't allow user space callchains for function trace + * event, due to issues with page faults while tracing page + * fault handler and its overall trickiness nature. + */ + if (!p_event->attr.exclude_callchain_user) + return -EINVAL; + } /* No tracing, just counting, so no obvious leak */ if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW)) -- cgit v0.10.2 From 63c45f4ba533e9749da16298db53e491c25d805b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 2 Mar 2014 16:56:39 +0100 Subject: perf: Disallow user-space stack dumps for function trace events Recent issues with user space callchains processing within page fault handler tracing showed as Peter said 'there's just too much fail surface'. The user space stack dump is just another source of the this issue. Related list discussions: http://marc.info/?t=139302086500001&r=1&w=2 http://marc.info/?t=139301437300003&r=1&w=2 Suggested-by: Peter Zijlstra Signed-off-by: Jiri Olsa Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Vince Weaver Cc: Steven Rostedt Cc: Paul Mackerras Cc: H. Peter Anvin Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1393775800-13524-3-git-send-email-jolsa@redhat.com Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index d5e01c3..c894614 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -42,6 +42,13 @@ static int perf_trace_event_perm(struct ftrace_event_call *tp_event, */ if (!p_event->attr.exclude_callchain_user) return -EINVAL; + + /* + * Same reason to disable user stack dump as for user space + * callchains above. + */ + if (p_event->attr.sample_type & PERF_SAMPLE_STACK_USER) + return -EINVAL; } /* No tracing, just counting, so no obvious leak */ -- cgit v0.10.2 From 6bedfab68666afac1b03f8d62ee037c6ab82fbc5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 2 Mar 2014 16:56:40 +0100 Subject: perf tools: Disable user-space callchain/stack dumps for function trace events User space callchains and user space stack dump were disabled for function trace event. Mailing list discussions: http://marc.info/?t=139302086500001&r=1&w=2 http://marc.info/?t=139301437300003&r=1&w=2 Catching up with perf and disabling user space callchains and DWARF unwind (uses user stack dump) for function trace event. Adding following warnings when callchains are used for function trace event: # perf record -g -e ftrace:function ... Disabling user space callchains for function trace event. ... # ./perf record --call-graph=dwarf -e ftrace:function ... Cannot use DWARF unwind for function trace event, falling back to framepointers. Disabling user space callchains for function trace event. ... Signed-off-by: Jiri Olsa Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: "H. Peter Anvin" Cc: Vince Weaver Cc: Steven Rostedt Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1393775800-13524-4-git-send-email-jolsa@redhat.com Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index adc94dd..26b67b1 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -500,6 +500,34 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size) return ret; } +static void +perf_evsel__config_callgraph(struct perf_evsel *evsel, + struct record_opts *opts) +{ + bool function = perf_evsel__is_function_event(evsel); + struct perf_event_attr *attr = &evsel->attr; + + perf_evsel__set_sample_bit(evsel, CALLCHAIN); + + if (opts->call_graph == CALLCHAIN_DWARF) { + if (!function) { + perf_evsel__set_sample_bit(evsel, REGS_USER); + perf_evsel__set_sample_bit(evsel, STACK_USER); + attr->sample_regs_user = PERF_REGS_MASK; + attr->sample_stack_user = opts->stack_dump_size; + attr->exclude_callchain_user = 1; + } else { + pr_info("Cannot use DWARF unwind for function trace event," + " falling back to framepointers.\n"); + } + } + + if (function) { + pr_info("Disabling user space callchains for function trace event.\n"); + attr->exclude_callchain_user = 1; + } +} + /* * The enable_on_exec/disabled value strategy: * @@ -595,17 +623,8 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) attr->mmap_data = track; } - if (opts->call_graph_enabled) { - perf_evsel__set_sample_bit(evsel, CALLCHAIN); - - if (opts->call_graph == CALLCHAIN_DWARF) { - perf_evsel__set_sample_bit(evsel, REGS_USER); - perf_evsel__set_sample_bit(evsel, STACK_USER); - attr->sample_regs_user = PERF_REGS_MASK; - attr->sample_stack_user = opts->stack_dump_size; - attr->exclude_callchain_user = 1; - } - } + if (opts->call_graph_enabled) + perf_evsel__config_callgraph(evsel, opts); if (target__has_cpu(&opts->target)) perf_evsel__set_sample_bit(evsel, CPU); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index f1b3256..0c9926c 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -315,6 +315,24 @@ static inline bool perf_evsel__is_group_event(struct perf_evsel *evsel) return perf_evsel__is_group_leader(evsel) && evsel->nr_members > 1; } +/** + * perf_evsel__is_function_event - Return whether given evsel is a function + * trace event + * + * @evsel - evsel selector to be tested + * + * Return %true if event is function trace event + */ +static inline bool perf_evsel__is_function_event(struct perf_evsel *evsel) +{ +#define FUNCTION_EVENT "ftrace:function" + + return evsel->name && + !strncmp(FUNCTION_EVENT, evsel->name, sizeof(FUNCTION_EVENT)); + +#undef FUNCTION_EVENT +} + struct perf_attr_details { bool freq; bool verbose; -- cgit v0.10.2 From b7b4839d93e50adccef29eccb694807cdcb8bee3 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 6 Mar 2014 12:20:28 -0500 Subject: perf/x86: Fix leak in uncore_type_init failure paths The error path of uncore_type_init() frees up any allocations that were made along the way, but it relies upon type->pmus being set, which only happens if the function succeeds. As type->pmus remains null in this case, the call to uncore_type_exit will do nothing. Moving the assignment earlier will allow us to actually free those allocations should something go awry. Signed-off-by: Dave Jones Acked-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140306172028.GA552@redhat.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index c88f7f4..047f540 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -3334,6 +3334,8 @@ static int __init uncore_type_init(struct intel_uncore_type *type) if (!pmus) return -ENOMEM; + type->pmus = pmus; + type->unconstrainted = (struct event_constraint) __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, 0, type->num_counters, 0, 0); @@ -3369,7 +3371,6 @@ static int __init uncore_type_init(struct intel_uncore_type *type) } type->pmu_group = &uncore_pmu_attr_group; - type->pmus = pmus; return 0; fail: uncore_type_exit(type); -- cgit v0.10.2 From 7743a536beda6225eef4a0758c5370f761fb0616 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 7 Mar 2014 10:52:42 -0500 Subject: i386: Remove unneeded test of 'task' in dump_trace() (again) Commit 028a690a1ebc8b "i386: Remove unneeded test of 'task' in dump_trace()" correctly removed the unneeded 'task != NULL' check because it would be set to current if it was NULL. Commit 2bc5f927d489 "i386: split out dumpstack code from traps_32.c" moved the code from traps_32.c to its own file dump_stack.c for preparation of the i386 / x86_64 merge. Commit 8a541665b906 "dumpstack: x86: various small unification steps" worked to make i386 and x86_64 dump_stack logic similar. But this actually reverted the correct change from 028a690a1ebc8b. Commit d0caf292505d "x86/dumpstack: Remove unneeded check in dump_trace()" removed the unneeded "task != NULL" check for x86_64 but left that same unneeded check for i386, that was added because x86_64 had it! This chain of events ironically had i386 add back the unneeded task != NULL check because x86_64 did it, and then the fix for x86_64 was fixed by Dan. And even more ironically, it was Dan's smatch bot that told me that a change to dump_stack_32 I made may be wrong if current can be NULL (it can't), as there was a check for it by assigning task to current, and then checking if task is NULL. Reported-by: Dan Carpenter Signed-off-by: Steven Rostedt Acked-by: Alexander van Heukelum Cc: Jesper Juhl Link: http://lkml.kernel.org/r/20140307105242.79a0befd@gandalf.local.home Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index f2a1770..a21d49c 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -30,7 +30,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long dummy; stack = &dummy; - if (task && task != current) + if (task != current) stack = (unsigned long *)task->thread.sp; } -- cgit v0.10.2 From ea7bdc65bca8cf837a63e0ff7b75daed83222511 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 27 Jan 2014 20:14:06 +0100 Subject: x86/apic: Plug racy xAPIC access of CPU hotplug code apic_icr_write() and its users in smpboot.c were apparently written under the assumption that this code would only run during early boot. But nowadays we also execute it when onlining a CPU later on while the system is fully running. That will make wakeup_cpu_via_init_nmi and, thus, also native_apic_icr_write run in plain process context. If we migrate the caller to a different CPU at the wrong time or interrupt it and write to ICR/ICR2 to send unrelated IPIs, we can end up sending INIT, SIPI or NMIs to wrong CPUs. Fix this by disabling interrupts during the write to the ICR halves and disable preemption around waiting for ICR availability and using it. Signed-off-by: Jan Kiszka Tested-By: Igor Mammedov Link: http://lkml.kernel.org/r/52E6AFFE.3030004@siemens.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index f824d69..53e2053 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -286,8 +286,12 @@ u32 native_safe_apic_wait_icr_idle(void) void native_apic_icr_write(u32 low, u32 id) { + unsigned long flags; + + local_irq_save(flags); apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); apic_write(APIC_ICR, low); + local_irq_restore(flags); } u64 native_apic_icr_read(void) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c77acc6..60179ec 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -702,11 +702,15 @@ wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid, int id; int boot_error; + preempt_disable(); + /* * Wake up AP by INIT, INIT, STARTUP sequence. */ - if (cpu) - return wakeup_secondary_cpu_via_init(apicid, start_ip); + if (cpu) { + boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip); + goto out; + } /* * Wake up BSP by nmi. @@ -726,6 +730,9 @@ wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid, boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip); } +out: + preempt_enable(); + return boot_error; } -- cgit v0.10.2 From 734ff2a71f9e6aa6fedfa5a9a34818b8586516d5 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Tue, 4 Mar 2014 19:25:46 +0400 Subject: sched/rt: Fix picking RT and DL tasks from empty queue The problems: 1) We check for rt_nr_running before call of put_prev_task(). If previous task is RT, its rt_rq may become throttled and dequeued after this call. In case of p is from rt->rq this just causes picking a task from throttled queue, but in case of its rt_rq is child we are guaranteed catch BUG_ON. 2) The same with deadline class. The only difference we operate on only dl_rq. This patch fixes all the above problems and it adds a small skip in the DL update like we've already done for RT class: if (unlikely((s64)delta_exec <= 0)) return; This will optimize sequential update_curr_dl() calls a little. Signed-off-by: Kirill Tkhai Signed-off-by: Peter Zijlstra Cc: Juri Lelli Link: http://lkml.kernel.org/r/1393946746.3643.3.camel@tkhai Signed-off-by: Ingo Molnar diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index e4f3ac3..27ef409 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -609,8 +609,8 @@ static void update_curr_dl(struct rq *rq) * approach need further study. */ delta_exec = rq_clock_task(rq) - curr->se.exec_start; - if (unlikely((s64)delta_exec < 0)) - delta_exec = 0; + if (unlikely((s64)delta_exec <= 0)) + return; schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); @@ -1023,6 +1023,12 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev) if (need_pull_dl_task(rq, prev)) pull_dl_task(rq); + /* + * When prev is DL, we may throttle it in put_prev_task(). + * So, we update time before we check for dl_nr_running. + */ + if (prev->sched_class == &dl_sched_class) + update_curr_dl(rq); if (unlikely(!dl_rq->dl_nr_running)) return NULL; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index facc824..f3cee0a 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1379,6 +1379,13 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev) return RETRY_TASK; } + /* + * We may dequeue prev's rt_rq in put_prev_task(). + * So, we update time before rt_nr_running check. + */ + if (prev->sched_class == &rt_sched_class) + update_curr_rt(rq); + if (!rt_rq->rt_nr_running) return NULL; -- cgit v0.10.2 From e4aa358b6c23f98b2715594f6b1e9a4996a55f04 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 6 Mar 2014 13:31:55 +0400 Subject: sched/fair: Push down check for high priority class task into idle_balance() We close idle_exit_fair() bracket in case of we've pulled something or we've received task of high priority class. Signed-off-by: Kirill Tkhai Signed-off-by: Peter Zijlstra Cc: Vincent Guittot Link: http://lkml.kernel.org/r/1394098315.19290.10.camel@tkhai Signed-off-by: Ingo Molnar diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d8482e1..b956e70 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4787,17 +4787,16 @@ simple: return p; idle: + new_tasks = idle_balance(rq); /* * Because idle_balance() releases (and re-acquires) rq->lock, it is * possible for any higher priority task to appear. In that case we * must re-start the pick_next_entity() loop. */ - new_tasks = idle_balance(rq); - - if (rq->nr_running != rq->cfs.h_nr_running) + if (new_tasks < 0) return RETRY_TASK; - if (new_tasks) + if (new_tasks > 0) goto again; return NULL; @@ -6728,8 +6727,14 @@ static int idle_balance(struct rq *this_rq) this_rq->max_idle_balance_cost = curr_cost; out: - if (pulled_task) + /* Is there a task of a high priority class? */ + if (this_rq->nr_running != this_rq->cfs.h_nr_running) + pulled_task = -1; + + if (pulled_task) { + idle_exit_fair(this_rq); this_rq->idle_stamp = 0; + } return pulled_task; } diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c index 1f37258..879f2b7 100644 --- a/kernel/sched/idle_task.c +++ b/kernel/sched/idle_task.c @@ -29,7 +29,6 @@ pick_next_task_idle(struct rq *rq, struct task_struct *prev) put_prev_task(rq, prev); schedstat_inc(rq, sched_goidle); - idle_enter_fair(rq); return rq->idle; } -- cgit v0.10.2 From 4c6c4e38c4e9a454889298dcc498174968d14a09 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 6 Mar 2014 13:32:01 +0400 Subject: sched/core: Fix endless loop in pick_next_task() 1) Single cpu machine case. When rq has only RT tasks, but no one of them can be picked because of throttling, we enter in endless loop. pick_next_task_{dl,rt} return NULL. In pick_next_task_fair() we permanently go to retry if (rq->nr_running != rq->cfs.h_nr_running) return RETRY_TASK; (rq->nr_running is not being decremented when rt_rq becomes throttled). No chances to unthrottle any rt_rq or to wake fair here, because of rq is locked permanently and interrupts are disabled. 2) In case of SMP this can cause a hang too. Although we unlock rq in idle_balance(), interrupts are still disabled. The solution is to check for available tasks in DL and RT classes instead of checking for sum. Signed-off-by: Kirill Tkhai Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1394098321.19290.11.camel@tkhai Signed-off-by: Ingo Molnar diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b956e70..10db4a8 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6728,7 +6728,9 @@ static int idle_balance(struct rq *this_rq) out: /* Is there a task of a high priority class? */ - if (this_rq->nr_running != this_rq->cfs.h_nr_running) + if (this_rq->nr_running != this_rq->cfs.h_nr_running && + (this_rq->dl.dl_nr_running || + (this_rq->rt.rt_nr_running && !rt_rq_throttled(&this_rq->rt)))) pulled_task = -1; if (pulled_task) { diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index f3cee0a..d8cdf16 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -470,11 +470,6 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) dequeue_rt_entity(rt_se); } -static inline int rt_rq_throttled(struct rt_rq *rt_rq) -{ - return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted; -} - static int rt_se_boosted(struct sched_rt_entity *rt_se) { struct rt_rq *rt_rq = group_rt_rq(rt_se); @@ -545,11 +540,6 @@ static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) { } -static inline int rt_rq_throttled(struct rt_rq *rt_rq) -{ - return rt_rq->rt_throttled; -} - static inline const struct cpumask *sched_rt_period_mask(void) { return cpu_online_mask; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 378bff7..f2de7a1 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -423,6 +423,18 @@ struct rt_rq { #endif }; +#ifdef CONFIG_RT_GROUP_SCHED +static inline int rt_rq_throttled(struct rt_rq *rt_rq) +{ + return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted; +} +#else +static inline int rt_rq_throttled(struct rt_rq *rt_rq) +{ + return rt_rq->rt_throttled; +} +#endif + /* Deadline class' related fields in a runqueue */ struct dl_rq { /* runqueue is an rbtree, ordered by deadline */ -- cgit v0.10.2 From 35805ff8f4fc535ac85330170d3c56829c87c677 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Thu, 6 Mar 2014 19:16:15 +0400 Subject: sched/fair: Fix endless loop in idle_balance() Check for fair tasks number to decide, that we've pulled a task. rq's nr_running may contain throttled RT tasks. Signed-off-by: Kirill Tkhai Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1394118975.19290.104.camel@tkhai Signed-off-by: Ingo Molnar diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 10db4a8..f1eedae 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6710,7 +6710,7 @@ static int idle_balance(struct rq *this_rq) * While browsing the domains, we released the rq lock. * A task could have be enqueued in the meantime */ - if (this_rq->nr_running && !pulled_task) { + if (this_rq->cfs.h_nr_running && !pulled_task) { pulled_task = 1; goto out; } -- cgit v0.10.2 From 156654f491dd8d52687a5fbe1637f472a52ce75b Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Fri, 28 Feb 2014 07:23:11 +0100 Subject: sched/numa: Move task_numa_free() to __put_task_struct() Bad idea on -rt: [ 908.026136] [] rt_spin_lock_slowlock+0xaa/0x2c0 [ 908.026145] [] task_numa_free+0x31/0x130 [ 908.026151] [] finish_task_switch+0xce/0x100 [ 908.026156] [] thread_return+0x48/0x4ae [ 908.026160] [] schedule+0x25/0xa0 [ 908.026163] [] rt_spin_lock_slowlock+0xd5/0x2c0 [ 908.026170] [] get_signal_to_deliver+0xaf/0x680 [ 908.026175] [] do_signal+0x3d/0x5b0 [ 908.026179] [] do_notify_resume+0x90/0xe0 [ 908.026186] [] int_signal+0x12/0x17 [ 908.026193] [<00007ff2a388b1d0>] 0x7ff2a388b1cf and since upstream does not mind where we do this, be a bit nicer ... Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra Cc: Mel Gorman Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1393568591.6018.27.camel@marge.simpson.net Signed-off-by: Ingo Molnar diff --git a/kernel/fork.c b/kernel/fork.c index a17621c..332688e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -237,6 +237,7 @@ void __put_task_struct(struct task_struct *tsk) WARN_ON(atomic_read(&tsk->usage)); WARN_ON(tsk == current); + task_numa_free(tsk); security_task_free(tsk); exit_creds(tsk); delayacct_tsk_free(tsk); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index dd89c27..9e126a2 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2151,8 +2151,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) if (mm) mmdrop(mm); if (unlikely(prev_state == TASK_DEAD)) { - task_numa_free(prev); - if (prev->sched_class->task_dead) prev->sched_class->task_dead(prev); -- cgit v0.10.2 From 36fc5500bb1907bea7e9b4785dd00abf714d556f Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 4 Mar 2014 14:07:37 -0700 Subject: sched: Remove unused mc_capable() and smt_capable() Remove mc_capable() and smt_capable(). Neither is used. Both were added by 5c45bf279d37 ("sched: mc/smt power savings sched policy"). Uses of both were removed by 8e7fbcbc22c1 ("sched: Remove stale power aware scheduling remnants and dysfunctional knobs"). Signed-off-by: Bjorn Helgaas Signed-off-by: Peter Zijlstra Acked-by: Thomas Gleixner Acked-by: David S. Miller Acked-by: Benjamin Herrenschmidt Link: http://lkml.kernel.org/r/20140304210737.16893.54289.stgit@bhelgaas-glaptop.roam.corp.google.com Signed-off-by: Ingo Molnar diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h index 58b8b84..2fe85ff 100644 --- a/arch/arm/include/asm/topology.h +++ b/arch/arm/include/asm/topology.h @@ -20,9 +20,6 @@ extern struct cputopo_arm cpu_topology[NR_CPUS]; #define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) #define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) -#define mc_capable() (cpu_topology[0].socket_id != -1) -#define smt_capable() (cpu_topology[0].thread_id != -1) - void init_cpu_topology(void); void store_cpu_topology(unsigned int cpuid); const struct cpumask *cpu_coregroup_mask(int cpu); diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index a2496e4..5cb55a1 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -77,7 +77,6 @@ void build_cpu_to_node_map(void); #define topology_core_id(cpu) (cpu_data(cpu)->core_id) #define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) #define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) -#define smt_capable() (smp_num_siblings > 1) #endif extern void arch_fix_phys_package_id(int num, u32 slot); diff --git a/arch/mips/include/asm/topology.h b/arch/mips/include/asm/topology.h index 12609a1..20ea485 100644 --- a/arch/mips/include/asm/topology.h +++ b/arch/mips/include/asm/topology.h @@ -10,8 +10,4 @@ #include -#ifdef CONFIG_SMP -#define smt_capable() (smp_num_siblings > 1) -#endif - #endif /* __ASM_TOPOLOGY_H */ diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index d0b5fca..c920215 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -99,7 +99,6 @@ static inline int prrn_is_enabled(void) #ifdef CONFIG_SMP #include -#define smt_capable() (cpu_has_feature(CPU_FTR_SMT)) #ifdef CONFIG_PPC64 #include diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index 1754390..a2d10fc 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -42,8 +42,6 @@ static inline int pcibus_to_node(struct pci_bus *pbus) #define topology_core_id(cpu) (cpu_data(cpu).core_id) #define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) #define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) -#define mc_capable() (sparc64_multi_core) -#define smt_capable() (sparc64_multi_core) #endif /* CONFIG_SMP */ extern cpumask_t cpu_core_map[NR_CPUS]; diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index d35f24e..9bcc724 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -133,12 +133,6 @@ static inline void arch_fix_phys_package_id(int num, u32 slot) struct pci_bus; void x86_pci_root_bus_resources(int bus, struct list_head *resources); -#ifdef CONFIG_SMP -#define mc_capable() ((boot_cpu_data.x86_max_cores > 1) && \ - (cpumask_weight(cpu_core_mask(0)) != nr_cpu_ids)) -#define smt_capable() (smp_num_siblings > 1) -#endif - #ifdef CONFIG_NUMA extern int get_mp_bus_to_node(int busnum); extern void set_mp_bus_to_node(int busnum, int node); -- cgit v0.10.2 From 070826820daf423eee83ee4e90bf3daf2cdd85c2 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 4 Mar 2014 14:07:44 -0700 Subject: sparc64, sched: Remove unused sparc64_multi_core Remove sparc64_multi_core because it's not used any more. It was added by a2f9f6bbb30e ("Fix {mc,smt}_capable()"), and the last uses were removed by e637d96bf462 ("sched: Remove unused mc_capable() and smt_capable()"). Signed-off-by: Bjorn Helgaas Signed-off-by: Peter Zijlstra Acked-by: David S. Miller Link: http://lkml.kernel.org/r/20140304210744.16893.75929.stgit@bhelgaas-glaptop.roam.corp.google.com Signed-off-by: Ingo Molnar diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h index dd3bef4..0571039 100644 --- a/arch/sparc/include/asm/smp_64.h +++ b/arch/sparc/include/asm/smp_64.h @@ -32,7 +32,6 @@ DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); extern cpumask_t cpu_core_map[NR_CPUS]; -extern int sparc64_multi_core; extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index b90bf23..a1a4400 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -896,10 +896,6 @@ void mdesc_fill_in_cpu_data(cpumask_t *mask) mdesc_iterate_over_cpus(fill_in_one_cpu, NULL, mask); -#ifdef CONFIG_SMP - sparc64_multi_core = 1; -#endif - hp = mdesc_grab(); set_core_ids(hp); diff --git a/arch/sparc/kernel/prom_64.c b/arch/sparc/kernel/prom_64.c index 6b39125..9a690d3 100644 --- a/arch/sparc/kernel/prom_64.c +++ b/arch/sparc/kernel/prom_64.c @@ -555,9 +555,6 @@ static void *fill_in_one_cpu(struct device_node *dp, int cpuid, int arg) cpu_data(cpuid).core_id = portid + 1; cpu_data(cpuid).proc_id = portid; -#ifdef CONFIG_SMP - sparc64_multi_core = 1; -#endif } else { cpu_data(cpuid).dcache_size = of_getintprop_default(dp, "dcache-size", 16 * 1024); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index b085311..9781048 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -53,8 +53,6 @@ #include "cpumap.h" -int sparc64_multi_core __read_mostly; - DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE; cpumask_t cpu_core_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; -- cgit v0.10.2 From c9122da1e2d29bd6a1475a0d1ce2aa6ac6ea25fa Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 3 Feb 2014 13:32:16 +0100 Subject: locking: Move mcs_spinlock.h into kernel/locking/ The mcs_spinlock code is not meant (or suitable) as a generic locking primitive, therefore take it away from the normal includes and place it in kernel/locking/. This way the locking primitives implemented there can use it as part of their implementation but we do not risk it getting used inapropriately. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-byirmpamgr7h25m5kyavwpzx@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/include/linux/mcs_spinlock.h b/include/linux/mcs_spinlock.h deleted file mode 100644 index f2a5c63..0000000 --- a/include/linux/mcs_spinlock.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * MCS lock defines - * - * This file contains the main data structure and API definitions of MCS lock. - * - * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock - * with the desirable properties of being fair, and with each cpu trying - * to acquire the lock spinning on a local variable. - * It avoids expensive cache bouncings that common test-and-set spin-lock - * implementations incur. - */ -#ifndef __LINUX_MCS_SPINLOCK_H -#define __LINUX_MCS_SPINLOCK_H - -#include - -struct mcs_spinlock { - struct mcs_spinlock *next; - int locked; /* 1 if lock acquired */ -}; - -#ifndef arch_mcs_spin_lock_contended -/* - * Using smp_load_acquire() provides a memory barrier that ensures - * subsequent operations happen after the lock is acquired. - */ -#define arch_mcs_spin_lock_contended(l) \ -do { \ - while (!(smp_load_acquire(l))) \ - arch_mutex_cpu_relax(); \ -} while (0) -#endif - -#ifndef arch_mcs_spin_unlock_contended -/* - * smp_store_release() provides a memory barrier to ensure all - * operations in the critical section has been completed before - * unlocking. - */ -#define arch_mcs_spin_unlock_contended(l) \ - smp_store_release((l), 1) -#endif - -/* - * Note: the smp_load_acquire/smp_store_release pair is not - * sufficient to form a full memory barrier across - * cpus for many architectures (except x86) for mcs_unlock and mcs_lock. - * For applications that need a full barrier across multiple cpus - * with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be - * used after mcs_lock. - */ - -/* - * In order to acquire the lock, the caller should declare a local node and - * pass a reference of the node to this function in addition to the lock. - * If the lock has already been acquired, then this will proceed to spin - * on this node->locked until the previous lock holder sets the node->locked - * in mcs_spin_unlock(). - * - * We don't inline mcs_spin_lock() so that perf can correctly account for the - * time spent in this lock function. - */ -static inline -void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) -{ - struct mcs_spinlock *prev; - - /* Init node */ - node->locked = 0; - node->next = NULL; - - prev = xchg(lock, node); - if (likely(prev == NULL)) { - /* - * Lock acquired, don't need to set node->locked to 1. Threads - * only spin on its own node->locked value for lock acquisition. - * However, since this thread can immediately acquire the lock - * and does not proceed to spin on its own node->locked, this - * value won't be used. If a debug mode is needed to - * audit lock status, then set node->locked value here. - */ - return; - } - ACCESS_ONCE(prev->next) = node; - - /* Wait until the lock holder passes the lock down. */ - arch_mcs_spin_lock_contended(&node->locked); -} - -/* - * Releases the lock. The caller should pass in the corresponding node that - * was used to acquire the lock. - */ -static inline -void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) -{ - struct mcs_spinlock *next = ACCESS_ONCE(node->next); - - if (likely(!next)) { - /* - * Release the lock by setting it to NULL - */ - if (likely(cmpxchg(lock, node, NULL) == node)) - return; - /* Wait until the next pointer is set */ - while (!(next = ACCESS_ONCE(node->next))) - arch_mutex_cpu_relax(); - } - - /* Pass lock to next waiter. */ - arch_mcs_spin_unlock_contended(&next->locked); -} - -#endif /* __LINUX_MCS_SPINLOCK_H */ diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h new file mode 100644 index 0000000..f2a5c63 --- /dev/null +++ b/kernel/locking/mcs_spinlock.h @@ -0,0 +1,114 @@ +/* + * MCS lock defines + * + * This file contains the main data structure and API definitions of MCS lock. + * + * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock + * with the desirable properties of being fair, and with each cpu trying + * to acquire the lock spinning on a local variable. + * It avoids expensive cache bouncings that common test-and-set spin-lock + * implementations incur. + */ +#ifndef __LINUX_MCS_SPINLOCK_H +#define __LINUX_MCS_SPINLOCK_H + +#include + +struct mcs_spinlock { + struct mcs_spinlock *next; + int locked; /* 1 if lock acquired */ +}; + +#ifndef arch_mcs_spin_lock_contended +/* + * Using smp_load_acquire() provides a memory barrier that ensures + * subsequent operations happen after the lock is acquired. + */ +#define arch_mcs_spin_lock_contended(l) \ +do { \ + while (!(smp_load_acquire(l))) \ + arch_mutex_cpu_relax(); \ +} while (0) +#endif + +#ifndef arch_mcs_spin_unlock_contended +/* + * smp_store_release() provides a memory barrier to ensure all + * operations in the critical section has been completed before + * unlocking. + */ +#define arch_mcs_spin_unlock_contended(l) \ + smp_store_release((l), 1) +#endif + +/* + * Note: the smp_load_acquire/smp_store_release pair is not + * sufficient to form a full memory barrier across + * cpus for many architectures (except x86) for mcs_unlock and mcs_lock. + * For applications that need a full barrier across multiple cpus + * with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be + * used after mcs_lock. + */ + +/* + * In order to acquire the lock, the caller should declare a local node and + * pass a reference of the node to this function in addition to the lock. + * If the lock has already been acquired, then this will proceed to spin + * on this node->locked until the previous lock holder sets the node->locked + * in mcs_spin_unlock(). + * + * We don't inline mcs_spin_lock() so that perf can correctly account for the + * time spent in this lock function. + */ +static inline +void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) +{ + struct mcs_spinlock *prev; + + /* Init node */ + node->locked = 0; + node->next = NULL; + + prev = xchg(lock, node); + if (likely(prev == NULL)) { + /* + * Lock acquired, don't need to set node->locked to 1. Threads + * only spin on its own node->locked value for lock acquisition. + * However, since this thread can immediately acquire the lock + * and does not proceed to spin on its own node->locked, this + * value won't be used. If a debug mode is needed to + * audit lock status, then set node->locked value here. + */ + return; + } + ACCESS_ONCE(prev->next) = node; + + /* Wait until the lock holder passes the lock down. */ + arch_mcs_spin_lock_contended(&node->locked); +} + +/* + * Releases the lock. The caller should pass in the corresponding node that + * was used to acquire the lock. + */ +static inline +void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) +{ + struct mcs_spinlock *next = ACCESS_ONCE(node->next); + + if (likely(!next)) { + /* + * Release the lock by setting it to NULL + */ + if (likely(cmpxchg(lock, node, NULL) == node)) + return; + /* Wait until the next pointer is set */ + while (!(next = ACCESS_ONCE(node->next))) + arch_mutex_cpu_relax(); + } + + /* Pass lock to next waiter. */ + arch_mcs_spin_unlock_contended(&next->locked); +} + +#endif /* __LINUX_MCS_SPINLOCK_H */ diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 45fe1b5..4f408be 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include "mcs_spinlock.h" /* * In the DEBUG case we are using the "NULL fastpath" for mutexes, -- cgit v0.10.2 From 46af29e479cc0c1c63633007993af5292c2c3e75 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Tue, 28 Jan 2014 11:13:12 -0800 Subject: locking/mutexes: Return false if task need_resched() in mutex_can_spin_on_owner() The mutex_can_spin_on_owner() function should also return false if the task needs to be rescheduled to avoid entering the MCS queue when it needs to reschedule. Signed-off-by: Jason Low Signed-off-by: Peter Zijlstra Cc: Waiman.Long@hp.com Cc: torvalds@linux-foundation.org Cc: tglx@linutronix.de Cc: riel@redhat.com Cc: akpm@linux-foundation.org Cc: davidlohr@hp.com Cc: hpa@zytor.com Cc: andi@firstfloor.org Cc: aswin@hp.com Cc: scott.norton@hp.com Cc: chegu_vinod@hp.com Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1390936396-3962-2-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 4f408be..e6d646b 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -166,6 +166,9 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock) struct task_struct *owner; int retval = 1; + if (need_resched()) + return 0; + rcu_read_lock(); owner = ACCESS_ONCE(lock->owner); if (owner) -- cgit v0.10.2 From 47667fa1502e4d759df87e9cc7fbc0f202483361 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Tue, 28 Jan 2014 11:13:13 -0800 Subject: locking/mutexes: Modify the way optimistic spinners are queued The mutex->spin_mlock was introduced in order to ensure that only 1 thread spins for lock acquisition at a time to reduce cache line contention. When lock->owner is NULL and the lock->count is still not 1, the spinner(s) will continually release and obtain the lock->spin_mlock. This can generate quite a bit of overhead/contention, and also might just delay the spinner from getting the lock. This patch modifies the way optimistic spinners are queued by queuing before entering the optimistic spinning loop as oppose to acquiring before every call to mutex_spin_on_owner(). So in situations where the spinner requires a few extra spins before obtaining the lock, then there will only be 1 spinner trying to get the lock and it will avoid the overhead from unnecessarily unlocking and locking the spin_mlock. Signed-off-by: Jason Low Cc: tglx@linutronix.de Cc: riel@redhat.com Cc: akpm@linux-foundation.org Cc: davidlohr@hp.com Cc: hpa@zytor.com Cc: andi@firstfloor.org Cc: aswin@hp.com Cc: scott.norton@hp.com Cc: chegu_vinod@hp.com Cc: Waiman.Long@hp.com Cc: paulmck@linux.vnet.ibm.com Cc: torvalds@linux-foundation.org Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1390936396-3962-3-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index e6d646b..82dad2c 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -403,9 +403,9 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, if (!mutex_can_spin_on_owner(lock)) goto slowpath; + mcs_spin_lock(&lock->mcs_lock, &node); for (;;) { struct task_struct *owner; - struct mcs_spinlock node; if (use_ww_ctx && ww_ctx->acquired > 0) { struct ww_mutex *ww; @@ -420,19 +420,16 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * performed the optimistic spinning cannot be done. */ if (ACCESS_ONCE(ww->ctx)) - goto slowpath; + break; } /* * If there's an owner, wait for it to either * release the lock or go to sleep. */ - mcs_spin_lock(&lock->mcs_lock, &node); owner = ACCESS_ONCE(lock->owner); - if (owner && !mutex_spin_on_owner(lock, owner)) { - mcs_spin_unlock(&lock->mcs_lock, &node); - goto slowpath; - } + if (owner && !mutex_spin_on_owner(lock, owner)) + break; if ((atomic_read(&lock->count) == 1) && (atomic_cmpxchg(&lock->count, 1, 0) == 1)) { @@ -449,7 +446,6 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, preempt_enable(); return 0; } - mcs_spin_unlock(&lock->mcs_lock, &node); /* * When there's no owner, we might have preempted between the @@ -458,7 +454,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * the owner complete. */ if (!owner && (need_resched() || rt_task(task))) - goto slowpath; + break; /* * The cpu_relax() call is a compiler barrier which forces @@ -468,6 +464,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, */ arch_mutex_cpu_relax(); } + mcs_spin_unlock(&lock->mcs_lock, &node); slowpath: #endif spin_lock_mutex(&lock->wait_lock, flags); -- cgit v0.10.2 From 1d8fe7dc8078b23e060ec62ccb4cdc1ac3c41bf8 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Tue, 28 Jan 2014 11:13:14 -0800 Subject: locking/mutexes: Unlock the mutex without the wait_lock When running workloads that have high contention in mutexes on an 8 socket machine, mutex spinners would often spin for a long time with no lock owner. The main reason why this is occuring is in __mutex_unlock_common_slowpath(), if __mutex_slowpath_needs_to_unlock(), then the owner needs to acquire the mutex->wait_lock before releasing the mutex (setting lock->count to 1). When the wait_lock is contended, this delays the mutex from being released. We should be able to release the mutex without holding the wait_lock. Signed-off-by: Jason Low Cc: chegu_vinod@hp.com Cc: paulmck@linux.vnet.ibm.com Cc: Waiman.Long@hp.com Cc: torvalds@linux-foundation.org Cc: tglx@linutronix.de Cc: riel@redhat.com Cc: akpm@linux-foundation.org Cc: davidlohr@hp.com Cc: hpa@zytor.com Cc: andi@firstfloor.org Cc: aswin@hp.com Cc: scott.norton@hp.com Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1390936396-3962-4-git-send-email-jason.low2@hp.com Signed-off-by: Ingo Molnar diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 82dad2c..dc3d6f2 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -671,10 +671,6 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) struct mutex *lock = container_of(lock_count, struct mutex, count); unsigned long flags; - spin_lock_mutex(&lock->wait_lock, flags); - mutex_release(&lock->dep_map, nested, _RET_IP_); - debug_mutex_unlock(lock); - /* * some architectures leave the lock unlocked in the fastpath failure * case, others need to leave it locked. In the later case we have to @@ -683,6 +679,10 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) if (__mutex_slowpath_needs_to_unlock()) atomic_set(&lock->count, 1); + spin_lock_mutex(&lock->wait_lock, flags); + mutex_release(&lock->dep_map, nested, _RET_IP_); + debug_mutex_unlock(lock); + if (!list_empty(&lock->wait_list)) { /* get the first entry from the wait-list: */ struct mutex_waiter *waiter = -- cgit v0.10.2 From fb0527bd5ea99bfeb2dd91e3c1433ecf745d6b99 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 29 Jan 2014 12:51:42 +0100 Subject: locking/mutexes: Introduce cancelable MCS lock for adaptive spinning Since we want a task waiting for a mutex_lock() to go to sleep and reschedule on need_resched() we must be able to abort the mcs_spin_lock() around the adaptive spin. Therefore implement a cancelable mcs lock. Signed-off-by: Peter Zijlstra Cc: chegu_vinod@hp.com Cc: paulmck@linux.vnet.ibm.com Cc: Waiman.Long@hp.com Cc: torvalds@linux-foundation.org Cc: tglx@linutronix.de Cc: riel@redhat.com Cc: akpm@linux-foundation.org Cc: davidlohr@hp.com Cc: hpa@zytor.com Cc: andi@firstfloor.org Cc: aswin@hp.com Cc: scott.norton@hp.com Cc: Jason Low Link: http://lkml.kernel.org/n/tip-62hcl5wxydmjzd182zhvk89m@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/include/linux/mutex.h b/include/linux/mutex.h index c482e1d..11692de 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -46,7 +46,7 @@ * - detects multi-task circular deadlocks and prints out all affected * locks and tasks (and only those tasks) */ -struct mcs_spinlock; +struct optimistic_spin_queue; struct mutex { /* 1: unlocked, 0: locked, negative: locked, possible waiters */ atomic_t count; @@ -56,7 +56,7 @@ struct mutex { struct task_struct *owner; #endif #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - struct mcs_spinlock *mcs_lock; /* Spinner MCS lock */ + struct optimistic_spin_queue *osq; /* Spinner MCS lock */ #endif #ifdef CONFIG_DEBUG_MUTEXES const char *name; diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index baab8e5..2a9ee96 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -1,5 +1,5 @@ -obj-y += mutex.o semaphore.o rwsem.o lglock.o +obj-y += mutex.o semaphore.o rwsem.o lglock.o mcs_spinlock.o ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_lockdep.o = -pg diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c new file mode 100644 index 0000000..838dc9e --- /dev/null +++ b/kernel/locking/mcs_spinlock.c @@ -0,0 +1,178 @@ + +#include +#include +#include +#include "mcs_spinlock.h" + +#ifdef CONFIG_SMP + +/* + * An MCS like lock especially tailored for optimistic spinning for sleeping + * lock implementations (mutex, rwsem, etc). + * + * Using a single mcs node per CPU is safe because sleeping locks should not be + * called from interrupt context and we have preemption disabled while + * spinning. + */ +static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_queue, osq_node); + +/* + * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. + * Can return NULL in case we were the last queued and we updated @lock instead. + */ +static inline struct optimistic_spin_queue * +osq_wait_next(struct optimistic_spin_queue **lock, + struct optimistic_spin_queue *node, + struct optimistic_spin_queue *prev) +{ + struct optimistic_spin_queue *next = NULL; + + for (;;) { + if (*lock == node && cmpxchg(lock, node, prev) == node) { + /* + * We were the last queued, we moved @lock back. @prev + * will now observe @lock and will complete its + * unlock()/unqueue(). + */ + break; + } + + /* + * We must xchg() the @node->next value, because if we were to + * leave it in, a concurrent unlock()/unqueue() from + * @node->next might complete Step-A and think its @prev is + * still valid. + * + * If the concurrent unlock()/unqueue() wins the race, we'll + * wait for either @lock to point to us, through its Step-B, or + * wait for a new @node->next from its Step-C. + */ + if (node->next) { + next = xchg(&node->next, NULL); + if (next) + break; + } + + arch_mutex_cpu_relax(); + } + + return next; +} + +bool osq_lock(struct optimistic_spin_queue **lock) +{ + struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); + struct optimistic_spin_queue *prev, *next; + + node->locked = 0; + node->next = NULL; + + node->prev = prev = xchg(lock, node); + if (likely(prev == NULL)) + return true; + + ACCESS_ONCE(prev->next) = node; + + /* + * Normally @prev is untouchable after the above store; because at that + * moment unlock can proceed and wipe the node element from stack. + * + * However, since our nodes are static per-cpu storage, we're + * guaranteed their existence -- this allows us to apply + * cmpxchg in an attempt to undo our queueing. + */ + + while (!smp_load_acquire(&node->locked)) { + /* + * If we need to reschedule bail... so we can block. + */ + if (need_resched()) + goto unqueue; + + arch_mutex_cpu_relax(); + } + return true; + +unqueue: + /* + * Step - A -- stabilize @prev + * + * Undo our @prev->next assignment; this will make @prev's + * unlock()/unqueue() wait for a next pointer since @lock points to us + * (or later). + */ + + for (;;) { + if (prev->next == node && + cmpxchg(&prev->next, node, NULL) == node) + break; + + /* + * We can only fail the cmpxchg() racing against an unlock(), + * in which case we should observe @node->locked becomming + * true. + */ + if (smp_load_acquire(&node->locked)) + return true; + + arch_mutex_cpu_relax(); + + /* + * Or we race against a concurrent unqueue()'s step-B, in which + * case its step-C will write us a new @node->prev pointer. + */ + prev = ACCESS_ONCE(node->prev); + } + + /* + * Step - B -- stabilize @next + * + * Similar to unlock(), wait for @node->next or move @lock from @node + * back to @prev. + */ + + next = osq_wait_next(lock, node, prev); + if (!next) + return false; + + /* + * Step - C -- unlink + * + * @prev is stable because its still waiting for a new @prev->next + * pointer, @next is stable because our @node->next pointer is NULL and + * it will wait in Step-A. + */ + + ACCESS_ONCE(next->prev) = prev; + ACCESS_ONCE(prev->next) = next; + + return false; +} + +void osq_unlock(struct optimistic_spin_queue **lock) +{ + struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); + struct optimistic_spin_queue *next; + + /* + * Fast path for the uncontended case. + */ + if (likely(cmpxchg(lock, node, NULL) == node)) + return; + + /* + * Second most likely case. + */ + next = xchg(&node->next, NULL); + if (next) { + ACCESS_ONCE(next->locked) = 1; + return; + } + + next = osq_wait_next(lock, node, NULL); + if (next) + ACCESS_ONCE(next->locked) = 1; +} + +#endif + diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h index f2a5c63..a2dbac4 100644 --- a/kernel/locking/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.h @@ -111,4 +111,19 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) arch_mcs_spin_unlock_contended(&next->locked); } +/* + * Cancellable version of the MCS lock above. + * + * Intended for adaptive spinning of sleeping locks: + * mutex_lock()/rwsem_down_{read,write}() etc. + */ + +struct optimistic_spin_queue { + struct optimistic_spin_queue *next, *prev; + int locked; /* 1 if lock acquired */ +}; + +extern bool osq_lock(struct optimistic_spin_queue **lock); +extern void osq_unlock(struct optimistic_spin_queue **lock); + #endif /* __LINUX_MCS_SPINLOCK_H */ diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index dc3d6f2..2670b84 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -53,7 +53,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) INIT_LIST_HEAD(&lock->wait_list); mutex_clear_owner(lock); #ifdef CONFIG_MUTEX_SPIN_ON_OWNER - lock->mcs_lock = NULL; + lock->osq = NULL; #endif debug_mutex_init(lock, name, key); @@ -403,7 +403,9 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, if (!mutex_can_spin_on_owner(lock)) goto slowpath; - mcs_spin_lock(&lock->mcs_lock, &node); + if (!osq_lock(&lock->osq)) + goto slowpath; + for (;;) { struct task_struct *owner; @@ -442,7 +444,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, } mutex_set_owner(lock); - mcs_spin_unlock(&lock->mcs_lock, &node); + osq_unlock(&lock->osq); preempt_enable(); return 0; } @@ -464,7 +466,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, */ arch_mutex_cpu_relax(); } - mcs_spin_unlock(&lock->mcs_lock, &node); + osq_unlock(&lock->osq); slowpath: #endif spin_lock_mutex(&lock->wait_lock, flags); -- cgit v0.10.2 From 34c6bc2c919a55e5ad4e698510a2f35ee13ab900 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 3 Feb 2014 16:21:09 +0100 Subject: locking/mutexes: Add extra reschedule point Add in an extra reschedule in an attempt to avoid getting reschedule the moment we've acquired the lock. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-zah5eyn9gu7qlgwh9r6n2anc@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 2670b84..02c61a9 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -468,6 +468,13 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, } osq_unlock(&lock->osq); slowpath: + /* + * If we fell out of the spin path because of need_resched(), + * reschedule now, before we try-lock the mutex. This avoids getting + * scheduled out right after we obtained the mutex. + */ + if (need_resched()) + schedule_preempt_disabled(); #endif spin_lock_mutex(&lock->wait_lock, flags); -- cgit v0.10.2 From 9236a76d9e978aaa7709b8ff3f03c1a8b66d9ea7 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 11 Mar 2014 12:55:24 +0100 Subject: ata: Fix CS55xx dependencies As far as I know, the CS5520 and CS5530 chipsets were only used with 32-bit x86 Geode processors, so I think their drivers are only needed on this architecture, except for build testing purpose. While we're here, simplify the dependencies for the CS5535 driver. The CS5536 was used with the Geode processors, but also on MIPS Loongson/Lemote 2 systems, so let its driver be built for these two architectures only, except for build testing purpose. Signed-off-by: Jean Delvare Signed-off-by: Tejun Heo Cc: Ralf Baechle diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index ab93c33..2e6a6f7 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -411,7 +411,7 @@ config PATA_CMD64X config PATA_CS5520 tristate "CS5510/5520 PATA support" - depends on PCI + depends on PCI && (X86_32 || COMPILE_TEST) help This option enables support for the Cyrix 5510/5520 companion chip used with the MediaGX/Geode processor family. @@ -420,7 +420,7 @@ config PATA_CS5520 config PATA_CS5530 tristate "CS5530 PATA support" - depends on PCI + depends on PCI && (X86_32 || COMPILE_TEST) help This option enables support for the Cyrix/NatSemi/AMD CS5530 companion chip used with the MediaGX/Geode processor family. @@ -429,7 +429,7 @@ config PATA_CS5530 config PATA_CS5535 tristate "CS5535 PATA support (Experimental)" - depends on PCI && X86 && !X86_64 + depends on PCI && X86_32 help This option enables support for the NatSemi/AMD CS5535 companion chip used with the Geode processor family. @@ -438,7 +438,7 @@ config PATA_CS5535 config PATA_CS5536 tristate "CS5536 PATA support" - depends on PCI + depends on PCI && (X86_32 || MIPS || COMPILE_TEST) help This option enables support for the AMD CS5536 companion chip used with the Geode LX processor family. -- cgit v0.10.2 From e853a4e01ffd0962a197f61e792178b458ae4763 Mon Sep 17 00:00:00 2001 From: Silvan Jegen Date: Mon, 10 Mar 2014 22:33:50 +0100 Subject: pata_legacy: Remove dead code The current flow of control prevents this function from being called. Let's remove the call. Signed-off-by: Silvan Jegen Signed-off-by: Tejun Heo diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c index be81642..bce2a8c 100644 --- a/drivers/ata/pata_legacy.c +++ b/drivers/ata/pata_legacy.c @@ -916,7 +916,6 @@ static __init int probe_chip_type(struct legacy_probe *probe) local_irq_restore(flags); return BIOS; } - local_irq_restore(flags); } if (ht6560a & mask) -- cgit v0.10.2 From 09df7c4c8097ca4a11393b1edd4997d786daad52 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 10 Mar 2014 19:32:22 -0400 Subject: x86: Remove CONFIG_X86_OOSTORE This was an optimization that made memcpy type benchmarks a little faster on ancient (Circa 1998) IDT Winchip CPUs. In real-life workloads, it wasn't even noticable, and I doubt anyone is running benchmarks on 16 year old silicon any more. Given this code has likely seen very little use over the last decade, let's just remove it. Signed-off-by: Dave Jones Signed-off-by: Linus Torvalds diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index c026cca..f3aaf23 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -341,10 +341,6 @@ config X86_USE_3DNOW def_bool y depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML -config X86_OOSTORE - def_bool y - depends on (MWINCHIP3D || MWINCHIPC6) && MTRR - # # P6_NOPs are a relatively minor optimization that require a family >= # 6 processor, except that it is broken on certain VIA chips. diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 04a4890..69bbb48 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -85,11 +85,7 @@ #else # define smp_rmb() barrier() #endif -#ifdef CONFIG_X86_OOSTORE -# define smp_wmb() wmb() -#else -# define smp_wmb() barrier() -#endif +#define smp_wmb() barrier() #define smp_read_barrier_depends() read_barrier_depends() #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) #else /* !SMP */ @@ -100,7 +96,7 @@ #define set_mb(var, value) do { var = value; barrier(); } while (0) #endif /* SMP */ -#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) +#if defined(CONFIG_X86_PPRO_FENCE) /* * For either of these options x86 doesn't have a strong TSO memory diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 34f69cb..91d9c69 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -237,7 +237,7 @@ memcpy_toio(volatile void __iomem *dst, const void *src, size_t count) static inline void flush_write_buffers(void) { -#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) +#if defined(CONFIG_X86_PPRO_FENCE) asm volatile("lock; addl $0,0(%%esp)": : :"memory"); #endif } diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index bf156de..0f62f54 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -26,10 +26,9 @@ # define LOCK_PTR_REG "D" #endif -#if defined(CONFIG_X86_32) && \ - (defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)) +#if defined(CONFIG_X86_32) && (defined(CONFIG_X86_PPRO_FENCE)) /* - * On PPro SMP or if we are using OOSTORE, we use a locked operation to unlock + * On PPro SMP, we use a locked operation to unlock * (PPro errata 66, 92) */ # define UNLOCK_LOCK_PREFIX LOCK_PREFIX diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 8779eda..d8fba5c 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -8,236 +8,6 @@ #include "cpu.h" -#ifdef CONFIG_X86_OOSTORE - -static u32 power2(u32 x) -{ - u32 s = 1; - - while (s <= x) - s <<= 1; - - return s >>= 1; -} - - -/* - * Set up an actual MCR - */ -static void centaur_mcr_insert(int reg, u32 base, u32 size, int key) -{ - u32 lo, hi; - - hi = base & ~0xFFF; - lo = ~(size-1); /* Size is a power of 2 so this makes a mask */ - lo &= ~0xFFF; /* Remove the ctrl value bits */ - lo |= key; /* Attribute we wish to set */ - wrmsr(reg+MSR_IDT_MCR0, lo, hi); - mtrr_centaur_report_mcr(reg, lo, hi); /* Tell the mtrr driver */ -} - -/* - * Figure what we can cover with MCR's - * - * Shortcut: We know you can't put 4Gig of RAM on a winchip - */ -static u32 ramtop(void) -{ - u32 clip = 0xFFFFFFFFUL; - u32 top = 0; - int i; - - for (i = 0; i < e820.nr_map; i++) { - unsigned long start, end; - - if (e820.map[i].addr > 0xFFFFFFFFUL) - continue; - /* - * Don't MCR over reserved space. Ignore the ISA hole - * we frob around that catastrophe already - */ - if (e820.map[i].type == E820_RESERVED) { - if (e820.map[i].addr >= 0x100000UL && - e820.map[i].addr < clip) - clip = e820.map[i].addr; - continue; - } - start = e820.map[i].addr; - end = e820.map[i].addr + e820.map[i].size; - if (start >= end) - continue; - if (end > top) - top = end; - } - /* - * Everything below 'top' should be RAM except for the ISA hole. - * Because of the limited MCR's we want to map NV/ACPI into our - * MCR range for gunk in RAM - * - * Clip might cause us to MCR insufficient RAM but that is an - * acceptable failure mode and should only bite obscure boxes with - * a VESA hole at 15Mb - * - * The second case Clip sometimes kicks in is when the EBDA is marked - * as reserved. Again we fail safe with reasonable results - */ - if (top > clip) - top = clip; - - return top; -} - -/* - * Compute a set of MCR's to give maximum coverage - */ -static int centaur_mcr_compute(int nr, int key) -{ - u32 mem = ramtop(); - u32 root = power2(mem); - u32 base = root; - u32 top = root; - u32 floor = 0; - int ct = 0; - - while (ct < nr) { - u32 fspace = 0; - u32 high; - u32 low; - - /* - * Find the largest block we will fill going upwards - */ - high = power2(mem-top); - - /* - * Find the largest block we will fill going downwards - */ - low = base/2; - - /* - * Don't fill below 1Mb going downwards as there - * is an ISA hole in the way. - */ - if (base <= 1024*1024) - low = 0; - - /* - * See how much space we could cover by filling below - * the ISA hole - */ - - if (floor == 0) - fspace = 512*1024; - else if (floor == 512*1024) - fspace = 128*1024; - - /* And forget ROM space */ - - /* - * Now install the largest coverage we get - */ - if (fspace > high && fspace > low) { - centaur_mcr_insert(ct, floor, fspace, key); - floor += fspace; - } else if (high > low) { - centaur_mcr_insert(ct, top, high, key); - top += high; - } else if (low > 0) { - base -= low; - centaur_mcr_insert(ct, base, low, key); - } else - break; - ct++; - } - /* - * We loaded ct values. We now need to set the mask. The caller - * must do this bit. - */ - return ct; -} - -static void centaur_create_optimal_mcr(void) -{ - int used; - int i; - - /* - * Allocate up to 6 mcrs to mark as much of ram as possible - * as write combining and weak write ordered. - * - * To experiment with: Linux never uses stack operations for - * mmio spaces so we could globally enable stack operation wc - * - * Load the registers with type 31 - full write combining, all - * writes weakly ordered. - */ - used = centaur_mcr_compute(6, 31); - - /* - * Wipe unused MCRs - */ - for (i = used; i < 8; i++) - wrmsr(MSR_IDT_MCR0+i, 0, 0); -} - -static void winchip2_create_optimal_mcr(void) -{ - u32 lo, hi; - int used; - int i; - - /* - * Allocate up to 6 mcrs to mark as much of ram as possible - * as write combining, weak store ordered. - * - * Load the registers with type 25 - * 8 - weak write ordering - * 16 - weak read ordering - * 1 - write combining - */ - used = centaur_mcr_compute(6, 25); - - /* - * Mark the registers we are using. - */ - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - for (i = 0; i < used; i++) - lo |= 1<<(9+i); - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); - - /* - * Wipe unused MCRs - */ - - for (i = used; i < 8; i++) - wrmsr(MSR_IDT_MCR0+i, 0, 0); -} - -/* - * Handle the MCR key on the Winchip 2. - */ -static void winchip2_unprotect_mcr(void) -{ - u32 lo, hi; - u32 key; - - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - lo &= ~0x1C0; /* blank bits 8-6 */ - key = (lo>>17) & 7; - lo |= key<<6; /* replace with unlock key */ - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); -} - -static void winchip2_protect_mcr(void) -{ - u32 lo, hi; - - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - lo &= ~0x1C0; /* blank bits 8-6 */ - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); -} -#endif /* CONFIG_X86_OOSTORE */ - #define ACE_PRESENT (1 << 6) #define ACE_ENABLED (1 << 7) #define ACE_FCR (1 << 28) /* MSR_VIA_FCR */ @@ -362,20 +132,6 @@ static void init_centaur(struct cpuinfo_x86 *c) fcr_clr = DPDC; printk(KERN_NOTICE "Disabling bugged TSC.\n"); clear_cpu_cap(c, X86_FEATURE_TSC); -#ifdef CONFIG_X86_OOSTORE - centaur_create_optimal_mcr(); - /* - * Enable: - * write combining on non-stack, non-string - * write combining on string, all types - * weak write ordering - * - * The C6 original lacks weak read order - * - * Note 0x120 is write only on Winchip 1 - */ - wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); -#endif break; case 8: switch (c->x86_mask) { @@ -392,40 +148,12 @@ static void init_centaur(struct cpuinfo_x86 *c) fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK| E2MMX|EAMD3D; fcr_clr = DPDC; -#ifdef CONFIG_X86_OOSTORE - winchip2_unprotect_mcr(); - winchip2_create_optimal_mcr(); - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - /* - * Enable: - * write combining on non-stack, non-string - * write combining on string, all types - * weak write ordering - */ - lo |= 31; - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); - winchip2_protect_mcr(); -#endif break; case 9: name = "3"; fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK| E2MMX|EAMD3D; fcr_clr = DPDC; -#ifdef CONFIG_X86_OOSTORE - winchip2_unprotect_mcr(); - winchip2_create_optimal_mcr(); - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - /* - * Enable: - * write combining on non-stack, non-string - * write combining on string, all types - * weak write ordering - */ - lo |= 31; - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); - winchip2_protect_mcr(); -#endif break; default: name = "??"; diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index 7d01b8c..cc04e67 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h @@ -40,11 +40,7 @@ #define smp_rmb() barrier() #endif /* CONFIG_X86_PPRO_FENCE */ -#ifdef CONFIG_X86_OOSTORE -#define smp_wmb() wmb() -#else /* CONFIG_X86_OOSTORE */ #define smp_wmb() barrier() -#endif /* CONFIG_X86_OOSTORE */ #define smp_read_barrier_depends() read_barrier_depends() #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) -- cgit v0.10.2 From 731bd6a93a6e9172094a2322bd0ee964bb1f4d63 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Sun, 2 Feb 2014 22:56:23 -0800 Subject: x86, fpu: Check tsk_used_math() in kernel_fpu_end() for eager FPU For non-eager fpu mode, thread's fpu state is allocated during the first fpu usage (in the context of device not available exception). This (math_state_restore()) can be a blocking call and hence we enable interrupts (which were originally disabled when the exception happened), allocate memory and disable interrupts etc. But the eager-fpu mode, call's the same math_state_restore() from kernel_fpu_end(). The assumption being that tsk_used_math() is always set for the eager-fpu mode and thus avoid the code path of enabling interrupts, allocating fpu state using blocking call and disable interrupts etc. But the below issue was noticed by Maarten Baert, Nate Eldredge and few others: If a user process dumps core on an ecrypt fs while aesni-intel is loaded, we get a BUG() in __find_get_block() complaining that it was called with interrupts disabled; then all further accesses to our ecrypt fs hang and we have to reboot. The aesni-intel code (encrypting the core file that we are writing) needs the FPU and quite properly wraps its code in kernel_fpu_{begin,end}(), the latter of which calls math_state_restore(). So after kernel_fpu_end(), interrupts may be disabled, which nobody seems to expect, and they stay that way until we eventually get to __find_get_block() which barfs. For eager fpu, most the time, tsk_used_math() is true. At few instances during thread exit, signal return handling etc, tsk_used_math() might be false. In kernel_fpu_end(), for eager-fpu, call math_state_restore() only if tsk_used_math() is set. Otherwise, don't bother. Kernel code path which cleared tsk_used_math() knows what needs to be done with the fpu state. Reported-by: Maarten Baert Reported-by: Nate Eldredge Suggested-by: Linus Torvalds Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/1391410583.3801.6.camel@europa Cc: George Spelvin Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index e8368c6..d5dd808 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -86,10 +86,19 @@ EXPORT_SYMBOL(__kernel_fpu_begin); void __kernel_fpu_end(void) { - if (use_eager_fpu()) - math_state_restore(); - else + if (use_eager_fpu()) { + /* + * For eager fpu, most the time, tsk_used_math() is true. + * Restore the user math as we are done with the kernel usage. + * At few instances during thread exit, signal handling etc, + * tsk_used_math() is false. Those few places will take proper + * actions, so we don't need to restore the math here. + */ + if (likely(tsk_used_math(current))) + math_state_restore(); + } else { stts(); + } } EXPORT_SYMBOL(__kernel_fpu_end); -- cgit v0.10.2 From f1c12837220c54c8d7138a4ee4a08c1ec67b68a2 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Tue, 11 Mar 2014 22:57:53 +0800 Subject: MAINTAINERS: update IMX kernel git tree Change Shawn's email address to his employer, and move IMX git tree to kernel.org. Cc: Sascha Hauer Signed-off-by: Shawn Guo Signed-off-by: Olof Johansson diff --git a/MAINTAINERS b/MAINTAINERS index 85b3dd8..5663cb8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -910,11 +910,11 @@ F: arch/arm/include/asm/hardware/dec21285.h F: arch/arm/mach-footbridge/ ARM/FREESCALE IMX / MXC ARM ARCHITECTURE -M: Shawn Guo +M: Shawn Guo M: Sascha Hauer L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained -T: git git://git.linaro.org/people/shawnguo/linux-2.6.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git F: arch/arm/mach-imx/ F: arch/arm/boot/dts/imx* F: arch/arm/configs/imx*_defconfig -- cgit v0.10.2 From f656d46bbb2d3ecdb71c998ccf657dccea8d19a9 Mon Sep 17 00:00:00 2001 From: Boris BREZILLON Date: Mon, 10 Mar 2014 15:37:18 +0100 Subject: ARM: at91: fix network interface ordering for sama5d36 On the newly introduced sama5d36, Gigabit and 10/100 Ethernet network interfaces are probed in a different order than for the sama5d35. Moreover, users are accustomed to this order in bootloaders and backports for older kernel revisions. So this patch switches DT node order as it is done for the other dual-Ethernet sama5d3 SoC. Better interface numbering which does not depend on DT node order is being developed for stronger interface identification. Signed-off-by: Boris BREZILLON Signed-off-by: Nicolas Ferre Signed-off-by: Olof Johansson diff --git a/arch/arm/boot/dts/sama5d36.dtsi b/arch/arm/boot/dts/sama5d36.dtsi index 6c31c26..db58cad 100644 --- a/arch/arm/boot/dts/sama5d36.dtsi +++ b/arch/arm/boot/dts/sama5d36.dtsi @@ -8,8 +8,8 @@ */ #include "sama5d3.dtsi" #include "sama5d3_can.dtsi" -#include "sama5d3_emac.dtsi" #include "sama5d3_gmac.dtsi" +#include "sama5d3_emac.dtsi" #include "sama5d3_lcd.dtsi" #include "sama5d3_mci2.dtsi" #include "sama5d3_tcb1.dtsi" -- cgit v0.10.2 From d25f06ea466ea521b563b76661180b4e44714ae6 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 10 Mar 2014 06:55:55 -0400 Subject: vmxnet3: fix netpoll race condition vmxnet3's netpoll driver is incorrectly coded. It directly calls vmxnet3_do_poll, which is the driver internal napi poll routine. As the netpoll controller method doesn't block real napi polls in any way, there is a potential for race conditions in which the netpoll controller method and the napi poll method run concurrently. The result is data corruption causing panics such as this one recently observed: PID: 1371 TASK: ffff88023762caa0 CPU: 1 COMMAND: "rs:main Q:Reg" #0 [ffff88023abd5780] machine_kexec at ffffffff81038f3b #1 [ffff88023abd57e0] crash_kexec at ffffffff810c5d92 #2 [ffff88023abd58b0] oops_end at ffffffff8152b570 #3 [ffff88023abd58e0] die at ffffffff81010e0b #4 [ffff88023abd5910] do_trap at ffffffff8152add4 #5 [ffff88023abd5970] do_invalid_op at ffffffff8100cf95 #6 [ffff88023abd5a10] invalid_op at ffffffff8100bf9b [exception RIP: vmxnet3_rq_rx_complete+1968] RIP: ffffffffa00f1e80 RSP: ffff88023abd5ac8 RFLAGS: 00010086 RAX: 0000000000000000 RBX: ffff88023b5dcee0 RCX: 00000000000000c0 RDX: 0000000000000000 RSI: 00000000000005f2 RDI: ffff88023b5dcee0 RBP: ffff88023abd5b48 R8: 0000000000000000 R9: ffff88023a3b6048 R10: 0000000000000000 R11: 0000000000000002 R12: ffff8802398d4cd8 R13: ffff88023af35140 R14: ffff88023b60c890 R15: 0000000000000000 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #7 [ffff88023abd5b50] vmxnet3_do_poll at ffffffffa00f204a [vmxnet3] #8 [ffff88023abd5b80] vmxnet3_netpoll at ffffffffa00f209c [vmxnet3] #9 [ffff88023abd5ba0] netpoll_poll_dev at ffffffff81472bb7 The fix is to do as other drivers do, and have the poll controller call the top half interrupt handler, which schedules a napi poll properly to recieve frames Tested by myself, successfully. Signed-off-by: Neil Horman CC: Shreyas Bhatewara CC: "VMware, Inc." CC: "David S. Miller" CC: stable@vger.kernel.org Reviewed-by: Shreyas N Bhatewara Signed-off-by: David S. Miller diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 3be786f..b7daa02 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1761,12 +1761,18 @@ static void vmxnet3_netpoll(struct net_device *netdev) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); + int i; - if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE) - vmxnet3_disable_all_intrs(adapter); - - vmxnet3_do_poll(adapter, adapter->rx_queue[0].rx_ring[0].size); - vmxnet3_enable_all_intrs(adapter); + switch (adapter->intr.type) { + case VMXNET3_IT_MSIX: + for (i = 0; i < adapter->num_rx_queues; i++) + vmxnet3_msix_rx(0, &adapter->rx_queue[i]); + break; + case VMXNET3_IT_MSI: + default: + vmxnet3_intr(0, adapter->netdev); + break; + } } #endif /* CONFIG_NET_POLL_CONTROLLER */ -- cgit v0.10.2 From 83bf79b6bb64e686ccf0b66b6828b7bfe9f70ae9 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 10 Mar 2014 13:40:31 +0100 Subject: stmmac: disable at run-time the EEE if not supported This patch is to disable the EEE (so HW and timers) for example when the phy communicates that the EEE can be supported anymore. Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 078ad0e..b418f94 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -286,10 +286,25 @@ bool stmmac_eee_init(struct stmmac_priv *priv) /* MAC core supports the EEE feature. */ if (priv->dma_cap.eee) { + int tx_lpi_timer = priv->tx_lpi_timer; + /* Check if the PHY supports EEE */ - if (phy_init_eee(priv->phydev, 1)) + if (phy_init_eee(priv->phydev, 1)) { + /* To manage at run-time if the EEE cannot be supported + * anymore (for example because the lp caps have been + * changed). + * In that case the driver disable own timers. + */ + if (priv->eee_active) { + pr_debug("stmmac: disable EEE\n"); + del_timer_sync(&priv->eee_ctrl_timer); + priv->hw->mac->set_eee_timer(priv->ioaddr, 0, + tx_lpi_timer); + } + priv->eee_active = 0; goto out; - + } + /* Activate the EEE and start timers */ if (!priv->eee_active) { priv->eee_active = 1; init_timer(&priv->eee_ctrl_timer); @@ -300,13 +315,13 @@ bool stmmac_eee_init(struct stmmac_priv *priv) priv->hw->mac->set_eee_timer(priv->ioaddr, STMMAC_DEFAULT_LIT_LS, - priv->tx_lpi_timer); + tx_lpi_timer); } else /* Set HW EEE according to the speed */ priv->hw->mac->set_eee_pls(priv->ioaddr, priv->phydev->link); - pr_info("stmmac: Energy-Efficient Ethernet initialized\n"); + pr_debug("stmmac: Energy-Efficient Ethernet initialized\n"); ret = true; } -- cgit v0.10.2 From d916701c670701e1ab4d0ed3d4e64d6023bccec9 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 10 Mar 2014 13:40:32 +0100 Subject: stmmac: fix and better tune the default buffer sizes This patch is to fix and tune the default buffer sizes. It reduces the default bufsize used by the driver from 4KiB to 1536 bytes. Patch has been tested on both ARM and SH4 platform based. Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index b418f94..5365836 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -92,8 +92,8 @@ static int tc = TC_DEFAULT; module_param(tc, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(tc, "DMA threshold control value"); -#define DMA_BUFFER_SIZE BUF_SIZE_4KiB -static int buf_sz = DMA_BUFFER_SIZE; +#define DEFAULT_BUFSIZE 1536 +static int buf_sz = DEFAULT_BUFSIZE; module_param(buf_sz, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(buf_sz, "DMA buffer size"); @@ -136,8 +136,8 @@ static void stmmac_verify_args(void) dma_rxsize = DMA_RX_SIZE; if (unlikely(dma_txsize < 0)) dma_txsize = DMA_TX_SIZE; - if (unlikely((buf_sz < DMA_BUFFER_SIZE) || (buf_sz > BUF_SIZE_16KiB))) - buf_sz = DMA_BUFFER_SIZE; + if (unlikely((buf_sz < DEFAULT_BUFSIZE) || (buf_sz > BUF_SIZE_16KiB))) + buf_sz = DEFAULT_BUFSIZE; if (unlikely(flow_ctrl > 1)) flow_ctrl = FLOW_AUTO; else if (likely(flow_ctrl < 0)) @@ -901,10 +901,10 @@ static int stmmac_set_bfsize(int mtu, int bufsize) ret = BUF_SIZE_8KiB; else if (mtu >= BUF_SIZE_2KiB) ret = BUF_SIZE_4KiB; - else if (mtu >= DMA_BUFFER_SIZE) + else if (mtu > DEFAULT_BUFSIZE) ret = BUF_SIZE_2KiB; else - ret = DMA_BUFFER_SIZE; + ret = DEFAULT_BUFSIZE; return ret; } -- cgit v0.10.2 From 29896a674c8ef3d75134dacb7b9cbb3f5b894b6d Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 10 Mar 2014 13:40:33 +0100 Subject: stmmac: fix chained mode This patch is to fix the chain mode that was broken and generated a panic. This patch reviews the chain/ring modes now shaing the same structure and taking care about the pointers and callbacks. Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c index 72d282b..c553f6b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c @@ -151,7 +151,7 @@ static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p) sizeof(struct dma_desc))); } -const struct stmmac_chain_mode_ops chain_mode_ops = { +const struct stmmac_mode_ops chain_mode_ops = { .init = stmmac_init_dma_chain, .is_jumbo_frm = stmmac_is_jumbo_frm, .jumbo_frm = stmmac_jumbo_frm, diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 7834a39..74610f3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -419,20 +419,13 @@ struct mii_regs { unsigned int data; /* MII Data */ }; -struct stmmac_ring_mode_ops { - unsigned int (*is_jumbo_frm) (int len, int ehn_desc); - unsigned int (*jumbo_frm) (void *priv, struct sk_buff *skb, int csum); - void (*refill_desc3) (void *priv, struct dma_desc *p); - void (*init_desc3) (struct dma_desc *p); - void (*clean_desc3) (void *priv, struct dma_desc *p); - int (*set_16kib_bfsize) (int mtu); -}; - -struct stmmac_chain_mode_ops { +struct stmmac_mode_ops { void (*init) (void *des, dma_addr_t phy_addr, unsigned int size, unsigned int extend_desc); unsigned int (*is_jumbo_frm) (int len, int ehn_desc); unsigned int (*jumbo_frm) (void *priv, struct sk_buff *skb, int csum); + int (*set_16kib_bfsize)(int mtu); + void (*init_desc3)(struct dma_desc *p); void (*refill_desc3) (void *priv, struct dma_desc *p); void (*clean_desc3) (void *priv, struct dma_desc *p); }; @@ -441,8 +434,7 @@ struct mac_device_info { const struct stmmac_ops *mac; const struct stmmac_desc_ops *desc; const struct stmmac_dma_ops *dma; - const struct stmmac_ring_mode_ops *ring; - const struct stmmac_chain_mode_ops *chain; + const struct stmmac_mode_ops *mode; const struct stmmac_hwtimestamp *ptp; struct mii_regs mii; /* MII register Addresses */ struct mac_link link; @@ -460,7 +452,7 @@ void stmmac_get_mac_addr(void __iomem *ioaddr, unsigned char *addr, void stmmac_set_mac(void __iomem *ioaddr, bool enable); void dwmac_dma_flush_tx_fifo(void __iomem *ioaddr); -extern const struct stmmac_ring_mode_ops ring_mode_ops; -extern const struct stmmac_chain_mode_ops chain_mode_ops; +extern const struct stmmac_mode_ops ring_mode_ops; +extern const struct stmmac_mode_ops chain_mode_ops; #endif /* __COMMON_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c index a96c7c2..650a4be 100644 --- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c @@ -100,10 +100,9 @@ static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p) { struct stmmac_priv *priv = (struct stmmac_priv *)priv_ptr; - if (unlikely(priv->plat->has_gmac)) - /* Fill DES3 in case of RING mode */ - if (priv->dma_buf_sz >= BUF_SIZE_8KiB) - p->des3 = p->des2 + BUF_SIZE_8KiB; + /* Fill DES3 in case of RING mode */ + if (priv->dma_buf_sz >= BUF_SIZE_8KiB) + p->des3 = p->des2 + BUF_SIZE_8KiB; } /* In ring mode we need to fill the desc3 because it is used as buffer */ @@ -126,7 +125,7 @@ static int stmmac_set_16kib_bfsize(int mtu) return ret; } -const struct stmmac_ring_mode_ops ring_mode_ops = { +const struct stmmac_mode_ops ring_mode_ops = { .is_jumbo_frm = stmmac_is_jumbo_frm, .jumbo_frm = stmmac_jumbo_frm, .refill_desc3 = stmmac_refill_desc3, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5365836..8543e1c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -966,9 +966,9 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p, p->des2 = priv->rx_skbuff_dma[i]; - if ((priv->mode == STMMAC_RING_MODE) && + if ((priv->hw->mode->init_desc3) && (priv->dma_buf_sz == BUF_SIZE_16KiB)) - priv->hw->ring->init_desc3(p); + priv->hw->mode->init_desc3(p); return 0; } @@ -999,11 +999,8 @@ static int init_dma_desc_rings(struct net_device *dev) unsigned int bfsize = 0; int ret = -ENOMEM; - /* Set the max buffer size according to the DESC mode - * and the MTU. Note that RING mode allows 16KiB bsize. - */ - if (priv->mode == STMMAC_RING_MODE) - bfsize = priv->hw->ring->set_16kib_bfsize(dev->mtu); + if (priv->hw->mode->set_16kib_bfsize) + bfsize = priv->hw->mode->set_16kib_bfsize(dev->mtu); if (bfsize < BUF_SIZE_16KiB) bfsize = stmmac_set_bfsize(dev->mtu, priv->dma_buf_sz); @@ -1044,15 +1041,15 @@ static int init_dma_desc_rings(struct net_device *dev) /* Setup the chained descriptor addresses */ if (priv->mode == STMMAC_CHAIN_MODE) { if (priv->extend_desc) { - priv->hw->chain->init(priv->dma_erx, priv->dma_rx_phy, - rxsize, 1); - priv->hw->chain->init(priv->dma_etx, priv->dma_tx_phy, - txsize, 1); + priv->hw->mode->init(priv->dma_erx, priv->dma_rx_phy, + rxsize, 1); + priv->hw->mode->init(priv->dma_etx, priv->dma_tx_phy, + txsize, 1); } else { - priv->hw->chain->init(priv->dma_rx, priv->dma_rx_phy, - rxsize, 0); - priv->hw->chain->init(priv->dma_tx, priv->dma_tx_phy, - txsize, 0); + priv->hw->mode->init(priv->dma_rx, priv->dma_rx_phy, + rxsize, 0); + priv->hw->mode->init(priv->dma_tx, priv->dma_tx_phy, + txsize, 0); } } @@ -1303,7 +1300,7 @@ static void stmmac_tx_clean(struct stmmac_priv *priv) DMA_TO_DEVICE); priv->tx_skbuff_dma[entry] = 0; } - priv->hw->ring->clean_desc3(priv, p); + priv->hw->mode->clean_desc3(priv, p); if (likely(skb != NULL)) { dev_kfree_skb(skb); @@ -1859,6 +1856,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) int nfrags = skb_shinfo(skb)->nr_frags; struct dma_desc *desc, *first; unsigned int nopaged_len = skb_headlen(skb); + unsigned int enh_desc = priv->plat->enh_desc; if (unlikely(stmmac_tx_avail(priv) < nfrags + 1)) { if (!netif_queue_stopped(dev)) { @@ -1886,27 +1884,19 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) first = desc; /* To program the descriptors according to the size of the frame */ - if (priv->mode == STMMAC_RING_MODE) { - is_jumbo = priv->hw->ring->is_jumbo_frm(skb->len, - priv->plat->enh_desc); - if (unlikely(is_jumbo)) - entry = priv->hw->ring->jumbo_frm(priv, skb, - csum_insertion); - } else { - is_jumbo = priv->hw->chain->is_jumbo_frm(skb->len, - priv->plat->enh_desc); - if (unlikely(is_jumbo)) - entry = priv->hw->chain->jumbo_frm(priv, skb, - csum_insertion); - } + if (enh_desc) + is_jumbo = priv->hw->mode->is_jumbo_frm(skb->len, enh_desc); + if (likely(!is_jumbo)) { desc->des2 = dma_map_single(priv->device, skb->data, nopaged_len, DMA_TO_DEVICE); priv->tx_skbuff_dma[entry] = desc->des2; priv->hw->desc->prepare_tx_desc(desc, 1, nopaged_len, csum_insertion, priv->mode); - } else + } else { desc = first; + entry = priv->hw->mode->jumbo_frm(priv, skb, csum_insertion); + } for (i = 0; i < nfrags; i++) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; @@ -2044,7 +2034,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv) p->des2 = priv->rx_skbuff_dma[entry]; - priv->hw->ring->refill_desc3(priv, p); + priv->hw->mode->refill_desc3(priv, p); if (netif_msg_rx_status(priv)) pr_debug("\trefill entry #%d\n", entry); @@ -2648,11 +2638,11 @@ static int stmmac_hw_init(struct stmmac_priv *priv) /* To use the chained or ring mode */ if (chain_mode) { - priv->hw->chain = &chain_mode_ops; + priv->hw->mode = &chain_mode_ops; pr_info(" Chain mode enabled\n"); priv->mode = STMMAC_CHAIN_MODE; } else { - priv->hw->ring = &ring_mode_ops; + priv->hw->mode = &ring_mode_ops; pr_info(" Ring mode enabled\n"); priv->mode = STMMAC_RING_MODE; } -- cgit v0.10.2 From c5e9103dc397114523c65bd24ff0548bcdca54b4 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 10 Mar 2014 13:40:34 +0100 Subject: stmmac: dwmac-sti: fix broken STiD127 compatibility This is to fix the compatibility to the STiD127 SoC. Signed-off-by: Giuseppe Cavallaro Cc: Srinivas Kandagatla Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index c61bc72b..8fb32a8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -36,7 +36,7 @@ static const struct of_device_id stmmac_dt_ids[] = { #ifdef CONFIG_DWMAC_STI { .compatible = "st,stih415-dwmac", .data = &sti_gmac_data}, { .compatible = "st,stih416-dwmac", .data = &sti_gmac_data}, - { .compatible = "st,stih127-dwmac", .data = &sti_gmac_data}, + { .compatible = "st,stid127-dwmac", .data = &sti_gmac_data}, #endif /* SoC specific glue layers should come before generic bindings */ { .compatible = "st,spear600-gmac"}, -- cgit v0.10.2 From 89935315f192abf7068d0044cefc84f162c3c81f Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 11 Mar 2014 22:40:27 +0800 Subject: PNP / ACPI: proper handling of ACPI IO/Memory resource parsing failures Before commit b355cee88e3b (ACPI / resources: ignore invalid ACPI device resources), if acpi_dev_resource_memory()/acpi_dev_resource_io() returns false, it means the the resource is not a memeory/IO resource. But after commit b355cee88e3b, those functions return false if the given memory/IO resource entry is invalid (the length of the resource is zero). This breaks pnpacpi_allocated_resource(), because it now recognizes the invalid memory/io resources as resources of unknown type. Thus users see confusing warning messages on machines with zero length ACPI memory/IO resources. Fix the problem by rearranging pnpacpi_allocated_resource() so that it calls acpi_dev_resource_memory() for memory type and IO type resources only, respectively. Fixes: b355cee88e3b (ACPI / resources: ignore invalid ACPI device resources) Signed-off-by: Zhang Rui Reported-and-tested-by: Markus Trippelsdorf Reported-and-tested-by: Julian Wollrath Reported-and-tested-by: Paul Bolle [rjw: Changelog] Signed-off-by: Rafael J. Wysocki diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index 167f3d0..66977eb 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -183,9 +183,7 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, struct resource r = {0}; int i, flags; - if (acpi_dev_resource_memory(res, &r) - || acpi_dev_resource_io(res, &r) - || acpi_dev_resource_address_space(res, &r) + if (acpi_dev_resource_address_space(res, &r) || acpi_dev_resource_ext_address_space(res, &r)) { pnp_add_resource(dev, &r); return AE_OK; @@ -217,6 +215,17 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, } switch (res->type) { + case ACPI_RESOURCE_TYPE_MEMORY24: + case ACPI_RESOURCE_TYPE_MEMORY32: + case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: + if (acpi_dev_resource_memory(res, &r)) + pnp_add_resource(dev, &r); + break; + case ACPI_RESOURCE_TYPE_IO: + case ACPI_RESOURCE_TYPE_FIXED_IO: + if (acpi_dev_resource_io(res, &r)) + pnp_add_resource(dev, &r); + break; case ACPI_RESOURCE_TYPE_DMA: dma = &res->data.dma; if (dma->channel_count > 0 && dma->channels[0] != (u8) -1) -- cgit v0.10.2 From 8cb19905e9287a93ce7c2cbbdf742a060b00e219 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 10 Mar 2014 18:29:04 +0200 Subject: skbuff: skb_segment: s/frag/nskb_frag/ frag points at nskb, so name it appropriately Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5d6236d..60e8cd7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2876,7 +2876,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) do { struct sk_buff *nskb; - skb_frag_t *frag; + skb_frag_t *nskb_frag; int hsize; int size; @@ -2969,7 +2969,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) continue; } - frag = skb_shinfo(nskb)->frags; + nskb_frag = skb_shinfo(nskb)->frags; skb_copy_from_linear_data_offset(skb, offset, skb_put(nskb, hsize), hsize); @@ -2997,13 +2997,13 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) goto err; } - *frag = *skb_frag; - __skb_frag_ref(frag); - size = skb_frag_size(frag); + *nskb_frag = *skb_frag; + __skb_frag_ref(nskb_frag); + size = skb_frag_size(nskb_frag); if (pos < offset) { - frag->page_offset += offset - pos; - skb_frag_size_sub(frag, offset - pos); + nskb_frag->page_offset += offset - pos; + skb_frag_size_sub(nskb_frag, offset - pos); } skb_shinfo(nskb)->nr_frags++; @@ -3013,11 +3013,11 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_frag++; pos += size; } else { - skb_frag_size_sub(frag, pos + size - (offset + len)); + skb_frag_size_sub(nskb_frag, pos + size - (offset + len)); goto skip_fraglist; } - frag++; + nskb_frag++; } skip_fraglist: -- cgit v0.10.2 From 4e1beba12d094c6c761ba5c49032b9b9e46380e8 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 10 Mar 2014 18:29:14 +0200 Subject: skbuff: skb_segment: s/skb_frag/frag/ skb_frag can in fact point at either skb or fskb so rename it generally "frag". Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 60e8cd7..d788a98 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2850,7 +2850,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; struct sk_buff *fskb = skb_shinfo(skb)->frag_list; - skb_frag_t *skb_frag = skb_shinfo(skb)->frags; + skb_frag_t *frag = skb_shinfo(skb)->frags; unsigned int mss = skb_shinfo(skb)->gso_size; unsigned int doffset = skb->data - skb_mac_header(skb); unsigned int offset = doffset; @@ -2896,19 +2896,19 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) i = 0; nfrags = skb_shinfo(fskb)->nr_frags; - skb_frag = skb_shinfo(fskb)->frags; + frag = skb_shinfo(fskb)->frags; pos += skb_headlen(fskb); while (pos < offset + len) { BUG_ON(i >= nfrags); - size = skb_frag_size(skb_frag); + size = skb_frag_size(frag); if (pos + size > offset + len) break; i++; pos += size; - skb_frag++; + frag++; } nskb = skb_clone(fskb, GFP_ATOMIC); @@ -2982,7 +2982,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) i = 0; nfrags = skb_shinfo(fskb)->nr_frags; - skb_frag = skb_shinfo(fskb)->frags; + frag = skb_shinfo(fskb)->frags; BUG_ON(!nfrags); @@ -2997,7 +2997,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) goto err; } - *nskb_frag = *skb_frag; + *nskb_frag = *frag; __skb_frag_ref(nskb_frag); size = skb_frag_size(nskb_frag); @@ -3010,7 +3010,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) if (pos + size <= offset + len) { i++; - skb_frag++; + frag++; pos += size; } else { skb_frag_size_sub(nskb_frag, pos + size - (offset + len)); -- cgit v0.10.2 From df5771ffefb13f8af5392bd54fd7e2b596a3a357 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 10 Mar 2014 18:29:19 +0200 Subject: skbuff: skb_segment: s/skb/head_skb/ rename local variable to make it easier to tell at a glance that we are dealing with a head skb. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d788a98..fdc065d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2838,41 +2838,42 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum); /** * skb_segment - Perform protocol segmentation on skb. - * @skb: buffer to segment + * @head_skb: buffer to segment * @features: features for the output path (see dev->features) * * This function performs segmentation on the given skb. It returns * a pointer to the first in a list of new skbs for the segments. * In case of error it returns ERR_PTR(err). */ -struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) +struct sk_buff *skb_segment(struct sk_buff *head_skb, + netdev_features_t features) { struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; - struct sk_buff *fskb = skb_shinfo(skb)->frag_list; - skb_frag_t *frag = skb_shinfo(skb)->frags; - unsigned int mss = skb_shinfo(skb)->gso_size; - unsigned int doffset = skb->data - skb_mac_header(skb); + struct sk_buff *fskb = skb_shinfo(head_skb)->frag_list; + skb_frag_t *frag = skb_shinfo(head_skb)->frags; + unsigned int mss = skb_shinfo(head_skb)->gso_size; + unsigned int doffset = head_skb->data - skb_mac_header(head_skb); unsigned int offset = doffset; - unsigned int tnl_hlen = skb_tnl_header_len(skb); + unsigned int tnl_hlen = skb_tnl_header_len(head_skb); unsigned int headroom; unsigned int len; __be16 proto; bool csum; int sg = !!(features & NETIF_F_SG); - int nfrags = skb_shinfo(skb)->nr_frags; + int nfrags = skb_shinfo(head_skb)->nr_frags; int err = -ENOMEM; int i = 0; int pos; - proto = skb_network_protocol(skb); + proto = skb_network_protocol(head_skb); if (unlikely(!proto)) return ERR_PTR(-EINVAL); csum = !!can_checksum_protocol(features, proto); - __skb_push(skb, doffset); - headroom = skb_headroom(skb); - pos = skb_headlen(skb); + __skb_push(head_skb, doffset); + headroom = skb_headroom(head_skb); + pos = skb_headlen(head_skb); do { struct sk_buff *nskb; @@ -2880,11 +2881,11 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) int hsize; int size; - len = skb->len - offset; + len = head_skb->len - offset; if (len > mss) len = mss; - hsize = skb_headlen(skb) - offset; + hsize = skb_headlen(head_skb) - offset; if (hsize < 0) hsize = 0; if (hsize > len || !sg) @@ -2933,7 +2934,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) __skb_push(nskb, doffset); } else { nskb = __alloc_skb(hsize + doffset + headroom, - GFP_ATOMIC, skb_alloc_rx_flag(skb), + GFP_ATOMIC, skb_alloc_rx_flag(head_skb), NUMA_NO_NODE); if (unlikely(!nskb)) @@ -2949,12 +2950,12 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) segs = nskb; tail = nskb; - __copy_skb_header(nskb, skb); - nskb->mac_len = skb->mac_len; + __copy_skb_header(nskb, head_skb); + nskb->mac_len = head_skb->mac_len; skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom); - skb_copy_from_linear_data_offset(skb, -tnl_hlen, + skb_copy_from_linear_data_offset(head_skb, -tnl_hlen, nskb->data - tnl_hlen, doffset + tnl_hlen); @@ -2963,7 +2964,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) if (!sg) { nskb->ip_summed = CHECKSUM_NONE; - nskb->csum = skb_copy_and_csum_bits(skb, offset, + nskb->csum = skb_copy_and_csum_bits(head_skb, offset, skb_put(nskb, len), len, 0); continue; @@ -2971,10 +2972,11 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) nskb_frag = skb_shinfo(nskb)->frags; - skb_copy_from_linear_data_offset(skb, offset, + skb_copy_from_linear_data_offset(head_skb, offset, skb_put(nskb, hsize), hsize); - skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; + skb_shinfo(nskb)->tx_flags = skb_shinfo(head_skb)->tx_flags & + SKBTX_SHARED_FRAG; while (pos < offset + len) { if (i >= nfrags) { @@ -3031,7 +3033,7 @@ perform_csum_check: nskb->len - doffset, 0); nskb->ip_summed = CHECKSUM_NONE; } - } while ((offset += len) < skb->len); + } while ((offset += len) < head_skb->len); return segs; -- cgit v0.10.2 From 1a4cedaf65491e66e1e55b8428c89209da729209 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 10 Mar 2014 19:27:59 +0200 Subject: skbuff: skb_segment: s/fskb/list_skb/ fskb is unrelated to frag: it's coming from frag_list. Rename it list_skb to avoid confusion. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller diff --git a/net/core/skbuff.c b/net/core/skbuff.c index fdc065d..dc4f768 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2850,7 +2850,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, { struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; - struct sk_buff *fskb = skb_shinfo(head_skb)->frag_list; + struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list; skb_frag_t *frag = skb_shinfo(head_skb)->frags; unsigned int mss = skb_shinfo(head_skb)->gso_size; unsigned int doffset = head_skb->data - skb_mac_header(head_skb); @@ -2891,14 +2891,14 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, if (hsize > len || !sg) hsize = len; - if (!hsize && i >= nfrags && skb_headlen(fskb) && - (skb_headlen(fskb) == len || sg)) { - BUG_ON(skb_headlen(fskb) > len); + if (!hsize && i >= nfrags && skb_headlen(list_skb) && + (skb_headlen(list_skb) == len || sg)) { + BUG_ON(skb_headlen(list_skb) > len); i = 0; - nfrags = skb_shinfo(fskb)->nr_frags; - frag = skb_shinfo(fskb)->frags; - pos += skb_headlen(fskb); + nfrags = skb_shinfo(list_skb)->nr_frags; + frag = skb_shinfo(list_skb)->frags; + pos += skb_headlen(list_skb); while (pos < offset + len) { BUG_ON(i >= nfrags); @@ -2912,8 +2912,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, frag++; } - nskb = skb_clone(fskb, GFP_ATOMIC); - fskb = fskb->next; + nskb = skb_clone(list_skb, GFP_ATOMIC); + list_skb = list_skb->next; if (unlikely(!nskb)) goto err; @@ -2980,15 +2980,15 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, while (pos < offset + len) { if (i >= nfrags) { - BUG_ON(skb_headlen(fskb)); + BUG_ON(skb_headlen(list_skb)); i = 0; - nfrags = skb_shinfo(fskb)->nr_frags; - frag = skb_shinfo(fskb)->frags; + nfrags = skb_shinfo(list_skb)->nr_frags; + frag = skb_shinfo(list_skb)->frags; BUG_ON(!nfrags); - fskb = fskb->next; + list_skb = list_skb->next; } if (unlikely(skb_shinfo(nskb)->nr_frags >= -- cgit v0.10.2 From 1fd819ecb90cc9b822cd84d3056ddba315d3340f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 10 Mar 2014 19:28:08 +0200 Subject: skbuff: skb_segment: orphan frags before copying skb_segment copies frags around, so we need to copy them carefully to avoid accessing user memory after reporting completion to userspace through a callback. skb_segment doesn't normally happen on datapath: TSO needs to be disabled - so disabling zero copy in this case does not look like a big deal. Signed-off-by: Michael S. Tsirkin Acked-by: Herbert Xu Signed-off-by: David S. Miller diff --git a/net/core/skbuff.c b/net/core/skbuff.c index dc4f768..869c7af 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2854,6 +2854,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, skb_frag_t *frag = skb_shinfo(head_skb)->frags; unsigned int mss = skb_shinfo(head_skb)->gso_size; unsigned int doffset = head_skb->data - skb_mac_header(head_skb); + struct sk_buff *frag_skb = head_skb; unsigned int offset = doffset; unsigned int tnl_hlen = skb_tnl_header_len(head_skb); unsigned int headroom; @@ -2898,6 +2899,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, i = 0; nfrags = skb_shinfo(list_skb)->nr_frags; frag = skb_shinfo(list_skb)->frags; + frag_skb = list_skb; pos += skb_headlen(list_skb); while (pos < offset + len) { @@ -2985,6 +2987,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, i = 0; nfrags = skb_shinfo(list_skb)->nr_frags; frag = skb_shinfo(list_skb)->frags; + frag_skb = list_skb; BUG_ON(!nfrags); @@ -2999,6 +3002,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, goto err; } + if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC))) + goto err; + *nskb_frag = *frag; __skb_frag_ref(nskb_frag); size = skb_frag_size(nskb_frag); -- cgit v0.10.2 From c3f9b01849ef3bc69024990092b9f42e20df7797 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Mar 2014 09:50:11 -0700 Subject: tcp: tcp_release_cb() should release socket ownership Lars Persson reported following deadlock : -000 |M:0x0:0x802B6AF8(asm) <-- arch_spin_lock -001 |tcp_v4_rcv(skb = 0x8BD527A0) <-- sk = 0x8BE6B2A0 -002 |ip_local_deliver_finish(skb = 0x8BD527A0) -003 |__netif_receive_skb_core(skb = 0x8BD527A0, ?) -004 |netif_receive_skb(skb = 0x8BD527A0) -005 |elk_poll(napi = 0x8C770500, budget = 64) -006 |net_rx_action(?) -007 |__do_softirq() -008 |do_softirq() -009 |local_bh_enable() -010 |tcp_rcv_established(sk = 0x8BE6B2A0, skb = 0x87D3A9E0, th = 0x814EBE14, ?) -011 |tcp_v4_do_rcv(sk = 0x8BE6B2A0, skb = 0x87D3A9E0) -012 |tcp_delack_timer_handler(sk = 0x8BE6B2A0) -013 |tcp_release_cb(sk = 0x8BE6B2A0) -014 |release_sock(sk = 0x8BE6B2A0) -015 |tcp_sendmsg(?, sk = 0x8BE6B2A0, ?, ?) -016 |sock_sendmsg(sock = 0x8518C4C0, msg = 0x87D8DAA8, size = 4096) -017 |kernel_sendmsg(?, ?, ?, ?, size = 4096) -018 |smb_send_kvec() -019 |smb_send_rqst(server = 0x87C4D400, rqst = 0x87D8DBA0) -020 |cifs_call_async() -021 |cifs_async_writev(wdata = 0x87FD6580) -022 |cifs_writepages(mapping = 0x852096E4, wbc = 0x87D8DC88) -023 |__writeback_single_inode(inode = 0x852095D0, wbc = 0x87D8DC88) -024 |writeback_sb_inodes(sb = 0x87D6D800, wb = 0x87E4A9C0, work = 0x87D8DD88) -025 |__writeback_inodes_wb(wb = 0x87E4A9C0, work = 0x87D8DD88) -026 |wb_writeback(wb = 0x87E4A9C0, work = 0x87D8DD88) -027 |wb_do_writeback(wb = 0x87E4A9C0, force_wait = 0) -028 |bdi_writeback_workfn(work = 0x87E4A9CC) -029 |process_one_work(worker = 0x8B045880, work = 0x87E4A9CC) -030 |worker_thread(__worker = 0x8B045880) -031 |kthread(_create = 0x87CADD90) -032 |ret_from_kernel_thread(asm) Bug occurs because __tcp_checksum_complete_user() enables BH, assuming it is running from softirq context. Lars trace involved a NIC without RX checksum support but other points are problematic as well, like the prequeue stuff. Problem is triggered by a timer, that found socket being owned by user. tcp_release_cb() should call tcp_write_timer_handler() or tcp_delack_timer_handler() in the appropriate context : BH disabled and socket lock held, but 'owned' field cleared, as if they were running from timer handlers. Fixes: 6f458dfb4092 ("tcp: improve latencies of timer triggered events") Reported-by: Lars Persson Tested-by: Lars Persson Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/include/net/sock.h b/include/net/sock.h index 7c4167b..b9586a1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1488,6 +1488,11 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) */ #define sock_owned_by_user(sk) ((sk)->sk_lock.owned) +static inline void sock_release_ownership(struct sock *sk) +{ + sk->sk_lock.owned = 0; +} + /* * Macro so as to not evaluate some arguments when * lockdep is not enabled. diff --git a/net/core/sock.c b/net/core/sock.c index 5b6a943..c0fc6bd 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2357,10 +2357,13 @@ void release_sock(struct sock *sk) if (sk->sk_backlog.tail) __release_sock(sk); + /* Warning : release_cb() might need to release sk ownership, + * ie call sock_release_ownership(sk) before us. + */ if (sk->sk_prot->release_cb) sk->sk_prot->release_cb(sk); - sk->sk_lock.owned = 0; + sock_release_ownership(sk); if (waitqueue_active(&sk->sk_lock.wq)) wake_up(&sk->sk_lock.wq); spin_unlock_bh(&sk->sk_lock.slock); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f0eb4e3..17a11e6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -767,6 +767,17 @@ void tcp_release_cb(struct sock *sk) if (flags & (1UL << TCP_TSQ_DEFERRED)) tcp_tsq_handler(sk); + /* Here begins the tricky part : + * We are called from release_sock() with : + * 1) BH disabled + * 2) sk_lock.slock spinlock held + * 3) socket owned by us (sk->sk_lock.owned == 1) + * + * But following code is meant to be called from BH handlers, + * so we should keep BH disabled, but early release socket ownership + */ + sock_release_ownership(sk); + if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) { tcp_write_timer_handler(sk); __sock_put(sk); -- cgit v0.10.2 From 7b52ad2efa4926ca7f399e5e63cf9a587396cd87 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 6 Jan 2014 14:56:17 -0800 Subject: clocksource: arch_timer: Set dynamic irq affinity on mmio clockevent Set the CLOCK_EVT_FEAT_DYNIRQ flag on the memory mapped clockevent so that we save power by waking up the CPU with the next event when this timer is used in broadcast mode. Signed-off-by: Stephen Boyd Signed-off-by: Daniel Lezcano diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 95fb944..57e823c 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -277,6 +277,7 @@ static void __arch_timer_setup(unsigned type, clk->set_next_event = arch_timer_set_next_event_phys; } } else { + clk->features |= CLOCK_EVT_FEAT_DYNIRQ; clk->name = "arch_mem_timer"; clk->rating = 400; clk->cpumask = cpu_all_mask; -- cgit v0.10.2 From 9affbd2458eca452e63b108e1a6258cf370f6eb9 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 21 Jan 2014 15:04:14 +0100 Subject: ARM: u300: move timer driver to clocksource Move the U300 timer driver down to the clocksource driver subsystem and keep arch/arm clean. Cc: Daniel Lezcano Cc: Thomas Gleixner Signed-off-by: Linus Walleij Signed-off-by: Daniel Lezcano diff --git a/MAINTAINERS b/MAINTAINERS index b2cf5cf..edd6139 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1319,6 +1319,7 @@ M: Linus Walleij L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Supported F: arch/arm/mach-u300/ +F: drivers/clocksource/timer-u300.c F: drivers/i2c/busses/i2c-stu300.c F: drivers/rtc/rtc-coh901331.c F: drivers/watchdog/coh901327_wdt.c diff --git a/arch/arm/mach-u300/Makefile b/arch/arm/mach-u300/Makefile index 0f362b6..3ec74ac 100644 --- a/arch/arm/mach-u300/Makefile +++ b/arch/arm/mach-u300/Makefile @@ -2,7 +2,7 @@ # Makefile for the linux kernel, U300 machine. # -obj-y := core.o timer.o +obj-y := core.o obj-m := obj-n := obj- := diff --git a/arch/arm/mach-u300/timer.c b/arch/arm/mach-u300/timer.c deleted file mode 100644 index fe08fd3..0000000 --- a/arch/arm/mach-u300/timer.c +++ /dev/null @@ -1,451 +0,0 @@ -/* - * - * arch/arm/mach-u300/timer.c - * - * - * Copyright (C) 2007-2009 ST-Ericsson AB - * License terms: GNU General Public License (GPL) version 2 - * Timer COH 901 328, runs the OS timer interrupt. - * Author: Linus Walleij - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* Generic stuff */ -#include -#include - -/* - * APP side special timer registers - * This timer contains four timers which can fire an interrupt each. - * OS (operating system) timer @ 32768 Hz - * DD (device driver) timer @ 1 kHz - * GP1 (general purpose 1) timer @ 1MHz - * GP2 (general purpose 2) timer @ 1MHz - */ - -/* Reset OS Timer 32bit (-/W) */ -#define U300_TIMER_APP_ROST (0x0000) -#define U300_TIMER_APP_ROST_TIMER_RESET (0x00000000) -/* Enable OS Timer 32bit (-/W) */ -#define U300_TIMER_APP_EOST (0x0004) -#define U300_TIMER_APP_EOST_TIMER_ENABLE (0x00000000) -/* Disable OS Timer 32bit (-/W) */ -#define U300_TIMER_APP_DOST (0x0008) -#define U300_TIMER_APP_DOST_TIMER_DISABLE (0x00000000) -/* OS Timer Mode Register 32bit (-/W) */ -#define U300_TIMER_APP_SOSTM (0x000c) -#define U300_TIMER_APP_SOSTM_MODE_CONTINUOUS (0x00000000) -#define U300_TIMER_APP_SOSTM_MODE_ONE_SHOT (0x00000001) -/* OS Timer Status Register 32bit (R/-) */ -#define U300_TIMER_APP_OSTS (0x0010) -#define U300_TIMER_APP_OSTS_TIMER_STATE_MASK (0x0000000F) -#define U300_TIMER_APP_OSTS_TIMER_STATE_IDLE (0x00000001) -#define U300_TIMER_APP_OSTS_TIMER_STATE_ACTIVE (0x00000002) -#define U300_TIMER_APP_OSTS_ENABLE_IND (0x00000010) -#define U300_TIMER_APP_OSTS_MODE_MASK (0x00000020) -#define U300_TIMER_APP_OSTS_MODE_CONTINUOUS (0x00000000) -#define U300_TIMER_APP_OSTS_MODE_ONE_SHOT (0x00000020) -#define U300_TIMER_APP_OSTS_IRQ_ENABLED_IND (0x00000040) -#define U300_TIMER_APP_OSTS_IRQ_PENDING_IND (0x00000080) -/* OS Timer Current Count Register 32bit (R/-) */ -#define U300_TIMER_APP_OSTCC (0x0014) -/* OS Timer Terminal Count Register 32bit (R/W) */ -#define U300_TIMER_APP_OSTTC (0x0018) -/* OS Timer Interrupt Enable Register 32bit (-/W) */ -#define U300_TIMER_APP_OSTIE (0x001c) -#define U300_TIMER_APP_OSTIE_IRQ_DISABLE (0x00000000) -#define U300_TIMER_APP_OSTIE_IRQ_ENABLE (0x00000001) -/* OS Timer Interrupt Acknowledge Register 32bit (-/W) */ -#define U300_TIMER_APP_OSTIA (0x0020) -#define U300_TIMER_APP_OSTIA_IRQ_ACK (0x00000080) - -/* Reset DD Timer 32bit (-/W) */ -#define U300_TIMER_APP_RDDT (0x0040) -#define U300_TIMER_APP_RDDT_TIMER_RESET (0x00000000) -/* Enable DD Timer 32bit (-/W) */ -#define U300_TIMER_APP_EDDT (0x0044) -#define U300_TIMER_APP_EDDT_TIMER_ENABLE (0x00000000) -/* Disable DD Timer 32bit (-/W) */ -#define U300_TIMER_APP_DDDT (0x0048) -#define U300_TIMER_APP_DDDT_TIMER_DISABLE (0x00000000) -/* DD Timer Mode Register 32bit (-/W) */ -#define U300_TIMER_APP_SDDTM (0x004c) -#define U300_TIMER_APP_SDDTM_MODE_CONTINUOUS (0x00000000) -#define U300_TIMER_APP_SDDTM_MODE_ONE_SHOT (0x00000001) -/* DD Timer Status Register 32bit (R/-) */ -#define U300_TIMER_APP_DDTS (0x0050) -#define U300_TIMER_APP_DDTS_TIMER_STATE_MASK (0x0000000F) -#define U300_TIMER_APP_DDTS_TIMER_STATE_IDLE (0x00000001) -#define U300_TIMER_APP_DDTS_TIMER_STATE_ACTIVE (0x00000002) -#define U300_TIMER_APP_DDTS_ENABLE_IND (0x00000010) -#define U300_TIMER_APP_DDTS_MODE_MASK (0x00000020) -#define U300_TIMER_APP_DDTS_MODE_CONTINUOUS (0x00000000) -#define U300_TIMER_APP_DDTS_MODE_ONE_SHOT (0x00000020) -#define U300_TIMER_APP_DDTS_IRQ_ENABLED_IND (0x00000040) -#define U300_TIMER_APP_DDTS_IRQ_PENDING_IND (0x00000080) -/* DD Timer Current Count Register 32bit (R/-) */ -#define U300_TIMER_APP_DDTCC (0x0054) -/* DD Timer Terminal Count Register 32bit (R/W) */ -#define U300_TIMER_APP_DDTTC (0x0058) -/* DD Timer Interrupt Enable Register 32bit (-/W) */ -#define U300_TIMER_APP_DDTIE (0x005c) -#define U300_TIMER_APP_DDTIE_IRQ_DISABLE (0x00000000) -#define U300_TIMER_APP_DDTIE_IRQ_ENABLE (0x00000001) -/* DD Timer Interrupt Acknowledge Register 32bit (-/W) */ -#define U300_TIMER_APP_DDTIA (0x0060) -#define U300_TIMER_APP_DDTIA_IRQ_ACK (0x00000080) - -/* Reset GP1 Timer 32bit (-/W) */ -#define U300_TIMER_APP_RGPT1 (0x0080) -#define U300_TIMER_APP_RGPT1_TIMER_RESET (0x00000000) -/* Enable GP1 Timer 32bit (-/W) */ -#define U300_TIMER_APP_EGPT1 (0x0084) -#define U300_TIMER_APP_EGPT1_TIMER_ENABLE (0x00000000) -/* Disable GP1 Timer 32bit (-/W) */ -#define U300_TIMER_APP_DGPT1 (0x0088) -#define U300_TIMER_APP_DGPT1_TIMER_DISABLE (0x00000000) -/* GP1 Timer Mode Register 32bit (-/W) */ -#define U300_TIMER_APP_SGPT1M (0x008c) -#define U300_TIMER_APP_SGPT1M_MODE_CONTINUOUS (0x00000000) -#define U300_TIMER_APP_SGPT1M_MODE_ONE_SHOT (0x00000001) -/* GP1 Timer Status Register 32bit (R/-) */ -#define U300_TIMER_APP_GPT1S (0x0090) -#define U300_TIMER_APP_GPT1S_TIMER_STATE_MASK (0x0000000F) -#define U300_TIMER_APP_GPT1S_TIMER_STATE_IDLE (0x00000001) -#define U300_TIMER_APP_GPT1S_TIMER_STATE_ACTIVE (0x00000002) -#define U300_TIMER_APP_GPT1S_ENABLE_IND (0x00000010) -#define U300_TIMER_APP_GPT1S_MODE_MASK (0x00000020) -#define U300_TIMER_APP_GPT1S_MODE_CONTINUOUS (0x00000000) -#define U300_TIMER_APP_GPT1S_MODE_ONE_SHOT (0x00000020) -#define U300_TIMER_APP_GPT1S_IRQ_ENABLED_IND (0x00000040) -#define U300_TIMER_APP_GPT1S_IRQ_PENDING_IND (0x00000080) -/* GP1 Timer Current Count Register 32bit (R/-) */ -#define U300_TIMER_APP_GPT1CC (0x0094) -/* GP1 Timer Terminal Count Register 32bit (R/W) */ -#define U300_TIMER_APP_GPT1TC (0x0098) -/* GP1 Timer Interrupt Enable Register 32bit (-/W) */ -#define U300_TIMER_APP_GPT1IE (0x009c) -#define U300_TIMER_APP_GPT1IE_IRQ_DISABLE (0x00000000) -#define U300_TIMER_APP_GPT1IE_IRQ_ENABLE (0x00000001) -/* GP1 Timer Interrupt Acknowledge Register 32bit (-/W) */ -#define U300_TIMER_APP_GPT1IA (0x00a0) -#define U300_TIMER_APP_GPT1IA_IRQ_ACK (0x00000080) - -/* Reset GP2 Timer 32bit (-/W) */ -#define U300_TIMER_APP_RGPT2 (0x00c0) -#define U300_TIMER_APP_RGPT2_TIMER_RESET (0x00000000) -/* Enable GP2 Timer 32bit (-/W) */ -#define U300_TIMER_APP_EGPT2 (0x00c4) -#define U300_TIMER_APP_EGPT2_TIMER_ENABLE (0x00000000) -/* Disable GP2 Timer 32bit (-/W) */ -#define U300_TIMER_APP_DGPT2 (0x00c8) -#define U300_TIMER_APP_DGPT2_TIMER_DISABLE (0x00000000) -/* GP2 Timer Mode Register 32bit (-/W) */ -#define U300_TIMER_APP_SGPT2M (0x00cc) -#define U300_TIMER_APP_SGPT2M_MODE_CONTINUOUS (0x00000000) -#define U300_TIMER_APP_SGPT2M_MODE_ONE_SHOT (0x00000001) -/* GP2 Timer Status Register 32bit (R/-) */ -#define U300_TIMER_APP_GPT2S (0x00d0) -#define U300_TIMER_APP_GPT2S_TIMER_STATE_MASK (0x0000000F) -#define U300_TIMER_APP_GPT2S_TIMER_STATE_IDLE (0x00000001) -#define U300_TIMER_APP_GPT2S_TIMER_STATE_ACTIVE (0x00000002) -#define U300_TIMER_APP_GPT2S_ENABLE_IND (0x00000010) -#define U300_TIMER_APP_GPT2S_MODE_MASK (0x00000020) -#define U300_TIMER_APP_GPT2S_MODE_CONTINUOUS (0x00000000) -#define U300_TIMER_APP_GPT2S_MODE_ONE_SHOT (0x00000020) -#define U300_TIMER_APP_GPT2S_IRQ_ENABLED_IND (0x00000040) -#define U300_TIMER_APP_GPT2S_IRQ_PENDING_IND (0x00000080) -/* GP2 Timer Current Count Register 32bit (R/-) */ -#define U300_TIMER_APP_GPT2CC (0x00d4) -/* GP2 Timer Terminal Count Register 32bit (R/W) */ -#define U300_TIMER_APP_GPT2TC (0x00d8) -/* GP2 Timer Interrupt Enable Register 32bit (-/W) */ -#define U300_TIMER_APP_GPT2IE (0x00dc) -#define U300_TIMER_APP_GPT2IE_IRQ_DISABLE (0x00000000) -#define U300_TIMER_APP_GPT2IE_IRQ_ENABLE (0x00000001) -/* GP2 Timer Interrupt Acknowledge Register 32bit (-/W) */ -#define U300_TIMER_APP_GPT2IA (0x00e0) -#define U300_TIMER_APP_GPT2IA_IRQ_ACK (0x00000080) - -/* Clock request control register - all four timers */ -#define U300_TIMER_APP_CRC (0x100) -#define U300_TIMER_APP_CRC_CLOCK_REQUEST_ENABLE (0x00000001) - -static void __iomem *u300_timer_base; - -struct u300_clockevent_data { - struct clock_event_device cevd; - unsigned ticks_per_jiffy; -}; - -/* - * The u300_set_mode() function is always called first, if we - * have oneshot timer active, the oneshot scheduling function - * u300_set_next_event() is called immediately after. - */ -static void u300_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt) -{ - struct u300_clockevent_data *cevdata = - container_of(evt, struct u300_clockevent_data, cevd); - - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - /* Disable interrupts on GPT1 */ - writel(U300_TIMER_APP_GPT1IE_IRQ_DISABLE, - u300_timer_base + U300_TIMER_APP_GPT1IE); - /* Disable GP1 while we're reprogramming it. */ - writel(U300_TIMER_APP_DGPT1_TIMER_DISABLE, - u300_timer_base + U300_TIMER_APP_DGPT1); - /* - * Set the periodic mode to a certain number of ticks per - * jiffy. - */ - writel(cevdata->ticks_per_jiffy, - u300_timer_base + U300_TIMER_APP_GPT1TC); - /* - * Set continuous mode, so the timer keeps triggering - * interrupts. - */ - writel(U300_TIMER_APP_SGPT1M_MODE_CONTINUOUS, - u300_timer_base + U300_TIMER_APP_SGPT1M); - /* Enable timer interrupts */ - writel(U300_TIMER_APP_GPT1IE_IRQ_ENABLE, - u300_timer_base + U300_TIMER_APP_GPT1IE); - /* Then enable the OS timer again */ - writel(U300_TIMER_APP_EGPT1_TIMER_ENABLE, - u300_timer_base + U300_TIMER_APP_EGPT1); - break; - case CLOCK_EVT_MODE_ONESHOT: - /* Just break; here? */ - /* - * The actual event will be programmed by the next event hook, - * so we just set a dummy value somewhere at the end of the - * universe here. - */ - /* Disable interrupts on GPT1 */ - writel(U300_TIMER_APP_GPT1IE_IRQ_DISABLE, - u300_timer_base + U300_TIMER_APP_GPT1IE); - /* Disable GP1 while we're reprogramming it. */ - writel(U300_TIMER_APP_DGPT1_TIMER_DISABLE, - u300_timer_base + U300_TIMER_APP_DGPT1); - /* - * Expire far in the future, u300_set_next_event() will be - * called soon... - */ - writel(0xFFFFFFFF, u300_timer_base + U300_TIMER_APP_GPT1TC); - /* We run one shot per tick here! */ - writel(U300_TIMER_APP_SGPT1M_MODE_ONE_SHOT, - u300_timer_base + U300_TIMER_APP_SGPT1M); - /* Enable interrupts for this timer */ - writel(U300_TIMER_APP_GPT1IE_IRQ_ENABLE, - u300_timer_base + U300_TIMER_APP_GPT1IE); - /* Enable timer */ - writel(U300_TIMER_APP_EGPT1_TIMER_ENABLE, - u300_timer_base + U300_TIMER_APP_EGPT1); - break; - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - /* Disable interrupts on GP1 */ - writel(U300_TIMER_APP_GPT1IE_IRQ_DISABLE, - u300_timer_base + U300_TIMER_APP_GPT1IE); - /* Disable GP1 */ - writel(U300_TIMER_APP_DGPT1_TIMER_DISABLE, - u300_timer_base + U300_TIMER_APP_DGPT1); - break; - case CLOCK_EVT_MODE_RESUME: - /* Ignore this call */ - break; - } -} - -/* - * The app timer in one shot mode obviously has to be reprogrammed - * in EXACTLY this sequence to work properly. Do NOT try to e.g. replace - * the interrupt disable + timer disable commands with a reset command, - * it will fail miserably. Apparently (and I found this the hard way) - * the timer is very sensitive to the instruction order, though you don't - * get that impression from the data sheet. - */ -static int u300_set_next_event(unsigned long cycles, - struct clock_event_device *evt) - -{ - /* Disable interrupts on GPT1 */ - writel(U300_TIMER_APP_GPT1IE_IRQ_DISABLE, - u300_timer_base + U300_TIMER_APP_GPT1IE); - /* Disable GP1 while we're reprogramming it. */ - writel(U300_TIMER_APP_DGPT1_TIMER_DISABLE, - u300_timer_base + U300_TIMER_APP_DGPT1); - /* Reset the General Purpose timer 1. */ - writel(U300_TIMER_APP_RGPT1_TIMER_RESET, - u300_timer_base + U300_TIMER_APP_RGPT1); - /* IRQ in n * cycles */ - writel(cycles, u300_timer_base + U300_TIMER_APP_GPT1TC); - /* - * We run one shot per tick here! (This is necessary to reconfigure, - * the timer will tilt if you don't!) - */ - writel(U300_TIMER_APP_SGPT1M_MODE_ONE_SHOT, - u300_timer_base + U300_TIMER_APP_SGPT1M); - /* Enable timer interrupts */ - writel(U300_TIMER_APP_GPT1IE_IRQ_ENABLE, - u300_timer_base + U300_TIMER_APP_GPT1IE); - /* Then enable the OS timer again */ - writel(U300_TIMER_APP_EGPT1_TIMER_ENABLE, - u300_timer_base + U300_TIMER_APP_EGPT1); - return 0; -} - -static struct u300_clockevent_data u300_clockevent_data = { - /* Use general purpose timer 1 as clock event */ - .cevd = { - .name = "GPT1", - /* Reasonably fast and accurate clock event */ - .rating = 300, - .features = CLOCK_EVT_FEAT_PERIODIC | - CLOCK_EVT_FEAT_ONESHOT, - .set_next_event = u300_set_next_event, - .set_mode = u300_set_mode, - }, -}; - -/* Clock event timer interrupt handler */ -static irqreturn_t u300_timer_interrupt(int irq, void *dev_id) -{ - struct clock_event_device *evt = &u300_clockevent_data.cevd; - /* ACK/Clear timer IRQ for the APP GPT1 Timer */ - - writel(U300_TIMER_APP_GPT1IA_IRQ_ACK, - u300_timer_base + U300_TIMER_APP_GPT1IA); - evt->event_handler(evt); - return IRQ_HANDLED; -} - -static struct irqaction u300_timer_irq = { - .name = "U300 Timer Tick", - .flags = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL, - .handler = u300_timer_interrupt, -}; - -/* - * Override the global weak sched_clock symbol with this - * local implementation which uses the clocksource to get some - * better resolution when scheduling the kernel. We accept that - * this wraps around for now, since it is just a relative time - * stamp. (Inspired by OMAP implementation.) - */ - -static u64 notrace u300_read_sched_clock(void) -{ - return readl(u300_timer_base + U300_TIMER_APP_GPT2CC); -} - -static unsigned long u300_read_current_timer(void) -{ - return readl(u300_timer_base + U300_TIMER_APP_GPT2CC); -} - -static struct delay_timer u300_delay_timer; - -/* - * This sets up the system timers, clock source and clock event. - */ -static void __init u300_timer_init_of(struct device_node *np) -{ - unsigned int irq; - struct clk *clk; - unsigned long rate; - - u300_timer_base = of_iomap(np, 0); - if (!u300_timer_base) - panic("could not ioremap system timer\n"); - - /* Get the IRQ for the GP1 timer */ - irq = irq_of_parse_and_map(np, 2); - if (!irq) - panic("no IRQ for system timer\n"); - - pr_info("U300 GP1 timer @ base: %p, IRQ: %u\n", u300_timer_base, irq); - - /* Clock the interrupt controller */ - clk = of_clk_get(np, 0); - BUG_ON(IS_ERR(clk)); - clk_prepare_enable(clk); - rate = clk_get_rate(clk); - - u300_clockevent_data.ticks_per_jiffy = DIV_ROUND_CLOSEST(rate, HZ); - - sched_clock_register(u300_read_sched_clock, 32, rate); - - u300_delay_timer.read_current_timer = &u300_read_current_timer; - u300_delay_timer.freq = rate; - register_current_timer_delay(&u300_delay_timer); - - /* - * Disable the "OS" and "DD" timers - these are designed for Symbian! - * Example usage in cnh1601578 cpu subsystem pd_timer_app.c - */ - writel(U300_TIMER_APP_CRC_CLOCK_REQUEST_ENABLE, - u300_timer_base + U300_TIMER_APP_CRC); - writel(U300_TIMER_APP_ROST_TIMER_RESET, - u300_timer_base + U300_TIMER_APP_ROST); - writel(U300_TIMER_APP_DOST_TIMER_DISABLE, - u300_timer_base + U300_TIMER_APP_DOST); - writel(U300_TIMER_APP_RDDT_TIMER_RESET, - u300_timer_base + U300_TIMER_APP_RDDT); - writel(U300_TIMER_APP_DDDT_TIMER_DISABLE, - u300_timer_base + U300_TIMER_APP_DDDT); - - /* Reset the General Purpose timer 1. */ - writel(U300_TIMER_APP_RGPT1_TIMER_RESET, - u300_timer_base + U300_TIMER_APP_RGPT1); - - /* Set up the IRQ handler */ - setup_irq(irq, &u300_timer_irq); - - /* Reset the General Purpose timer 2 */ - writel(U300_TIMER_APP_RGPT2_TIMER_RESET, - u300_timer_base + U300_TIMER_APP_RGPT2); - /* Set this timer to run around forever */ - writel(0xFFFFFFFFU, u300_timer_base + U300_TIMER_APP_GPT2TC); - /* Set continuous mode so it wraps around */ - writel(U300_TIMER_APP_SGPT2M_MODE_CONTINUOUS, - u300_timer_base + U300_TIMER_APP_SGPT2M); - /* Disable timer interrupts */ - writel(U300_TIMER_APP_GPT2IE_IRQ_DISABLE, - u300_timer_base + U300_TIMER_APP_GPT2IE); - /* Then enable the GP2 timer to use as a free running us counter */ - writel(U300_TIMER_APP_EGPT2_TIMER_ENABLE, - u300_timer_base + U300_TIMER_APP_EGPT2); - - /* Use general purpose timer 2 as clock source */ - if (clocksource_mmio_init(u300_timer_base + U300_TIMER_APP_GPT2CC, - "GPT2", rate, 300, 32, clocksource_mmio_readl_up)) - pr_err("timer: failed to initialize U300 clock source\n"); - - /* Configure and register the clockevent */ - clockevents_config_and_register(&u300_clockevent_data.cevd, rate, - 1, 0xffffffff); - - /* - * TODO: init and register the rest of the timers too, they can be - * used by hrtimers! - */ -} - -CLOCKSOURCE_OF_DECLARE(u300_timer, "stericsson,u300-apptimer", - u300_timer_init_of); diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index c7ca50a..01e5435 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_ARCH_MARCO) += timer-marco.o obj-$(CONFIG_ARCH_MOXART) += moxart_timer.o obj-$(CONFIG_ARCH_MXS) += mxs_timer.o obj-$(CONFIG_ARCH_PRIMA2) += timer-prima2.o +obj-$(CONFIG_ARCH_U300) += timer-u300.o obj-$(CONFIG_SUN4I_TIMER) += sun4i_timer.o obj-$(CONFIG_SUN5I_HSTIMER) += timer-sun5i.o obj-$(CONFIG_ARCH_TEGRA) += tegra20_timer.o diff --git a/drivers/clocksource/timer-u300.c b/drivers/clocksource/timer-u300.c new file mode 100644 index 0000000..e63d469 --- /dev/null +++ b/drivers/clocksource/timer-u300.c @@ -0,0 +1,447 @@ +/* + * Copyright (C) 2007-2009 ST-Ericsson AB + * License terms: GNU General Public License (GPL) version 2 + * Timer COH 901 328, runs the OS timer interrupt. + * Author: Linus Walleij + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Generic stuff */ +#include +#include + +/* + * APP side special timer registers + * This timer contains four timers which can fire an interrupt each. + * OS (operating system) timer @ 32768 Hz + * DD (device driver) timer @ 1 kHz + * GP1 (general purpose 1) timer @ 1MHz + * GP2 (general purpose 2) timer @ 1MHz + */ + +/* Reset OS Timer 32bit (-/W) */ +#define U300_TIMER_APP_ROST (0x0000) +#define U300_TIMER_APP_ROST_TIMER_RESET (0x00000000) +/* Enable OS Timer 32bit (-/W) */ +#define U300_TIMER_APP_EOST (0x0004) +#define U300_TIMER_APP_EOST_TIMER_ENABLE (0x00000000) +/* Disable OS Timer 32bit (-/W) */ +#define U300_TIMER_APP_DOST (0x0008) +#define U300_TIMER_APP_DOST_TIMER_DISABLE (0x00000000) +/* OS Timer Mode Register 32bit (-/W) */ +#define U300_TIMER_APP_SOSTM (0x000c) +#define U300_TIMER_APP_SOSTM_MODE_CONTINUOUS (0x00000000) +#define U300_TIMER_APP_SOSTM_MODE_ONE_SHOT (0x00000001) +/* OS Timer Status Register 32bit (R/-) */ +#define U300_TIMER_APP_OSTS (0x0010) +#define U300_TIMER_APP_OSTS_TIMER_STATE_MASK (0x0000000F) +#define U300_TIMER_APP_OSTS_TIMER_STATE_IDLE (0x00000001) +#define U300_TIMER_APP_OSTS_TIMER_STATE_ACTIVE (0x00000002) +#define U300_TIMER_APP_OSTS_ENABLE_IND (0x00000010) +#define U300_TIMER_APP_OSTS_MODE_MASK (0x00000020) +#define U300_TIMER_APP_OSTS_MODE_CONTINUOUS (0x00000000) +#define U300_TIMER_APP_OSTS_MODE_ONE_SHOT (0x00000020) +#define U300_TIMER_APP_OSTS_IRQ_ENABLED_IND (0x00000040) +#define U300_TIMER_APP_OSTS_IRQ_PENDING_IND (0x00000080) +/* OS Timer Current Count Register 32bit (R/-) */ +#define U300_TIMER_APP_OSTCC (0x0014) +/* OS Timer Terminal Count Register 32bit (R/W) */ +#define U300_TIMER_APP_OSTTC (0x0018) +/* OS Timer Interrupt Enable Register 32bit (-/W) */ +#define U300_TIMER_APP_OSTIE (0x001c) +#define U300_TIMER_APP_OSTIE_IRQ_DISABLE (0x00000000) +#define U300_TIMER_APP_OSTIE_IRQ_ENABLE (0x00000001) +/* OS Timer Interrupt Acknowledge Register 32bit (-/W) */ +#define U300_TIMER_APP_OSTIA (0x0020) +#define U300_TIMER_APP_OSTIA_IRQ_ACK (0x00000080) + +/* Reset DD Timer 32bit (-/W) */ +#define U300_TIMER_APP_RDDT (0x0040) +#define U300_TIMER_APP_RDDT_TIMER_RESET (0x00000000) +/* Enable DD Timer 32bit (-/W) */ +#define U300_TIMER_APP_EDDT (0x0044) +#define U300_TIMER_APP_EDDT_TIMER_ENABLE (0x00000000) +/* Disable DD Timer 32bit (-/W) */ +#define U300_TIMER_APP_DDDT (0x0048) +#define U300_TIMER_APP_DDDT_TIMER_DISABLE (0x00000000) +/* DD Timer Mode Register 32bit (-/W) */ +#define U300_TIMER_APP_SDDTM (0x004c) +#define U300_TIMER_APP_SDDTM_MODE_CONTINUOUS (0x00000000) +#define U300_TIMER_APP_SDDTM_MODE_ONE_SHOT (0x00000001) +/* DD Timer Status Register 32bit (R/-) */ +#define U300_TIMER_APP_DDTS (0x0050) +#define U300_TIMER_APP_DDTS_TIMER_STATE_MASK (0x0000000F) +#define U300_TIMER_APP_DDTS_TIMER_STATE_IDLE (0x00000001) +#define U300_TIMER_APP_DDTS_TIMER_STATE_ACTIVE (0x00000002) +#define U300_TIMER_APP_DDTS_ENABLE_IND (0x00000010) +#define U300_TIMER_APP_DDTS_MODE_MASK (0x00000020) +#define U300_TIMER_APP_DDTS_MODE_CONTINUOUS (0x00000000) +#define U300_TIMER_APP_DDTS_MODE_ONE_SHOT (0x00000020) +#define U300_TIMER_APP_DDTS_IRQ_ENABLED_IND (0x00000040) +#define U300_TIMER_APP_DDTS_IRQ_PENDING_IND (0x00000080) +/* DD Timer Current Count Register 32bit (R/-) */ +#define U300_TIMER_APP_DDTCC (0x0054) +/* DD Timer Terminal Count Register 32bit (R/W) */ +#define U300_TIMER_APP_DDTTC (0x0058) +/* DD Timer Interrupt Enable Register 32bit (-/W) */ +#define U300_TIMER_APP_DDTIE (0x005c) +#define U300_TIMER_APP_DDTIE_IRQ_DISABLE (0x00000000) +#define U300_TIMER_APP_DDTIE_IRQ_ENABLE (0x00000001) +/* DD Timer Interrupt Acknowledge Register 32bit (-/W) */ +#define U300_TIMER_APP_DDTIA (0x0060) +#define U300_TIMER_APP_DDTIA_IRQ_ACK (0x00000080) + +/* Reset GP1 Timer 32bit (-/W) */ +#define U300_TIMER_APP_RGPT1 (0x0080) +#define U300_TIMER_APP_RGPT1_TIMER_RESET (0x00000000) +/* Enable GP1 Timer 32bit (-/W) */ +#define U300_TIMER_APP_EGPT1 (0x0084) +#define U300_TIMER_APP_EGPT1_TIMER_ENABLE (0x00000000) +/* Disable GP1 Timer 32bit (-/W) */ +#define U300_TIMER_APP_DGPT1 (0x0088) +#define U300_TIMER_APP_DGPT1_TIMER_DISABLE (0x00000000) +/* GP1 Timer Mode Register 32bit (-/W) */ +#define U300_TIMER_APP_SGPT1M (0x008c) +#define U300_TIMER_APP_SGPT1M_MODE_CONTINUOUS (0x00000000) +#define U300_TIMER_APP_SGPT1M_MODE_ONE_SHOT (0x00000001) +/* GP1 Timer Status Register 32bit (R/-) */ +#define U300_TIMER_APP_GPT1S (0x0090) +#define U300_TIMER_APP_GPT1S_TIMER_STATE_MASK (0x0000000F) +#define U300_TIMER_APP_GPT1S_TIMER_STATE_IDLE (0x00000001) +#define U300_TIMER_APP_GPT1S_TIMER_STATE_ACTIVE (0x00000002) +#define U300_TIMER_APP_GPT1S_ENABLE_IND (0x00000010) +#define U300_TIMER_APP_GPT1S_MODE_MASK (0x00000020) +#define U300_TIMER_APP_GPT1S_MODE_CONTINUOUS (0x00000000) +#define U300_TIMER_APP_GPT1S_MODE_ONE_SHOT (0x00000020) +#define U300_TIMER_APP_GPT1S_IRQ_ENABLED_IND (0x00000040) +#define U300_TIMER_APP_GPT1S_IRQ_PENDING_IND (0x00000080) +/* GP1 Timer Current Count Register 32bit (R/-) */ +#define U300_TIMER_APP_GPT1CC (0x0094) +/* GP1 Timer Terminal Count Register 32bit (R/W) */ +#define U300_TIMER_APP_GPT1TC (0x0098) +/* GP1 Timer Interrupt Enable Register 32bit (-/W) */ +#define U300_TIMER_APP_GPT1IE (0x009c) +#define U300_TIMER_APP_GPT1IE_IRQ_DISABLE (0x00000000) +#define U300_TIMER_APP_GPT1IE_IRQ_ENABLE (0x00000001) +/* GP1 Timer Interrupt Acknowledge Register 32bit (-/W) */ +#define U300_TIMER_APP_GPT1IA (0x00a0) +#define U300_TIMER_APP_GPT1IA_IRQ_ACK (0x00000080) + +/* Reset GP2 Timer 32bit (-/W) */ +#define U300_TIMER_APP_RGPT2 (0x00c0) +#define U300_TIMER_APP_RGPT2_TIMER_RESET (0x00000000) +/* Enable GP2 Timer 32bit (-/W) */ +#define U300_TIMER_APP_EGPT2 (0x00c4) +#define U300_TIMER_APP_EGPT2_TIMER_ENABLE (0x00000000) +/* Disable GP2 Timer 32bit (-/W) */ +#define U300_TIMER_APP_DGPT2 (0x00c8) +#define U300_TIMER_APP_DGPT2_TIMER_DISABLE (0x00000000) +/* GP2 Timer Mode Register 32bit (-/W) */ +#define U300_TIMER_APP_SGPT2M (0x00cc) +#define U300_TIMER_APP_SGPT2M_MODE_CONTINUOUS (0x00000000) +#define U300_TIMER_APP_SGPT2M_MODE_ONE_SHOT (0x00000001) +/* GP2 Timer Status Register 32bit (R/-) */ +#define U300_TIMER_APP_GPT2S (0x00d0) +#define U300_TIMER_APP_GPT2S_TIMER_STATE_MASK (0x0000000F) +#define U300_TIMER_APP_GPT2S_TIMER_STATE_IDLE (0x00000001) +#define U300_TIMER_APP_GPT2S_TIMER_STATE_ACTIVE (0x00000002) +#define U300_TIMER_APP_GPT2S_ENABLE_IND (0x00000010) +#define U300_TIMER_APP_GPT2S_MODE_MASK (0x00000020) +#define U300_TIMER_APP_GPT2S_MODE_CONTINUOUS (0x00000000) +#define U300_TIMER_APP_GPT2S_MODE_ONE_SHOT (0x00000020) +#define U300_TIMER_APP_GPT2S_IRQ_ENABLED_IND (0x00000040) +#define U300_TIMER_APP_GPT2S_IRQ_PENDING_IND (0x00000080) +/* GP2 Timer Current Count Register 32bit (R/-) */ +#define U300_TIMER_APP_GPT2CC (0x00d4) +/* GP2 Timer Terminal Count Register 32bit (R/W) */ +#define U300_TIMER_APP_GPT2TC (0x00d8) +/* GP2 Timer Interrupt Enable Register 32bit (-/W) */ +#define U300_TIMER_APP_GPT2IE (0x00dc) +#define U300_TIMER_APP_GPT2IE_IRQ_DISABLE (0x00000000) +#define U300_TIMER_APP_GPT2IE_IRQ_ENABLE (0x00000001) +/* GP2 Timer Interrupt Acknowledge Register 32bit (-/W) */ +#define U300_TIMER_APP_GPT2IA (0x00e0) +#define U300_TIMER_APP_GPT2IA_IRQ_ACK (0x00000080) + +/* Clock request control register - all four timers */ +#define U300_TIMER_APP_CRC (0x100) +#define U300_TIMER_APP_CRC_CLOCK_REQUEST_ENABLE (0x00000001) + +static void __iomem *u300_timer_base; + +struct u300_clockevent_data { + struct clock_event_device cevd; + unsigned ticks_per_jiffy; +}; + +/* + * The u300_set_mode() function is always called first, if we + * have oneshot timer active, the oneshot scheduling function + * u300_set_next_event() is called immediately after. + */ +static void u300_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + struct u300_clockevent_data *cevdata = + container_of(evt, struct u300_clockevent_data, cevd); + + switch (mode) { + case CLOCK_EVT_MODE_PERIODIC: + /* Disable interrupts on GPT1 */ + writel(U300_TIMER_APP_GPT1IE_IRQ_DISABLE, + u300_timer_base + U300_TIMER_APP_GPT1IE); + /* Disable GP1 while we're reprogramming it. */ + writel(U300_TIMER_APP_DGPT1_TIMER_DISABLE, + u300_timer_base + U300_TIMER_APP_DGPT1); + /* + * Set the periodic mode to a certain number of ticks per + * jiffy. + */ + writel(cevdata->ticks_per_jiffy, + u300_timer_base + U300_TIMER_APP_GPT1TC); + /* + * Set continuous mode, so the timer keeps triggering + * interrupts. + */ + writel(U300_TIMER_APP_SGPT1M_MODE_CONTINUOUS, + u300_timer_base + U300_TIMER_APP_SGPT1M); + /* Enable timer interrupts */ + writel(U300_TIMER_APP_GPT1IE_IRQ_ENABLE, + u300_timer_base + U300_TIMER_APP_GPT1IE); + /* Then enable the OS timer again */ + writel(U300_TIMER_APP_EGPT1_TIMER_ENABLE, + u300_timer_base + U300_TIMER_APP_EGPT1); + break; + case CLOCK_EVT_MODE_ONESHOT: + /* Just break; here? */ + /* + * The actual event will be programmed by the next event hook, + * so we just set a dummy value somewhere at the end of the + * universe here. + */ + /* Disable interrupts on GPT1 */ + writel(U300_TIMER_APP_GPT1IE_IRQ_DISABLE, + u300_timer_base + U300_TIMER_APP_GPT1IE); + /* Disable GP1 while we're reprogramming it. */ + writel(U300_TIMER_APP_DGPT1_TIMER_DISABLE, + u300_timer_base + U300_TIMER_APP_DGPT1); + /* + * Expire far in the future, u300_set_next_event() will be + * called soon... + */ + writel(0xFFFFFFFF, u300_timer_base + U300_TIMER_APP_GPT1TC); + /* We run one shot per tick here! */ + writel(U300_TIMER_APP_SGPT1M_MODE_ONE_SHOT, + u300_timer_base + U300_TIMER_APP_SGPT1M); + /* Enable interrupts for this timer */ + writel(U300_TIMER_APP_GPT1IE_IRQ_ENABLE, + u300_timer_base + U300_TIMER_APP_GPT1IE); + /* Enable timer */ + writel(U300_TIMER_APP_EGPT1_TIMER_ENABLE, + u300_timer_base + U300_TIMER_APP_EGPT1); + break; + case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_SHUTDOWN: + /* Disable interrupts on GP1 */ + writel(U300_TIMER_APP_GPT1IE_IRQ_DISABLE, + u300_timer_base + U300_TIMER_APP_GPT1IE); + /* Disable GP1 */ + writel(U300_TIMER_APP_DGPT1_TIMER_DISABLE, + u300_timer_base + U300_TIMER_APP_DGPT1); + break; + case CLOCK_EVT_MODE_RESUME: + /* Ignore this call */ + break; + } +} + +/* + * The app timer in one shot mode obviously has to be reprogrammed + * in EXACTLY this sequence to work properly. Do NOT try to e.g. replace + * the interrupt disable + timer disable commands with a reset command, + * it will fail miserably. Apparently (and I found this the hard way) + * the timer is very sensitive to the instruction order, though you don't + * get that impression from the data sheet. + */ +static int u300_set_next_event(unsigned long cycles, + struct clock_event_device *evt) + +{ + /* Disable interrupts on GPT1 */ + writel(U300_TIMER_APP_GPT1IE_IRQ_DISABLE, + u300_timer_base + U300_TIMER_APP_GPT1IE); + /* Disable GP1 while we're reprogramming it. */ + writel(U300_TIMER_APP_DGPT1_TIMER_DISABLE, + u300_timer_base + U300_TIMER_APP_DGPT1); + /* Reset the General Purpose timer 1. */ + writel(U300_TIMER_APP_RGPT1_TIMER_RESET, + u300_timer_base + U300_TIMER_APP_RGPT1); + /* IRQ in n * cycles */ + writel(cycles, u300_timer_base + U300_TIMER_APP_GPT1TC); + /* + * We run one shot per tick here! (This is necessary to reconfigure, + * the timer will tilt if you don't!) + */ + writel(U300_TIMER_APP_SGPT1M_MODE_ONE_SHOT, + u300_timer_base + U300_TIMER_APP_SGPT1M); + /* Enable timer interrupts */ + writel(U300_TIMER_APP_GPT1IE_IRQ_ENABLE, + u300_timer_base + U300_TIMER_APP_GPT1IE); + /* Then enable the OS timer again */ + writel(U300_TIMER_APP_EGPT1_TIMER_ENABLE, + u300_timer_base + U300_TIMER_APP_EGPT1); + return 0; +} + +static struct u300_clockevent_data u300_clockevent_data = { + /* Use general purpose timer 1 as clock event */ + .cevd = { + .name = "GPT1", + /* Reasonably fast and accurate clock event */ + .rating = 300, + .features = CLOCK_EVT_FEAT_PERIODIC | + CLOCK_EVT_FEAT_ONESHOT, + .set_next_event = u300_set_next_event, + .set_mode = u300_set_mode, + }, +}; + +/* Clock event timer interrupt handler */ +static irqreturn_t u300_timer_interrupt(int irq, void *dev_id) +{ + struct clock_event_device *evt = &u300_clockevent_data.cevd; + /* ACK/Clear timer IRQ for the APP GPT1 Timer */ + + writel(U300_TIMER_APP_GPT1IA_IRQ_ACK, + u300_timer_base + U300_TIMER_APP_GPT1IA); + evt->event_handler(evt); + return IRQ_HANDLED; +} + +static struct irqaction u300_timer_irq = { + .name = "U300 Timer Tick", + .flags = IRQF_DISABLED | IRQF_TIMER | IRQF_IRQPOLL, + .handler = u300_timer_interrupt, +}; + +/* + * Override the global weak sched_clock symbol with this + * local implementation which uses the clocksource to get some + * better resolution when scheduling the kernel. We accept that + * this wraps around for now, since it is just a relative time + * stamp. (Inspired by OMAP implementation.) + */ + +static u64 notrace u300_read_sched_clock(void) +{ + return readl(u300_timer_base + U300_TIMER_APP_GPT2CC); +} + +static unsigned long u300_read_current_timer(void) +{ + return readl(u300_timer_base + U300_TIMER_APP_GPT2CC); +} + +static struct delay_timer u300_delay_timer; + +/* + * This sets up the system timers, clock source and clock event. + */ +static void __init u300_timer_init_of(struct device_node *np) +{ + unsigned int irq; + struct clk *clk; + unsigned long rate; + + u300_timer_base = of_iomap(np, 0); + if (!u300_timer_base) + panic("could not ioremap system timer\n"); + + /* Get the IRQ for the GP1 timer */ + irq = irq_of_parse_and_map(np, 2); + if (!irq) + panic("no IRQ for system timer\n"); + + pr_info("U300 GP1 timer @ base: %p, IRQ: %u\n", u300_timer_base, irq); + + /* Clock the interrupt controller */ + clk = of_clk_get(np, 0); + BUG_ON(IS_ERR(clk)); + clk_prepare_enable(clk); + rate = clk_get_rate(clk); + + u300_clockevent_data.ticks_per_jiffy = DIV_ROUND_CLOSEST(rate, HZ); + + sched_clock_register(u300_read_sched_clock, 32, rate); + + u300_delay_timer.read_current_timer = &u300_read_current_timer; + u300_delay_timer.freq = rate; + register_current_timer_delay(&u300_delay_timer); + + /* + * Disable the "OS" and "DD" timers - these are designed for Symbian! + * Example usage in cnh1601578 cpu subsystem pd_timer_app.c + */ + writel(U300_TIMER_APP_CRC_CLOCK_REQUEST_ENABLE, + u300_timer_base + U300_TIMER_APP_CRC); + writel(U300_TIMER_APP_ROST_TIMER_RESET, + u300_timer_base + U300_TIMER_APP_ROST); + writel(U300_TIMER_APP_DOST_TIMER_DISABLE, + u300_timer_base + U300_TIMER_APP_DOST); + writel(U300_TIMER_APP_RDDT_TIMER_RESET, + u300_timer_base + U300_TIMER_APP_RDDT); + writel(U300_TIMER_APP_DDDT_TIMER_DISABLE, + u300_timer_base + U300_TIMER_APP_DDDT); + + /* Reset the General Purpose timer 1. */ + writel(U300_TIMER_APP_RGPT1_TIMER_RESET, + u300_timer_base + U300_TIMER_APP_RGPT1); + + /* Set up the IRQ handler */ + setup_irq(irq, &u300_timer_irq); + + /* Reset the General Purpose timer 2 */ + writel(U300_TIMER_APP_RGPT2_TIMER_RESET, + u300_timer_base + U300_TIMER_APP_RGPT2); + /* Set this timer to run around forever */ + writel(0xFFFFFFFFU, u300_timer_base + U300_TIMER_APP_GPT2TC); + /* Set continuous mode so it wraps around */ + writel(U300_TIMER_APP_SGPT2M_MODE_CONTINUOUS, + u300_timer_base + U300_TIMER_APP_SGPT2M); + /* Disable timer interrupts */ + writel(U300_TIMER_APP_GPT2IE_IRQ_DISABLE, + u300_timer_base + U300_TIMER_APP_GPT2IE); + /* Then enable the GP2 timer to use as a free running us counter */ + writel(U300_TIMER_APP_EGPT2_TIMER_ENABLE, + u300_timer_base + U300_TIMER_APP_EGPT2); + + /* Use general purpose timer 2 as clock source */ + if (clocksource_mmio_init(u300_timer_base + U300_TIMER_APP_GPT2CC, + "GPT2", rate, 300, 32, clocksource_mmio_readl_up)) + pr_err("timer: failed to initialize U300 clock source\n"); + + /* Configure and register the clockevent */ + clockevents_config_and_register(&u300_clockevent_data.cevd, rate, + 1, 0xffffffff); + + /* + * TODO: init and register the rest of the timers too, they can be + * used by hrtimers! + */ +} + +CLOCKSOURCE_OF_DECLARE(u300_timer, "stericsson,u300-apptimer", + u300_timer_init_of); -- cgit v0.10.2 From ec6c085cc779ae863d363e7d076b763a2a0aca4c Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 6 Feb 2014 10:40:31 +0100 Subject: clocksource: sunxi: Add new compatibles The Allwinner A10 compatibles were following a slightly different compatible patterns than the rest of the SoCs for historical reasons. Add compatibles matching the other pattern to the timer driver for consistency, and keep the older one for backward compatibility. Signed-off-by: Maxime Ripard Signed-off-by: Daniel Lezcano diff --git a/Documentation/devicetree/bindings/timer/allwinner,sun4i-timer.txt b/Documentation/devicetree/bindings/timer/allwinner,sun4i-timer.txt index 48aeb78..5c2e235 100644 --- a/Documentation/devicetree/bindings/timer/allwinner,sun4i-timer.txt +++ b/Documentation/devicetree/bindings/timer/allwinner,sun4i-timer.txt @@ -2,7 +2,7 @@ Allwinner A1X SoCs Timer Controller Required properties: -- compatible : should be "allwinner,sun4i-timer" +- compatible : should be "allwinner,sun4i-a10-timer" - reg : Specifies base physical address and size of the registers. - interrupts : The interrupt of the first timer - clocks: phandle to the source clock (usually a 24 MHz fixed clock) @@ -10,7 +10,7 @@ Required properties: Example: timer { - compatible = "allwinner,sun4i-timer"; + compatible = "allwinner,sun4i-a10-timer"; reg = <0x01c20c00 0x400>; interrupts = <22>; clocks = <&osc>; diff --git a/drivers/clocksource/sun4i_timer.c b/drivers/clocksource/sun4i_timer.c index bf497af..efb17c3 100644 --- a/drivers/clocksource/sun4i_timer.c +++ b/drivers/clocksource/sun4i_timer.c @@ -196,5 +196,5 @@ static void __init sun4i_timer_init(struct device_node *node) clockevents_config_and_register(&sun4i_clockevent, rate, TIMER_SYNC_TICKS, 0xffffffff); } -CLOCKSOURCE_OF_DECLARE(sun4i, "allwinner,sun4i-timer", +CLOCKSOURCE_OF_DECLARE(sun4i, "allwinner,sun4i-a10-timer", sun4i_timer_init); -- cgit v0.10.2 From b4f26440d932d022e109fb7a0518ae291e72f399 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 6 Feb 2014 10:40:32 +0100 Subject: ARM: sunxi: dt: Convert to the new clocksource compatible Switch the device tree to the new compatibles introduced in the timer driver to have a common pattern accross all Allwinner SoCs. Signed-off-by: Maxime Ripard Signed-off-by: Daniel Lezcano diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index 040bb0e..dd64cc0 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -403,7 +403,7 @@ }; timer@01c20c00 { - compatible = "allwinner,sun4i-timer"; + compatible = "allwinner,sun4i-a10-timer"; reg = <0x01c20c00 0x90>; interrupts = <22>; clocks = <&osc24M>; diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi index ea16054..9cee110 100644 --- a/arch/arm/boot/dts/sun5i-a10s.dtsi +++ b/arch/arm/boot/dts/sun5i-a10s.dtsi @@ -366,7 +366,7 @@ }; timer@01c20c00 { - compatible = "allwinner,sun4i-timer"; + compatible = "allwinner,sun4i-a10-timer"; reg = <0x01c20c00 0x90>; interrupts = <22>; clocks = <&osc24M>; diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi index 320335a..f5cba63 100644 --- a/arch/arm/boot/dts/sun5i-a13.dtsi +++ b/arch/arm/boot/dts/sun5i-a13.dtsi @@ -329,7 +329,7 @@ }; timer@01c20c00 { - compatible = "allwinner,sun4i-timer"; + compatible = "allwinner,sun4i-a10-timer"; reg = <0x01c20c00 0x90>; interrupts = <22>; clocks = <&osc24M>; diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi index 5256ad9..996fff5 100644 --- a/arch/arm/boot/dts/sun6i-a31.dtsi +++ b/arch/arm/boot/dts/sun6i-a31.dtsi @@ -231,7 +231,7 @@ }; timer@01c20c00 { - compatible = "allwinner,sun4i-timer"; + compatible = "allwinner,sun4i-a10-timer"; reg = <0x01c20c00 0xa0>; interrupts = <0 18 4>, <0 19 4>, diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 119f066..4bc5c6f 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -435,7 +435,7 @@ }; timer@01c20c00 { - compatible = "allwinner,sun4i-timer"; + compatible = "allwinner,sun4i-a10-timer"; reg = <0x01c20c00 0x90>; interrupts = <0 22 4>, <0 23 4>, -- cgit v0.10.2 From 926b65e20dc336391dc333309d34bdf7d5fa3f3b Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Mon, 10 Feb 2014 12:10:57 +0200 Subject: clocksource: keystone: add bindings for keystone timer This patch provides bindings for the 64-bit timer in the KeyStone architecture devices. The timer can be configured as a general-purpose 64-bit timer, dual general-purpose 32-bit timers. When configured as dual 32-bit timers, each half can operate in conjunction (chain mode) or independently (unchained mode) of each other. It is global timer is a free running up-counter and can generate interrupt when the counter reaches preset counter values. Documentation: http://www.ti.com/lit/ug/sprugv5a/sprugv5a.pdf Acked-by: Rob Herring Acked-by: Santosh Shilimkar Signed-off-by: Ivan Khoronzhuk Signed-off-by: Daniel Lezcano diff --git a/Documentation/devicetree/bindings/timer/ti,keystone-timer.txt b/Documentation/devicetree/bindings/timer/ti,keystone-timer.txt new file mode 100644 index 0000000..5fbe361 --- /dev/null +++ b/Documentation/devicetree/bindings/timer/ti,keystone-timer.txt @@ -0,0 +1,29 @@ +* Device tree bindings for Texas instruments Keystone timer + +This document provides bindings for the 64-bit timer in the KeyStone +architecture devices. The timer can be configured as a general-purpose 64-bit +timer, dual general-purpose 32-bit timers. When configured as dual 32-bit +timers, each half can operate in conjunction (chain mode) or independently +(unchained mode) of each other. + +It is global timer is a free running up-counter and can generate interrupt +when the counter reaches preset counter values. + +Documentation: +http://www.ti.com/lit/ug/sprugv5a/sprugv5a.pdf + +Required properties: + +- compatible : should be "ti,keystone-timer". +- reg : specifies base physical address and count of the registers. +- interrupts : interrupt generated by the timer. +- clocks : the clock feeding the timer clock. + +Example: + +timer@22f0000 { + compatible = "ti,keystone-timer"; + reg = <0x022f0000 0x80>; + interrupts = ; + clocks = <&clktimer15>; +}; -- cgit v0.10.2 From e6b7f580bacb4a23e1ee5cdf6dc6e54bab141f7c Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Mon, 10 Feb 2014 12:10:56 +0200 Subject: clocksource: timer-keystone: introduce clocksource driver for Keystone Add broadcast clock-event device for the Keystone arch. The timer can be configured as a general-purpose 64-bit timer, dual general-purpose 32-bit timers. When configured as dual 32-bit timers, each half can operate in conjunction (chain mode) or independently (unchained mode) of each other. Reviewed-by: Stephen Boyd Acked-by: Santosh shilimkar Signed-off-by: Ivan Khoronzhuk Signed-off-by: Daniel Lezcano diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index 01e5435..aed3488 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -38,3 +38,4 @@ obj-$(CONFIG_ARM_ARCH_TIMER) += arm_arch_timer.o obj-$(CONFIG_ARM_GLOBAL_TIMER) += arm_global_timer.o obj-$(CONFIG_CLKSRC_METAG_GENERIC) += metag_generic.o obj-$(CONFIG_ARCH_HAS_TICK_BROADCAST) += dummy_timer.o +obj-$(CONFIG_ARCH_KEYSTONE) += timer-keystone.o diff --git a/drivers/clocksource/timer-keystone.c b/drivers/clocksource/timer-keystone.c new file mode 100644 index 0000000..86b08cd --- /dev/null +++ b/drivers/clocksource/timer-keystone.c @@ -0,0 +1,244 @@ +/* + * Keystone broadcast clock-event + * + * Copyright 2013 Texas Instruments, Inc. + * + * Author: Ivan Khoronzhuk + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include + +#define TIMER_NAME "timer-keystone" + +/* Timer register offsets */ +#define TIM12 0x10 +#define TIM34 0x14 +#define PRD12 0x18 +#define PRD34 0x1c +#define TCR 0x20 +#define TGCR 0x24 +#define INTCTLSTAT 0x44 + +/* Timer register bitfields */ +#define TCR_ENAMODE_MASK 0xC0 +#define TCR_ENAMODE_ONESHOT_MASK 0x40 +#define TCR_ENAMODE_PERIODIC_MASK 0x80 + +#define TGCR_TIM_UNRESET_MASK 0x03 +#define INTCTLSTAT_ENINT_MASK 0x01 + +/** + * struct keystone_timer: holds timer's data + * @base: timer memory base address + * @hz_period: cycles per HZ period + * @event_dev: event device based on timer + */ +static struct keystone_timer { + void __iomem *base; + unsigned long hz_period; + struct clock_event_device event_dev; +} timer; + +static inline u32 keystone_timer_readl(unsigned long rg) +{ + return readl_relaxed(timer.base + rg); +} + +static inline void keystone_timer_writel(u32 val, unsigned long rg) +{ + writel_relaxed(val, timer.base + rg); +} + +/** + * keystone_timer_barrier: write memory barrier + * use explicit barrier to avoid using readl/writel non relaxed function + * variants, because in our case non relaxed variants hide the true places + * where barrier is needed. + */ +static inline void keystone_timer_barrier(void) +{ + __iowmb(); +} + +/** + * keystone_timer_config: configures timer to work in oneshot/periodic modes. + * @ mode: mode to configure + * @ period: cycles number to configure for + */ +static int keystone_timer_config(u64 period, enum clock_event_mode mode) +{ + u32 tcr; + u32 off; + + tcr = keystone_timer_readl(TCR); + off = tcr & ~(TCR_ENAMODE_MASK); + + /* set enable mode */ + switch (mode) { + case CLOCK_EVT_MODE_ONESHOT: + tcr |= TCR_ENAMODE_ONESHOT_MASK; + break; + case CLOCK_EVT_MODE_PERIODIC: + tcr |= TCR_ENAMODE_PERIODIC_MASK; + break; + default: + return -1; + } + + /* disable timer */ + keystone_timer_writel(off, TCR); + /* here we have to be sure the timer has been disabled */ + keystone_timer_barrier(); + + /* reset counter to zero, set new period */ + keystone_timer_writel(0, TIM12); + keystone_timer_writel(0, TIM34); + keystone_timer_writel(period & 0xffffffff, PRD12); + keystone_timer_writel(period >> 32, PRD34); + + /* + * enable timer + * here we have to be sure that CNTLO, CNTHI, PRDLO, PRDHI registers + * have been written. + */ + keystone_timer_barrier(); + keystone_timer_writel(tcr, TCR); + return 0; +} + +static void keystone_timer_disable(void) +{ + u32 tcr; + + tcr = keystone_timer_readl(TCR); + + /* disable timer */ + tcr &= ~(TCR_ENAMODE_MASK); + keystone_timer_writel(tcr, TCR); +} + +static irqreturn_t keystone_timer_interrupt(int irq, void *dev_id) +{ + struct clock_event_device *evt = dev_id; + + evt->event_handler(evt); + return IRQ_HANDLED; +} + +static int keystone_set_next_event(unsigned long cycles, + struct clock_event_device *evt) +{ + return keystone_timer_config(cycles, evt->mode); +} + +static void keystone_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + switch (mode) { + case CLOCK_EVT_MODE_PERIODIC: + keystone_timer_config(timer.hz_period, CLOCK_EVT_MODE_PERIODIC); + break; + case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_SHUTDOWN: + case CLOCK_EVT_MODE_ONESHOT: + keystone_timer_disable(); + break; + default: + break; + } +} + +static void __init keystone_timer_init(struct device_node *np) +{ + struct clock_event_device *event_dev = &timer.event_dev; + unsigned long rate; + struct clk *clk; + int irq, error; + u32 tgcr; + + irq = irq_of_parse_and_map(np, 0); + if (irq == NO_IRQ) { + pr_err("%s: failed to map interrupts\n", __func__); + return; + } + + timer.base = of_iomap(np, 0); + if (!timer.base) { + pr_err("%s: failed to map registers\n", __func__); + return; + } + + clk = of_clk_get(np, 0); + if (IS_ERR(clk)) { + pr_err("%s: failed to get clock\n", __func__); + iounmap(timer.base); + return; + } + + error = clk_prepare_enable(clk); + if (error) { + pr_err("%s: failed to enable clock\n", __func__); + goto err; + } + + rate = clk_get_rate(clk); + + /* disable, use internal clock source */ + keystone_timer_writel(0, TCR); + /* here we have to be sure the timer has been disabled */ + keystone_timer_barrier(); + + /* reset timer as 64-bit, no pre-scaler, plus features are disabled */ + tgcr = 0; + keystone_timer_writel(0, TGCR); + + /* unreset timer */ + tgcr |= TGCR_TIM_UNRESET_MASK; + keystone_timer_writel(tgcr, TGCR); + + /* init counter to zero */ + keystone_timer_writel(0, TIM12); + keystone_timer_writel(0, TIM34); + + timer.hz_period = DIV_ROUND_UP(rate, HZ); + + /* enable timer interrupts */ + keystone_timer_writel(INTCTLSTAT_ENINT_MASK, INTCTLSTAT); + + error = request_irq(irq, keystone_timer_interrupt, IRQF_TIMER, + TIMER_NAME, event_dev); + if (error) { + pr_err("%s: failed to setup irq\n", __func__); + goto err; + } + + /* setup clockevent */ + event_dev->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT; + event_dev->set_next_event = keystone_set_next_event; + event_dev->set_mode = keystone_set_mode; + event_dev->cpumask = cpu_all_mask; + event_dev->owner = THIS_MODULE; + event_dev->name = TIMER_NAME; + event_dev->irq = irq; + + clockevents_config_and_register(event_dev, rate, 1, ULONG_MAX); + + pr_info("keystone timer clock @%lu Hz\n", rate); + return; +err: + clk_put(clk); + iounmap(timer.base); +} + +CLOCKSOURCE_OF_DECLARE(keystone_timer, "ti,keystone-timer", + keystone_timer_init); -- cgit v0.10.2 From e099d01eb221f4a1e713a7663ca143c05303947f Mon Sep 17 00:00:00 2001 From: Matthias Brugger Date: Tue, 18 Feb 2014 19:29:28 +0100 Subject: clocksource: timer-keystone: Delete unnecessary variable Commit 438e0bff5257 added the timer-keystone device driver but make use of an unnecessary variable in the init function. This patch deletes this variable. Signed-off-by: Matthias Brugger Signed-off-by: Daniel Lezcano diff --git a/drivers/clocksource/timer-keystone.c b/drivers/clocksource/timer-keystone.c index 86b08cd..0250354 100644 --- a/drivers/clocksource/timer-keystone.c +++ b/drivers/clocksource/timer-keystone.c @@ -164,7 +164,6 @@ static void __init keystone_timer_init(struct device_node *np) unsigned long rate; struct clk *clk; int irq, error; - u32 tgcr; irq = irq_of_parse_and_map(np, 0); if (irq == NO_IRQ) { @@ -199,12 +198,10 @@ static void __init keystone_timer_init(struct device_node *np) keystone_timer_barrier(); /* reset timer as 64-bit, no pre-scaler, plus features are disabled */ - tgcr = 0; keystone_timer_writel(0, TGCR); /* unreset timer */ - tgcr |= TGCR_TIM_UNRESET_MASK; - keystone_timer_writel(tgcr, TGCR); + keystone_timer_writel(TGCR_TIM_UNRESET_MASK, TGCR); /* init counter to zero */ keystone_timer_writel(0, TIM12); -- cgit v0.10.2 From 0a54a069585bb875d37494c434f36a19bcc05df9 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Wed, 19 Feb 2014 17:05:25 -0300 Subject: clocksource: orion: Use atomic access for shared registers Replace the driver-specific thread-safe shared register API by the recently introduced atomic_io_clear_set(). Cc: Daniel Lezcano Tested-by: Sebastian Hesselbarth Tested-by: Willy Tarreau Acked-by: Jason Cooper Signed-off-by: Ezequiel Garcia Signed-off-by: Daniel Lezcano diff --git a/drivers/clocksource/time-orion.c b/drivers/clocksource/time-orion.c index 2006622..0b3ce03 100644 --- a/drivers/clocksource/time-orion.c +++ b/drivers/clocksource/time-orion.c @@ -35,20 +35,6 @@ #define ORION_ONESHOT_MAX 0xfffffffe static void __iomem *timer_base; -static DEFINE_SPINLOCK(timer_ctrl_lock); - -/* - * Thread-safe access to TIMER_CTRL register - * (shared with watchdog timer) - */ -void orion_timer_ctrl_clrset(u32 clr, u32 set) -{ - spin_lock(&timer_ctrl_lock); - writel((readl(timer_base + TIMER_CTRL) & ~clr) | set, - timer_base + TIMER_CTRL); - spin_unlock(&timer_ctrl_lock); -} -EXPORT_SYMBOL(orion_timer_ctrl_clrset); /* * Free-running clocksource handling. @@ -68,7 +54,8 @@ static int orion_clkevt_next_event(unsigned long delta, { /* setup and enable one-shot timer */ writel(delta, timer_base + TIMER1_VAL); - orion_timer_ctrl_clrset(TIMER1_RELOAD_EN, TIMER1_EN); + atomic_io_modify(timer_base + TIMER_CTRL, + TIMER1_RELOAD_EN | TIMER1_EN, TIMER1_EN); return 0; } @@ -80,10 +67,13 @@ static void orion_clkevt_mode(enum clock_event_mode mode, /* setup and enable periodic timer at 1/HZ intervals */ writel(ticks_per_jiffy - 1, timer_base + TIMER1_RELOAD); writel(ticks_per_jiffy - 1, timer_base + TIMER1_VAL); - orion_timer_ctrl_clrset(0, TIMER1_RELOAD_EN | TIMER1_EN); + atomic_io_modify(timer_base + TIMER_CTRL, + TIMER1_RELOAD_EN | TIMER1_EN, + TIMER1_RELOAD_EN | TIMER1_EN); } else { /* disable timer */ - orion_timer_ctrl_clrset(TIMER1_RELOAD_EN | TIMER1_EN, 0); + atomic_io_modify(timer_base + TIMER_CTRL, + TIMER1_RELOAD_EN | TIMER1_EN, 0); } } @@ -131,7 +121,9 @@ static void __init orion_timer_init(struct device_node *np) /* setup timer0 as free-running clocksource */ writel(~0, timer_base + TIMER0_VAL); writel(~0, timer_base + TIMER0_RELOAD); - orion_timer_ctrl_clrset(0, TIMER0_RELOAD_EN | TIMER0_EN); + atomic_io_modify(timer_base + TIMER_CTRL, + TIMER0_RELOAD_EN | TIMER0_EN, + TIMER0_RELOAD_EN | TIMER0_EN); clocksource_mmio_init(timer_base + TIMER0_VAL, "orion_clocksource", clk_get_rate(clk), 300, 32, clocksource_mmio_readl_down); -- cgit v0.10.2 From c8af34b4dbc211f1e08a7ea513dc399046825866 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Wed, 19 Feb 2014 17:05:26 -0300 Subject: clocksource: armada-370-xp: Use atomic access for shared registers Replace the driver-specific thread-safe shared register API by the recently introduced atomic_io_clear_set(). Signed-off-by: Ezequiel Garcia Signed-off-by: Daniel Lezcano diff --git a/drivers/clocksource/time-armada-370-xp.c b/drivers/clocksource/time-armada-370-xp.c index ee8691b..0451e62 100644 --- a/drivers/clocksource/time-armada-370-xp.c +++ b/drivers/clocksource/time-armada-370-xp.c @@ -85,12 +85,6 @@ static u32 ticks_per_jiffy; static struct clock_event_device __percpu *armada_370_xp_evt; -static void timer_ctrl_clrset(u32 clr, u32 set) -{ - writel((readl(timer_base + TIMER_CTRL_OFF) & ~clr) | set, - timer_base + TIMER_CTRL_OFF); -} - static void local_timer_ctrl_clrset(u32 clr, u32 set) { writel((readl(local_base + TIMER_CTRL_OFF) & ~clr) | set, @@ -245,7 +239,7 @@ static void __init armada_370_xp_timer_common_init(struct device_node *np) clr = TIMER0_25MHZ; enable_mask = TIMER0_EN | TIMER0_DIV(TIMER_DIVIDER_SHIFT); } - timer_ctrl_clrset(clr, set); + atomic_io_modify(timer_base + TIMER_CTRL_OFF, clr | set, set); local_timer_ctrl_clrset(clr, set); /* @@ -263,7 +257,9 @@ static void __init armada_370_xp_timer_common_init(struct device_node *np) writel(0xffffffff, timer_base + TIMER0_VAL_OFF); writel(0xffffffff, timer_base + TIMER0_RELOAD_OFF); - timer_ctrl_clrset(0, TIMER0_RELOAD_EN | enable_mask); + atomic_io_modify(timer_base + TIMER_CTRL_OFF, + TIMER0_RELOAD_EN | enable_mask, + TIMER0_RELOAD_EN | enable_mask); /* * Set scale and timer for sched_clock. -- cgit v0.10.2 From aeb8fb7910fc7344d0ee0c9306eb45f11327c600 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 20 Feb 2014 12:54:46 +0100 Subject: ARM: shmobile: Remove CMT, TMU and STI Kconfig entries Now when drivers/clocksource/Kconfig has been updated with entires for CMT, TMU, MTU2, and STI it is safe to remove these from mach-shmobile. Also select timers per SoC via SYS_SUPPORTS_xxx. Signed-off-by: Magnus Damm Acked-by: John Stultz Signed-off-by: Wolfram Sang Signed-off-by: Daniel Lezcano diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index 3386406..c1a9538 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -24,17 +24,21 @@ comment "Renesas ARM SoCs System Type" config ARCH_EMEV2 bool "Emma Mobile EV2" + select SYS_SUPPORTS_EM_STI config ARCH_R7S72100 bool "RZ/A1H (R7S72100)" + select SYS_SUPPORTS_SH_MTU2 config ARCH_R8A7790 bool "R-Car H2 (R8A77900)" select RENESAS_IRQC + select SYS_SUPPORTS_SH_CMT config ARCH_R8A7791 bool "R-Car M2 (R8A77910)" select RENESAS_IRQC + select SYS_SUPPORTS_SH_CMT comment "Renesas ARM SoCs Board Type" @@ -68,6 +72,8 @@ config ARCH_SH7372 select ARM_CPU_SUSPEND if PM || CPU_IDLE select CPU_V7 select SH_CLK_CPG + select SYS_SUPPORTS_SH_CMT + select SYS_SUPPORTS_SH_TMU config ARCH_SH73A0 bool "SH-Mobile AG5 (R8A73A00)" @@ -77,6 +83,8 @@ config ARCH_SH73A0 select I2C select SH_CLK_CPG select RENESAS_INTC_IRQPIN + select SYS_SUPPORTS_SH_CMT + select SYS_SUPPORTS_SH_TMU config ARCH_R8A73A4 bool "R-Mobile APE6 (R8A73A40)" @@ -87,6 +95,8 @@ config ARCH_R8A73A4 select RENESAS_IRQC select ARCH_HAS_CPUFREQ select ARCH_HAS_OPP + select SYS_SUPPORTS_SH_CMT + select SYS_SUPPORTS_SH_TMU config ARCH_R8A7740 bool "R-Mobile A1 (R8A77400)" @@ -95,6 +105,8 @@ config ARCH_R8A7740 select CPU_V7 select SH_CLK_CPG select RENESAS_INTC_IRQPIN + select SYS_SUPPORTS_SH_CMT + select SYS_SUPPORTS_SH_TMU config ARCH_R8A7778 bool "R-Car M1A (R8A77781)" @@ -104,6 +116,7 @@ config ARCH_R8A7778 select ARM_GIC select USB_ARCH_HAS_EHCI select USB_ARCH_HAS_OHCI + select SYS_SUPPORTS_SH_TMU config ARCH_R8A7779 bool "R-Car H1 (R8A77790)" @@ -114,6 +127,7 @@ config ARCH_R8A7779 select USB_ARCH_HAS_EHCI select USB_ARCH_HAS_OHCI select RENESAS_INTC_IRQPIN + select SYS_SUPPORTS_SH_TMU config ARCH_R8A7790 bool "R-Car H2 (R8A77900)" @@ -123,6 +137,7 @@ config ARCH_R8A7790 select MIGHT_HAVE_PCI select SH_CLK_CPG select RENESAS_IRQC + select SYS_SUPPORTS_SH_CMT config ARCH_R8A7791 bool "R-Car M2 (R8A77910)" @@ -132,6 +147,7 @@ config ARCH_R8A7791 select MIGHT_HAVE_PCI select SH_CLK_CPG select RENESAS_IRQC + select SYS_SUPPORTS_SH_CMT config ARCH_EMEV2 bool "Emma Mobile EV2" @@ -141,6 +157,7 @@ config ARCH_EMEV2 select MIGHT_HAVE_PCI select USE_OF select AUTO_ZRELADDR + select SYS_SUPPORTS_EM_STI config ARCH_R7S72100 bool "RZ/A1H (R7S72100)" @@ -148,6 +165,7 @@ config ARCH_R7S72100 select ARM_GIC select CPU_V7 select SH_CLK_CPG + select SYS_SUPPORTS_SH_MTU2 comment "Renesas ARM SoCs Board Type" @@ -321,24 +339,6 @@ config SHMOBILE_TIMER_HZ want to select a HZ value such as 128 that can evenly divide RCLK. A HZ value that does not divide evenly may cause timer drift. -config SH_TIMER_CMT - bool "CMT timer driver" - default y - help - This enables build of the CMT timer driver. - -config SH_TIMER_TMU - bool "TMU timer driver" - default y - help - This enables build of the TMU timer driver. - -config EM_TIMER_STI - bool "STI timer driver" - default y - help - This enables build of the STI timer driver. - endmenu endif -- cgit v0.10.2 From fbfa893458edb0daf18a0add33d3341b1e9f6d44 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 20 Feb 2014 12:54:47 +0100 Subject: sh: Remove Kconfig entries for TMU, CMT and MTU2 Now when drivers/clocksource/Kconfig has been updated with entires for CMT, TMU and MTU2 it is safe to remove these from SH. Signed-off-by: Magnus Damm Acked-by: John Stultz Signed-off-by: Wolfram Sang Signed-off-by: Daniel Lezcano diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 6357710..364d204 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -123,15 +123,6 @@ config SYS_SUPPORTS_NUMA config SYS_SUPPORTS_PCI bool -config SYS_SUPPORTS_CMT - bool - -config SYS_SUPPORTS_MTU2 - bool - -config SYS_SUPPORTS_TMU - bool - config STACKTRACE_SUPPORT def_bool y @@ -191,14 +182,14 @@ config CPU_SH3 bool select CPU_HAS_INTEVT select CPU_HAS_SR_RB - select SYS_SUPPORTS_TMU + select SYS_SUPPORTS_SH_TMU config CPU_SH4 bool select CPU_HAS_INTEVT select CPU_HAS_SR_RB select CPU_HAS_FPU if !CPU_SH4AL_DSP - select SYS_SUPPORTS_TMU + select SYS_SUPPORTS_SH_TMU select SYS_SUPPORTS_HUGETLBFS if MMU config CPU_SH4A @@ -213,7 +204,7 @@ config CPU_SH4AL_DSP config CPU_SH5 bool select CPU_HAS_FPU - select SYS_SUPPORTS_TMU + select SYS_SUPPORTS_SH_TMU select SYS_SUPPORTS_HUGETLBFS if MMU config CPU_SHX2 @@ -250,7 +241,7 @@ choice config CPU_SUBTYPE_SH7619 bool "Support SH7619 processor" select CPU_SH2 - select SYS_SUPPORTS_CMT + select SYS_SUPPORTS_SH_CMT # SH-2A Processor Support @@ -258,50 +249,50 @@ config CPU_SUBTYPE_SH7201 bool "Support SH7201 processor" select CPU_SH2A select CPU_HAS_FPU - select SYS_SUPPORTS_MTU2 + select SYS_SUPPORTS_SH_MTU2 config CPU_SUBTYPE_SH7203 bool "Support SH7203 processor" select CPU_SH2A select CPU_HAS_FPU - select SYS_SUPPORTS_CMT - select SYS_SUPPORTS_MTU2 + select SYS_SUPPORTS_SH_CMT + select SYS_SUPPORTS_SH_MTU2 select ARCH_WANT_OPTIONAL_GPIOLIB select PINCTRL config CPU_SUBTYPE_SH7206 bool "Support SH7206 processor" select CPU_SH2A - select SYS_SUPPORTS_CMT - select SYS_SUPPORTS_MTU2 + select SYS_SUPPORTS_SH_CMT + select SYS_SUPPORTS_SH_MTU2 config CPU_SUBTYPE_SH7263 bool "Support SH7263 processor" select CPU_SH2A select CPU_HAS_FPU - select SYS_SUPPORTS_CMT - select SYS_SUPPORTS_MTU2 + select SYS_SUPPORTS_SH_CMT + select SYS_SUPPORTS_SH_MTU2 config CPU_SUBTYPE_SH7264 bool "Support SH7264 processor" select CPU_SH2A select CPU_HAS_FPU - select SYS_SUPPORTS_CMT - select SYS_SUPPORTS_MTU2 + select SYS_SUPPORTS_SH_CMT + select SYS_SUPPORTS_SH_MTU2 select PINCTRL config CPU_SUBTYPE_SH7269 bool "Support SH7269 processor" select CPU_SH2A select CPU_HAS_FPU - select SYS_SUPPORTS_CMT - select SYS_SUPPORTS_MTU2 + select SYS_SUPPORTS_SH_CMT + select SYS_SUPPORTS_SH_MTU2 select PINCTRL config CPU_SUBTYPE_MXG bool "Support MX-G processor" select CPU_SH2A - select SYS_SUPPORTS_MTU2 + select SYS_SUPPORTS_SH_MTU2 help Select MX-G if running on an R8A03022BG part. @@ -354,7 +345,7 @@ config CPU_SUBTYPE_SH7720 bool "Support SH7720 processor" select CPU_SH3 select CPU_HAS_DSP - select SYS_SUPPORTS_CMT + select SYS_SUPPORTS_SH_CMT select ARCH_WANT_OPTIONAL_GPIOLIB select USB_ARCH_HAS_OHCI select USB_OHCI_SH if USB_OHCI_HCD @@ -366,7 +357,7 @@ config CPU_SUBTYPE_SH7721 bool "Support SH7721 processor" select CPU_SH3 select CPU_HAS_DSP - select SYS_SUPPORTS_CMT + select SYS_SUPPORTS_SH_CMT select USB_ARCH_HAS_OHCI select USB_OHCI_SH if USB_OHCI_HCD help @@ -422,7 +413,7 @@ config CPU_SUBTYPE_SH7723 select CPU_SHX2 select ARCH_SHMOBILE select ARCH_SPARSEMEM_ENABLE - select SYS_SUPPORTS_CMT + select SYS_SUPPORTS_SH_CMT select ARCH_WANT_OPTIONAL_GPIOLIB select PINCTRL help @@ -434,7 +425,7 @@ config CPU_SUBTYPE_SH7724 select CPU_SHX2 select ARCH_SHMOBILE select ARCH_SPARSEMEM_ENABLE - select SYS_SUPPORTS_CMT + select SYS_SUPPORTS_SH_CMT select ARCH_WANT_OPTIONAL_GPIOLIB select PINCTRL help @@ -514,7 +505,7 @@ config CPU_SUBTYPE_SH7343 bool "Support SH7343 processor" select CPU_SH4AL_DSP select ARCH_SHMOBILE - select SYS_SUPPORTS_CMT + select SYS_SUPPORTS_SH_CMT config CPU_SUBTYPE_SH7722 bool "Support SH7722 processor" @@ -523,7 +514,7 @@ config CPU_SUBTYPE_SH7722 select ARCH_SHMOBILE select ARCH_SPARSEMEM_ENABLE select SYS_SUPPORTS_NUMA - select SYS_SUPPORTS_CMT + select SYS_SUPPORTS_SH_CMT select ARCH_WANT_OPTIONAL_GPIOLIB select PINCTRL @@ -534,7 +525,7 @@ config CPU_SUBTYPE_SH7366 select ARCH_SHMOBILE select ARCH_SPARSEMEM_ENABLE select SYS_SUPPORTS_NUMA - select SYS_SUPPORTS_CMT + select SYS_SUPPORTS_SH_CMT endchoice @@ -567,27 +558,6 @@ source "arch/sh/boards/Kconfig" menu "Timer and clock configuration" -config SH_TIMER_TMU - bool "TMU timer driver" - depends on SYS_SUPPORTS_TMU - default y - help - This enables the build of the TMU timer driver. - -config SH_TIMER_CMT - bool "CMT timer driver" - depends on SYS_SUPPORTS_CMT - default y - help - This enables build of the CMT timer driver. - -config SH_TIMER_MTU2 - bool "MTU2 timer driver" - depends on SYS_SUPPORTS_MTU2 - default y - help - This enables build of the MTU2 timer driver. - config SH_PCLK_FREQ int "Peripheral clock frequency (in Hz)" depends on SH_CLK_CPG_LEGACY -- cgit v0.10.2 From fd3f1270d237d1afb344ee6bfd0e50c488c29e34 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 20 Feb 2014 12:54:45 +0100 Subject: clocksource: Add Kconfig entries for CMT, MTU2, TMU and STI Add Kconfig entries for CMT, MTU2, TMU and STI to drivers/clocksource/Kconfig. This will allow us to get rid of duplicated entires in architecture code such as arch/sh and arch/arm/mach-shmobile. Signed-off-by: Magnus Damm Signed-off-by: Wolfram Sang Signed-off-by: Daniel Lezcano diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index cd6950f..4f754a9 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -140,3 +140,47 @@ config VF_PIT_TIMER bool help Support for Period Interrupt Timer on Freescale Vybrid Family SoCs. + +config SYS_SUPPORTS_SH_CMT + bool + +config SYS_SUPPORTS_SH_MTU2 + bool + +config SYS_SUPPORTS_SH_TMU + bool + +config SYS_SUPPORTS_EM_STI + bool + +config SH_TIMER_CMT + bool "Renesas CMT timer driver" if COMPILE_TEST + default SYS_SUPPORTS_SH_CMT + help + This enables build of a clocksource and clockevent driver for + the Compare Match Timer (CMT) hardware available in 16/32/48-bit + variants on a wide range of Mobile and Automotive SoCs from Renesas. + +config SH_TIMER_MTU2 + bool "Renesas MTU2 timer driver" if COMPILE_TEST + default SYS_SUPPORTS_SH_MTU2 + help + This enables build of a clockevent driver for the Multi-Function + Timer Pulse Unit 2 (TMU2) hardware available on SoCs from Renesas. + This hardware comes with 16 bit-timer registers. + +config SH_TIMER_TMU + bool "Renesas TMU timer driver" if COMPILE_TEST + default SYS_SUPPORTS_SH_TMU + help + This enables build of a clocksource and clockevent driver for + the 32-bit Timer Unit (TMU) hardware available on a wide range + SoCs from Renesas. + +config EM_TIMER_STI + bool "Renesas STI timer driver" if COMPILE_TEST + default SYS_SUPPORTS_EM_STI + help + This enables build of a clocksource and clockevent driver for + the 48-bit System Timer (STI) hardware available on a SoCs + such as EMEV2 from former NEC Electronics. -- cgit v0.10.2 From 5f0ba3b462b2d36b3c28748863747fb1050f40d0 Mon Sep 17 00:00:00 2001 From: Soren Brinkmann Date: Wed, 19 Feb 2014 15:14:41 -0800 Subject: clocksource/cadence_ttc: Call clockevents_update_freq() with IRQs enabled The timer core takes care of serialization and IRQs. Hence the driver is no longer required to disable interrupts when calling clockevents_update_freq(). Signed-off-by: Soren Brinkmann Signed-off-by: Daniel Lezcano Acked-by: Michal Simek diff --git a/drivers/clocksource/cadence_ttc_timer.c b/drivers/clocksource/cadence_ttc_timer.c index 63f176d..f05b9fd 100644 --- a/drivers/clocksource/cadence_ttc_timer.c +++ b/drivers/clocksource/cadence_ttc_timer.c @@ -321,25 +321,12 @@ static int ttc_rate_change_clockevent_cb(struct notifier_block *nb, switch (event) { case POST_RATE_CHANGE: - { - unsigned long flags; - - /* - * clockevents_update_freq should be called with IRQ disabled on - * the CPU the timer provides events for. The timer we use is - * common to both CPUs, not sure if we need to run on both - * cores. - */ - local_irq_save(flags); - clockevents_update_freq(&ttcce->ce, - ndata->new_rate / PRESCALE); - local_irq_restore(flags); - /* update cached frequency */ ttc->freq = ndata->new_rate; + clockevents_update_freq(&ttcce->ce, ndata->new_rate / PRESCALE); + /* fall through */ - } case PRE_RATE_CHANGE: case ABORT_RATE_CHANGE: default: -- cgit v0.10.2 From b3e90722f6f53fa457a88146a877e34ea71d25ea Mon Sep 17 00:00:00 2001 From: Soren Brinkmann Date: Wed, 19 Feb 2014 15:14:42 -0800 Subject: clocksource/cadence_ttc: Overhaul clocksource frequency adjustment The currently used method adjusting the clocksource to a changing input frequency does not work on kernels from 3.11 on. The new approach is to keep the timer frequency as constant as possible. I.e. - due to the TTC's prescaler limitations, allow frequency changes only if the frequency scales by a power of 2 - adjust the counter's divider on the fly when a frequency change occurs This limits cpufreq to scale by certain factors only. But we may keep the time base somewhat constant, so that sleep() & co keep working as expected, while supporting cpufreq. Signed-off-by: Soren Brinkmann Signed-off-by: Daniel Lezcano Acked-by: Michal Simek diff --git a/drivers/clocksource/cadence_ttc_timer.c b/drivers/clocksource/cadence_ttc_timer.c index f05b9fd..49fbe28 100644 --- a/drivers/clocksource/cadence_ttc_timer.c +++ b/drivers/clocksource/cadence_ttc_timer.c @@ -16,6 +16,7 @@ */ #include +#include #include #include #include @@ -52,6 +53,8 @@ #define TTC_CNT_CNTRL_DISABLE_MASK 0x1 #define TTC_CLK_CNTRL_CSRC_MASK (1 << 5) /* clock source */ +#define TTC_CLK_CNTRL_PSV_MASK 0x1e +#define TTC_CLK_CNTRL_PSV_SHIFT 1 /* * Setup the timers to use pre-scaling, using a fixed value for now that will @@ -63,6 +66,8 @@ #define CLK_CNTRL_PRESCALE_EN 1 #define CNT_CNTRL_RESET (1 << 4) +#define MAX_F_ERR 50 + /** * struct ttc_timer - This definition defines local timer structure * @@ -82,6 +87,8 @@ struct ttc_timer { container_of(x, struct ttc_timer, clk_rate_change_nb) struct ttc_timer_clocksource { + u32 scale_clk_ctrl_reg_old; + u32 scale_clk_ctrl_reg_new; struct ttc_timer ttc; struct clocksource cs; }; @@ -229,32 +236,89 @@ static int ttc_rate_change_clocksource_cb(struct notifier_block *nb, struct ttc_timer_clocksource, ttc); switch (event) { - case POST_RATE_CHANGE: + case PRE_RATE_CHANGE: + { + u32 psv; + unsigned long factor, rate_low, rate_high; + + if (ndata->new_rate > ndata->old_rate) { + factor = DIV_ROUND_CLOSEST(ndata->new_rate, + ndata->old_rate); + rate_low = ndata->old_rate; + rate_high = ndata->new_rate; + } else { + factor = DIV_ROUND_CLOSEST(ndata->old_rate, + ndata->new_rate); + rate_low = ndata->new_rate; + rate_high = ndata->old_rate; + } + + if (!is_power_of_2(factor)) + return NOTIFY_BAD; + + if (abs(rate_high - (factor * rate_low)) > MAX_F_ERR) + return NOTIFY_BAD; + + factor = __ilog2_u32(factor); + /* - * Do whatever is necessary to maintain a proper time base - * - * I cannot find a way to adjust the currently used clocksource - * to the new frequency. __clocksource_updatefreq_hz() sounds - * good, but does not work. Not sure what's that missing. - * - * This approach works, but triggers two clocksource switches. - * The first after unregister to clocksource jiffies. And - * another one after the register to the newly registered timer. - * - * Alternatively we could 'waste' another HW timer to ping pong - * between clock sources. That would also use one register and - * one unregister call, but only trigger one clocksource switch - * for the cost of another HW timer used by the OS. + * store timer clock ctrl register so we can restore it in case + * of an abort. */ - clocksource_unregister(&ttccs->cs); - clocksource_register_hz(&ttccs->cs, - ndata->new_rate / PRESCALE); - /* fall through */ - case PRE_RATE_CHANGE: + ttccs->scale_clk_ctrl_reg_old = + __raw_readl(ttccs->ttc.base_addr + + TTC_CLK_CNTRL_OFFSET); + + psv = (ttccs->scale_clk_ctrl_reg_old & + TTC_CLK_CNTRL_PSV_MASK) >> + TTC_CLK_CNTRL_PSV_SHIFT; + if (ndata->new_rate < ndata->old_rate) + psv -= factor; + else + psv += factor; + + /* prescaler within legal range? */ + if (psv & ~(TTC_CLK_CNTRL_PSV_MASK >> TTC_CLK_CNTRL_PSV_SHIFT)) + return NOTIFY_BAD; + + ttccs->scale_clk_ctrl_reg_new = ttccs->scale_clk_ctrl_reg_old & + ~TTC_CLK_CNTRL_PSV_MASK; + ttccs->scale_clk_ctrl_reg_new |= psv << TTC_CLK_CNTRL_PSV_SHIFT; + + + /* scale down: adjust divider in post-change notification */ + if (ndata->new_rate < ndata->old_rate) + return NOTIFY_DONE; + + /* scale up: adjust divider now - before frequency change */ + __raw_writel(ttccs->scale_clk_ctrl_reg_new, + ttccs->ttc.base_addr + TTC_CLK_CNTRL_OFFSET); + break; + } + case POST_RATE_CHANGE: + /* scale up: pre-change notification did the adjustment */ + if (ndata->new_rate > ndata->old_rate) + return NOTIFY_OK; + + /* scale down: adjust divider now - after frequency change */ + __raw_writel(ttccs->scale_clk_ctrl_reg_new, + ttccs->ttc.base_addr + TTC_CLK_CNTRL_OFFSET); + break; + case ABORT_RATE_CHANGE: + /* we have to undo the adjustment in case we scale up */ + if (ndata->new_rate < ndata->old_rate) + return NOTIFY_OK; + + /* restore original register value */ + __raw_writel(ttccs->scale_clk_ctrl_reg_old, + ttccs->ttc.base_addr + TTC_CLK_CNTRL_OFFSET); + /* fall through */ default: return NOTIFY_DONE; } + + return NOTIFY_DONE; } static void __init ttc_setup_clocksource(struct clk *clk, void __iomem *base) -- cgit v0.10.2 From 61f1fc7e9258a169ac8afb5ddf657a181e60d052 Mon Sep 17 00:00:00 2001 From: Soren Brinkmann Date: Wed, 19 Feb 2014 15:14:43 -0800 Subject: arm: zynq: Don't use arm_global_timer with cpufreq The timer frequency of the arm_global_timer depends on the CPU frequency. With cpufreq altering that frequency the arm_global_timer does not maintain a stable time base. Therefore don't enable that timer in case cpufreq is enabled. Cc: Russell King Signed-off-by: Soren Brinkmann Signed-off-by: Daniel Lezcano Acked-by: Michal Simek diff --git a/arch/arm/mach-zynq/Kconfig b/arch/arm/mach-zynq/Kconfig index 6b04260..f84fab1 100644 --- a/arch/arm/mach-zynq/Kconfig +++ b/arch/arm/mach-zynq/Kconfig @@ -13,6 +13,6 @@ config ARCH_ZYNQ select HAVE_SMP select SPARSE_IRQ select CADENCE_TTC_TIMER - select ARM_GLOBAL_TIMER + select ARM_GLOBAL_TIMER if !CPU_FREQ help Support for Xilinx Zynq ARM Cortex A9 Platform -- cgit v0.10.2 From cd325295871fbd172340a73f323f6a19c0b0525d Mon Sep 17 00:00:00 2001 From: Soren Brinkmann Date: Wed, 19 Feb 2014 15:14:44 -0800 Subject: arm: zynq: Add support for cpufreq The generic cpufreq-cpu0 driver can scale the CPU frequency on Zynq SOCs. Add the required platform device to the BSP and appropriate OPPs to the dts. Cc: Rob Herring Cc: Pawel Moll Cc: Mark Rutland Cc: Ian Campbell Cc: Kumar Gala Cc: Russell King Cc: devicetree@vger.kernel.org Signed-off-by: Soren Brinkmann Signed-off-by: Daniel Lezcano Acked-by: Michal Simek diff --git a/arch/arm/boot/dts/zynq-7000.dtsi b/arch/arm/boot/dts/zynq-7000.dtsi index 8b67b19..789d0ba 100644 --- a/arch/arm/boot/dts/zynq-7000.dtsi +++ b/arch/arm/boot/dts/zynq-7000.dtsi @@ -24,6 +24,12 @@ device_type = "cpu"; reg = <0>; clocks = <&clkc 3>; + operating-points = < + /* kHz uV */ + 666667 1000000 + 333334 1000000 + 222223 1000000 + >; }; cpu@1 { diff --git a/arch/arm/mach-zynq/Kconfig b/arch/arm/mach-zynq/Kconfig index f84fab1..f03e75b 100644 --- a/arch/arm/mach-zynq/Kconfig +++ b/arch/arm/mach-zynq/Kconfig @@ -2,6 +2,8 @@ config ARCH_ZYNQ bool "Xilinx Zynq ARM Cortex A9 Platform" if ARCH_MULTI_V7 select ARM_AMBA select ARM_GIC + select ARCH_HAS_CPUFREQ + select ARCH_HAS_OPP select COMMON_CLK select CPU_V7 select GENERIC_CLOCKEVENTS diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c index 1db2a5ca..6444681 100644 --- a/arch/arm/mach-zynq/common.c +++ b/arch/arm/mach-zynq/common.c @@ -51,6 +51,8 @@ static struct platform_device zynq_cpuidle_device = { */ static void __init zynq_init_machine(void) { + struct platform_device_info devinfo = { .name = "cpufreq-cpu0", }; + /* * 64KB way size, 8-way associativity, parity disabled */ @@ -59,6 +61,7 @@ static void __init zynq_init_machine(void) of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); platform_device_register(&zynq_cpuidle_device); + platform_device_register_full(&devinfo); } static void __init zynq_timer_init(void) -- cgit v0.10.2 From 51061b8876a3906aa5bf173582f180596f9d6455 Mon Sep 17 00:00:00 2001 From: Deng-Cheng Zhu Date: Thu, 6 Mar 2014 17:05:27 -0800 Subject: MIPS: math-emu: Fix prefx detection and COP1X function field definition When running applications which contain the instruction "prefx" on FPU-less CPUs, a message "Illegal instruction" will be seen. This instruction is supposed to be ignored by the FPU emulator. However, its current detection and function field encoding are incorrect. This patch fix the issue. Signed-off-by: Deng-Cheng Zhu Reviewed-by: Leonid Yegoshin Reviewed-by: Paul Burton Acked-by: David Daney Cc: linux-mips@linux-mips.org Cc: Steven.Hill@imgtec.com Patchwork: https://patchwork.linux-mips.org/patch/6608/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h index b39ba25..f25181b 100644 --- a/arch/mips/include/uapi/asm/inst.h +++ b/arch/mips/include/uapi/asm/inst.h @@ -163,8 +163,8 @@ enum cop1_sdw_func { */ enum cop1x_func { lwxc1_op = 0x00, ldxc1_op = 0x01, - pfetch_op = 0x07, swxc1_op = 0x08, - sdxc1_op = 0x09, madd_s_op = 0x20, + swxc1_op = 0x08, sdxc1_op = 0x09, + pfetch_op = 0x0f, madd_s_op = 0x20, madd_d_op = 0x21, madd_e_op = 0x22, msub_s_op = 0x28, msub_d_op = 0x29, msub_e_op = 0x2a, nmadd_s_op = 0x30, diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 506925b..0b4e2e3 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -1538,10 +1538,10 @@ static int fpux_emu(struct pt_regs *xcp, struct mips_fpu_struct *ctx, break; } - case 0x7: /* 7 */ - if (MIPSInst_FUNC(ir) != pfetch_op) { + case 0x3: + if (MIPSInst_FUNC(ir) != pfetch_op) return SIGILL; - } + /* ignore prefx operation */ break; -- cgit v0.10.2 From 09e15176ded1faa7bd685b3b5b1213cf0240566e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 1 Mar 2014 16:57:14 +0300 Subject: clocksource: exynos_mct: silence a static checker warning My guess is we aren't going to have a 2 digit cpuid here any time soon but the static checkers don't know that and complain that the snprintf() could overflow. Signed-off-by: Dan Carpenter Signed-off-by: Daniel Lezcano diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index 48f76bc..c2e390e 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c @@ -410,7 +410,7 @@ static int exynos4_local_timer_setup(struct clock_event_device *evt) mevt = container_of(evt, struct mct_clock_event_device, evt); mevt->base = EXYNOS4_MCT_L_BASE(cpu); - sprintf(mevt->name, "mct_tick%d", cpu); + snprintf(mevt->name, sizeof(mevt->name), "mct_tick%d", cpu); evt->name = mevt->name; evt->cpumask = cpumask_of(cpu); -- cgit v0.10.2 From 9ed973cc40c588abeaa58aea0683ea665132d11d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Mon, 10 Mar 2014 22:25:24 +0100 Subject: bridge: multicast: add sanity check for general query destination MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit General IGMP and MLD queries are supposed to have the multicast link-local all-nodes address as their destination according to RFC2236 section 9, RFC3376 section 4.1.12/9.1, RFC2710 section 8 and RFC3810 section 5.1.15. Without this check, such malformed IGMP/MLD queries can result in a denial of service: The queries are ignored by most IGMP/MLD listeners therefore they will not respond with an IGMP/MLD report. However, without this patch these malformed MLD queries would enable the snooping part in the bridge code, potentially shutting down the according ports towards these hosts for multicast traffic as the bridge did not learn about these listeners. Reported-by: Jan Stancek Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index fb0e36f..e56bae4 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1181,6 +1181,14 @@ static int br_ip4_multicast_query(struct net_bridge *br, IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1; } + /* RFC2236+RFC3376 (IGMPv2+IGMPv3) require the multicast link layer + * all-systems destination addresses (224.0.0.1) for general queries + */ + if (!group && iph->daddr != htonl(INADDR_ALLHOSTS_GROUP)) { + err = -EINVAL; + goto out; + } + br_multicast_query_received(br, port, &br->ip4_querier, !!iph->saddr, max_delay); @@ -1228,6 +1236,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, unsigned long max_delay; unsigned long now = jiffies; const struct in6_addr *group = NULL; + bool is_general_query; int err = 0; spin_lock(&br->multicast_lock); @@ -1262,6 +1271,16 @@ static int br_ip6_multicast_query(struct net_bridge *br, max_delay = max(msecs_to_jiffies(mldv2_mrc(mld2q)), 1UL); } + is_general_query = group && ipv6_addr_any(group); + + /* RFC2710+RFC3810 (MLDv1+MLDv2) require the multicast link layer + * all-nodes destination address (ff02::1) for general queries + */ + if (is_general_query && !ipv6_addr_is_ll_all_nodes(&ip6h->daddr)) { + err = -EINVAL; + goto out; + } + br_multicast_query_received(br, port, &br->ip6_querier, !ipv6_addr_any(&ip6h->saddr), max_delay); -- cgit v0.10.2 From 20a599bec95a52fa72432b2376a2ce47c5bb68fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Mon, 10 Mar 2014 22:25:25 +0100 Subject: bridge: multicast: enable snooping on general queries only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this check someone could easily create a denial of service by injecting multicast-specific queries to enable the bridge snooping part if no real querier issuing periodic general queries is present on the link which would result in the bridge wrongly shutting down ports for multicast traffic as the bridge did not learn about these listeners. With this patch the snooping code is enabled upon receiving valid, general queries only. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index e56bae4..93067ec 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1127,9 +1127,10 @@ static void br_multicast_query_received(struct net_bridge *br, struct net_bridge_port *port, struct bridge_mcast_querier *querier, int saddr, + bool is_general_query, unsigned long max_delay) { - if (saddr) + if (saddr && is_general_query) br_multicast_update_querier_timer(br, querier, max_delay); else if (timer_pending(&querier->timer)) return; @@ -1190,7 +1191,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, } br_multicast_query_received(br, port, &br->ip4_querier, !!iph->saddr, - max_delay); + !group, max_delay); if (!group) goto out; @@ -1282,7 +1283,8 @@ static int br_ip6_multicast_query(struct net_bridge *br, } br_multicast_query_received(br, port, &br->ip6_querier, - !ipv6_addr_any(&ip6h->saddr), max_delay); + !ipv6_addr_any(&ip6h->saddr), + is_general_query, max_delay); if (!group) goto out; -- cgit v0.10.2 From fdfaf64e75397567257e1051931f9a3377360665 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 10 Mar 2014 15:56:51 -0700 Subject: x86: bpf_jit: support negative offsets Commit a998d4342337 claimed to introduce negative offset support to x86 jit, but it couldn't be working, since at the time of the execution of LD+ABS or LD+IND instructions via call into bpf_internal_load_pointer_neg_helper() the %edx (3rd argument of this func) had junk value instead of access size in bytes (1 or 2 or 4). Store size into %edx instead of %ecx (what original commit intended to do) Fixes: a998d4342337 ("bpf jit: Let the x86 jit handle negative offsets") Signed-off-by: Alexei Starovoitov Cc: Jan Seiffert Cc: Eric Dumazet Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S index 877b9a1..0149575 100644 --- a/arch/x86/net/bpf_jit.S +++ b/arch/x86/net/bpf_jit.S @@ -140,7 +140,7 @@ bpf_slow_path_byte_msh: push %r9; \ push SKBDATA; \ /* rsi already has offset */ \ - mov $SIZE,%ecx; /* size */ \ + mov $SIZE,%edx; /* size */ \ call bpf_internal_load_pointer_neg_helper; \ test %rax,%rax; \ pop SKBDATA; \ -- cgit v0.10.2 From 406764622fb71e5f4efb39ff111ef87713815f6d Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Tue, 11 Mar 2014 08:49:39 +0000 Subject: tools/net/Makefile: Define PACKAGE to fix build problems Fixes the following build problem with binutils-2.24 gcc -Wall -O2 -c -o bpf_jit_disasm.o bpf_jit_disasm.c In file included from bpf_jit_disasm.c:25:0: /usr/include/bfd.h:35:2: error: #error config.h must be included before this header #error config.h must be included before this header This is similar to commit 3ce711a6abc27abce1554e1d671a8762b7187690 "perf tools: bfd.h/libbfd detection fails with recent binutils" See: https://sourceware.org/bugzilla/show_bug.cgi?id=14243 CC: David S. Miller CC: Daniel Borkmann CC: netdev@vger.kernel.org Acked-by: Daniel Borkmann Signed-off-by: Markos Chandras Signed-off-by: David S. Miller diff --git a/tools/net/Makefile b/tools/net/Makefile index 004cd74..ee577ea 100644 --- a/tools/net/Makefile +++ b/tools/net/Makefile @@ -12,7 +12,7 @@ YACC = bison all : bpf_jit_disasm bpf_dbg bpf_asm -bpf_jit_disasm : CFLAGS = -Wall -O2 +bpf_jit_disasm : CFLAGS = -Wall -O2 -DPACKAGE='bpf_jit_disasm' bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl bpf_jit_disasm : bpf_jit_disasm.o -- cgit v0.10.2 From f75761b6b5bf6277296505941d2dd8e11f9b5c35 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 11 Mar 2014 15:11:59 +0800 Subject: r8169: fix the incorrect tx descriptor version The tx descriptor version of RTL8111B belong to RTL_TD_0. Signed-off-by: Hayes Wang Acked-by: Francois Romieu Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index e977965..3ff7bc3 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -209,7 +209,7 @@ static const struct { [RTL_GIGA_MAC_VER_16] = _R("RTL8101e", RTL_TD_0, NULL, JUMBO_1K, true), [RTL_GIGA_MAC_VER_17] = - _R("RTL8168b/8111b", RTL_TD_1, NULL, JUMBO_4K, false), + _R("RTL8168b/8111b", RTL_TD_0, NULL, JUMBO_4K, false), [RTL_GIGA_MAC_VER_18] = _R("RTL8168cp/8111cp", RTL_TD_1, NULL, JUMBO_6K, false), [RTL_GIGA_MAC_VER_19] = -- cgit v0.10.2 From b12bb60d6c350b348a4e1460cd68f97ccae9822e Mon Sep 17 00:00:00 2001 From: Ales Novak Date: Thu, 27 Feb 2014 11:03:30 +0100 Subject: [SCSI] storvsc: NULL pointer dereference fix If the initialization of storvsc fails, the storvsc_device_destroy() causes NULL pointer dereference. storvsc_bus_scan() scsi_scan_target() __scsi_scan_target() scsi_probe_and_add_lun(hostdata=NULL) scsi_alloc_sdev(hostdata=NULL) sdev->hostdata = hostdata now the host allocation fails __scsi_remove_device(sdev) calls sdev->host->hostt->slave_destroy() == storvsc_device_destroy(sdev) access of sdev->hostdata->request_mempool Signed-off-by: Ales Novak Signed-off-by: Thomas Abraham Reviewed-by: Jiri Kosina Acked-by: K. Y. Srinivasan Cc: stable@vger.kernel.org Signed-off-by: James Bottomley diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 17d7404..9969fa1 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1419,6 +1419,9 @@ static void storvsc_device_destroy(struct scsi_device *sdevice) { struct stor_mem_pools *memp = sdevice->hostdata; + if (!memp) + return; + mempool_destroy(memp->request_mempool); kmem_cache_destroy(memp->request_pool); kfree(memp); -- cgit v0.10.2 From 383afd0971538b3d77532a56404b24cfe967b5dd Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 11 Mar 2014 19:24:20 -0400 Subject: sched: Fix broken setscheduler() I decided to run my tests on linux-next, and my wakeup_rt tracer was broken. After running a bisect, I found that the problem commit was: linux-next commit c365c292d059 "sched: Consider pi boosting in setscheduler()" And the reason the wake_rt tracer test was failing, was because it had no RT task to trace. I first noticed this when running with sched_switch event and saw that my RT task still had normal SCHED_OTHER priority. Looking at the problem commit, I found: - p->normal_prio = normal_prio(p); - p->prio = rt_mutex_getprio(p); With no + p->normal_prio = normal_prio(p); + p->prio = rt_mutex_getprio(p); Reading what the commit is suppose to do, I realize that the p->prio can't be set if the task is boosted with a higher prio, but the p->normal_prio still needs to be set regardless, otherwise, when the task is deboosted, it wont get the new priority. The p->prio has to be set before "check_class_changed()" is called, otherwise the class wont be changed. Also added fix to newprio to include a check for deadline policy that was missing. This change was suggested by Juri Lelli. Signed-off-by: Steven Rostedt Cc: SebastianAndrzej Siewior Cc: Juri Lelli Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140306120438.638bfe94@gandalf.local.home Signed-off-by: Ingo Molnar diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9e126a2..ae365aa 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3195,6 +3195,7 @@ static void __setscheduler_params(struct task_struct *p, * getparam()/getattr() don't report silly values for !rt tasks. */ p->rt_priority = attr->sched_priority; + p->normal_prio = normal_prio(p); set_load_weight(p); } @@ -3204,6 +3205,12 @@ static void __setscheduler(struct rq *rq, struct task_struct *p, { __setscheduler_params(p, attr); + /* + * If we get here, there was no pi waiters boosting the + * task. It is safe to use the normal prio. + */ + p->prio = normal_prio(p); + if (dl_prio(p->prio)) p->sched_class = &dl_sched_class; else if (rt_prio(p->prio)) @@ -3262,7 +3269,8 @@ static int __sched_setscheduler(struct task_struct *p, const struct sched_attr *attr, bool user) { - int newprio = MAX_RT_PRIO - 1 - attr->sched_priority; + int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 : + MAX_RT_PRIO - 1 - attr->sched_priority; int retval, oldprio, oldpolicy = -1, on_rq, running; int policy = attr->sched_policy; unsigned long flags; -- cgit v0.10.2 From a2cd42601b474b957e1a5fe3692bcf7f9363bd51 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Tue, 11 Mar 2014 17:26:06 +0100 Subject: sched: Remove double calculation in fix_small_imbalance() The tmp value has been already calculated in: scaled_busy_load_per_task = (busiest->load_per_task * SCHED_POWER_SCALE) / busiest->group_power; Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1394555166-22894-1-git-send-email-vincent.guittot@linaro.org Signed-off-by: Ingo Molnar diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f1eedae..b301918 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6061,12 +6061,10 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) pwr_now /= SCHED_POWER_SCALE; /* Amount of load we'd subtract */ - tmp = (busiest->load_per_task * SCHED_POWER_SCALE) / - busiest->group_power; - if (busiest->avg_load > tmp) { + if (busiest->avg_load > scaled_busy_load_per_task) { pwr_move += busiest->group_power * min(busiest->load_per_task, - busiest->avg_load - tmp); + busiest->avg_load - scaled_busy_load_per_task); } /* Amount of load we'd add */ -- cgit v0.10.2 From 6037dd1a49f95092824fa8ba75c717ff7805e317 Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Wed, 12 Mar 2014 14:51:51 +0800 Subject: sched: Clean up the task_hot() function task_hot() doesn't need the 'sched_domain' parameter, so remove it. Signed-off-by: Alex Shi Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1394607111-1904-1-git-send-email-alex.shi@linaro.org Signed-off-by: Ingo Molnar diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b301918..7e9bd0b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5037,7 +5037,7 @@ static void move_task(struct task_struct *p, struct lb_env *env) * Is this task likely cache-hot: */ static int -task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) +task_hot(struct task_struct *p, u64 now) { s64 delta; @@ -5198,7 +5198,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) * 2) task is cache cold, or * 3) too many balance attempts have failed. */ - tsk_cache_hot = task_hot(p, rq_clock_task(env->src_rq), env->sd); + tsk_cache_hot = task_hot(p, rq_clock_task(env->src_rq)); if (!tsk_cache_hot) tsk_cache_hot = migrate_degrades_locality(p, env); -- cgit v0.10.2 From 4191c29f0593e82d19c3cd05de67e55467aca6b5 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 12 Mar 2014 00:53:30 +0100 Subject: perf/x86/uncore: Fix compilation warning in snb_uncore_imc_init_box() This patch fixes a compilation problem (unused variable) with the new SNB/IVB/HSW uncore IMC code. [ In -v2 we simplify the fix as suggested by Peter Zjilstra. ] Reported-by: Stephen Rothwell Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140311235329.GA28624@quad Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 5c2537a..dfd50ea 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -1722,15 +1722,16 @@ static struct attribute_group snb_uncore_imc_format_group = { static void snb_uncore_imc_init_box(struct intel_uncore_box *box) { struct pci_dev *pdev = box->pci_dev; - u32 addr_lo, addr_hi; + int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET; resource_size_t addr; + u32 pci_dword; - pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET, &addr_lo); - addr = addr_lo; + pci_read_config_dword(pdev, where, &pci_dword); + addr = pci_dword; #ifdef CONFIG_PHYS_ADDR_T_64BIT - pci_read_config_dword(pdev, SNB_UNCORE_PCI_IMC_BAR_OFFSET+4, &addr_hi); - addr = ((resource_size_t)addr_hi << 32) | addr_lo; + pci_read_config_dword(pdev, where + 4, &pci_dword); + addr |= ((resource_size_t)pci_dword << 32); #endif addr &= ~(PAGE_SIZE - 1); -- cgit v0.10.2 From 8783dd3a37a5853689e1a8fa728827a50905b912 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 4 Mar 2014 16:40:30 -0800 Subject: irqchip: Remove asmlinkage from static functions LTO patches add __visible to the asmlinkage define, causing compilation warnings like: drivers/irqchip/irq-gic.c:283:1: warning: 'externally_visible' attribute have effect only on public objects [-Wattributes] Drop asmlinkage here to avoid such warnings. Reported-by: Olof's autobuilder Signed-off-by: Stephen Boyd Cc: linux-arm-kernel@lists.infradead.org Cc: khilman@linaro.org Cc: Russell King Cc: Josh Cartwright Cc: Andi Kleen Link: http://lkml.kernel.org/r/1393980030-17770-1-git-send-email-sboyd@codeaurora.org Signed-off-by: Thomas Gleixner diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index cd79503..41be897 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -410,7 +410,7 @@ static void armada_370_xp_mpic_handle_cascade_irq(unsigned int irq, chained_irq_exit(chip, desc); } -static asmlinkage void __exception_irq_entry +static void __exception_irq_entry armada_370_xp_handle_irq(struct pt_regs *regs) { u32 irqstat, irqnr; diff --git a/drivers/irqchip/irq-bcm2835.c b/drivers/irqchip/irq-bcm2835.c index 1693b8e..5916d6c 100644 --- a/drivers/irqchip/irq-bcm2835.c +++ b/drivers/irqchip/irq-bcm2835.c @@ -95,7 +95,7 @@ struct armctrl_ic { }; static struct armctrl_ic intc __read_mostly; -static asmlinkage void __exception_irq_entry bcm2835_handle_irq( +static void __exception_irq_entry bcm2835_handle_irq( struct pt_regs *regs); static void armctrl_mask_irq(struct irq_data *d) @@ -196,7 +196,7 @@ static void armctrl_handle_shortcut(int bank, struct pt_regs *regs, handle_IRQ(irq_linear_revmap(intc.domain, irq), regs); } -static asmlinkage void __exception_irq_entry bcm2835_handle_irq( +static void __exception_irq_entry bcm2835_handle_irq( struct pt_regs *regs) { u32 stat, irq; diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index abeb5a9..531769b 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -279,7 +279,7 @@ static int gic_set_wake(struct irq_data *d, unsigned int on) #define gic_set_wake NULL #endif -static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) +static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) { u32 irqstat, irqnr; struct gic_chip_data *gic = &gic_data[0]; diff --git a/drivers/irqchip/irq-mmp.c b/drivers/irqchip/irq-mmp.c index 2cb7cd0..3c8827f 100644 --- a/drivers/irqchip/irq-mmp.c +++ b/drivers/irqchip/irq-mmp.c @@ -194,8 +194,7 @@ static struct mmp_intc_conf mmp2_conf = { .conf_mask = 0x7f, }; -static asmlinkage void __exception_irq_entry -mmp_handle_irq(struct pt_regs *regs) +static void __exception_irq_entry mmp_handle_irq(struct pt_regs *regs) { int irq, hwirq; @@ -207,8 +206,7 @@ mmp_handle_irq(struct pt_regs *regs) handle_IRQ(irq, regs); } -static asmlinkage void __exception_irq_entry -mmp2_handle_irq(struct pt_regs *regs) +static void __exception_irq_entry mmp2_handle_irq(struct pt_regs *regs) { int irq, hwirq; diff --git a/drivers/irqchip/irq-moxart.c b/drivers/irqchip/irq-moxart.c index 5552fc2..00b3cc9 100644 --- a/drivers/irqchip/irq-moxart.c +++ b/drivers/irqchip/irq-moxart.c @@ -44,7 +44,7 @@ struct moxart_irq_data { static struct moxart_irq_data intc; -static asmlinkage void __exception_irq_entry handle_irq(struct pt_regs *regs) +static void __exception_irq_entry handle_irq(struct pt_regs *regs) { u32 irqstat; int hwirq; diff --git a/drivers/irqchip/irq-orion.c b/drivers/irqchip/irq-orion.c index e51d400..c3f0f41 100644 --- a/drivers/irqchip/irq-orion.c +++ b/drivers/irqchip/irq-orion.c @@ -30,7 +30,7 @@ static struct irq_domain *orion_irq_domain; -static asmlinkage void +static void __exception_irq_entry orion_handle_irq(struct pt_regs *regs) { struct irq_domain_chip_generic *dgc = orion_irq_domain->gc; diff --git a/drivers/irqchip/irq-sirfsoc.c b/drivers/irqchip/irq-sirfsoc.c index 3a070c5..581eefe 100644 --- a/drivers/irqchip/irq-sirfsoc.c +++ b/drivers/irqchip/irq-sirfsoc.c @@ -47,7 +47,7 @@ sirfsoc_alloc_gc(void __iomem *base, unsigned int irq_start, unsigned int num) ct->regs.mask = SIRFSOC_INT_RISC_MASK0; } -static asmlinkage void __exception_irq_entry sirfsoc_handle_irq(struct pt_regs *regs) +static void __exception_irq_entry sirfsoc_handle_irq(struct pt_regs *regs) { void __iomem *base = sirfsoc_irqdomain->host_data; u32 irqstat, irqnr; diff --git a/drivers/irqchip/irq-sun4i.c b/drivers/irqchip/irq-sun4i.c index a5438d8..9fbff03 100644 --- a/drivers/irqchip/irq-sun4i.c +++ b/drivers/irqchip/irq-sun4i.c @@ -36,7 +36,7 @@ static void __iomem *sun4i_irq_base; static struct irq_domain *sun4i_irq_domain; -static asmlinkage void __exception_irq_entry sun4i_handle_irq(struct pt_regs *regs); +static void __exception_irq_entry sun4i_handle_irq(struct pt_regs *regs); static void sun4i_irq_ack(struct irq_data *irqd) { @@ -136,7 +136,7 @@ static int __init sun4i_of_init(struct device_node *node, } IRQCHIP_DECLARE(allwinner_sun4i_ic, "allwinner,sun4i-ic", sun4i_of_init); -static asmlinkage void __exception_irq_entry sun4i_handle_irq(struct pt_regs *regs) +static void __exception_irq_entry sun4i_handle_irq(struct pt_regs *regs) { u32 irq, hwirq; diff --git a/drivers/irqchip/irq-vic.c b/drivers/irqchip/irq-vic.c index 8e21ae0..473f09a 100644 --- a/drivers/irqchip/irq-vic.c +++ b/drivers/irqchip/irq-vic.c @@ -228,7 +228,7 @@ static int handle_one_vic(struct vic_device *vic, struct pt_regs *regs) * Keep iterating over all registered VIC's until there are no pending * interrupts. */ -static asmlinkage void __exception_irq_entry vic_handle_irq(struct pt_regs *regs) +static void __exception_irq_entry vic_handle_irq(struct pt_regs *regs) { int i, handled; diff --git a/drivers/irqchip/irq-vt8500.c b/drivers/irqchip/irq-vt8500.c index 1846e7d..eb6e91e 100644 --- a/drivers/irqchip/irq-vt8500.c +++ b/drivers/irqchip/irq-vt8500.c @@ -178,8 +178,7 @@ static struct irq_domain_ops vt8500_irq_domain_ops = { .xlate = irq_domain_xlate_onecell, }; -static asmlinkage -void __exception_irq_entry vt8500_handle_irq(struct pt_regs *regs) +static void __exception_irq_entry vt8500_handle_irq(struct pt_regs *regs) { u32 stat, i; int irqnr, virq; diff --git a/drivers/irqchip/irq-zevio.c b/drivers/irqchip/irq-zevio.c index 8ed04c4..ceb3a43 100644 --- a/drivers/irqchip/irq-zevio.c +++ b/drivers/irqchip/irq-zevio.c @@ -50,7 +50,7 @@ static void zevio_irq_ack(struct irq_data *irqd) readl(gc->reg_base + regs->ack); } -static asmlinkage void __exception_irq_entry zevio_handle_irq(struct pt_regs *regs) +static void __exception_irq_entry zevio_handle_irq(struct pt_regs *regs) { int irqnr; -- cgit v0.10.2 From 785aebd0cfff52e735ad4fd188d3726b5affc8e5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 4 Mar 2014 20:43:38 +0000 Subject: ia64: Validate online cpus in irq_set_affinity() callbacks The [user space] interface does not filter out offline cpus. It merily guarantees that the mask contains at least one online cpu. So the selector in the irq chip implementation needs to make sure to pick only an online cpu because otherwise: Offline Core 1 Set affinity to 0xe (is valid due to online mask 0xd) cpumask_first will pick core 1, which is offline Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Tony Luck Cc: Fenghua Yu Cc: ia64 Link: http://lkml.kernel.org/r/20140304203100.650414633@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index fb2f1e6..c430f91 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -17,12 +17,9 @@ static int ia64_set_msi_irq_affinity(struct irq_data *idata, { struct msi_msg msg; u32 addr, data; - int cpu = first_cpu(*cpu_mask); + int cpu = cpumask_first_and(cpu_mask, cpu_online_mask); unsigned int irq = idata->irq; - if (!cpu_online(cpu)) - return -1; - if (irq_prepare_move(irq, cpu)) return -1; @@ -139,10 +136,7 @@ static int dmar_msi_set_affinity(struct irq_data *data, unsigned int irq = data->irq; struct irq_cfg *cfg = irq_cfg + irq; struct msi_msg msg; - int cpu = cpumask_first(mask); - - if (!cpu_online(cpu)) - return -1; + int cpu = cpumask_first_and(mask, cpu_online_mask); if (irq_prepare_move(irq, cpu)) return -1; diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 62cf4dd..85d0951 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -209,8 +209,8 @@ static int sn_set_affinity_irq(struct irq_data *data, nasid_t nasid; int slice; - nasid = cpuid_to_nasid(cpumask_first(mask)); - slice = cpuid_to_slice(cpumask_first(mask)); + nasid = cpuid_to_nasid(cpumask_first_and(mask, cpu_online_mask)); + slice = cpuid_to_slice(cpumask_first_and(mask, cpu_online_mask)); list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe, sn_irq_lh[irq], list) diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c index 2b98b9e..afc58d2 100644 --- a/arch/ia64/sn/kernel/msi_sn.c +++ b/arch/ia64/sn/kernel/msi_sn.c @@ -166,7 +166,7 @@ static int sn_set_msi_irq_affinity(struct irq_data *data, struct sn_pcibus_provider *provider; unsigned int cpu, irq = data->irq; - cpu = cpumask_first(cpu_mask); + cpu = cpumask_first_and(cpu_mask, cpu_online_mask); sn_irq_info = sn_msi_info[irq].sn_irq_info; if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0) return -1; -- cgit v0.10.2 From 421d1563c6620423d23e394711e3f209e585c161 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 4 Mar 2014 20:43:39 +0000 Subject: mips: Validate online cpus in irq_set_affinity() callbacks The [user space] interface does not filter out offline cpus. It merily guarantees that the mask contains at least one online cpu. So the selector in the irq chip implementation needs to make sure to pick only an online cpu because otherwise: Offline Core 1 Set affinity to 0xe (is valid due to online mask 0xd) cpumask_first will pick core 1, which is offline Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Link: http://lkml.kernel.org/r/20140304203100.744800502@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/arch/mips/sibyte/bcm1480/irq.c b/arch/mips/sibyte/bcm1480/irq.c index 09d6e16..59cfe26 100644 --- a/arch/mips/sibyte/bcm1480/irq.c +++ b/arch/mips/sibyte/bcm1480/irq.c @@ -95,7 +95,7 @@ static int bcm1480_set_affinity(struct irq_data *d, const struct cpumask *mask, u64 cur_ints; unsigned long flags; - i = cpumask_first(mask); + i = cpumask_first_and(mask, cpu_online_mask); /* Convert logical CPU to physical CPU */ cpu = cpu_logical_map(i); diff --git a/arch/mips/sibyte/sb1250/irq.c b/arch/mips/sibyte/sb1250/irq.c index fca0cdb..6d8dba5 100644 --- a/arch/mips/sibyte/sb1250/irq.c +++ b/arch/mips/sibyte/sb1250/irq.c @@ -88,7 +88,7 @@ static int sb1250_set_affinity(struct irq_data *d, const struct cpumask *mask, u64 cur_ints; unsigned long flags; - i = cpumask_first(mask); + i = cpumask_first_and(mask, cpu_online_mask); /* Convert logical CPU to physical CPU */ cpu = cpu_logical_map(i); -- cgit v0.10.2 From 753fbd23f5e59ea9dc0cabe0a684d32100a4af02 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 4 Mar 2014 20:43:40 +0000 Subject: xen: Validate online cpus in set_affinity The user space interface does not filter out offline cpus. It merily verifies that the mask contains at least one online cpu. So the selector in the irq chip implementation needs to make sure to pick only an online cpu because otherwise: Offline Core 1 Set affinity to 0xe Selector will pick first set bit, i.e. core 1 Signed-off-by: Thomas Gleixner Reviewed-by: David Vrabel Cc: Peter Zijlstra Cc: Konrad Rzeszutek Wilk Cc: Xen Link: http://lkml.kernel.org/r/20140304203100.978031089@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 8b91c256..c3458f5 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1324,7 +1324,7 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu) static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, bool force) { - unsigned tcpu = cpumask_first(dest); + unsigned tcpu = cpumask_first_and(dest, cpu_online_mask); return rebind_irq_to_cpu(data->irq, tcpu); } -- cgit v0.10.2 From 1ed71e59bca79e866c4bebbe1efc0bc18245119d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 4 Mar 2014 20:43:40 +0000 Subject: parisc: Validate online cpus in irq_set_affinity() callbacks The [user space] interface does not filter out offline cpus. It merily guarantees that the mask contains at least one online cpu. So the selector in the irq chip implementation needs to make sure to pick only an online cpu because otherwise: Offline Core 1 Set affinity to 0xe (is valid due to online mask 0xd) cpumask_first will pick core 1, which is offline Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: James E.J. Bottomley Cc: Helge Deller Cc: linux-parisc@vger.kernel.org Link: http://lkml.kernel.org/r/20140304203100.859489993@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index 8ceac47..cfe056f 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -117,7 +117,7 @@ int cpu_check_affinity(struct irq_data *d, const struct cpumask *dest) return -EINVAL; /* whatever mask they set, we just allow one CPU */ - cpu_dest = first_cpu(*dest); + cpu_dest = cpumask_first_and(dest, cpu_online_mask); return cpu_dest; } -- cgit v0.10.2 From 1a75b8e64571a85d5e648cfdf4c40e0d9923abc5 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 11 Mar 2014 13:51:49 -0700 Subject: irqchip: Remove unused include The "irqchip.h" include here is not needed as the only thing in irqchip.h is IRQCHIP_DECLARE which this file doesn't use. Drop it. Reported-by: Jiri Kosina Signed-off-by: Stephen Boyd Link: http://lkml.kernel.org/r/531F7765.40207@codeaurora.org Signed-off-by: Thomas Gleixner diff --git a/drivers/irqchip/irqchip.c b/drivers/irqchip/irqchip.c index 3469141..cad3e24 100644 --- a/drivers/irqchip/irqchip.c +++ b/drivers/irqchip/irqchip.c @@ -12,8 +12,6 @@ #include #include -#include "irqchip.h" - /* * This special of_device_id is the sentinel at the end of the * of_device_id[] array of all irqchips. It is automatically placed at -- cgit v0.10.2 From 6f008e72cd111a119b5d8de8c5438d892aae99eb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 12 Mar 2014 13:24:42 +0100 Subject: locking/mutex: Fix debug checks OK, so commit: 1d8fe7dc8078 ("locking/mutexes: Unlock the mutex without the wait_lock") generates this boot warning when CONFIG_DEBUG_MUTEXES=y: WARNING: CPU: 0 PID: 139 at /usr/src/linux-2.6/kernel/locking/mutex-debug.c:82 debug_mutex_unlock+0x155/0x180() DEBUG_LOCKS_WARN_ON(lock->owner != current) And that makes sense, because as soon as we release the lock a new owner can come in... One would think that !__mutex_slowpath_needs_to_unlock() implementations suffer the same, but for DEBUG we fall back to mutex-null.h which has an unconditional 1 for that. The mutex debug code requires the mutex to be unlocked after doing the debug checks, otherwise it can find inconsistent state. Reported-by: Ingo Molnar Signed-off-by: Peter Zijlstra Cc: jason.low2@hp.com Link: http://lkml.kernel.org/r/20140312122442.GB27965@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index faf6f5b..e1191c9 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -83,6 +83,12 @@ void debug_mutex_unlock(struct mutex *lock) DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); mutex_clear_owner(lock); + + /* + * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug + * mutexes so that we can do it here after we've verified state. + */ + atomic_set(&lock->count, 1); } void debug_mutex_init(struct mutex *lock, const char *name, diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 02c61a9..14fe72c 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -34,6 +34,13 @@ #ifdef CONFIG_DEBUG_MUTEXES # include "mutex-debug.h" # include +/* + * Must be 0 for the debug case so we do not do the unlock outside of the + * wait_lock region. debug_mutex_unlock() will do the actual unlock in this + * case. + */ +# undef __mutex_slowpath_needs_to_unlock +# define __mutex_slowpath_needs_to_unlock() 0 #else # include "mutex.h" # include -- cgit v0.10.2 From 0e6d6ec02f867fe8d1f785312b7343b21052322f Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 12 Mar 2014 10:41:32 +0100 Subject: drm/ttm: Work around performance regression with VM_PFNMAP A performance regression was introduced in TTM in linux 3.13 when we started using VM_PFNMAP for shared mappings. In theory this should've been faster due to less page book-keeping but it appears like VM_PFNMAP + x86 PAT + write-combine is a particularly cpu-hungry combination, as seen by largely increased cpu-usage on r200 GL video playback. Until we've sorted out why, revert to always use VM_MIXEDMAP. Reference: freedesktop.org bugzilla bug #75719 Reported-and-tested-by: Acked-by: Alex Deucher Signed-off-by: Thomas Hellstrom Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 801231c..0ce48e5 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -339,11 +339,13 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma, vma->vm_private_data = bo; /* - * PFNMAP is faster than MIXEDMAP due to reduced page - * administration. So use MIXEDMAP only if private VMA, where - * we need to support COW. + * We'd like to use VM_PFNMAP on shared mappings, where + * (vma->vm_flags & VM_SHARED) != 0, for performance reasons, + * but for some reason VM_PFNMAP + x86 PAT + write-combine is very + * bad for performance. Until that has been sorted out, use + * VM_MIXEDMAP on all mappings. See freedesktop.org bug #75719 */ - vma->vm_flags |= (vma->vm_flags & VM_SHARED) ? VM_PFNMAP : VM_MIXEDMAP; + vma->vm_flags |= VM_MIXEDMAP; vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; return 0; out_unref: @@ -359,7 +361,7 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo) vma->vm_ops = &ttm_bo_vm_ops; vma->vm_private_data = ttm_bo_reference(bo); - vma->vm_flags |= (vma->vm_flags & VM_SHARED) ? VM_PFNMAP : VM_MIXEDMAP; + vma->vm_flags |= VM_MIXEDMAP; vma->vm_flags |= VM_IO | VM_DONTEXPAND; return 0; } -- cgit v0.10.2 From c1bacbae8192dd2a9ebadd22d793b68054f6c6e5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 8 Mar 2014 08:59:58 +0100 Subject: genirq: Provide irq_request/release_resources chip callbacks For certain irq types, e.g. gpios, it's necessary to request resources before starting up the irq. This might fail so we cannot use the irq_startup() callback because we might call the irq_set_type() callback before that which does not make sense when the resource is not available. Calling irq_startup() before irq_set_type() can lead to spurious interrupts which is not desired either. Signed-off-by: Thomas Gleixner Cc: Jean-Jacques Hiblot Cc: Grant Likely Cc: linux-arm-kernel@lists.infradead.org Reviewed-by: Linus Walleij Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1403080857160.18573@ionos.tec.linutronix.de Signed-off-by: Thomas Gleixner diff --git a/include/linux/irq.h b/include/linux/irq.h index 7dc1003..e675971 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -303,6 +303,10 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @irq_pm_shutdown: function called from core code on shutdown once per chip * @irq_calc_mask: Optional function to set irq_data.mask for special cases * @irq_print_chip: optional to print special chip info in show_interrupts + * @irq_request_resources: optional to request resources before calling + * any other callback related to this irq + * @irq_release_resources: optional to release resources acquired with + * irq_request_resources * @flags: chip specific flags */ struct irq_chip { @@ -336,6 +340,8 @@ struct irq_chip { void (*irq_calc_mask)(struct irq_data *data); void (*irq_print_chip)(struct irq_data *data, struct seq_file *p); + int (*irq_request_resources)(struct irq_data *data); + void (*irq_release_resources)(struct irq_data *data); unsigned long flags; }; diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index d3bf660..5d35cbe 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -896,6 +896,23 @@ static void irq_setup_forced_threading(struct irqaction *new) } } +static int irq_request_resources(struct irq_desc *desc) +{ + struct irq_data *d = &desc->irq_data; + struct irq_chip *c = d->chip; + + return c->irq_request_resources ? c->irq_request_resources(d) : 0; +} + +static void irq_release_resources(struct irq_desc *desc) +{ + struct irq_data *d = &desc->irq_data; + struct irq_chip *c = d->chip; + + if (c->irq_release_resources) + c->irq_release_resources(d); +} + /* * Internal function to register an irqaction - typically used to * allocate special interrupts that are part of the architecture. @@ -1091,6 +1108,13 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } if (!shared) { + ret = irq_request_resources(desc); + if (ret) { + pr_err("Failed to request resources for %s (irq %d) on irqchip %s\n", + new->name, irq, desc->irq_data.chip->name); + goto out_mask; + } + init_waitqueue_head(&desc->wait_for_threads); /* Setup the type (level, edge polarity) if configured: */ @@ -1261,8 +1285,10 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) *action_ptr = action->next; /* If this was the last handler, shut down the IRQ line: */ - if (!desc->action) + if (!desc->action) { irq_shutdown(desc); + irq_release_resources(desc); + } #ifdef CONFIG_SMP /* make sure affinity_hint is cleaned up */ -- cgit v0.10.2 From fcb818231f81e22b2c3a76d7ef416237fa0c7609 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 11 Mar 2014 12:58:45 +0200 Subject: drm/i915: Add a workaround for HSW scanline counter weirdness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On HSW the scanline counter seems to behave differently depending on the output type. eDP on port A does what you would expect an the normal +1 fixup is sufficient to cover it. But on HDMI outputs we seem to need a +2 fixup. Just assume we always need the +2 fixup and accept the slight inaccuracy on eDP. This fixes a regression introduced in: commit 8072bfa6045a264d3913102a35fab125b06603a2 Author: Ville Syrjälä Date: Mon Oct 28 21:22:52 2013 +0200 drm/radeon: Move the early vblank IRQ fixup to radeon_get_crtc_scanoutpos() That commit removed the heuristic that tried to fix up the timestamps for vblank interrupts that fire a bit too early. Since then the vblank timestamp code would treat some vblank interrupts as spurious since the scanline counter would indicate that vblank_start wasn't reached yet. That in turn lead to incorrect vblank event sequence numbers being reported to userspace, which lead to unsteady framerate in applications such as XBMC which uses them for timing purposes. v2: Remember to call ilk_pipe_in_vblank_locked() on HSW too (Mika) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=75725 Tested-by: bugzilla1@gmx.com Reviewed-by: Mika Kuoppala Signed-off-by: Ville Syrjälä Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 9fec711..d4c952d 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -702,7 +702,28 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, int pipe, else position = __raw_i915_read32(dev_priv, PIPEDSL(pipe)) & DSL_LINEMASK_GEN3; - if (HAS_PCH_SPLIT(dev)) { + if (HAS_DDI(dev)) { + /* + * On HSW HDMI outputs there seems to be a 2 line + * difference, whereas eDP has the normal 1 line + * difference that earlier platforms have. External + * DP is unknown. For now just check for the 2 line + * difference case on all output types on HSW+. + * + * This might misinterpret the scanline counter being + * one line too far along on eDP, but that's less + * dangerous than the alternative since that would lead + * the vblank timestamp code astray when it sees a + * scanline count before vblank_start during a vblank + * interrupt. + */ + in_vbl = ilk_pipe_in_vblank_locked(dev, pipe); + if ((in_vbl && (position == vbl_start - 2 || + position == vbl_start - 1)) || + (!in_vbl && (position == vbl_end - 2 || + position == vbl_end - 1))) + position = (position + 2) % vtotal; + } else if (HAS_PCH_SPLIT(dev)) { /* * The scanline counter increments at the leading edge * of hsync, ie. it completely misses the active portion -- cgit v0.10.2 From 243026249324b2c958b67ab387fe6813a9836fe0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 11 Mar 2014 12:58:46 +0200 Subject: drm/i915: Fix scanline counter fixup on BDW MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The display interrupts changed on BDW, so the current ILK-HSW specific code in ilk_pipe_in_vblank_locked() doesn't work there. Add the required bits for BDW, and while at it, change the existing code to use nicer looking vblank status bit macros. Also remove the now stale __raw_i915_read16() definition which was left over from the failed gen2 ISR experiment. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=73962 Tested-by: Lu Hua Reviewed-by: Mika Kuoppala Signed-off-by: Ville Syrjälä Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index d4c952d..ec9b8a4 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -618,33 +618,25 @@ static u32 gm45_get_vblank_counter(struct drm_device *dev, int pipe) /* raw reads, only for fast reads of display block, no need for forcewake etc. */ #define __raw_i915_read32(dev_priv__, reg__) readl((dev_priv__)->regs + (reg__)) -#define __raw_i915_read16(dev_priv__, reg__) readw((dev_priv__)->regs + (reg__)) static bool ilk_pipe_in_vblank_locked(struct drm_device *dev, enum pipe pipe) { struct drm_i915_private *dev_priv = dev->dev_private; uint32_t status; - - if (INTEL_INFO(dev)->gen < 7) { - status = pipe == PIPE_A ? - DE_PIPEA_VBLANK : - DE_PIPEB_VBLANK; + int reg; + + if (INTEL_INFO(dev)->gen >= 8) { + status = GEN8_PIPE_VBLANK; + reg = GEN8_DE_PIPE_ISR(pipe); + } else if (INTEL_INFO(dev)->gen >= 7) { + status = DE_PIPE_VBLANK_IVB(pipe); + reg = DEISR; } else { - switch (pipe) { - default: - case PIPE_A: - status = DE_PIPEA_VBLANK_IVB; - break; - case PIPE_B: - status = DE_PIPEB_VBLANK_IVB; - break; - case PIPE_C: - status = DE_PIPEC_VBLANK_IVB; - break; - } + status = DE_PIPE_VBLANK(pipe); + reg = DEISR; } - return __raw_i915_read32(dev_priv, DEISR) & status; + return __raw_i915_read32(dev_priv, reg) & status; } static int i915_get_crtc_scanoutpos(struct drm_device *dev, int pipe, -- cgit v0.10.2 From 5c673b60a9b3b23486f4eda75c72e91d31d26a2b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 7 Mar 2014 20:34:46 +0100 Subject: drm/i915: Don't enable display error interrupts from the start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to enable interrupt processing before all the modeset state is set up. But that means we can fall over when we get a pipe underrun. This shouldn't happen as long as the bios works correctly but as usual this turns out to be wishful thinking. So disable error interrupts at irq install time and rely on the re-enabling code in the modeset functions to take care of this. Note that due to the SDE interrupt handling race we must uncondtionally enable all interrupt sources in SDEIER, hence no need to enable the SERR bit specifically. On gmch platforms we don't have an explicit enable/mask bit for fifo underruns. Fixing this up would require a bit of software tracking, hence is material for a separate patch. To make this possible we need to switch all gmch platforms to the new pipestat interrupt handling scheme Imre implemented for vlv, and then also add a safe form of sw state checking to __cpu_fifo_underrun_reporting_enabled a bit. v2: Also handle the ilk/snb cpu fifo underrun bits accordingly. Spotted by Ville. v3: Also handle the south interrupt underrun bits on ibx. Again spotted by Ville. Reported-by: Rob Clark Cc: Rob Clark Cc: Ville Syrjälä Cc: stable@vger.kernel.org Tested-by: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index ec9b8a4..d554169 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2782,10 +2782,9 @@ static void ibx_irq_postinstall(struct drm_device *dev) return; if (HAS_PCH_IBX(dev)) { - mask = SDE_GMBUS | SDE_AUX_MASK | SDE_TRANSB_FIFO_UNDER | - SDE_TRANSA_FIFO_UNDER | SDE_POISON; + mask = SDE_GMBUS | SDE_AUX_MASK | SDE_POISON; } else { - mask = SDE_GMBUS_CPT | SDE_AUX_MASK_CPT | SDE_ERROR_CPT; + mask = SDE_GMBUS_CPT | SDE_AUX_MASK_CPT; I915_WRITE(SERR_INT, I915_READ(SERR_INT)); } @@ -2845,20 +2844,19 @@ static int ironlake_irq_postinstall(struct drm_device *dev) display_mask = (DE_MASTER_IRQ_CONTROL | DE_GSE_IVB | DE_PCH_EVENT_IVB | DE_PLANEC_FLIP_DONE_IVB | DE_PLANEB_FLIP_DONE_IVB | - DE_PLANEA_FLIP_DONE_IVB | DE_AUX_CHANNEL_A_IVB | - DE_ERR_INT_IVB); + DE_PLANEA_FLIP_DONE_IVB | DE_AUX_CHANNEL_A_IVB); extra_mask = (DE_PIPEC_VBLANK_IVB | DE_PIPEB_VBLANK_IVB | - DE_PIPEA_VBLANK_IVB); + DE_PIPEA_VBLANK_IVB | DE_ERR_INT_IVB); I915_WRITE(GEN7_ERR_INT, I915_READ(GEN7_ERR_INT)); } else { display_mask = (DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT | DE_PLANEA_FLIP_DONE | DE_PLANEB_FLIP_DONE | DE_AUX_CHANNEL_A | - DE_PIPEB_FIFO_UNDERRUN | DE_PIPEA_FIFO_UNDERRUN | DE_PIPEB_CRC_DONE | DE_PIPEA_CRC_DONE | DE_POISON); - extra_mask = DE_PIPEA_VBLANK | DE_PIPEB_VBLANK | DE_PCU_EVENT; + extra_mask = DE_PIPEA_VBLANK | DE_PIPEB_VBLANK | DE_PCU_EVENT | + DE_PIPEB_FIFO_UNDERRUN | DE_PIPEA_FIFO_UNDERRUN; } dev_priv->irq_mask = ~display_mask; @@ -2974,9 +2972,9 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) struct drm_device *dev = dev_priv->dev; uint32_t de_pipe_masked = GEN8_PIPE_FLIP_DONE | GEN8_PIPE_CDCLK_CRC_DONE | - GEN8_PIPE_FIFO_UNDERRUN | GEN8_DE_PIPE_IRQ_FAULT_ERRORS; - uint32_t de_pipe_enables = de_pipe_masked | GEN8_PIPE_VBLANK; + uint32_t de_pipe_enables = de_pipe_masked | GEN8_PIPE_VBLANK | + GEN8_PIPE_FIFO_UNDERRUN; int pipe; dev_priv->de_irq_mask[PIPE_A] = ~de_pipe_masked; dev_priv->de_irq_mask[PIPE_B] = ~de_pipe_masked; -- cgit v0.10.2 From 3cdeb713dc66057b50682048c151eae07b186c42 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 11 Mar 2014 14:22:19 -0600 Subject: PCI: Enable INTx in pci_reenable_device() only when MSI/MSI-X not enabled Andreas reported that after 1f42db786b14 ("PCI: Enable INTx if BIOS left them disabled"), pciehp surprise removal stopped working. This happens because pci_reenable_device() on the hotplug bridge (used in the pciehp_configure_device() path) clears the Interrupt Disable bit, which apparently breaks the bridge's MSI hotplug event reporting. Previously we cleared the Interrupt Disable bit in do_pci_enable_device(), which is used by both pci_enable_device() and pci_reenable_device(). But we use pci_reenable_device() after the driver may have enabled MSI or MSI-X, and we *set* Interrupt Disable as part of enabling MSI/MSI-X. This patch clears Interrupt Disable only when MSI/MSI-X has not been enabled. Fixes: 1f42db786b14 PCI: Enable INTx if BIOS left them disabled Link: https://bugzilla.kernel.org/show_bug.cgi?id=71691 Reported-and-tested-by: Andreas Noever Signed-off-by: Bjorn Helgaas CC: stable@vger.kernel.org CC: Sarah Sharp diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 6b05f61..fdbc294 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1192,6 +1192,9 @@ static int do_pci_enable_device(struct pci_dev *dev, int bars) return err; pci_fixup_device(pci_fixup_enable, dev); + if (dev->msi_enabled || dev->msix_enabled) + return 0; + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); if (pin) { pci_read_config_word(dev, PCI_COMMAND, &cmd); -- cgit v0.10.2 From ac93ac7403493f8707b7734de9f40d5cb5db9045 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 11 Mar 2014 14:23:12 -0600 Subject: PCI: Don't check resource_size() in pci_bus_alloc_resource() Paul reported that after f75b99d5a77d ("PCI: Enforce bus address limits in resource allocation") on a 32-bit kernel (CONFIG_PHYS_ADDR_T_64BIT not set), intel-gtt complained "can't ioremap flush page - no chipset flushing". In addition, other PCI resource allocations, e.g., for bridge windows, failed. This happens because we incorrectly skip bus resources of [mem 0x00000000-0xffffffff] because we think they are of size zero. When resource_size_t is 32 bits wide, resource_size() on [mem 0x00000000-0xffffffff] returns 0 because (r->end - r->start + 1) overflows. Therefore, we can't use "resource_size() == 0" to decide that allocation from this resource will fail. allocate_resource() should fail anyway if it can't satisfy the address constraints, so we should just depend on that. A [mem 0x00000000-0xffffffff] bus resource is obviously not really valid, but we do fall back to it as a default when we don't have information about host bridge apertures. Link: https://bugzilla.kernel.org/show_bug.cgi?id=71611 Fixes: f75b99d5a77d PCI: Enforce bus address limits in resource allocation Reported-and-tested-by: Paul Bolle Signed-off-by: Bjorn Helgaas diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 00660cc..3890166 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -162,8 +162,6 @@ static int pci_bus_alloc_from_region(struct pci_bus *bus, struct resource *res, avail = *r; pci_clip_resource_to_region(bus, &avail, region); - if (!resource_size(&avail)) - continue; /* * "min" is typically PCIBIOS_MIN_IO or PCIBIOS_MIN_MEM to -- cgit v0.10.2 From 596f3142d2b7be307a1652d59e7b93adab918437 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Tue, 11 Mar 2014 19:11:18 +0100 Subject: KVM: SVM: fix cr8 intercept window MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We always disable cr8 intercept in its handler, but only re-enable it if handling KVM_REQ_EVENT, so there can be a window where we do not intercept cr8 writes, which allows an interrupt to disrupt a higher priority task. Fix this by disabling intercepts in the same function that re-enables them when needed. This fixes BSOD in Windows 2008. Cc: Signed-off-by: Radim Krčmář Reviewed-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e81df8f..2de1bc0 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3002,10 +3002,8 @@ static int cr8_write_interception(struct vcpu_svm *svm) u8 cr8_prev = kvm_get_cr8(&svm->vcpu); /* instruction emulation calls kvm_set_cr8() */ r = cr_interception(svm); - if (irqchip_in_kernel(svm->vcpu.kvm)) { - clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); + if (irqchip_in_kernel(svm->vcpu.kvm)) return r; - } if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) return r; kvm_run->exit_reason = KVM_EXIT_SET_TPR; @@ -3567,6 +3565,8 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) return; + clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); + if (irr == -1) return; -- cgit v0.10.2 From 8b9d96666529a979acf4825391efcc7c8a3e9f12 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Wed, 12 Mar 2014 00:40:05 +0100 Subject: dm cache: fix truncation bug when copying a block to/from >2TB fast device During demotion or promotion to a cache's >2TB fast device we must not truncate the cache block's associated sector to 32bits. The 32bit temporary result of from_cblock() caused a 32bit multiplication when calculating the sector of the fast device in issue_copy_real(). Use an intermediate 64bit type to store the 32bit from_cblock() to allow for proper 64bit multiplication. Here is an example of how this bug manifests on an ext4 filesystem: EXT4-fs error (device dm-0): ext4_mb_generate_buddy:756: group 17136, 32768 clusters in bitmap, 30688 in gd; block bitmap corrupt. JBD2: Spotted dirty metadata buffer (dev = dm-0, blocknr = 0). There's a risk of filesystem corruption in case of system crash. Signed-off-by: Heinz Mauelshagen Acked-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 1af7014..354bbc1 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -979,12 +979,13 @@ static void issue_copy_real(struct dm_cache_migration *mg) int r; struct dm_io_region o_region, c_region; struct cache *cache = mg->cache; + sector_t cblock = from_cblock(mg->cblock); o_region.bdev = cache->origin_dev->bdev; o_region.count = cache->sectors_per_block; c_region.bdev = cache->cache_dev->bdev; - c_region.sector = from_cblock(mg->cblock) * cache->sectors_per_block; + c_region.sector = cblock * cache->sectors_per_block; c_region.count = cache->sectors_per_block; if (mg->writeback || mg->demote) { -- cgit v0.10.2 From e893fba90c09f9b57fb97daae204ea9cc2c52fa5 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Wed, 12 Mar 2014 16:13:39 +0100 Subject: dm cache: fix access beyond end of origin device In order to avoid wasting cache space a partial block at the end of the origin device is not cached. Unfortunately, the check for such a partial block at the end of the origin device was flawed. Fix accesses beyond the end of the origin device that occured due to attempted promotion of an undetected partial block by: - initializing the per bio data struct to allow cache_end_io to work properly - recognizing access to the partial block at the end of the origin device - avoiding out of bounds access to the discard bitset Otherwise, users can experience errors like the following: attempt to access beyond end of device dm-5: rw=0, want=20971520, limit=20971456 ... device-mapper: cache: promotion failed; couldn't copy block Signed-off-by: Heinz Mauelshagen Acked-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 354bbc1..074b9c8 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2465,20 +2465,18 @@ static int cache_map(struct dm_target *ti, struct bio *bio) bool discarded_block; struct dm_bio_prison_cell *cell; struct policy_result lookup_result; - struct per_bio_data *pb; + struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size); - if (from_oblock(block) > from_oblock(cache->origin_blocks)) { + if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) { /* * This can only occur if the io goes to a partial block at * the end of the origin device. We don't cache these. * Just remap to the origin and carry on. */ - remap_to_origin_clear_discard(cache, bio, block); + remap_to_origin(cache, bio); return DM_MAPIO_REMAPPED; } - pb = init_per_bio_data(bio, pb_data_size); - if (bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD)) { defer_bio(cache, bio); return DM_MAPIO_SUBMITTED; -- cgit v0.10.2 From 9ef7506f7eff3fc42724269f62e30164c141661f Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 12 Mar 2014 10:59:37 -0400 Subject: drm/ttm: don't oops if no invalidate_caches() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A few of the simpler TTM drivers (cirrus, ast, mgag200) do not implement this function. Yet can end up somehow with an evicted bo: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [< (null)>] (null) PGD 16e761067 PUD 16e6cf067 PMD 0 Oops: 0010 [#1] SMP Modules linked in: bnep bluetooth rfkill fuse ip6t_rpfilter ip6t_REJECT ipt_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw iptable_filter ip_tables sg btrfs zlib_deflate raid6_pq xor dm_queue_length iTCO_wdt iTCO_vendor_support coretemp kvm dcdbas dm_service_time microcode serio_raw pcspkr lpc_ich mfd_core i7core_edac edac_core ses enclosure ipmi_si ipmi_msghandler shpchp acpi_power_meter mperf nfsd auth_rpcgss nfs_acl lockd uinput sunrpc dm_multipath xfs libcrc32c ata_generic pata_acpi sr_mod cdrom sd_mod usb_storage mgag200 syscopyarea sysfillrect sysimgblt i2c_algo_bit lpfc drm_kms_helper ttm crc32c_intel ata_piix bfa drm ixgbe libata i2c_core mdio crc_t10dif ptp crct10dif_common pps_core scsi_transport_fc dca scsi_tgt megaraid_sas bnx2 dm_mirror dm_region_hash dm_log dm_mod CPU: 16 PID: 2572 Comm: X Not tainted 3.10.0-86.el7.x86_64 #1 Hardware name: Dell Inc. PowerEdge R810/0H235N, BIOS 0.3.0 11/14/2009 task: ffff8801799dabc0 ti: ffff88016c884000 task.ti: ffff88016c884000 RIP: 0010:[<0000000000000000>] [< (null)>] (null) RSP: 0018:ffff88016c885ad8 EFLAGS: 00010202 RAX: ffffffffa04e94c0 RBX: ffff880178937a20 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000240004 RDI: ffff880178937a00 RBP: ffff88016c885b60 R08: 00000000000171a0 R09: ffff88007cf171a0 R10: ffffea0005842540 R11: ffffffff810487b9 R12: ffff880178937b30 R13: ffff880178937a00 R14: ffff88016c885b78 R15: ffff880179929400 FS: 00007f81ba2ef980(0000) GS:ffff88007cf00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000016e763000 CR4: 00000000000007e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: ffffffffa0306fae ffff8801799295c0 0000000000260004 0000000000000001 ffff88016c885b60 ffffffffa0307669 00ff88007cf17738 ffff88017cf17700 ffff880178937a00 ffff880100000000 ffff880100000000 0000000079929400 Call Trace: [] ? ttm_bo_handle_move_mem+0x54e/0x5b0 [ttm] [] ? ttm_bo_mem_space+0x169/0x340 [ttm] [] ttm_bo_move_buffer+0x117/0x130 [ttm] [] ? perf_event_init_context+0x141/0x220 [] ttm_bo_validate+0xc1/0x130 [ttm] [] mgag200_bo_pin+0x87/0xc0 [mgag200] [] mga_crtc_cursor_set+0x474/0xbb0 [mgag200] [] ? __mem_cgroup_commit_charge+0x152/0x3b0 [] ? mutex_lock+0x12/0x2f [] drm_mode_cursor_common+0x123/0x170 [drm] [] drm_mode_cursor_ioctl+0x41/0x50 [drm] [] drm_ioctl+0x502/0x630 [drm] [] ? __do_page_fault+0x1f4/0x510 [] ? __restore_xstate_sig+0x218/0x4f0 [] do_vfs_ioctl+0x2e5/0x4d0 [] ? file_has_perm+0x8e/0xa0 [] SyS_ioctl+0x81/0xa0 [] system_call_fastpath+0x16/0x1b Code: Bad RIP value. RIP [< (null)>] (null) RSP CR2: 0000000000000000 Signed-off-by: Rob Clark Reviewed-by: Jérôme Glisse Reviewed-by: Thomas Hellstrom Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index a066513..214b799 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -351,9 +351,11 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, moved: if (bo->evicted) { - ret = bdev->driver->invalidate_caches(bdev, bo->mem.placement); - if (ret) - pr_err("Can not flush read caches\n"); + if (bdev->driver->invalidate_caches) { + ret = bdev->driver->invalidate_caches(bdev, bo->mem.placement); + if (ret) + pr_err("Can not flush read caches\n"); + } bo->evicted = false; } -- cgit v0.10.2 From 836fbaf459f9c041826864021688e9bd131e722c Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Tue, 11 Mar 2014 12:45:32 +0000 Subject: xen-netback: use skb_is_gso in xenvif_start_xmit In 5bd076708 ("Xen-netback: Fix issue caused by using gso_type wrongly") we use skb_is_gso to determine if we need an extra slot to accommodate the SKB. There's similar error in interface.c. Change that to use skb_is_gso as well. Signed-off-by: Wei Liu Cc: Annie Li Cc: Ian Campbell Cc: Paul Durrant Acked-by: Ian Campbell Signed-off-by: David S. Miller diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 7669d49..301cc03 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -132,8 +132,7 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) /* If the skb is GSO then we'll also need an extra slot for the * metadata. */ - if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 || - skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) + if (skb_is_gso(skb)) min_slots_needed++; /* If the skb can't possibly fit in the remaining slots -- cgit v0.10.2 From 7848865914c6a63ead674f0f5604b77df7d3874f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 11 Mar 2014 15:02:30 -0400 Subject: drm/radeon: fix runpm disabling on non-PX harder Make sure runtime pm is disabled on non-PX hardware. Should fix powerdown problems without displays attached. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 2aecd6d..66ed3ea 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -33,6 +33,13 @@ #include #include #include + +#if defined(CONFIG_VGA_SWITCHEROO) +bool radeon_is_px(void); +#else +static inline bool radeon_is_px(void) { return false; } +#endif + /** * radeon_driver_unload_kms - Main unload function for KMS. * @@ -130,7 +137,8 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) "Error during ACPI methods call\n"); } - if (radeon_runtime_pm != 0) { + if ((radeon_runtime_pm == 1) || + ((radeon_runtime_pm == -1) && radeon_is_px())) { pm_runtime_use_autosuspend(dev->dev); pm_runtime_set_autosuspend_delay(dev->dev, 5000); pm_runtime_set_active(dev->dev); -- cgit v0.10.2 From 7b1bbe883b3ed962ca2be4daf321f318f5091340 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Mar 2014 15:15:58 -0400 Subject: drm/radeon/cik: properly set sdma ring status on disable When we disable the rings, set the status properly. If not other code pathes may try and use the rings which are not functional at this point. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 1ecb3f1..5d7a5fa 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -264,6 +264,8 @@ static void cik_sdma_gfx_stop(struct radeon_device *rdev) WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl); WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0); } + rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; + rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; } /** -- cgit v0.10.2 From 07ae78c9798b79bad3d3adf983c94ba23fde54d4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Mar 2014 15:26:34 -0400 Subject: drm/radeon/cik: stop the sdma engines in the enable() function We always stop the rings when disabling the engines so just call the stop functions directly from the sdma enable function. This way the rings' status is set correctly on suspend so there are no problems on resume. Fixes resume failures that result in acceleration getting disabled. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 5d7a5fa..94626ea 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -293,6 +293,11 @@ void cik_sdma_enable(struct radeon_device *rdev, bool enable) u32 me_cntl, reg_offset; int i; + if (enable == false) { + cik_sdma_gfx_stop(rdev); + cik_sdma_rlc_stop(rdev); + } + for (i = 0; i < 2; i++) { if (i == 0) reg_offset = SDMA0_REGISTER_OFFSET; @@ -422,10 +427,6 @@ static int cik_sdma_load_microcode(struct radeon_device *rdev) if (!rdev->sdma_fw) return -EINVAL; - /* stop the gfx rings and rlc compute queues */ - cik_sdma_gfx_stop(rdev); - cik_sdma_rlc_stop(rdev); - /* halt the MEs */ cik_sdma_enable(rdev, false); @@ -494,9 +495,6 @@ int cik_sdma_resume(struct radeon_device *rdev) */ void cik_sdma_fini(struct radeon_device *rdev) { - /* stop the gfx rings and rlc compute queues */ - cik_sdma_gfx_stop(rdev); - cik_sdma_rlc_stop(rdev); /* halt the MEs */ cik_sdma_enable(rdev, false); radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); -- cgit v0.10.2 From 56cb456746a15c1025a178466492ca4c373b1a63 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 12 Mar 2014 17:16:30 +0200 Subject: net/mlx4_core: Fix wrong dump of the vxlan offloads device capability Fix the value used to dump the vxlan offloads device capability to align with the MLX4_DEV_CAP_FLAG2_yyy definition. While on that, add dump to the IPoIB flow-steering device capability and fix small typo. The vxlan cap value wasn't fully handled when a conflict was resolved between MLX4_DEV_CAP_FLAG2_DMFS_IPOIB coming from the IB tree to MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS coming from net-next. Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 8726e34..7e2995e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -129,13 +129,14 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [0] = "RSS support", [1] = "RSS Toeplitz Hash Function support", [2] = "RSS XOR Hash Function support", - [3] = "Device manage flow steering support", + [3] = "Device managed flow steering support", [4] = "Automatic MAC reassignment support", [5] = "Time stamping support", [6] = "VST (control vlan insertion/stripping) support", [7] = "FSM (MAC anti-spoofing) support", [8] = "Dynamic QP updates support", - [9] = "TCP/IP offloads/flow-steering for VXLAN support" + [9] = "Device managed flow steering IPoIB support", + [10] = "TCP/IP offloads/flow-steering for VXLAN support" }; int i; -- cgit v0.10.2 From 2a2083f7f3568c0192daa6ac0e6fa35d953f47bd Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 12 Mar 2014 17:16:31 +0200 Subject: net/mlx4_en: Handle vxlan steering rules for mac address changes When the device mac address is changed, we must deregister the vxlan steering rule associated with the previous mac, and register a new steering rule using the new mac. Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index fad4531..ba212611 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -742,6 +742,14 @@ static int mlx4_en_replace_mac(struct mlx4_en_priv *priv, int qpn, err = mlx4_en_uc_steer_add(priv, new_mac, &qpn, &entry->reg_id); + if (err) + return err; + if (priv->tunnel_reg_id) { + mlx4_flow_detach(priv->mdev->dev, priv->tunnel_reg_id); + priv->tunnel_reg_id = 0; + } + err = mlx4_en_tunnel_steer_add(priv, new_mac, qpn, + &priv->tunnel_reg_id); return err; } } -- cgit v0.10.2 From 7855bff42ea9938a0853321256f4c8ce3628aa73 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 12 Mar 2014 17:16:32 +0200 Subject: net/mlx4_core: Load the IB driver when the device supports IBoE When checking what protocol drivers to load, the IB driver should be requested also over Ethernet ports, if the device supports IBoE (RoCE). Fixes: b046ffe 'net/mlx4_core: Load higher level modules according to ports type' Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 30a08a6..936c153 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -751,7 +751,7 @@ static void mlx4_request_modules(struct mlx4_dev *dev) has_eth_port = true; } - if (has_ib_port) + if (has_ib_port || (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)) request_module_nowait(IB_DRV_NAME); if (has_eth_port) request_module_nowait(EN_DRV_NAME); -- cgit v0.10.2 From a93c12560498066765c58b032530798a26bd8f70 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Mar 2014 11:23:42 +0100 Subject: packet: doc: Spelling s/than/that/ Signed-off-by: Geert Uytterhoeven Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt index 1404674..6fea79e 100644 --- a/Documentation/networking/packet_mmap.txt +++ b/Documentation/networking/packet_mmap.txt @@ -453,7 +453,7 @@ TP_STATUS_COPY : This flag indicates that the frame (and associated enabled previously with setsockopt() and the PACKET_COPY_THRESH option. - The number of frames than can be buffered to + The number of frames that can be buffered to be read with recvfrom is limited like a normal socket. See the SO_RCVBUF option in the socket (7) man page. -- cgit v0.10.2 From 7e814a6c50a1669118ffa4961568fea3aa955faf Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 11 Mar 2014 12:06:24 +0100 Subject: MAINTAINERS: Add tools/net to NETWORKING [GENERAL] Make sure patches for these tools go to the netdev list as well. References: https://marc.info/?l=linux-kernel&m=139450284501328&w=2 Cc: David S. Miller Cc: Daniel Borkmann Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index e1297ff..e1be77c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6002,6 +6002,7 @@ F: include/linux/netdevice.h F: include/uapi/linux/in.h F: include/uapi/linux/net.h F: include/uapi/linux/netdevice.h +F: tools/net/ NETWORKING [IPv4/IPv6] M: "David S. Miller" -- cgit v0.10.2 From b2b3d8d952e4f8d6ac2ce80be96b937f29f6e42e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Mar 2014 16:20:44 -0400 Subject: drm/radeon/cik: properly set compute ring status on disable When we disable the rings, set the status properly. If not other code pathes may try and use the rings which are not functional at this point. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index e22be84..bbb1784 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -4134,8 +4134,11 @@ static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable) { if (enable) WREG32(CP_MEC_CNTL, 0); - else + else { WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT)); + rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; + rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; + } udelay(50); } -- cgit v0.10.2 From dbb490b96584d4e958533fb637f08b557f505657 Mon Sep 17 00:00:00 2001 From: Matthew Leach Date: Tue, 11 Mar 2014 11:58:27 +0000 Subject: net: socket: error on a negative msg_namelen When copying in a struct msghdr from the user, if the user has set the msg_namelen parameter to a negative value it gets clamped to a valid size due to a comparison between signed and unsigned values. Ensure the syscall errors when the user passes in a negative value. Signed-off-by: Matthew Leach Signed-off-by: David S. Miller diff --git a/net/socket.c b/net/socket.c index 879933a..32df584 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1985,6 +1985,10 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, { if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) return -EFAULT; + + if (kmsg->msg_namelen < 0) + return -EINVAL; + if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) kmsg->msg_namelen = sizeof(struct sockaddr_storage); return 0; -- cgit v0.10.2 From 2ed99e39cb9392312c100d9da591c20641c64d12 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 12 Mar 2014 21:49:33 +0100 Subject: cpufreq: Skip current frequency initialization for ->setpolicy drivers After commit da60ce9f2fac (cpufreq: call cpufreq_driver->get() after calling ->init()) __cpufreq_add_dev() sometimes fails for CPUs handled by intel_pstate, because that driver may return 0 from its ->get() callback if it has not run long enough to collect enough samples on the given CPU. That didn't happen before commit da60ce9f2fac which added policy->cur initialization to __cpufreq_add_dev() to help reduce code duplication in other cpufreq drivers. However, the code added by commit da60ce9f2fac need not be executed for cpufreq drivers having the ->setpolicy callback defined, because the subsequent invocation of cpufreq_set_policy() will use that callback to initialize the policy anyway and it doesn't need policy->cur to be initialized upfront. The analogous code in cpufreq_update_policy() is also unnecessary for cpufreq drivers having ->setpolicy set and may be skipped for them as well. Since intel_pstate provides ->setpolicy, skipping the upfront policy->cur initialization for cpufreq drivers with that callback set will cover intel_pstate and the problem it's been having after commit da60ce9f2fac will be addressed. Fixes: da60ce9f2fac (cpufreq: call cpufreq_driver->get() after calling ->init()) References: https://bugzilla.kernel.org/show_bug.cgi?id=71931 Reported-and-tested-by: Patrik Lundquist Acked-by: Dirk Brandewie Cc: 3.13+ # 3.13+ Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index cf485d9..199b52b 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1129,7 +1129,7 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif, per_cpu(cpufreq_cpu_data, j) = policy; write_unlock_irqrestore(&cpufreq_driver_lock, flags); - if (cpufreq_driver->get) { + if (cpufreq_driver->get && !cpufreq_driver->setpolicy) { policy->cur = cpufreq_driver->get(policy->cpu); if (!policy->cur) { pr_err("%s: ->get() failed\n", __func__); @@ -2143,7 +2143,7 @@ int cpufreq_update_policy(unsigned int cpu) * BIOS might change freq behind our back * -> ask driver for current freq and notify governors about a change */ - if (cpufreq_driver->get) { + if (cpufreq_driver->get && !cpufreq_driver->setpolicy) { new_policy.cur = cpufreq_driver->get(cpu); if (!policy->cur) { pr_debug("Driver did not initialize current freq"); -- cgit v0.10.2 From c4e1acbb35e4a3838cdfc0e7f8237e844aff00b6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 13 Mar 2014 00:22:58 +0100 Subject: ACPI / init: Invoke early ACPI initialization later Commit 73f7d1ca3263 (ACPI / init: Run acpi_early_init() before timekeeping_init()) optimistically moved the early ACPI initialization before timekeeping_init(), but that didn't work, because it broke fast TSC calibration for Julian Wollrath on Thinkpad x121e (and most likely for others too). The reason is that acpi_early_init() enables the SCI and that interferes with the fast TSC calibration mechanism. Thus follow the original idea to execute acpi_early_init() before efi_enter_virtual_mode() to help the EFI people for now and we can revisit the other problem that commit 73f7d1ca3263 attempted to address in the future (if really necessary). Fixes: 73f7d1ca3263 (ACPI / init: Run acpi_early_init() before timekeeping_init()) Reported-by: Julian Wollrath Reviewed-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki diff --git a/init/main.c b/init/main.c index eb03090..9c7fd4c 100644 --- a/init/main.c +++ b/init/main.c @@ -561,7 +561,6 @@ asmlinkage void __init start_kernel(void) init_timers(); hrtimers_init(); softirq_init(); - acpi_early_init(); timekeeping_init(); time_init(); sched_clock_postinit(); @@ -613,6 +612,7 @@ asmlinkage void __init start_kernel(void) calibrate_delay(); pidmap_init(); anon_vma_init(); + acpi_early_init(); #ifdef CONFIG_X86 if (efi_enabled(EFI_RUNTIME_SERVICES)) efi_enter_virtual_mode(); -- cgit v0.10.2 From 4b0c82529b92cda4723cf1d09e1e2ee9b9ae96f1 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 12 Mar 2014 20:42:31 +0100 Subject: drm/vmwgfx: Fix a surface reference corner-case in legacy emulation mode If running on a gb-object capable device with a non-gb capable surface exporter (X server) and a gb capable surface referencing client (GL driver), the referencing client expects to find a shareable backing buffer attached to the surface at reference time. This may not be the case if the surface has not yet been validated. This would cause the surface reference IOCTL to return an error. Signed-off-by: Thomas Hellstrom Reviewed-by: Jakob Bornecrantz diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 82468d9..e7af580 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -830,6 +830,24 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, if (unlikely(ret != 0)) goto out_unlock; + /* + * A gb-aware client referencing a shared surface will + * expect a backup buffer to be present. + */ + if (dev_priv->has_mob && req->shareable) { + uint32_t backup_handle; + + ret = vmw_user_dmabuf_alloc(dev_priv, tfile, + res->backup_size, + true, + &backup_handle, + &res->backup); + if (unlikely(ret != 0)) { + vmw_resource_unreference(&res); + goto out_unlock; + } + } + tmp = vmw_resource_reference(&srf->res); ret = ttm_prime_object_init(tfile, res->backup_size, &user_srf->prime, req->shareable, VMW_RES_SURFACE, -- cgit v0.10.2 From a5b0ccb0b5d080c0decb4c9208d9bb6072defa50 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 13 Mar 2014 20:02:02 +1100 Subject: KVM: PPC: Book3S HV: Remove bogus duplicate code Commit 7b490411c37f ("KVM: PPC: Book3S HV: Add new state for transactional memory") incorrectly added some duplicate code to the guest exit path because I didn't manage to clean up after a rebase correctly. This removes the extraneous material. The presence of this extraneous code causes host crashes whenever a guest is run. Signed-off-by: Paul Mackerras Signed-off-by: Paolo Bonzini diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index e66d4ec..781e6bf 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1504,73 +1504,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 1: addi r8,r8,16 .endr - /* Save DEC */ - mfspr r5,SPRN_DEC - mftb r6 - extsw r5,r5 - add r5,r5,r6 - std r5,VCPU_DEC_EXPIRES(r9) - -BEGIN_FTR_SECTION - b 8f -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) - /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */ - mfmsr r8 - li r0, 1 - rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG - mtmsrd r8 - - /* Save POWER8-specific registers */ - mfspr r5, SPRN_IAMR - mfspr r6, SPRN_PSPB - mfspr r7, SPRN_FSCR - std r5, VCPU_IAMR(r9) - stw r6, VCPU_PSPB(r9) - std r7, VCPU_FSCR(r9) - mfspr r5, SPRN_IC - mfspr r6, SPRN_VTB - mfspr r7, SPRN_TAR - std r5, VCPU_IC(r9) - std r6, VCPU_VTB(r9) - std r7, VCPU_TAR(r9) -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM - mfspr r5, SPRN_TFHAR - mfspr r6, SPRN_TFIAR - mfspr r7, SPRN_TEXASR - std r5, VCPU_TFHAR(r9) - std r6, VCPU_TFIAR(r9) - std r7, VCPU_TEXASR(r9) -#endif - mfspr r8, SPRN_EBBHR - std r8, VCPU_EBBHR(r9) - mfspr r5, SPRN_EBBRR - mfspr r6, SPRN_BESCR - mfspr r7, SPRN_CSIGR - mfspr r8, SPRN_TACR - std r5, VCPU_EBBRR(r9) - std r6, VCPU_BESCR(r9) - std r7, VCPU_CSIGR(r9) - std r8, VCPU_TACR(r9) - mfspr r5, SPRN_TCSCR - mfspr r6, SPRN_ACOP - mfspr r7, SPRN_PID - mfspr r8, SPRN_WORT - std r5, VCPU_TCSCR(r9) - std r6, VCPU_ACOP(r9) - stw r7, VCPU_GUEST_PID(r9) - std r8, VCPU_WORT(r9) -8: - - /* Save and reset AMR and UAMOR before turning on the MMU */ -BEGIN_FTR_SECTION - mfspr r5,SPRN_AMR - mfspr r6,SPRN_UAMOR - std r5,VCPU_AMR(r9) - std r6,VCPU_UAMOR(r9) - li r6,0 - mtspr SPRN_AMR,r6 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) - /* Unset guest mode */ li r0, KVM_GUEST_MODE_NONE stb r0, HSTATE_IN_GUEST(r13) -- cgit v0.10.2 From e724f080f5dd03881bc6d378750c37f7374cae7e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 13 Mar 2014 20:02:48 +1100 Subject: KVM: PPC: Book3S HV: Fix register usage when loading/saving VRSAVE Commit 595e4f7e697e ("KVM: PPC: Book3S HV: Use load/store_fp_state functions in HV guest entry/exit") changed the register usage in kvmppc_save_fp() and kvmppc_load_fp() but omitted changing the instructions that load and save VRSAVE. The result is that the VRSAVE value was loaded from a constant address, and saved to a location past the end of the vcpu struct, causing host kernel memory corruption and various kinds of host kernel crashes. This fixes the problem by using register r31, which contains the vcpu pointer, instead of r3 and r4. Signed-off-by: Paul Mackerras Signed-off-by: Paolo Bonzini diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 781e6bf..818dce3 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2136,7 +2136,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif mfspr r6,SPRN_VRSAVE - stw r6,VCPU_VRSAVE(r3) + stw r6,VCPU_VRSAVE(r31) mtlr r30 mtmsrd r5 isync @@ -2173,7 +2173,7 @@ BEGIN_FTR_SECTION bl .load_vr_state END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif - lwz r7,VCPU_VRSAVE(r4) + lwz r7,VCPU_VRSAVE(r31) mtspr SPRN_VRSAVE,r7 mtlr r30 mr r4,r31 -- cgit v0.10.2 From de2db7432917a82b62d55bb59635586eeca6d1bd Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 12 Mar 2014 16:07:06 +0000 Subject: arm64: Make DMA coherent and strongly ordered mappings not executable pgprot_{dmacoherent,writecombine,noncached} don't need to generate executable mappings with side-effects like __sync_icache_dcache() being called when the mapping is in user space. Signed-off-by: Catalin Marinas Reported-by: Bharat Bhushan Tested-by: Laura Abbott Tested-by: Bharat Bhushan Cc: diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index b524dcd..2d3cede 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -286,11 +286,11 @@ static inline int has_transparent_hugepage(void) * Mark the prot value as uncacheable and unbufferable. */ #define pgprot_noncached(prot) \ - __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE)) + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE) | PTE_PXN | PTE_UXN) #define pgprot_writecombine(prot) \ - __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC)) + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) #define pgprot_dmacoherent(prot) \ - __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC)) + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) #define __HAVE_PHYS_MEM_ACCESS_PROT struct file; extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, -- cgit v0.10.2 From 71fdb6bf61bf0692f004f9daf5650392c0cfe300 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 12 Mar 2014 16:28:09 +0000 Subject: arm64: Do not synchronise I and D caches for special ptes Special pte mappings are not intended to be executable and do not even have an associated struct page. This patch ensures that we do not call __sync_icache_dcache() on such ptes. Signed-off-by: Catalin Marinas Reported-by: Steve Capper Tested-by: Laura Abbott Tested-by: Bharat Bhushan Cc: diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 2d3cede..72c9ac3 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -199,7 +199,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { if (pte_valid_user(pte)) { - if (pte_exec(pte)) + if (!pte_special(pte) && pte_exec(pte)) __sync_icache_dcache(pte, addr); if (pte_dirty(pte) && pte_write(pte)) pte_val(pte) &= ~PTE_RDONLY; -- cgit v0.10.2 From 87366d8cf7b3f6dc34633938aa8766e5a390ce33 Mon Sep 17 00:00:00 2001 From: Radha Mohan Chintakuntla Date: Fri, 7 Mar 2014 08:49:25 +0000 Subject: arm64: Add boot time configuration of Intermediate Physical Address size ARMv8 supports a range of physical address bit sizes. The PARange bits from ID_AA64MMFR0_EL1 register are read during boot-time and the intermediate physical address size bits are written in the translation control registers (TCR_EL1 and VTCR_EL2). There is no change in the VA bits and levels of translation. Signed-off-by: Radha Mohan Chintakuntla Reviewed-by: Will Deacon Acked-by: Marc Zyngier Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 0eb3986..21ef48d 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -106,7 +106,6 @@ /* VTCR_EL2 Registers bits */ #define VTCR_EL2_PS_MASK (7 << 16) -#define VTCR_EL2_PS_40B (2 << 16) #define VTCR_EL2_TG0_MASK (1 << 14) #define VTCR_EL2_TG0_4K (0 << 14) #define VTCR_EL2_TG0_64K (1 << 14) @@ -129,10 +128,9 @@ * 64kB pages (TG0 = 1) * 2 level page tables (SL = 1) */ -#define VTCR_EL2_FLAGS (VTCR_EL2_PS_40B | VTCR_EL2_TG0_64K | \ - VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \ - VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \ - VTCR_EL2_T0SZ_40B) +#define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \ + VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ + VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) #define VTTBR_X (38 - VTCR_EL2_T0SZ_40B) #else /* @@ -142,10 +140,9 @@ * 4kB pages (TG0 = 0) * 3 level page tables (SL = 1) */ -#define VTCR_EL2_FLAGS (VTCR_EL2_PS_40B | VTCR_EL2_TG0_4K | \ - VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \ - VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \ - VTCR_EL2_T0SZ_40B) +#define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \ + VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ + VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) #define VTTBR_X (37 - VTCR_EL2_T0SZ_40B) #endif diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index b1d2e26..f7af66b 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -100,9 +100,9 @@ #define PTE_HYP PTE_USER /* - * 40-bit physical address supported. + * Highest possible physical address supported. */ -#define PHYS_MASK_SHIFT (40) +#define PHYS_MASK_SHIFT (48) #define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1) /* @@ -122,7 +122,6 @@ #define TCR_SHARED ((UL(3) << 12) | (UL(3) << 28)) #define TCR_TG0_64K (UL(1) << 14) #define TCR_TG1_64K (UL(1) << 30) -#define TCR_IPS_40BIT (UL(2) << 32) #define TCR_ASID16 (UL(1) << 36) #define TCR_TBI0 (UL(1) << 37) diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 2b0244d..d968796 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -68,6 +68,12 @@ __do_hyp_init: msr tcr_el2, x4 ldr x4, =VTCR_EL2_FLAGS + /* + * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in + * VTCR_EL2. + */ + mrs x5, ID_AA64MMFR0_EL1 + bfi x4, x5, #16, #3 msr vtcr_el2, x4 mrs x4, mair_el1 diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index e0ef63c..e085ee6 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -209,8 +209,14 @@ ENTRY(__cpu_setup) * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for * both user and kernel. */ - ldr x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | TCR_IPS_40BIT | \ + ldr x10, =TCR_TxSZ(VA_BITS) | TCR_FLAGS | \ TCR_ASID16 | TCR_TBI0 | (1 << 31) + /* + * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in + * TCR_EL1. + */ + mrs x9, ID_AA64MMFR0_EL1 + bfi x10, x9, #32, #3 #ifdef CONFIG_ARM64_64K_PAGES orr x10, x10, TCR_TG0_64K orr x10, x10, TCR_TG1_64K -- cgit v0.10.2 From 2ee0d7fd36a3f87bc5b29b1ec54ad6728deedb41 Mon Sep 17 00:00:00 2001 From: Jean Pihet Date: Mon, 3 Feb 2014 19:18:27 +0100 Subject: ARM64: perf: add support for perf registers API This patch implements the functions required for the perf registers API, allowing the perf tool to interface kernel register dumps with libunwind in order to provide userspace backtracing. Compat mode is also supported. Only the general purpose user space registers are exported, i.e.: PERF_REG_ARM_X0, ... PERF_REG_ARM_X28, PERF_REG_ARM_FP, PERF_REG_ARM_LR, PERF_REG_ARM_SP, PERF_REG_ARM_PC and not the PERF_REG_ARM_V* registers. Signed-off-by: Jean Pihet Acked-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 140cd1a..507ab6e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -39,6 +39,8 @@ config ARM64 select HAVE_MEMBLOCK select HAVE_PATA_PLATFORM select HAVE_PERF_EVENTS + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP select IRQ_DOMAIN select MODULES_USE_ELF_RELA select NO_BOOTMEM diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 5233966..35ff2f9 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -68,6 +68,7 @@ /* Architecturally defined mapping between AArch32 and AArch64 registers */ #define compat_usr(x) regs[(x)] +#define compat_fp regs[11] #define compat_sp regs[13] #define compat_lr regs[14] #define compat_sp_hyp regs[15] diff --git a/arch/arm64/include/uapi/asm/Kbuild b/arch/arm64/include/uapi/asm/Kbuild index e4b78bd..942376d 100644 --- a/arch/arm64/include/uapi/asm/Kbuild +++ b/arch/arm64/include/uapi/asm/Kbuild @@ -9,6 +9,7 @@ header-y += byteorder.h header-y += fcntl.h header-y += hwcap.h header-y += kvm_para.h +header-y += perf_regs.h header-y += param.h header-y += ptrace.h header-y += setup.h diff --git a/arch/arm64/include/uapi/asm/perf_regs.h b/arch/arm64/include/uapi/asm/perf_regs.h new file mode 100644 index 0000000..172b831 --- /dev/null +++ b/arch/arm64/include/uapi/asm/perf_regs.h @@ -0,0 +1,40 @@ +#ifndef _ASM_ARM64_PERF_REGS_H +#define _ASM_ARM64_PERF_REGS_H + +enum perf_event_arm_regs { + PERF_REG_ARM64_X0, + PERF_REG_ARM64_X1, + PERF_REG_ARM64_X2, + PERF_REG_ARM64_X3, + PERF_REG_ARM64_X4, + PERF_REG_ARM64_X5, + PERF_REG_ARM64_X6, + PERF_REG_ARM64_X7, + PERF_REG_ARM64_X8, + PERF_REG_ARM64_X9, + PERF_REG_ARM64_X10, + PERF_REG_ARM64_X11, + PERF_REG_ARM64_X12, + PERF_REG_ARM64_X13, + PERF_REG_ARM64_X14, + PERF_REG_ARM64_X15, + PERF_REG_ARM64_X16, + PERF_REG_ARM64_X17, + PERF_REG_ARM64_X18, + PERF_REG_ARM64_X19, + PERF_REG_ARM64_X20, + PERF_REG_ARM64_X21, + PERF_REG_ARM64_X22, + PERF_REG_ARM64_X23, + PERF_REG_ARM64_X24, + PERF_REG_ARM64_X25, + PERF_REG_ARM64_X26, + PERF_REG_ARM64_X27, + PERF_REG_ARM64_X28, + PERF_REG_ARM64_X29, + PERF_REG_ARM64_LR, + PERF_REG_ARM64_SP, + PERF_REG_ARM64_PC, + PERF_REG_ARM64_MAX, +}; +#endif /* _ASM_ARM64_PERF_REGS_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index cfee827..7d811d9 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -15,8 +15,9 @@ arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o topology.o +arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o -arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o +arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o arm64-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND) += sleep.o suspend.o arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c new file mode 100644 index 0000000..f2d6f0a --- /dev/null +++ b/arch/arm64/kernel/perf_regs.c @@ -0,0 +1,44 @@ +#include +#include +#include +#include +#include +#include + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + if (WARN_ON_ONCE((u32)idx >= PERF_REG_ARM64_MAX)) + return 0; + + /* + * Compat (i.e. 32 bit) mode: + * - PC has been set in the pt_regs struct in kernel_entry, + * - Handle SP and LR here. + */ + if (compat_user_mode(regs)) { + if ((u32)idx == PERF_REG_ARM64_SP) + return regs->compat_sp; + if ((u32)idx == PERF_REG_ARM64_LR) + return regs->compat_lr; + } + + return regs->regs[idx]; +} + +#define REG_RESERVED (~((1ULL << PERF_REG_ARM64_MAX) - 1)) + +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + if (is_compat_thread(task_thread_info(task))) + return PERF_SAMPLE_REGS_ABI_32; + else + return PERF_SAMPLE_REGS_ABI_64; +} -- cgit v0.10.2 From 23c7d70d55c6d963f225744cd1b996dee68c88d1 Mon Sep 17 00:00:00 2001 From: Jean Pihet Date: Mon, 3 Feb 2014 19:18:28 +0100 Subject: ARM64: perf: add support for frame pointer unwinding in compat mode When profiling a 32-bit application, user space callchain unwinding using the frame pointer is performed in compat mode. The code is taken over from the AARCH32 code and adapted to work on AARCH64. Signed-off-by: Jean Pihet Acked-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 5b1cd79..e868c72 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1348,8 +1348,8 @@ early_initcall(init_hw_perf_events); * Callchain handling code. */ struct frame_tail { - struct frame_tail __user *fp; - unsigned long lr; + struct frame_tail __user *fp; + unsigned long lr; } __attribute__((packed)); /* @@ -1386,22 +1386,80 @@ user_backtrace(struct frame_tail __user *tail, return buftail.fp; } +/* + * The registers we're interested in are at the end of the variable + * length saved register structure. The fp points at the end of this + * structure so the address of this struct is: + * (struct compat_frame_tail *)(xxx->fp)-1 + * + * This code has been adapted from the ARM OProfile support. + */ +struct compat_frame_tail { + compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */ + u32 sp; + u32 lr; +} __attribute__((packed)); + +static struct compat_frame_tail __user * +compat_user_backtrace(struct compat_frame_tail __user *tail, + struct perf_callchain_entry *entry) +{ + struct compat_frame_tail buftail; + unsigned long err; + + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) + return NULL; + + pagefault_disable(); + err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); + pagefault_enable(); + + if (err) + return NULL; + + perf_callchain_store(entry, buftail.lr); + + /* + * Frame pointers should strictly progress back up the stack + * (towards higher addresses). + */ + if (tail + 1 >= (struct compat_frame_tail __user *) + compat_ptr(buftail.fp)) + return NULL; + + return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1; +} + void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - struct frame_tail __user *tail; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { /* We don't support guest os callchain now */ return; } perf_callchain_store(entry, regs->pc); - tail = (struct frame_tail __user *)regs->regs[29]; - while (entry->nr < PERF_MAX_STACK_DEPTH && - tail && !((unsigned long)tail & 0xf)) - tail = user_backtrace(tail, entry); + if (!compat_user_mode(regs)) { + /* AARCH64 mode */ + struct frame_tail __user *tail; + + tail = (struct frame_tail __user *)regs->regs[29]; + + while (entry->nr < PERF_MAX_STACK_DEPTH && + tail && !((unsigned long)tail & 0xf)) + tail = user_backtrace(tail, entry); + } else { + /* AARCH32 compat mode */ + struct compat_frame_tail __user *tail; + + tail = (struct compat_frame_tail __user *)regs->compat_fp - 1; + + while ((entry->nr < PERF_MAX_STACK_DEPTH) && + tail && !((unsigned long)tail & 0x3)) + tail = compat_user_backtrace(tail, entry); + } } /* @@ -1429,6 +1487,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry, frame.fp = regs->regs[29]; frame.sp = regs->sp; frame.pc = regs->pc; + walk_stackframe(&frame, callchain_trace, entry); } -- cgit v0.10.2 From 5f888a1d33c48900012e6b4c18296ce7c715dc6c Mon Sep 17 00:00:00 2001 From: Jean Pihet Date: Mon, 3 Feb 2014 19:18:29 +0100 Subject: ARM64: perf: support dwarf unwinding in compat mode Add support for unwinding using the dwarf information in compat mode. Using the correct user stack pointer allows perf to record the frames correctly in the native and compat modes. Note that although the dwarf frame unwinding works ok using libunwind in native mode (on ARMv7 & ARMv8), some changes are required to the libunwind code for the compat mode. Those changes are posted separately on the libunwind mailing list. Tested on ARMv8 platform with v8 and compat v7 binaries, the latter are statically built. Signed-off-by: Jean Pihet Acked-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index fda2704..e71f81f 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -228,7 +228,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } -#define compat_user_stack_pointer() (current_pt_regs()->compat_sp) +#define compat_user_stack_pointer() (user_stack_pointer(current_pt_regs())) static inline void __user *arch_compat_alloc_user_space(long len) { diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 35ff2f9..c7ba261 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -133,7 +133,7 @@ struct pt_regs { (!((regs)->pstate & PSR_F_BIT)) #define user_stack_pointer(regs) \ - ((regs)->sp) + (!compat_user_mode(regs)) ? ((regs)->sp) : ((regs)->compat_sp) /* * Are the current registers suitable for user mode? (used to maintain -- cgit v0.10.2 From 242c04bc4be959ae28618772e439c27e87a7d880 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 5 Mar 2014 05:34:32 +0000 Subject: arm64: debug: make local symbols static Make local symbols static, because these are used only in this file. Signed-off-by: Jingoo Han Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 13f87de..14ba23c 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -189,7 +189,7 @@ static void clear_regs_spsr_ss(struct pt_regs *regs) /* EL1 Single Step Handler hooks */ static LIST_HEAD(step_hook); -DEFINE_RWLOCK(step_hook_lock); +static DEFINE_RWLOCK(step_hook_lock); void register_step_hook(struct step_hook *hook) { @@ -276,7 +276,7 @@ static int single_step_handler(unsigned long addr, unsigned int esr, * Use reader/writer locks instead of plain spinlock. */ static LIST_HEAD(break_hook); -DEFINE_RWLOCK(break_hook_lock); +static DEFINE_RWLOCK(break_hook_lock); void register_break_hook(struct break_hook *hook) { -- cgit v0.10.2 From 7184659bed3090248e382d98a49a3c1bcfe11174 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 5 Mar 2014 05:35:45 +0000 Subject: arm64: smp: make local symbol static Make smp_spin_table_cpu_postboot() static, because this function is used only in this file. Signed-off-by: Jingoo Han Signed-off-by: Catalin Marinas diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 44c2280..7a530d2 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -128,7 +128,7 @@ static int smp_spin_table_cpu_boot(unsigned int cpu) return secondary_holding_pen_release != INVALID_HWID ? -ENOSYS : 0; } -void smp_spin_table_cpu_postboot(void) +static void smp_spin_table_cpu_postboot(void) { /* * Let the primary processor know we're out of the pen. -- cgit v0.10.2 From 69bb2600c9f8ca450fede9633edf9c2513c9ee6f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 5 Mar 2014 15:33:18 +0100 Subject: cputime: Fix nsecs_to_cputime() return type cast Even though nsec based cputime_t maps to u64, nsecs_to_cputime() must return a cputime_t value. We want to enforce this kind of cast in order to track down buggy manipulations of cputime_t such as direct access of its values under wrong assumptions on its backend type (nsecs, jiffies, etc...) by core code. Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Acked-by: Rik van Riel Signed-off-by: Frederic Weisbecker diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h index 2c9e62c..768294f 100644 --- a/include/asm-generic/cputime_nsecs.h +++ b/include/asm-generic/cputime_nsecs.h @@ -44,7 +44,8 @@ typedef u64 __nocast cputime64_t; /* * Convert cputime <-> nanoseconds */ -#define nsecs_to_cputime(__nsecs) ((__force u64)(__nsecs)) +#define nsecs_to_cputime(__nsecs) \ + (__force cputime_t)(__nsecs) /* -- cgit v0.10.2 From bfc3f0281e08066fa8111c3972cff6edc1049864 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 5 Mar 2014 16:33:42 +0100 Subject: cputime: Default implementation of nsecs -> cputime conversion The architectures that override cputime_t (s390, ppc) don't provide any version of nsecs_to_cputime(). Indeed this cputime_t implementation by backend only happens when CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y under which the core code doesn't make any use of nsecs_to_cputime(). At least for now. We are going to make a broader use of it so lets provide a default version with a per usecs granularity. It should be good enough for most usecases. Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Acked-by: Rik van Riel Signed-off-by: Frederic Weisbecker diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 5793e14..79911a2 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include static spinlock_t cpufreq_stats_lock; diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 88e35d8..5154513 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 6f599c6..9d231e9 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #ifndef arch_irq_stat_cpu diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 7141b8d..33de567 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include static int uptime_proc_show(struct seq_file *m, void *v) { diff --git a/include/linux/cputime.h b/include/linux/cputime.h new file mode 100644 index 0000000..2842ebe --- /dev/null +++ b/include/linux/cputime.h @@ -0,0 +1,11 @@ +#ifndef __LINUX_CPUTIME_H +#define __LINUX_CPUTIME_H + +#include + +#ifndef nsecs_to_cputime +# define nsecs_to_cputime(__nsecs) \ + usecs_to_cputime((__nsecs) / NSEC_PER_USEC) +#endif + +#endif /* __LINUX_CPUTIME_H */ diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 51c72be..d7c6131 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include /* * 'kernel_stat.h' contains the definitions needed for doing diff --git a/include/linux/sched.h b/include/linux/sched.h index 68a0e84..1ac566c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -27,7 +27,7 @@ struct sched_param { #include #include -#include +#include #include #include -- cgit v0.10.2 From d8a9ce3f8ad2b546b9ebaf65de809da0793f11c5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 5 Mar 2014 16:22:37 +0100 Subject: cputime: Bring cputime -> nsecs conversion We already have nsecs_to_cputime(). Now we need to be able to convert the other way around in order to fix a bug on steal time accounting. Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Acked-by: Rik van Riel Signed-off-by: Frederic Weisbecker diff --git a/include/asm-generic/cputime_jiffies.h b/include/asm-generic/cputime_jiffies.h index 272ecba..d5cb78f5 100644 --- a/include/asm-generic/cputime_jiffies.h +++ b/include/asm-generic/cputime_jiffies.h @@ -15,8 +15,10 @@ typedef u64 __nocast cputime64_t; /* - * Convert nanoseconds to cputime + * Convert nanoseconds <-> cputime */ +#define cputime_to_nsecs(__ct) \ + jiffies_to_nsecs(cputime_to_jiffies(__ct)) #define nsecs_to_cputime64(__nsec) \ jiffies64_to_cputime64(nsecs_to_jiffies64(__nsec)) #define nsecs_to_cputime(__nsec) \ diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h index 768294f..4e81760 100644 --- a/include/asm-generic/cputime_nsecs.h +++ b/include/asm-generic/cputime_nsecs.h @@ -44,6 +44,8 @@ typedef u64 __nocast cputime64_t; /* * Convert cputime <-> nanoseconds */ +#define cputime_to_nsecs(__ct) \ + (__force u64)(__ct) #define nsecs_to_cputime(__nsecs) \ (__force cputime_t)(__nsecs) diff --git a/include/linux/cputime.h b/include/linux/cputime.h index 2842ebe..f2eb2ee 100644 --- a/include/linux/cputime.h +++ b/include/linux/cputime.h @@ -3,6 +3,11 @@ #include +#ifndef cputime_to_nsecs +# define cputime_to_nsecs(__ct) \ + (cputime_to_usecs(__ct) * NSEC_PER_USEC) +#endif + #ifndef nsecs_to_cputime # define nsecs_to_cputime(__nsecs) \ usecs_to_cputime((__nsecs) / NSEC_PER_USEC) -- cgit v0.10.2 From dee08a72deefac251267ed2717717596aa8b6818 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 5 Mar 2014 17:02:22 +0100 Subject: cputime: Fix jiffies based cputime assumption on steal accounting The steal guest time accounting code assumes that cputime_t is based on jiffies. So when CONFIG_NO_HZ_FULL=y, which implies that cputime_t is based on nsecs, steal_account_process_tick() passes the delta in jiffies to account_steal_time() which then accounts it as if it's a value in nsecs. As a result, accounting 1 second of steal time (with HZ=100 that would be 100 jiffies) is spuriously accounted as 100 nsecs. As such /proc/stat may report 0 values of steal time even when two guests have run concurrently for a few seconds on the same host and same CPU. In order to fix this, lets convert the nsecs based steal delta to cputime instead of jiffies by using the right conversion API. Given that the steal time is stored in cputime_t and this type can have a smaller granularity than nsecs, we only account the rounded converted value and leave the remaining nsecs for the next deltas. Reported-by: Huiqingding Reported-by: Marcelo Tosatti Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Acked-by: Rik van Riel Signed-off-by: Frederic Weisbecker diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 9994791..c91b097 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -258,16 +258,22 @@ static __always_inline bool steal_account_process_tick(void) { #ifdef CONFIG_PARAVIRT if (static_key_false(¶virt_steal_enabled)) { - u64 steal, st = 0; + u64 steal; + cputime_t steal_ct; steal = paravirt_steal_clock(smp_processor_id()); steal -= this_rq()->prev_steal_time; - st = steal_ticks(steal); - this_rq()->prev_steal_time += st * TICK_NSEC; + /* + * cputime_t may be less precise than nsecs (eg: if it's + * based on jiffies). Lets cast the result to cputime + * granularity and account the rest on the next rounds. + */ + steal_ct = nsecs_to_cputime(steal); + this_rq()->prev_steal_time += cputime_to_nsecs(steal_ct); - account_steal_time(st); - return st; + account_steal_time(steal_ct); + return steal_ct; } #endif return false; -- cgit v0.10.2 From 300a9d887ea221f344962506f724e02101bacc08 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 5 Mar 2014 17:05:57 +0100 Subject: sched: Remove needless round trip nsecs <-> tick conversion of steal time When update_rq_clock_task() accounts the pending steal time for a task, it converts the steal delta from nsecs to tick then from tick to nsecs. There is no apparent good reason for doing that though because both the task clock and the prev steal delta are u64 and store values in nsecs. So lets remove the needless conversion. Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Acked-by: Rik van Riel Signed-off-by: Frederic Weisbecker diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b46131e..b14a188 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -823,19 +823,13 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) #endif #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING if (static_key_false((¶virt_steal_rq_enabled))) { - u64 st; - steal = paravirt_steal_clock(cpu_of(rq)); steal -= rq->prev_steal_time_rq; if (unlikely(steal > delta)) steal = delta; - st = steal_ticks(steal); - steal = st * TICK_NSEC; - rq->prev_steal_time_rq += steal; - delta -= steal; } #endif diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c2119fd..5ec9910 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1214,16 +1214,6 @@ extern void update_idle_cpu_load(struct rq *this_rq); extern void init_task_runnable_average(struct task_struct *p); -#ifdef CONFIG_PARAVIRT -static inline u64 steal_ticks(u64 steal) -{ - if (unlikely(steal > NSEC_PER_SEC)) - return div_u64(steal, TICK_NSEC); - - return __iter_div_u64_rem(steal, TICK_NSEC, &steal); -} -#endif - static inline void inc_nr_running(struct rq *rq) { rq->nr_running++; -- cgit v0.10.2 From 073d8224d299528778e90773becd1e890953443c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 5 Mar 2014 17:24:12 +0100 Subject: arch: Remove stub cputime.h headers Many architectures have a stub cputime.h that only include the default cputime.h Lets remove the useless headers, we only need to mention that we want the default headers on the Kbuild files. Cc: Archs Cc: Ingo Molnar Cc: Marcelo Tosatti Cc: Peter Zijlstra Cc: Thomas Gleixner Acked-by: Rik van Riel Signed-off-by: Frederic Weisbecker diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index 7736f42..96e54be 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -1,6 +1,7 @@ generic-y += clkdev.h +generic-y += cputime.h generic-y += exec.h generic-y += hash.h generic-y += mcs_spinlock.h diff --git a/arch/alpha/include/asm/cputime.h b/arch/alpha/include/asm/cputime.h deleted file mode 100644 index 19577fd..0000000 --- a/arch/alpha/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ALPHA_CPUTIME_H -#define __ALPHA_CPUTIME_H - -#include - -#endif /* __ALPHA_CPUTIME_H */ diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 056027f..afff510 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -5,6 +5,7 @@ header-y += arch-v32/ generic-y += barrier.h generic-y += clkdev.h +generic-y += cputime.h generic-y += exec.h generic-y += hash.h generic-y += kvm_para.h diff --git a/arch/cris/include/asm/cputime.h b/arch/cris/include/asm/cputime.h deleted file mode 100644 index 4446a65..0000000 --- a/arch/cris/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __CRIS_CPUTIME_H -#define __CRIS_CPUTIME_H - -#include - -#endif /* __CRIS_CPUTIME_H */ diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild index babb933..87b95eb 100644 --- a/arch/frv/include/asm/Kbuild +++ b/arch/frv/include/asm/Kbuild @@ -1,5 +1,6 @@ generic-y += clkdev.h +generic-y += cputime.h generic-y += exec.h generic-y += hash.h generic-y += mcs_spinlock.h diff --git a/arch/frv/include/asm/cputime.h b/arch/frv/include/asm/cputime.h deleted file mode 100644 index f6c373a..0000000 --- a/arch/frv/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_CPUTIME_H -#define _ASM_CPUTIME_H - -#include - -#endif /* _ASM_CPUTIME_H */ diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index 5825a35..67779a7 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -1,5 +1,6 @@ generic-y += clkdev.h +generic-y += cputime.h generic-y += exec.h generic-y += hash.h generic-y += mcs_spinlock.h diff --git a/arch/m32r/include/asm/cputime.h b/arch/m32r/include/asm/cputime.h deleted file mode 100644 index 0a47550..0000000 --- a/arch/m32r/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __M32R_CPUTIME_H -#define __M32R_CPUTIME_H - -#include - -#endif /* __M32R_CPUTIME_H */ diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 1f590ab..c98ed95 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -1,6 +1,7 @@ generic-y += barrier.h generic-y += clkdev.h +generic-y += cputime.h generic-y += exec.h generic-y += hash.h generic-y += mcs_spinlock.h diff --git a/arch/microblaze/include/asm/cputime.h b/arch/microblaze/include/asm/cputime.h deleted file mode 100644 index 6d68ad7..0000000 --- a/arch/microblaze/include/asm/cputime.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild index cbc6b9b..654d5ba 100644 --- a/arch/mn10300/include/asm/Kbuild +++ b/arch/mn10300/include/asm/Kbuild @@ -1,6 +1,7 @@ generic-y += barrier.h generic-y += clkdev.h +generic-y += cputime.h generic-y += exec.h generic-y += hash.h generic-y += mcs_spinlock.h diff --git a/arch/mn10300/include/asm/cputime.h b/arch/mn10300/include/asm/cputime.h deleted file mode 100644 index 6d68ad7..0000000 --- a/arch/mn10300/include/asm/cputime.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index 4630cf2..2f947ab 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -4,6 +4,7 @@ header-y += generic-y += barrier.h generic-y += clkdev.h +generic-y += cputime.h generic-y += hash.h generic-y += mcs_spinlock.h generic-y += preempt.h diff --git a/arch/score/include/asm/cputime.h b/arch/score/include/asm/cputime.h deleted file mode 100644 index 1fced99..0000000 --- a/arch/score/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_SCORE_CPUTIME_H -#define _ASM_SCORE_CPUTIME_H - -#include - -#endif /* _ASM_SCORE_CPUTIME_H */ diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index a8fee07..4acddc4 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -5,4 +5,5 @@ genhdr-y += unistd_64.h genhdr-y += unistd_x32.h generic-y += clkdev.h +generic-y += cputime.h generic-y += mcs_spinlock.h diff --git a/arch/x86/include/asm/cputime.h b/arch/x86/include/asm/cputime.h deleted file mode 100644 index 6d68ad7..0000000 --- a/arch/x86/include/asm/cputime.h +++ /dev/null @@ -1 +0,0 @@ -#include -- cgit v0.10.2 From f4e53f9a4f7d01cef9f096b1eb3705329a7ce9c2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 13 Mar 2014 10:18:53 +0100 Subject: MAINTAINERS: add networking selftests to NETWORKING Add it to NETWORKING [GENERAL] to make sure patches for selftests go to the netdev list as well. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index e1be77c..b359eb0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6003,6 +6003,7 @@ F: include/uapi/linux/in.h F: include/uapi/linux/net.h F: include/uapi/linux/netdevice.h F: tools/net/ +F: tools/testing/selftests/net/ NETWORKING [IPv4/IPv6] M: "David S. Miller" -- cgit v0.10.2 From 0a8d8c446b5429d15ff2d48f46e00d8a08552303 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 13 Mar 2014 10:44:34 +0100 Subject: vmxnet3: fix building without CONFIG_PCI_MSI Since commit d25f06ea466e "vmxnet3: fix netpoll race condition", the vmxnet3 driver fails to build when CONFIG_PCI_MSI is disabled, because it unconditionally references the vmxnet3_msix_rx() function. To fix this, use the same #ifdef in the caller that exists around the function definition. Signed-off-by: Arnd Bergmann Cc: Neil Horman Cc: Shreyas Bhatewara Cc: "VMware, Inc." Cc: "David S. Miller" Cc: stable@vger.kernel.org Acked-by: Neil Horman Signed-off-by: David S. Miller diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index b7daa02..0fa3b44 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1761,13 +1761,16 @@ static void vmxnet3_netpoll(struct net_device *netdev) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); - int i; switch (adapter->intr.type) { - case VMXNET3_IT_MSIX: +#ifdef CONFIG_PCI_MSI + case VMXNET3_IT_MSIX: { + int i; for (i = 0; i < adapter->num_rx_queues; i++) vmxnet3_msix_rx(0, &adapter->rx_queue[i]); break; + } +#endif case VMXNET3_IT_MSI: default: vmxnet3_intr(0, adapter->netdev); -- cgit v0.10.2 From de123268300fd33b7f7668fda3264059daffa6ef Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 13 Mar 2014 14:52:15 +0200 Subject: net/mlx4_en: Deregister multicast vxlan steering rules when going down When mlx4_en_stop_port() is called, we need to deregister also the tunnel steering rules that relate to multicast. Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index ba212611..84a96f7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1800,6 +1800,8 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) mc_list[5] = priv->port; mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH, mclist->reg_id); + if (mclist->tunnel_reg_id) + mlx4_flow_detach(mdev->dev, mclist->tunnel_reg_id); } mlx4_en_clear_list(dev); list_for_each_entry_safe(mclist, tmp, &priv->curr_list, list) { -- cgit v0.10.2 From 2dc3a8e0b6e10c77798a4f6f711ce66334eda1c4 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 6 Mar 2014 15:51:08 -0500 Subject: brcmfmac: fix skb leak in brcmf_sdio_txpkt_prep_sg error path. Commit 1eb4301867 (brcmfmac: fix txglomming scatter-gather packet transfers) added an allocation of an skb via brcmu_pkt_buf_get_skb() but forgot to free it on one of the error paths. Acked-by: Arend van Spriel Signed-off-by: Dave Jones Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c index 119ee6e..ddaa9ef 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd_sdio.c @@ -1948,8 +1948,10 @@ static int brcmf_sdio_txpkt_prep_sg(struct brcmf_sdio *bus, if (pkt_pad == NULL) return -ENOMEM; ret = brcmf_sdio_txpkt_hdalign(bus, pkt_pad); - if (unlikely(ret < 0)) + if (unlikely(ret < 0)) { + kfree_skb(pkt_pad); return ret; + } memcpy(pkt_pad->data, pkt->data + pkt->len - tail_chop, tail_chop); -- cgit v0.10.2 From 105ff411c96c52c67261efbe245f0947d39ebce7 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 9 Mar 2014 09:51:16 +0100 Subject: ath9k_hw: fix unreachable code in baseband hang detection code The commit "ath9k: reduce baseband hang detection false positive rate" added a delay in the loop checking the baseband state, however it was unreachable due to previous 'continue' statements. Reported-by: Dan Carpenter Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index 303ce27..9078a6c 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -1548,6 +1548,7 @@ bool ath9k_hw_check_alive(struct ath_hw *ah) if (reg != last_val) return true; + udelay(1); last_val = reg; if ((reg & 0x7E7FFFEF) == 0x00702400) continue; @@ -1560,8 +1561,6 @@ bool ath9k_hw_check_alive(struct ath_hw *ah) default: return true; } - - udelay(1); } while (count-- > 0); return false; -- cgit v0.10.2 From 5998be879719384af2014b79697eed6e38ee2706 Mon Sep 17 00:00:00 2001 From: Helmut Schaa Date: Wed, 12 Mar 2014 10:37:55 +0100 Subject: ath9k: Fix sequence number assignment for non-data frames Since commit 558ff225de80ac95b132d3a115ddadcd64498b4f (ath9k: fix ps-poll responses under a-mpdu sessions) non-data frames would have gotten a sequence number from a TIDs sequence counter instead of using the global sequence counter. This can lead to instable connections. To fix this only select the correct TID if we are processing a data frame. Furthermore, prevent non-data frames to get a sequence number from a TID sequence counter by adding a check to ath_tx_setup_buffer. Cc: Felix Fietkau Signed-off-by: Helmut Schaa Acked-by: Felix Fietkau Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index f042a18..55897d5 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -2063,7 +2063,7 @@ static struct ath_buf *ath_tx_setup_buffer(struct ath_softc *sc, ATH_TXBUF_RESET(bf); - if (tid) { + if (tid && ieee80211_is_data_present(hdr->frame_control)) { fragno = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_FRAG; seqno = tid->seq_next; hdr->seq_ctrl = cpu_to_le16(tid->seq_next << IEEE80211_SEQ_SEQ_SHIFT); @@ -2186,7 +2186,7 @@ int ath_tx_start(struct ieee80211_hw *hw, struct sk_buff *skb, txq->stopped = true; } - if (txctl->an) + if (txctl->an && ieee80211_is_data_present(hdr->frame_control)) tid = ath_get_skb_tid(sc, txctl->an, skb); if (info->flags & IEEE80211_TX_CTL_PS_RESPONSE) { -- cgit v0.10.2 From 584221918925f0d3bbc8ff6de9cfdb703c06a29c Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 12 Mar 2014 15:14:04 +0100 Subject: Revert "rt2x00: rt2800lib: Update BBP register initialization for RT53xx" This reverts commit eac40d9631a7db43570df859fa8a9922e9623607. It cause random connection drops on RT5390 PCI adapters. On Mediatek there is different driver version available for RT53xx chip based on bus type (2.5.0.3 for PCI and 2.6.1.3 for USB). Hence possibly we should set registers differently based on bus type. But is also possible that new driver (i.e. 2.6.1.3) was not verified on RT53xx USB. Until we figure out how to initialize registers properly for RT53xx just revert commit eac40d9631a7db43570df859fa8a9922e9623607 since it cause regression. Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c index 7f8b5d1..41d4a81 100644 --- a/drivers/net/wireless/rt2x00/rt2800lib.c +++ b/drivers/net/wireless/rt2x00/rt2800lib.c @@ -5460,14 +5460,15 @@ static void rt2800_init_bbp_53xx(struct rt2x00_dev *rt2x00dev) rt2800_bbp_write(rt2x00dev, 68, 0x0b); - rt2800_bbp_write(rt2x00dev, 69, 0x0d); - rt2800_bbp_write(rt2x00dev, 70, 0x06); + rt2800_bbp_write(rt2x00dev, 69, 0x12); rt2800_bbp_write(rt2x00dev, 73, 0x13); rt2800_bbp_write(rt2x00dev, 75, 0x46); rt2800_bbp_write(rt2x00dev, 76, 0x28); rt2800_bbp_write(rt2x00dev, 77, 0x59); + rt2800_bbp_write(rt2x00dev, 70, 0x0a); + rt2800_bbp_write(rt2x00dev, 79, 0x13); rt2800_bbp_write(rt2x00dev, 80, 0x05); rt2800_bbp_write(rt2x00dev, 81, 0x33); @@ -5510,7 +5511,6 @@ static void rt2800_init_bbp_53xx(struct rt2x00_dev *rt2x00dev) if (rt2x00_rt(rt2x00dev, RT5392)) { rt2800_bbp_write(rt2x00dev, 134, 0xd0); rt2800_bbp_write(rt2x00dev, 135, 0xf6); - rt2800_bbp_write(rt2x00dev, 148, 0x84); } rt2800_disable_unused_dac_adc(rt2x00dev); -- cgit v0.10.2 From 6e07a1e0b53d77dbcb08f29d1cca07c6d33a926f Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 12 Mar 2014 08:21:24 +0100 Subject: at86rf230: fix lockdep splats This patch fix a lockdep in the at86rf230 driver, otherwise we get: [ 30.206517] ================================= [ 30.211078] [ INFO: inconsistent lock state ] [ 30.215647] 3.14.0-20140108-1-00994-g32e9426 #163 Not tainted [ 30.221660] --------------------------------- [ 30.226222] inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage. [ 30.232514] systemd-udevd/157 [HC1[1]:SC0[0]:HE0:SE1] takes: [ 30.238439] (&(&lp->lock)->rlock){?.+...}, at: [] at86rf230_isr+0x18/0x44 [ 30.246621] {HARDIRQ-ON-W} state was registered at: [ 30.251728] [] __lock_acquire+0x7a4/0x18d8 [ 30.257135] [] lock_acquire+0x68/0x7c [ 30.262071] [] _raw_spin_lock+0x28/0x38 [ 30.267203] [] at86rf230_xmit+0x1c/0x144 [ 30.272412] [] mac802154_xmit_worker+0x88/0x148 [ 30.278271] [] process_one_work+0x274/0x404 [ 30.283761] [] worker_thread+0x228/0x374 [ 30.288971] [] kthread+0xd0/0xe4 [ 30.293455] [] ret_from_fork+0x14/0x2c [ 30.298493] irq event stamp: 8948 [ 30.301963] hardirqs last enabled at (8947): [] __kmalloc+0xb4/0x110 [ 30.309636] hardirqs last disabled at (8948): [] __irq_svc+0x34/0x5c [ 30.317215] softirqs last enabled at (8452): [] __do_softirq+0x1dc/0x264 [ 30.325243] softirqs last disabled at (8439): [] irq_exit+0x80/0xf4 We use the lp->lock inside the isr of at86rf230, that's why we need the irqsave spinlock calls. Signed-off-by: Alexander Aring Signed-off-by: David S. Miller diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index ab31544..a30258a 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -546,12 +546,12 @@ at86rf230_xmit(struct ieee802154_dev *dev, struct sk_buff *skb) int rc; unsigned long flags; - spin_lock(&lp->lock); + spin_lock_irqsave(&lp->lock, flags); if (lp->irq_busy) { - spin_unlock(&lp->lock); + spin_unlock_irqrestore(&lp->lock, flags); return -EBUSY; } - spin_unlock(&lp->lock); + spin_unlock_irqrestore(&lp->lock, flags); might_sleep(); @@ -725,10 +725,11 @@ static void at86rf230_irqwork_level(struct work_struct *work) static irqreturn_t at86rf230_isr(int irq, void *data) { struct at86rf230_local *lp = data; + unsigned long flags; - spin_lock(&lp->lock); + spin_lock_irqsave(&lp->lock, flags); lp->irq_busy = 1; - spin_unlock(&lp->lock); + spin_unlock_irqrestore(&lp->lock, flags); schedule_work(&lp->irqwork); -- cgit v0.10.2 From fb00bc2e6cd2046282ba4b03f4fe682aee70b2f8 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Wed, 12 Mar 2014 17:31:59 +0800 Subject: bonding: set correct vlan id for alb xmit path The commit d3ab3ffd1d728d7ee77340e7e7e2c7cfe6a4013e (bonding: use rlb_client_info->vlan_id instead of ->tag) remove the rlb_client_info->tag, but occur some issues, The vlan_get_tag() will return 0 for success and -EINVAL for error, so the client_info->vlan_id always be set to 0 if the vlan_get_tag return 0 for success, so the client_info would never get a correct vlan id. We should only set the vlan id to 0 when the vlan_get_tag return error. Fixes: d3ab3ffd1d7 (bonding: use rlb_client_info->vlan_id instead of ->tag) CC: Ding Tianhong CC: Jay Vosburgh CC: Andy Gospodarek Signed-off-by: Ding Tianhong Acked-by: Veaceslav Falico Signed-off-by: David S. Miller diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index a2c4747..e8f133e 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -730,7 +730,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon client_info->ntt = 0; } - if (!vlan_get_tag(skb, &client_info->vlan_id)) + if (vlan_get_tag(skb, &client_info->vlan_id)) client_info->vlan_id = 0; if (!client_info->assigned) { -- cgit v0.10.2 From 84fe61821e4ebab6322eeae3f3c27f77f0031978 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 12 Mar 2014 11:28:19 +0100 Subject: eth: fec: Fix lost promiscuous mode after reconnecting cable If the Freescale fec is in promiscuous mode and network cable is reconnected then the promiscuous mode get lost. The problem is caused by a too soon call of set_multicast_list to re-enable promisc mode. The FEC_R_CNTRL register changes are overwritten by fec_restart. This patch fixes this by moving the call behind the init of FEC_R_CNTRL register in fec_restart. Successful tested on a i.MX28 board. Signed-off-by: Stefan Wahren Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 479a7cb..03a3513 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -528,13 +528,6 @@ fec_restart(struct net_device *ndev, int duplex) /* Clear any outstanding interrupt. */ writel(0xffc00000, fep->hwp + FEC_IEVENT); - /* Setup multicast filter. */ - set_multicast_list(ndev); -#ifndef CONFIG_M5272 - writel(0, fep->hwp + FEC_HASH_TABLE_HIGH); - writel(0, fep->hwp + FEC_HASH_TABLE_LOW); -#endif - /* Set maximum receive buffer size. */ writel(PKT_MAXBLR_SIZE, fep->hwp + FEC_R_BUFF_SIZE); @@ -655,6 +648,13 @@ fec_restart(struct net_device *ndev, int duplex) writel(rcntl, fep->hwp + FEC_R_CNTRL); + /* Setup multicast filter. */ + set_multicast_list(ndev); +#ifndef CONFIG_M5272 + writel(0, fep->hwp + FEC_HASH_TABLE_HIGH); + writel(0, fep->hwp + FEC_HASH_TABLE_LOW); +#endif + if (id_entry->driver_data & FEC_QUIRK_ENET_MAC) { /* enable ENET endian swap */ ecntl |= (1 << 8); -- cgit v0.10.2 From ecab67015ef6e3f3635551dcc9971cf363cc1cd5 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 12 Mar 2014 22:13:19 +0100 Subject: ipv6: Avoid unnecessary temporary addresses being generated tmp_prefered_lft is an offset to ifp->tstamp, not now. Therefore age needs to be added to the condition. Age calculation in ipv6_create_tempaddr is different from the one in addrconf_verify and doesn't consider ADDRCONF_TIMER_FUZZ_MINUS. This can cause age in ipv6_create_tempaddr to be less than the one in addrconf_verify and therefore unnecessary temporary address to be generated. Use age calculation as in addrconf_modify to avoid this. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index fdbfeca..344e972 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1103,8 +1103,11 @@ retry: * Lifetime is greater than REGEN_ADVANCE time units. In particular, * an implementation must not create a temporary address with a zero * Preferred Lifetime. + * Use age calculation as in addrconf_verify to avoid unnecessary + * temporary addresses being generated. */ - if (tmp_prefered_lft <= regen_advance) { + age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; + if (tmp_prefered_lft <= regen_advance + age) { in6_ifa_put(ifp); in6_dev_put(idev); ret = -1; -- cgit v0.10.2 From a4e90bed511220ff601d064c9e5d583e91308f65 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 13 Mar 2014 22:11:39 +0100 Subject: ACPI / sleep: Add extra checks for HW Reduced ACPI mode sleep states If the HW Reduced ACPI mode bit is set in the FADT, ACPICA uses the optional sleep control and sleep status registers for making the system enter sleep states (including S5), so it is not possible to use system sleep states or power it off using ACPI if the HW Reduced ACPI mode bit is set and those registers are not available. For this reason, add a new function, acpi_sleep_state_supported(), checking if the HW Reduced ACPI mode bit is set and whether or not system sleep states are usable in that case in addition to checking the return value of acpi_get_sleep_type_data() and make the ACPI sleep setup routines use that function to check the availability of system sleep states. Among other things, this prevents the kernel from attempting to use ACPI for powering off HW Reduced ACPI systems without the sleep control and sleep status registers, because ACPI power off doesn't have a chance to work on them. That allows alternative power off mechanisms that may actually work to be used on those systems. The affected machines include Dell Venue 8 Pro, Asus T100TA, Haswell Desktop SDP and Ivy Bridge EP Demo depot. References: https://bugzilla.kernel.org/show_bug.cgi?id=70931 Reported-by: Adam Williamson Tested-by: Aubrey Li Cc: 3.4+ # 3.4+ Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index b718806..c40fb2e 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -71,6 +71,17 @@ static int acpi_sleep_prepare(u32 acpi_state) return 0; } +static bool acpi_sleep_state_supported(u8 sleep_state) +{ + acpi_status status; + u8 type_a, type_b; + + status = acpi_get_sleep_type_data(sleep_state, &type_a, &type_b); + return ACPI_SUCCESS(status) && (!acpi_gbl_reduced_hardware + || (acpi_gbl_FADT.sleep_control.address + && acpi_gbl_FADT.sleep_status.address)); +} + #ifdef CONFIG_ACPI_SLEEP static u32 acpi_target_sleep_state = ACPI_STATE_S0; @@ -604,15 +615,9 @@ static void acpi_sleep_suspend_setup(void) { int i; - for (i = ACPI_STATE_S1; i < ACPI_STATE_S4; i++) { - acpi_status status; - u8 type_a, type_b; - - status = acpi_get_sleep_type_data(i, &type_a, &type_b); - if (ACPI_SUCCESS(status)) { + for (i = ACPI_STATE_S1; i < ACPI_STATE_S4; i++) + if (acpi_sleep_state_supported(i)) sleep_states[i] = 1; - } - } suspend_set_ops(old_suspend_ordering ? &acpi_suspend_ops_old : &acpi_suspend_ops); @@ -740,11 +745,7 @@ static const struct platform_hibernation_ops acpi_hibernation_ops_old = { static void acpi_sleep_hibernate_setup(void) { - acpi_status status; - u8 type_a, type_b; - - status = acpi_get_sleep_type_data(ACPI_STATE_S4, &type_a, &type_b); - if (ACPI_FAILURE(status)) + if (!acpi_sleep_state_supported(ACPI_STATE_S4)) return; hibernation_set_ops(old_suspend_ordering ? @@ -793,8 +794,6 @@ static void acpi_power_off(void) int __init acpi_sleep_init(void) { - acpi_status status; - u8 type_a, type_b; char supported[ACPI_S_STATE_COUNT * 3 + 1]; char *pos = supported; int i; @@ -806,8 +805,7 @@ int __init acpi_sleep_init(void) acpi_sleep_suspend_setup(); acpi_sleep_hibernate_setup(); - status = acpi_get_sleep_type_data(ACPI_STATE_S5, &type_a, &type_b); - if (ACPI_SUCCESS(status)) { + if (acpi_sleep_state_supported(ACPI_STATE_S5)) { sleep_states[ACPI_STATE_S5] = 1; pm_power_off_prepare = acpi_power_off_prepare; pm_power_off = acpi_power_off; -- cgit v0.10.2 From 62c19c9d29e65086e5ae76df371ed2e6b23f00cd Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 9 Feb 2014 19:47:40 +0100 Subject: i2c: Remove usage of orphaned symbol OF_I2C The symbol is an orphan, don't depend on it anymore. Signed-off-by: Richard Weinberger [wsa: enhanced commit message] Signed-off-by: Wolfram Sang Fixes: 687b81d083c0 (i2c: move OF helpers into the core) Cc: stable@kernel.org diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index f5ed031..de17c55 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -387,7 +387,7 @@ config I2C_CBUS_GPIO config I2C_CPM tristate "Freescale CPM1 or CPM2 (MPC8xx/826x)" - depends on (CPM1 || CPM2) && OF_I2C + depends on CPM1 || CPM2 help This supports the use of the I2C interface on Freescale processors with CPM1 or CPM2. -- cgit v0.10.2 From 3e920b532a0f49d05e4c0f7d1b46b0ab8f471e09 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 12 Mar 2014 15:13:02 +0100 Subject: x86, boot: Correct max ramdisk size name The name in struct bootparam is ->initrd_addr_max and not ramdisk_max. Fix that. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1394633584-5509-2-git-send-email-bp@alien8.de Signed-off-by: H. Peter Anvin diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt index cb81741d..a75e3ad 100644 --- a/Documentation/x86/boot.txt +++ b/Documentation/x86/boot.txt @@ -182,7 +182,7 @@ Offset Proto Name Meaning 0226/1 2.02+(3 ext_loader_ver Extended boot loader version 0227/1 2.02+(3 ext_loader_type Extended boot loader ID 0228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line -022C/4 2.03+ ramdisk_max Highest legal initrd address +022C/4 2.03+ initrd_addr_max Highest legal initrd address 0230/4 2.05+ kernel_alignment Physical addr alignment required for kernel 0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not 0235/1 2.10+ min_alignment Minimum alignment, as a power of two @@ -534,7 +534,7 @@ Protocol: 2.02+ zero, the kernel will assume that your boot loader does not support the 2.02+ protocol. -Field name: ramdisk_max +Field name: initrd_addr_max Type: read Offset/size: 0x22c/4 Protocol: 2.03+ diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index ec3b8ba..46f5a22 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -350,7 +350,7 @@ cmd_line_ptr: .long 0 # (Header version 0x0202 or later) # can be located anywhere in # low memory 0x10000 or higher. -ramdisk_max: .long 0x7fffffff +initrd_addr_max: .long 0x7fffffff # (Header version 0x0203 or later) # The highest safe address for # the contents of an initrd -- cgit v0.10.2 From 5314feebabd92535a16295a8067b3fd79815cd16 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 12 Mar 2014 15:13:03 +0100 Subject: x86, crash: Unify ifdef Merge two back-to-back CONFIG_X86_32 ifdefs into one. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1394633584-5509-3-git-send-email-bp@alien8.de Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index a57902e..507de80 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -57,9 +57,7 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs) { #ifdef CONFIG_X86_32 struct pt_regs fixed_regs; -#endif -#ifdef CONFIG_X86_32 if (!user_mode_vm(regs)) { crash_fixup_ss_esp(&fixed_regs, regs); regs = &fixed_regs; -- cgit v0.10.2 From b82ad3d3944193e5f95319c30fd24e39d0d7b510 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 12 Mar 2014 15:13:04 +0100 Subject: x86, pageattr: Correct WBINVD spelling in comment It is WBINVD, for INValiDate and not "wbindv". Use caps for instruction names, while at it. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1394633584-5509-4-git-send-email-bp@alien8.de Signed-off-by: H. Peter Anvin diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index b3b19f4..ba957c9 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1377,10 +1377,10 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, cache = cache_attr(mask_set); /* - * On success we use clflush, when the CPU supports it to - * avoid the wbindv. If the CPU does not support it and in the + * On success we use CLFLUSH, when the CPU supports it to + * avoid the WBINVD. If the CPU does not support it and in the * error case we fall back to cpa_flush_all (which uses - * wbindv): + * WBINVD): */ if (!ret && cpu_has_clflush) { if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { -- cgit v0.10.2 From 22085a66c2fab6cf9b9393c056a3600a6b4735de Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 9 Mar 2014 18:05:23 +0100 Subject: x86: Add another set of MSR accessor functions We very often need to set or clear a bit in an MSR as a result of doing some sort of a hardware configuration. Add generic versions of that repeated functionality in order to save us a bunch of duplicated code in the early CPU vendor detection/config code. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1394384725-10796-2-git-send-email-bp@alien8.de Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index e139b13..de36f22 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -214,6 +214,8 @@ do { \ struct msr *msrs_alloc(void); void msrs_free(struct msr *msrs); +int msr_set_bit(u32 msr, u8 bit); +int msr_clear_bit(u32 msr, u8 bit); #ifdef CONFIG_SMP int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 8f8eebd..db9db44 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -8,7 +8,7 @@ struct msr *msrs_alloc(void) msrs = alloc_percpu(struct msr); if (!msrs) { - pr_warning("%s: error allocating msrs\n", __func__); + pr_warn("%s: error allocating msrs\n", __func__); return NULL; } @@ -21,3 +21,90 @@ void msrs_free(struct msr *msrs) free_percpu(msrs); } EXPORT_SYMBOL(msrs_free); + +/** + * Read an MSR with error handling + * + * @msr: MSR to read + * @m: value to read into + * + * It returns read data only on success, otherwise it doesn't change the output + * argument @m. + * + */ +int msr_read(u32 msr, struct msr *m) +{ + int err; + u64 val; + + err = rdmsrl_safe(msr, &val); + if (!err) + m->q = val; + + return err; +} + +/** + * Write an MSR with error handling + * + * @msr: MSR to write + * @m: value to write + */ +int msr_write(u32 msr, struct msr *m) +{ + return wrmsrl_safe(msr, m->q); +} + +static inline int __flip_bit(u32 msr, u8 bit, bool set) +{ + struct msr m, m1; + int err = -EINVAL; + + if (bit > 63) + return err; + + err = msr_read(msr, &m); + if (err) + return err; + + m1 = m; + if (set) + m1.q |= BIT_64(bit); + else + m1.q &= ~BIT_64(bit); + + if (m1.q == m.q) + return 0; + + err = msr_write(msr, &m); + if (err) + return err; + + return 1; +} + +/** + * Set @bit in a MSR @msr. + * + * Retval: + * < 0: An error was encountered. + * = 0: Bit was already set. + * > 0: Hardware accepted the MSR write. + */ +int msr_set_bit(u32 msr, u8 bit) +{ + return __flip_bit(msr, bit, true); +} + +/** + * Clear @bit in a MSR @msr. + * + * Retval: + * < 0: An error was encountered. + * = 0: Bit was already cleared. + * > 0: Hardware accepted the MSR write. + */ +int msr_clear_bit(u32 msr, u8 bit) +{ + return __flip_bit(msr, bit, false); +} -- cgit v0.10.2 From 8f86a7373a1c8ee52d3cc64adf7f2ace13fd24ed Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 9 Mar 2014 18:05:24 +0100 Subject: x86, AMD: Convert to the new bit access MSR accessors ... and save us a bunch of code. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1394384725-10796-3-git-send-email-bp@alien8.de Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c67ffa6..b85e43a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -233,9 +233,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c) if (c->x86_model >= 6 && c->x86_model <= 10) { if (!cpu_has(c, X86_FEATURE_XMM)) { printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); - rdmsr(MSR_K7_HWCR, l, h); - l &= ~0x00008000; - wrmsr(MSR_K7_HWCR, l, h); + msr_clear_bit(MSR_K7_HWCR, 15); set_cpu_cap(c, X86_FEATURE_XMM); } } @@ -509,14 +507,8 @@ static void early_init_amd(struct cpuinfo_x86 *c) #endif /* F16h erratum 793, CVE-2013-6885 */ - if (c->x86 == 0x16 && c->x86_model <= 0xf) { - u64 val; - - rdmsrl(MSR_AMD64_LS_CFG, val); - if (!(val & BIT(15))) - wrmsrl(MSR_AMD64_LS_CFG, val | BIT(15)); - } - + if (c->x86 == 0x16 && c->x86_model <= 0xf) + msr_set_bit(MSR_AMD64_LS_CFG, 15); } static const int amd_erratum_383[]; @@ -536,11 +528,8 @@ static void init_amd(struct cpuinfo_x86 *c) * Errata 63 for SH-B3 steppings * Errata 122 for all steppings (F+ have it disabled by default) */ - if (c->x86 == 0xf) { - rdmsrl(MSR_K7_HWCR, value); - value |= 1 << 6; - wrmsrl(MSR_K7_HWCR, value); - } + if (c->x86 == 0xf) + msr_set_bit(MSR_K7_HWCR, 6); #endif early_init_amd(c); @@ -623,14 +612,11 @@ static void init_amd(struct cpuinfo_x86 *c) (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && !cpu_has(c, X86_FEATURE_TOPOEXT)) { - if (!rdmsrl_safe(0xc0011005, &value)) { - value |= 1ULL << 54; - wrmsrl_safe(0xc0011005, value); + if (msr_set_bit(0xc0011005, 54) > 0) { rdmsrl(0xc0011005, value); - if (value & (1ULL << 54)) { + if (value & BIT_64(54)) { set_cpu_cap(c, X86_FEATURE_TOPOEXT); - printk(KERN_INFO FW_INFO "CPU: Re-enabling " - "disabled Topology Extensions Support\n"); + pr_info(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n"); } } } @@ -709,19 +695,12 @@ static void init_amd(struct cpuinfo_x86 *c) * Disable GART TLB Walk Errors on Fam10h. We do this here * because this is always needed when GART is enabled, even in a * kernel which has no MCE support built in. - * BIOS should disable GartTlbWlk Errors themself. If - * it doesn't do it here as suggested by the BKDG. + * BIOS should disable GartTlbWlk Errors already. If + * it doesn't, do it here as suggested by the BKDG. * * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012 */ - u64 mask; - int err; - - err = rdmsrl_safe(MSR_AMD64_MCx_MASK(4), &mask); - if (err == 0) { - mask |= (1 << 10); - wrmsrl_safe(MSR_AMD64_MCx_MASK(4), mask); - } + msr_set_bit(MSR_AMD64_MCx_MASK(4), 10); /* * On family 10h BIOS may not have properly enabled WC+ support, @@ -733,10 +712,7 @@ static void init_amd(struct cpuinfo_x86 *c) * NOTE: we want to use the _safe accessors so as not to #GP kvm * guests on older kvm hosts. */ - - rdmsrl_safe(MSR_AMD64_BU_CFG2, &value); - value &= ~(1ULL << 24); - wrmsrl_safe(MSR_AMD64_BU_CFG2, value); + msr_clear_bit(MSR_AMD64_BU_CFG2, 24); if (cpu_has_amd_erratum(c, amd_erratum_383)) set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); -- cgit v0.10.2 From c0a639ad0bc6b178b46996bd1f821a04643e2bde Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 9 Mar 2014 18:05:25 +0100 Subject: x86, Intel: Convert to the new bit access MSR accessors ... and save some lines of code. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1394384725-10796-4-git-send-email-bp@alien8.de Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index c19fc60..045e6db 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -368,14 +368,16 @@ #define THERM_LOG_THRESHOLD1 (1 << 9) /* MISC_ENABLE bits: architectural */ -#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0) +#define MSR_BIT_FAST_STRING 0 +#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << MSR_BIT_FAST_STRING) #define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1) #define MSR_IA32_MISC_ENABLE_EMON (1ULL << 7) #define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << 11) #define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << 12) #define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << 16) #define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << 18) -#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << 22) +#define MSR_BIT_LIMIT_CPUID 22 +#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_BIT_LIMIT_CPUID); #define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << 23) #define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << 34) @@ -385,7 +387,8 @@ #define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << 4) #define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << 6) #define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << 8) -#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << 9) +#define MSR_BIT_PRF_DIS 9 +#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << MSR_BIT_PRF_DIS) #define MSR_IA32_MISC_ENABLE_FERR (1ULL << 10) #define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << 10) #define MSR_IA32_MISC_ENABLE_TM2 (1ULL << 13) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 5cd9bfa..44ca631 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -31,11 +31,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) /* Unmask CPUID levels if masked: */ if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { - rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); - - if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) { - misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID; - wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); + if (msr_clear_bit(MSR_IA32_MISC_ENABLE, MSR_BIT_LIMIT_CPUID) > 0) { c->cpuid_level = cpuid_eax(0); get_cpu_cap(c); } @@ -129,16 +125,9 @@ static void early_init_intel(struct cpuinfo_x86 *c) * Ingo Molnar reported a Pentium D (model 6) and a Xeon * (model 2) with the same problem. */ - if (c->x86 == 15) { - rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); - - if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) { - printk(KERN_INFO "kmemcheck: Disabling fast string operations\n"); - - misc_enable &= ~MSR_IA32_MISC_ENABLE_FAST_STRING; - wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); - } - } + if (c->x86 == 15) + if (msr_clear_bit(MSR_IA32_MISC_ENABLE, MSR_BIT_FAST_STRING) > 0) + pr_info("kmemcheck: Disabling fast string operations\n"); #endif /* @@ -197,8 +186,6 @@ static void intel_smp_check(struct cpuinfo_x86 *c) static void intel_workarounds(struct cpuinfo_x86 *c) { - unsigned long lo, hi; - #ifdef CONFIG_X86_F00F_BUG /* * All current models of Pentium and Pentium with MMX technology CPUs @@ -229,12 +216,9 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * Hardware prefetcher may cause stale data to be loaded into the cache. */ if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { - rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); - if ((lo & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE) == 0) { - printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); - printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); - lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE; - wrmsr(MSR_IA32_MISC_ENABLE, lo, hi); + if (msr_set_bit(MSR_IA32_MISC_ENABLE, MSR_BIT_PRF_DIS) > 0) { + pr_info("CPU: C0 stepping P4 Xeon detected.\n"); + pr_info("CPU: Disabling hardware prefetching (Errata 037)\n"); } } -- cgit v0.10.2 From 0b131be8d4125b32eb5b94e84f4f9dee6a0ef797 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 13 Mar 2014 15:40:52 -0700 Subject: x86, intel: Make MSR_IA32_MISC_ENABLE bit constants systematic Replace somewhat arbitrary constants for bits in MSR_IA32_MISC_ENABLE with verbose but systematic ones. Add _BIT defines for all the rest of them, too. Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 045e6db..4924f4b 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -368,36 +368,58 @@ #define THERM_LOG_THRESHOLD1 (1 << 9) /* MISC_ENABLE bits: architectural */ -#define MSR_BIT_FAST_STRING 0 -#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << MSR_BIT_FAST_STRING) -#define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1) -#define MSR_IA32_MISC_ENABLE_EMON (1ULL << 7) -#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << 11) -#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << 12) -#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << 16) -#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << 18) -#define MSR_BIT_LIMIT_CPUID 22 -#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_BIT_LIMIT_CPUID); -#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << 23) -#define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << 34) +#define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT 0 +#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) +#define MSR_IA32_MISC_ENABLE_TCC_BIT 1 +#define MSR_IA32_MISC_ENABLE_TCC (1ULL << MSR_IA32_MISC_ENABLE_TCC_BIT) +#define MSR_IA32_MISC_ENABLE_EMON_BIT 7 +#define MSR_IA32_MISC_ENABLE_EMON (1ULL << MSR_IA32_MISC_ENABLE_EMON_BIT) +#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT 11 +#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT) +#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT 12 +#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT) +#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT 16 +#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT) +#define MSR_IA32_MISC_ENABLE_MWAIT_BIT 18 +#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT) +#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT 22 +#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT); +#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT 23 +#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT 34 +#define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT) /* MISC_ENABLE bits: model-specific, meaning may vary from core to core */ -#define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << 2) -#define MSR_IA32_MISC_ENABLE_TM1 (1ULL << 3) -#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << 4) -#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << 6) -#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << 8) -#define MSR_BIT_PRF_DIS 9 -#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << MSR_BIT_PRF_DIS) -#define MSR_IA32_MISC_ENABLE_FERR (1ULL << 10) -#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << 10) -#define MSR_IA32_MISC_ENABLE_TM2 (1ULL << 13) -#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << 19) -#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << 20) -#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << 24) -#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << 37) -#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << 38) -#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << 39) +#define MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT 2 +#define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT) +#define MSR_IA32_MISC_ENABLE_TM1_BIT 3 +#define MSR_IA32_MISC_ENABLE_TM1 (1ULL << MSR_IA32_MISC_ENABLE_TM1_BIT) +#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT 4 +#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT 6 +#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT 8 +#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT) +#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT 9 +#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_FERR_BIT 10 +#define MSR_IA32_MISC_ENABLE_FERR (1ULL << MSR_IA32_MISC_ENABLE_FERR_BIT) +#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT 10 +#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT) +#define MSR_IA32_MISC_ENABLE_TM2_BIT 13 +#define MSR_IA32_MISC_ENABLE_TM2 (1ULL << MSR_IA32_MISC_ENABLE_TM2_BIT) +#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT 19 +#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT 20 +#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT) +#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT 24 +#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT) +#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT 37 +#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT 38 +#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39 +#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT) #define MSR_IA32_TSC_DEADLINE 0x000006E0 diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 44ca631..34bbb55 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -31,7 +31,8 @@ static void early_init_intel(struct cpuinfo_x86 *c) /* Unmask CPUID levels if masked: */ if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { - if (msr_clear_bit(MSR_IA32_MISC_ENABLE, MSR_BIT_LIMIT_CPUID) > 0) { + if (msr_clear_bit(MSR_IA32_MISC_ENABLE, + MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) { c->cpuid_level = cpuid_eax(0); get_cpu_cap(c); } @@ -126,7 +127,8 @@ static void early_init_intel(struct cpuinfo_x86 *c) * (model 2) with the same problem. */ if (c->x86 == 15) - if (msr_clear_bit(MSR_IA32_MISC_ENABLE, MSR_BIT_FAST_STRING) > 0) + if (msr_clear_bit(MSR_IA32_MISC_ENABLE, + MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) > 0) pr_info("kmemcheck: Disabling fast string operations\n"); #endif @@ -216,7 +218,9 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * Hardware prefetcher may cause stale data to be loaded into the cache. */ if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { - if (msr_set_bit(MSR_IA32_MISC_ENABLE, MSR_BIT_PRF_DIS) > 0) { + if (msr_set_bit(MSR_IA32_MISC_ENABLE, + MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) + > 0) { pr_info("CPU: C0 stepping P4 Xeon detected.\n"); pr_info("CPU: Disabling hardware prefetching (Errata 037)\n"); } -- cgit v0.10.2 From 81827ed8d85e892311965dc9ec4120b2b2e745bd Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 13 Mar 2014 13:04:36 +0100 Subject: perf/x86/uncore: Fix missing end markers for SNB/IVB/HSW IMC PMU This patch fixes a bug with the SNB/IVB/HSW uncore mmeory controller support. The PCI Ids tables for the memory controller were missing end markers. That could cause random crashes on boot during or after PCI device registration. Signed-off-by: Stephane Erainan Cc: peterz@infradead.org Cc: zheng.z.yan@intel.com Cc: bp@alien8.de Cc: ak@linux.intel.com Link: http://lkml.kernel.org/r/20140313120436.GA14236@quad Signed-off-by: Ingo Molnar -- diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index dfd50ea..618d502 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -1989,6 +1989,7 @@ static DEFINE_PCI_DEVICE_TABLE(snb_uncore_pci_ids) = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* end: all zeroes */ }, }; static DEFINE_PCI_DEVICE_TABLE(ivb_uncore_pci_ids) = { @@ -1996,6 +1997,7 @@ static DEFINE_PCI_DEVICE_TABLE(ivb_uncore_pci_ids) = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* end: all zeroes */ }, }; static DEFINE_PCI_DEVICE_TABLE(hsw_uncore_pci_ids) = { @@ -2003,6 +2005,7 @@ static DEFINE_PCI_DEVICE_TABLE(hsw_uncore_pci_ids) = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* end: all zeroes */ }, }; static struct pci_driver snb_uncore_pci_driver = { -- cgit v0.10.2 From 847d7970defb45540735b3fb4e88471c27cacd85 Mon Sep 17 00:00:00 2001 From: Daniel J Blueman Date: Thu, 13 Mar 2014 19:43:01 +0800 Subject: x86/amd/numa: Fix northbridge quirk to assign correct NUMA node For systems with multiple servers and routed fabric, all northbridges get assigned to the first server. Fix this by also using the node reported from the PCI bus. For single-fabric systems, the northbriges are on PCI bus 0 by definition, which are on NUMA node 0 by definition, so this is invarient on most systems. Tested on fam10h and fam15h single and multi-fabric systems and candidate for stable. Signed-off-by: Daniel J Blueman Acked-by: Steffen Persvold Acked-by: Borislav Petkov Cc: Link: http://lkml.kernel.org/r/1394710981-3596-1-git-send-email-daniel@numascale.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 7c6acd4..ff898bb 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -529,7 +529,7 @@ static void quirk_amd_nb_node(struct pci_dev *dev) return; pci_read_config_dword(nb_ht, 0x60, &val); - node = val & 7; + node = pcibus_to_node(dev->bus) | (val & 7); /* * Some hardware may return an invalid node ID, * so check it first: -- cgit v0.10.2 From 443fc8a3e01cef95f2c7c26605457d49e63da6b1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 11 Mar 2014 10:49:20 +0100 Subject: s390/perf: make print_debug_cf() static Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 5d2dfa3..61595c1 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -121,7 +121,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs) : PERF_RECORD_MISC_KERNEL; } -void print_debug_cf(void) +static void print_debug_cf(void) { struct cpumf_ctr_info cf_info; int cpu = smp_processor_id(); -- cgit v0.10.2 From 818a330c4e1be9c39fa7ca9221e044907d92b4bb Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 14 Mar 2014 12:01:08 +0100 Subject: s390/ptrace: add support for PTRACE_SINGLEBLOCK The PTRACE_SINGLEBLOCK option is used to get control whenever the inferior has executed a successful branch. The PER option to implement block stepping is successful-branching event, bit 32 in the PER-event mask. Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 9c82ceb..f4783c0 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -83,6 +83,7 @@ struct per_struct_kernel { * These are defined as per linux/ptrace.h, which see. */ #define arch_has_single_step() (1) +#define arch_has_block_step() (1) #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0) #define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN) diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index d2e53d6..3ccd71b 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -92,6 +92,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 19 /* restore signal mask in do_signal() */ #define TIF_SINGLE_STEP 20 /* This task is single stepped */ +#define TIF_BLOCK_STEP 21 /* This task is block stepped */ #define _TIF_SYSCALL (1<. + */ +#define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */ + +/* * PT_PROT definition is loosely based on hppa bsd definition in * gdb/hppab-nat.c */ diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index f6be608..4ac8faf 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -85,7 +85,10 @@ void update_cr_regs(struct task_struct *task) /* merge TIF_SINGLE_STEP into user specified PER registers. */ if (test_tsk_thread_flag(task, TIF_SINGLE_STEP)) { - new.control |= PER_EVENT_IFETCH; + if (test_tsk_thread_flag(task, TIF_BLOCK_STEP)) + new.control |= PER_EVENT_BRANCH; + else + new.control |= PER_EVENT_IFETCH; #ifdef CONFIG_64BIT new.control |= PER_CONTROL_SUSPENSION; new.control |= PER_EVENT_TRANSACTION_END; @@ -107,14 +110,22 @@ void update_cr_regs(struct task_struct *task) void user_enable_single_step(struct task_struct *task) { + clear_tsk_thread_flag(task, TIF_BLOCK_STEP); set_tsk_thread_flag(task, TIF_SINGLE_STEP); } void user_disable_single_step(struct task_struct *task) { + clear_tsk_thread_flag(task, TIF_BLOCK_STEP); clear_tsk_thread_flag(task, TIF_SINGLE_STEP); } +void user_enable_block_step(struct task_struct *task) +{ + set_tsk_thread_flag(task, TIF_SINGLE_STEP); + set_tsk_thread_flag(task, TIF_BLOCK_STEP); +} + /* * Called by kernel/ptrace.c when detaching.. * -- cgit v0.10.2 From a7e8b4b51917e5e14e28af4f49891bd153ffe5cd Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 7 Feb 2014 21:50:25 +0100 Subject: irqchip: sunxi: Change compatibles The Allwinner A10 compatibles were following a slightly different compatible patterns than the rest of the SoCs for historical reasons. Change the compatibles to match the other pattern in the irq controller driver for consistency. Signed-off-by: Maxime Ripard Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Thomas Gleixner diff --git a/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun4i-ic.txt b/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun4i-ic.txt index 32cec4b..b290ca1 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun4i-ic.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun4i-ic.txt @@ -2,7 +2,7 @@ Allwinner Sunxi Interrupt Controller Required properties: -- compatible : should be "allwinner,sun4i-ic" +- compatible : should be "allwinner,sun4i-a10-ic" - reg : Specifies base physical address and size of the registers. - interrupt-controller : Identifies the node as an interrupt controller - #interrupt-cells : Specifies the number of cells needed to encode an @@ -11,7 +11,7 @@ Required properties: Example: intc: interrupt-controller { - compatible = "allwinner,sun4i-ic"; + compatible = "allwinner,sun4i-a10-ic"; reg = <0x01c20400 0x400>; interrupt-controller; #interrupt-cells = <1>; diff --git a/drivers/irqchip/irq-sun4i.c b/drivers/irqchip/irq-sun4i.c index 9fbff03..1599955 100644 --- a/drivers/irqchip/irq-sun4i.c +++ b/drivers/irqchip/irq-sun4i.c @@ -134,7 +134,7 @@ static int __init sun4i_of_init(struct device_node *node, return 0; } -IRQCHIP_DECLARE(allwinner_sun4i_ic, "allwinner,sun4i-ic", sun4i_of_init); +IRQCHIP_DECLARE(allwinner_sun4i_ic, "allwinner,sun4i-a10-ic", sun4i_of_init); static void __exception_irq_entry sun4i_handle_irq(struct pt_regs *regs) { -- cgit v0.10.2 From 09504a7d76452c5891ff3bad7f8892376eb133f2 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 7 Feb 2014 21:50:26 +0100 Subject: ARM: sunxi: dt: Convert to the new irq controller compatibles Switch the device tree to the new compatibles introduced in the irqchip drivers to have a common pattern accross all Allwinner SoCs. Signed-off-by: Maxime Ripard Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Thomas Gleixner diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index d4d2763..491ce8f 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -331,7 +331,7 @@ }; intc: interrupt-controller@01c20400 { - compatible = "allwinner,sun4i-ic"; + compatible = "allwinner,sun4i-a10-ic"; reg = <0x01c20400 0x400>; interrupt-controller; #interrupt-cells = <1>; diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi index 79fd412..a005a565 100644 --- a/arch/arm/boot/dts/sun5i-a10s.dtsi +++ b/arch/arm/boot/dts/sun5i-a10s.dtsi @@ -294,7 +294,7 @@ }; intc: interrupt-controller@01c20400 { - compatible = "allwinner,sun4i-ic"; + compatible = "allwinner,sun4i-a10-ic"; reg = <0x01c20400 0x400>; interrupt-controller; #interrupt-cells = <1>; diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi index c463fd7..49f2f1b 100644 --- a/arch/arm/boot/dts/sun5i-a13.dtsi +++ b/arch/arm/boot/dts/sun5i-a13.dtsi @@ -275,7 +275,7 @@ ranges; intc: interrupt-controller@01c20400 { - compatible = "allwinner,sun4i-ic"; + compatible = "allwinner,sun4i-a10-ic"; reg = <0x01c20400 0x400>; interrupt-controller; #interrupt-cells = <1>; -- cgit v0.10.2 From 4f6e4f71c9d39cf49e0cb1be5b7721db5fbe92ac Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 13 Mar 2014 15:32:47 +0100 Subject: genirq: Document IRQCHIP_ONESHOT_SAFE flag Add missing documentation of the flag. Signed-off-by: Thomas Gleixner diff --git a/include/linux/irq.h b/include/linux/irq.h index e675971..67ace7a 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -355,6 +355,7 @@ struct irq_chip { * IRQCHIP_ONOFFLINE_ENABLED: Only call irq_on/off_line callbacks * when irq enabled * IRQCHIP_SKIP_SET_WAKE: Skip chip.irq_set_wake(), for this irq chip + * IRQCHIP_ONESHOT_SAFE: One shot does not require mask/unmask */ enum { IRQCHIP_SET_TYPE_MASKED = (1 << 0), -- cgit v0.10.2 From 328a4978df833249b099c9875738d7b72042ffe1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 13 Mar 2014 19:03:51 +0100 Subject: genirq: Add a new IRQCHIP_EOI_THREADED flag The flag is necessary for interrupt chips which require an ACK/EOI after the handler has run. In case of threaded handlers this needs to happen after the threaded handler has completed before the unmask of the interrupt. The flag is only unseful in combination with the handle_fasteoi_irq flow control handler. It can be combined with the flag IRQCHIP_EOI_IF_HANDLED, so the EOI is not issued when the interrupt is disabled or in progress. Tested-by: Hans de Goede Reviewed-by: Hans de Goede Cc: linux-arm-kernel@lists.infradead.org Cc: linux-sunxi@googlegroups.com Cc: Maxime Ripard Link: http://lkml.kernel.org/r/1394733834-26839-2-git-send-email-hdegoede@redhat.com Signed-off-by: Thomas Gleixner diff --git a/include/linux/irq.h b/include/linux/irq.h index 67ace7a..d278838 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -356,6 +356,7 @@ struct irq_chip { * when irq enabled * IRQCHIP_SKIP_SET_WAKE: Skip chip.irq_set_wake(), for this irq chip * IRQCHIP_ONESHOT_SAFE: One shot does not require mask/unmask + * IRQCHIP_EOI_THREADED: Chip requires eoi() on unmask in threaded mode */ enum { IRQCHIP_SET_TYPE_MASKED = (1 << 0), @@ -364,6 +365,7 @@ enum { IRQCHIP_ONOFFLINE_ENABLED = (1 << 3), IRQCHIP_SKIP_SET_WAKE = (1 << 4), IRQCHIP_ONESHOT_SAFE = (1 << 5), + IRQCHIP_EOI_THREADED = (1 << 6), }; /* This include will go away once we isolated irq_desc usage to core code */ diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index dc04c16..6397df2 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -281,6 +281,19 @@ void unmask_irq(struct irq_desc *desc) } } +void unmask_threaded_irq(struct irq_desc *desc) +{ + struct irq_chip *chip = desc->irq_data.chip; + + if (chip->flags & IRQCHIP_EOI_THREADED) + chip->irq_eoi(&desc->irq_data); + + if (chip->irq_unmask) { + chip->irq_unmask(&desc->irq_data); + irq_state_clr_masked(desc); + } +} + /* * handle_nested_irq - Handle a nested irq from a irq thread * @irq: the interrupt number @@ -435,6 +448,27 @@ static inline void preflow_handler(struct irq_desc *desc) static inline void preflow_handler(struct irq_desc *desc) { } #endif +static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip) +{ + if (!(desc->istate & IRQS_ONESHOT)) { + chip->irq_eoi(&desc->irq_data); + return; + } + /* + * We need to unmask in the following cases: + * - Oneshot irq which did not wake the thread (caused by a + * spurious interrupt or a primary handler handling it + * completely). + */ + if (!irqd_irq_disabled(&desc->irq_data) && + irqd_irq_masked(&desc->irq_data) && !desc->threads_oneshot) { + chip->irq_eoi(&desc->irq_data); + unmask_irq(desc); + } else if (!(chip->flags & IRQCHIP_EOI_THREADED)) { + chip->irq_eoi(&desc->irq_data); + } +} + /** * handle_fasteoi_irq - irq handler for transparent controllers * @irq: the interrupt number @@ -448,6 +482,8 @@ static inline void preflow_handler(struct irq_desc *desc) { } void handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) { + struct irq_chip *chip = desc->irq_data.chip; + raw_spin_lock(&desc->lock); if (unlikely(irqd_irq_inprogress(&desc->irq_data))) @@ -473,18 +509,14 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) preflow_handler(desc); handle_irq_event(desc); - if (desc->istate & IRQS_ONESHOT) - cond_unmask_irq(desc); + cond_unmask_eoi_irq(desc, chip); -out_eoi: - desc->irq_data.chip->irq_eoi(&desc->irq_data); -out_unlock: raw_spin_unlock(&desc->lock); return; out: - if (!(desc->irq_data.chip->flags & IRQCHIP_EOI_IF_HANDLED)) - goto out_eoi; - goto out_unlock; + if (!(chip->flags & IRQCHIP_EOI_IF_HANDLED)) + chip->irq_eoi(&desc->irq_data); + raw_spin_unlock(&desc->lock); } /** diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 17b6717..ddf1ffe 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -74,6 +74,7 @@ extern void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu); extern void irq_percpu_disable(struct irq_desc *desc, unsigned int cpu); extern void mask_irq(struct irq_desc *desc); extern void unmask_irq(struct irq_desc *desc); +extern void unmask_threaded_irq(struct irq_desc *desc); extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index de1a8ed..2486a4c 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -748,7 +748,7 @@ again: if (!desc->threads_oneshot && !irqd_irq_disabled(&desc->irq_data) && irqd_irq_masked(&desc->irq_data)) - unmask_irq(desc); + unmask_threaded_irq(desc); out_unlock: raw_spin_unlock_irq(&desc->lock); -- cgit v0.10.2 From 56af0416b00f6edc7845a7b3f2ef179e385c8e15 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 13 Mar 2014 19:03:52 +0100 Subject: irqchip: sun4i: Fix irq 0 not working SUN4I_IRQ_VECTOR_REG containing 0 can mean one of 3 things: 1) no more irqs pending 2) irq 0 pending 3) spurious irq So if we immediately get a reading of 0, check the irq-pending reg to differentiate between 2 and 3. We only do this once to avoid the extra check in the common case of 1) hapening after having read the vector-reg once. Signed-off-by: Hans de Goede Acked-by: Maxime Ripard Cc: linux-arm-kernel@lists.infradead.org Cc: linux-sunxi@googlegroups.com Link: http://lkml.kernel.org/r/1394733834-26839-3-git-send-email-hdegoede@redhat.com Signed-off-by: Thomas Gleixner diff --git a/drivers/irqchip/irq-sun4i.c b/drivers/irqchip/irq-sun4i.c index 1599955..7ae85ec 100644 --- a/drivers/irqchip/irq-sun4i.c +++ b/drivers/irqchip/irq-sun4i.c @@ -140,10 +140,24 @@ static void __exception_irq_entry sun4i_handle_irq(struct pt_regs *regs) { u32 irq, hwirq; + /* + * hwirq == 0 can mean one of 3 things: + * 1) no more irqs pending + * 2) irq 0 pending + * 3) spurious irq + * So if we immediately get a reading of 0, check the irq-pending reg + * to differentiate between 2 and 3. We only do this once to avoid + * the extra check in the common case of 1 hapening after having + * read the vector-reg once. + */ hwirq = readl(sun4i_irq_base + SUN4I_IRQ_VECTOR_REG) >> 2; - while (hwirq != 0) { + if (hwirq == 0 && + !(readl(sun4i_irq_base + SUN4I_IRQ_PENDING_REG(0)) & BIT(0))) + return; + + do { irq = irq_find_mapping(sun4i_irq_domain, hwirq); handle_IRQ(irq, regs); hwirq = readl(sun4i_irq_base + SUN4I_IRQ_VECTOR_REG) >> 2; - } + } while (hwirq != 0); } -- cgit v0.10.2 From 649ff46e5e29868e915354ed1e9bebcf0faec3ae Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 13 Mar 2014 19:03:53 +0100 Subject: irqchip: sun4i: Fix a comment about mask register initialization The comment was claiming that we were masking all irqs, while the code actually *un*masks all of them. Signed-off-by: Hans de Goede Acked-by: Maxime Ripard Cc: linux-arm-kernel@lists.infradead.org Cc: linux-sunxi@googlegroups.com Link: http://lkml.kernel.org/r/1394733834-26839-4-git-send-email-hdegoede@redhat.com Signed-off-by: Thomas Gleixner diff --git a/drivers/irqchip/irq-sun4i.c b/drivers/irqchip/irq-sun4i.c index 7ae85ec..60a28c69 100644 --- a/drivers/irqchip/irq-sun4i.c +++ b/drivers/irqchip/irq-sun4i.c @@ -109,7 +109,7 @@ static int __init sun4i_of_init(struct device_node *node, writel(0, sun4i_irq_base + SUN4I_IRQ_ENABLE_REG(1)); writel(0, sun4i_irq_base + SUN4I_IRQ_ENABLE_REG(2)); - /* Mask all the interrupts */ + /* Unmask all the interrupts, ENABLE_REG(x) is used for masking */ writel(0, sun4i_irq_base + SUN4I_IRQ_MASK_REG(0)); writel(0, sun4i_irq_base + SUN4I_IRQ_MASK_REG(1)); writel(0, sun4i_irq_base + SUN4I_IRQ_MASK_REG(2)); -- cgit v0.10.2 From e9df9e221665d40928e25a02c2700ac12eda7270 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 13 Mar 2014 19:03:54 +0100 Subject: irqchip: sun4i: Don't ack IRQs > 0, fix acking of IRQ 0 All IRQs except for IRQ 0 seem to not need acking, so drop acking for them. The ENMI needs to have the ack done *after* clearing the interrupt source, otherwise we will get a spurious interrupt for each real interrupt. So use the new IRQCHIP_EOI_THREADED flag for this in combination with handle_fasteoi_irq. This uses a separate irq_chip struct for IRQ 0, since we only want this behavior for IRQ 0. Signed-off-by: Hans de Goede Cc: linux-arm-kernel@lists.infradead.org Cc: linux-sunxi@googlegroups.com Cc: Maxime Ripard Link: http://lkml.kernel.org/r/1394733834-26839-5-git-send-email-hdegoede@redhat.com Signed-off-by: Thomas Gleixner diff --git a/drivers/irqchip/irq-sun4i.c b/drivers/irqchip/irq-sun4i.c index 60a28c69..2029cc5 100644 --- a/drivers/irqchip/irq-sun4i.c +++ b/drivers/irqchip/irq-sun4i.c @@ -76,16 +76,29 @@ static void sun4i_irq_unmask(struct irq_data *irqd) static struct irq_chip sun4i_irq_chip = { .name = "sun4i_irq", - .irq_ack = sun4i_irq_ack, .irq_mask = sun4i_irq_mask, .irq_unmask = sun4i_irq_unmask, }; +/* IRQ 0 / the ENMI needs a late eoi call */ +static struct irq_chip sun4i_irq_chip_enmi = { + .name = "sun4i_irq", + .irq_eoi = sun4i_irq_ack, + .irq_mask = sun4i_irq_mask, + .irq_unmask = sun4i_irq_unmask, + .flags = IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED, +}; + static int sun4i_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw) { - irq_set_chip_and_handler(virq, &sun4i_irq_chip, - handle_level_irq); + if (hw == 0) + irq_set_chip_and_handler(virq, &sun4i_irq_chip_enmi, + handle_fasteoi_irq); + else + irq_set_chip_and_handler(virq, &sun4i_irq_chip, + handle_level_irq); + set_irq_flags(virq, IRQF_VALID | IRQF_PROBE); return 0; -- cgit v0.10.2 From 0fae799e869b9b1ece8c04c714d0529da0b0bade Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 13 Mar 2014 16:54:03 -0300 Subject: perf bench numa: Make no args mean 'run all tests' If we call just: perf bench numa mem it will present the same output as: perf bench numa mem -h i.e. ask for instructions about what to run. While that is kinda ok, using 'run all tests' as the default, i.e. making 'no parms' be equivalent to: perf bench numa mem -a Will allow: perf bench numa all to actually do what is asked: i.e. run all the 'bench' tests, instead of responding to that by asking what to do. That, in turn, allows: perf bench all to actually complete, for the same reasons. And after that, the tests that come after that, and that at some point hit a NULL deref, will run, allowing me to reproduce a recently reported problem. That when you have the needed numa libraries, which wasn't the case for the reporter, making me a bit confused after trying to reproduce his report. So make no parms mean -a. Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Patrick Palka Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-x7h0ghx4pef4n0brywg21krk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index d4c83c6..97d86d8 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -1593,6 +1593,7 @@ static void init_params(struct params *p, const char *name, int argc, const char p->data_rand_walk = true; p->nr_loops = -1; p->init_random = true; + p->run_all = argc == 1; } static int run_bench_numa(const char *name, const char **argv) -- cgit v0.10.2 From 76884cb2f7da5282019bf5c90e8f804429150742 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 26 Feb 2014 14:47:21 +0000 Subject: ahci: st: Add support for ST's SATA IP Acked-by: Alexandre Torgue Signed-off-by: Lee Jones Signed-off-by: Tejun Heo diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 2e6a6f7..46258ea 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -97,6 +97,15 @@ config SATA_AHCI_PLATFORM If unsure, say N. +config SATA_AHCI_ST + tristate "ST SATA support" + depends on SATA_AHCI_PLATFORM + select GENERIC_PHY + help + This option enables support for ST SATA controller. + + If unsure, say N. + config AHCI_IMX tristate "Freescale i.MX AHCI SATA support" depends on SATA_AHCI_PLATFORM && MFD_SYSCON diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile index 246050b..6bbd6da 100644 --- a/drivers/ata/Makefile +++ b/drivers/ata/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_ATA) += libata.o obj-$(CONFIG_SATA_AHCI) += ahci.o libahci.o obj-$(CONFIG_SATA_ACARD_AHCI) += acard-ahci.o libahci.o obj-$(CONFIG_SATA_AHCI_PLATFORM) += ahci_platform.o libahci.o +obj-$(CONFIG_SATA_AHCI_ST) += ahci_st.o obj-$(CONFIG_SATA_FSL) += sata_fsl.o obj-$(CONFIG_SATA_INIC162X) += sata_inic162x.o obj-$(CONFIG_SATA_SIL24) += sata_sil24.o diff --git a/drivers/ata/ahci_st.c b/drivers/ata/ahci_st.c new file mode 100644 index 0000000..2f95133 --- /dev/null +++ b/drivers/ata/ahci_st.c @@ -0,0 +1,239 @@ +/* + * Copyright (C) 2012 STMicroelectronics Limited + * + * Authors: Francesco Virlinzi + * Alexandre Torgue + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ahci.h" + +#define ST_AHCI_OOBR 0xbc +#define ST_AHCI_OOBR_WE BIT(31) +#define ST_AHCI_OOBR_CWMIN_SHIFT 24 +#define ST_AHCI_OOBR_CWMAX_SHIFT 16 +#define ST_AHCI_OOBR_CIMIN_SHIFT 8 +#define ST_AHCI_OOBR_CIMAX_SHIFT 0 + +struct st_ahci_drv_data { + struct platform_device *ahci; + struct phy *phy; + struct reset_control *pwr; + struct reset_control *sw_rst; + struct reset_control *pwr_rst; + struct ahci_host_priv *hpriv; +}; + +static void st_ahci_configure_oob(void __iomem *mmio) +{ + unsigned long old_val, new_val; + + new_val = (0x02 << ST_AHCI_OOBR_CWMIN_SHIFT) | + (0x04 << ST_AHCI_OOBR_CWMAX_SHIFT) | + (0x08 << ST_AHCI_OOBR_CIMIN_SHIFT) | + (0x0C << ST_AHCI_OOBR_CIMAX_SHIFT); + + old_val = readl(mmio + ST_AHCI_OOBR); + writel(old_val | ST_AHCI_OOBR_WE, mmio + ST_AHCI_OOBR); + writel(new_val | ST_AHCI_OOBR_WE, mmio + ST_AHCI_OOBR); + writel(new_val, mmio + ST_AHCI_OOBR); +} + +static int st_ahci_deassert_resets(struct device *dev) +{ + struct st_ahci_drv_data *drv_data = dev_get_drvdata(dev); + int err; + + if (drv_data->pwr) { + err = reset_control_deassert(drv_data->pwr); + if (err) { + dev_err(dev, "unable to bring out of pwrdwn\n"); + return err; + } + } + + st_ahci_configure_oob(drv_data->hpriv->mmio); + + if (drv_data->sw_rst) { + err = reset_control_deassert(drv_data->sw_rst); + if (err) { + dev_err(dev, "unable to bring out of sw-rst\n"); + return err; + } + } + + if (drv_data->pwr_rst) { + err = reset_control_deassert(drv_data->pwr_rst); + if (err) { + dev_err(dev, "unable to bring out of pwr-rst\n"); + return err; + } + } + + return 0; +} + +static int st_ahci_probe_resets(struct platform_device *pdev) +{ + struct st_ahci_drv_data *drv_data = platform_get_drvdata(pdev); + + drv_data->pwr = devm_reset_control_get(&pdev->dev, "pwr-dwn"); + if (IS_ERR(drv_data->pwr)) { + dev_info(&pdev->dev, "power reset control not defined\n"); + drv_data->pwr = NULL; + } + + drv_data->sw_rst = devm_reset_control_get(&pdev->dev, "sw-rst"); + if (IS_ERR(drv_data->sw_rst)) { + dev_info(&pdev->dev, "soft reset control not defined\n"); + drv_data->sw_rst = NULL; + } + + drv_data->pwr_rst = devm_reset_control_get(&pdev->dev, "pwr-rst"); + if (IS_ERR(drv_data->pwr_rst)) { + dev_dbg(&pdev->dev, "power soft reset control not defined\n"); + drv_data->pwr_rst = NULL; + } + + return st_ahci_deassert_resets(&pdev->dev); +} + +static const struct ata_port_info st_ahci_port_info = { + .flags = AHCI_FLAG_COMMON, + .pio_mask = ATA_PIO4, + .udma_mask = ATA_UDMA6, + .port_ops = &ahci_platform_ops, +}; + +static int st_ahci_probe(struct platform_device *pdev) +{ + struct st_ahci_drv_data *drv_data; + struct ahci_host_priv *hpriv; + int err; + + drv_data = devm_kzalloc(&pdev->dev, sizeof(*drv_data), GFP_KERNEL); + if (!drv_data) + return -ENOMEM; + + platform_set_drvdata(pdev, drv_data); + + hpriv = ahci_platform_get_resources(pdev); + if (IS_ERR(hpriv)) + return PTR_ERR(hpriv); + + drv_data->hpriv = hpriv; + + err = st_ahci_probe_resets(pdev); + if (err) + return err; + + err = ahci_platform_enable_resources(hpriv); + if (err) + return err; + + err = ahci_platform_init_host(pdev, hpriv, &st_ahci_port_info, 0, 0); + if (err) { + ahci_platform_disable_resources(hpriv); + return err; + } + + return 0; +} + +static int st_ahci_remove(struct platform_device *pdev) +{ + struct st_ahci_drv_data *drv_data = platform_get_drvdata(pdev); + struct ahci_host_priv *hpriv = drv_data->hpriv; + int err; + + if (drv_data->pwr) { + err = reset_control_assert(drv_data->pwr); + if (err) + dev_err(&pdev->dev, "unable to pwrdwn\n"); + } + + ahci_platform_disable_resources(hpriv); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int st_ahci_suspend(struct device *dev) +{ + struct st_ahci_drv_data *drv_data = dev_get_drvdata(dev); + struct ahci_host_priv *hpriv = drv_data->hpriv; + int err; + + if (drv_data->pwr) { + err = reset_control_assert(drv_data->pwr); + if (err) { + dev_err(dev, "unable to pwrdwn"); + return err; + } + } + + ahci_platform_disable_resources(hpriv); + + return 0; +} + +static int st_ahci_resume(struct device *dev) +{ + struct st_ahci_drv_data *drv_data = dev_get_drvdata(dev); + struct ahci_host_priv *hpriv = drv_data->hpriv; + int err; + + err = ahci_platform_enable_resources(hpriv); + if (err) + return err; + + err = st_ahci_deassert_resets(dev); + if (err) { + ahci_platform_disable_resources(hpriv); + return err; + } + + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(st_ahci_pm_ops, st_ahci_suspend, st_ahci_resume); + +static struct of_device_id st_ahci_match[] = { + { .compatible = "st,ahci", }, + {}, +}; +MODULE_DEVICE_TABLE(of, st_ahci_match); + +static struct platform_driver st_ahci_driver = { + .driver = { + .name = "st_ahci", + .owner = THIS_MODULE, + .pm = &st_ahci_pm_ops, + .of_match_table = of_match_ptr(st_ahci_match), + }, + .probe = st_ahci_probe, + .remove = st_ahci_remove, +}; +module_platform_driver(st_ahci_driver); + +MODULE_AUTHOR("Alexandre Torgue "); +MODULE_AUTHOR("Francesco Virlinzi "); +MODULE_DESCRIPTION("STMicroelectronics Sata Ahci driver"); +MODULE_LICENSE("GPL v2"); -- cgit v0.10.2 From 4a2e51234038fb8b24963a2f32e8b55980085a23 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 12 Mar 2014 12:39:38 +0000 Subject: ahci: st: Standardise naming conventions Other devices have adopted similar naming conventions which have been accepted as the standard. This patch brings any mention of the the ST AHCI driver into line with them. Suggested-by: Bartlomiej Zolnierkiewicz Signed-off-by: Lee Jones Signed-off-by: Tejun Heo diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 46258ea..88c2c12 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -97,12 +97,12 @@ config SATA_AHCI_PLATFORM If unsure, say N. -config SATA_AHCI_ST - tristate "ST SATA support" +config AHCI_ST + tristate "ST AHCI SATA support" depends on SATA_AHCI_PLATFORM select GENERIC_PHY help - This option enables support for ST SATA controller. + This option enables support for ST AHCI SATA controller. If unsure, say N. diff --git a/drivers/ata/ahci_st.c b/drivers/ata/ahci_st.c index 2f95133..f5af660 100644 --- a/drivers/ata/ahci_st.c +++ b/drivers/ata/ahci_st.c @@ -235,5 +235,5 @@ module_platform_driver(st_ahci_driver); MODULE_AUTHOR("Alexandre Torgue "); MODULE_AUTHOR("Francesco Virlinzi "); -MODULE_DESCRIPTION("STMicroelectronics Sata Ahci driver"); +MODULE_DESCRIPTION("STMicroelectronics SATA AHCI Driver"); MODULE_LICENSE("GPL v2"); -- cgit v0.10.2 From c51a848973da8b347191c0e317da3b912c2cdb6a Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 12 Mar 2014 12:39:39 +0000 Subject: ahci: st: Remove legacy dependencies on PHY Suggested-by: Bartlomiej Zolnierkiewicz Signed-off-by: Lee Jones Signed-off-by: Tejun Heo diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 88c2c12..d2209e4 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -100,7 +100,6 @@ config SATA_AHCI_PLATFORM config AHCI_ST tristate "ST AHCI SATA support" depends on SATA_AHCI_PLATFORM - select GENERIC_PHY help This option enables support for ST AHCI SATA controller. diff --git a/drivers/ata/ahci_st.c b/drivers/ata/ahci_st.c index f5af660..a28532a 100644 --- a/drivers/ata/ahci_st.c +++ b/drivers/ata/ahci_st.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -33,7 +32,6 @@ struct st_ahci_drv_data { struct platform_device *ahci; - struct phy *phy; struct reset_control *pwr; struct reset_control *sw_rst; struct reset_control *pwr_rst; -- cgit v0.10.2 From a82370842834875c81b482181af280353aa6be05 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 12 Mar 2014 12:39:40 +0000 Subject: ahci: st: Utilise ata_platform_remove_one() call ata_platform_remove_one() allows us to specify our own exit function via platform data then goes off and removes ATA Host and Port in preparation for device removal. Suggested-by: Bartlomiej Zolnierkiewicz Signed-off-by: Lee Jones Signed-off-by: Tejun Heo diff --git a/drivers/ata/ahci_st.c b/drivers/ata/ahci_st.c index a28532a..3edec5d 100644 --- a/drivers/ata/ahci_st.c +++ b/drivers/ata/ahci_st.c @@ -87,6 +87,23 @@ static int st_ahci_deassert_resets(struct device *dev) return 0; } +static int st_ahci_exit(struct device *dev) +{ + struct st_ahci_drv_data *drv_data = dev_get_drvdata(dev); + struct ahci_host_priv *hpriv = drv_data->hpriv; + int err; + + if (drv_data->pwr) { + err = reset_control_assert(drv_data->pwr); + if (err) + dev_err(&pdev->dev, "unable to pwrdwn\n"); + } + + ahci_platform_disable_resources(hpriv); + + return 0; +} + static int st_ahci_probe_resets(struct platform_device *pdev) { struct st_ahci_drv_data *drv_data = platform_get_drvdata(pdev); @@ -122,6 +139,7 @@ static const struct ata_port_info st_ahci_port_info = { static int st_ahci_probe(struct platform_device *pdev) { struct st_ahci_drv_data *drv_data; + struct ahci_platform_data *pdata; struct ahci_host_priv *hpriv; int err; @@ -131,6 +149,13 @@ static int st_ahci_probe(struct platform_device *pdev) platform_set_drvdata(pdev, drv_data); + pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return -ENOMEM; + + pdata->exit = st_ahci_exit; + pdev->dev.platform_data = pdata; + hpriv = ahci_platform_get_resources(pdev); if (IS_ERR(hpriv)) return PTR_ERR(hpriv); @@ -154,23 +179,6 @@ static int st_ahci_probe(struct platform_device *pdev) return 0; } -static int st_ahci_remove(struct platform_device *pdev) -{ - struct st_ahci_drv_data *drv_data = platform_get_drvdata(pdev); - struct ahci_host_priv *hpriv = drv_data->hpriv; - int err; - - if (drv_data->pwr) { - err = reset_control_assert(drv_data->pwr); - if (err) - dev_err(&pdev->dev, "unable to pwrdwn\n"); - } - - ahci_platform_disable_resources(hpriv); - - return 0; -} - #ifdef CONFIG_PM_SLEEP static int st_ahci_suspend(struct device *dev) { @@ -227,7 +235,7 @@ static struct platform_driver st_ahci_driver = { .of_match_table = of_match_ptr(st_ahci_match), }, .probe = st_ahci_probe, - .remove = st_ahci_remove, + .remove = ata_platform_remove_one, }; module_platform_driver(st_ahci_driver); -- cgit v0.10.2 From 48c54df1305c4287cf8d8a344de6b70b1d56e234 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 12 Mar 2014 12:39:41 +0000 Subject: ahci: st: Only build for ST-Micro h/w This device is designed specifically to run on ST-Microelectronics' hardware. To ensure no attempts are made to run on anything incompatible we add a dependency on ST architecture Suggested-by: Bartlomiej Zolnierkiewicz Signed-off-by: Lee Jones Signed-off-by: Tejun Heo diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index d2209e4..4c338b2 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -100,6 +100,7 @@ config SATA_AHCI_PLATFORM config AHCI_ST tristate "ST AHCI SATA support" depends on SATA_AHCI_PLATFORM + depends on ARCH_STI help This option enables support for ST AHCI SATA controller. -- cgit v0.10.2 From 761a8c2765df17ac5296e2631a16ec08d1a0cb1c Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Wed, 12 Mar 2014 12:39:42 +0000 Subject: ahci: st: Invoke AHCI Platform Suspend/Resume This is where we disable IRQs on suspend and update the internal power state during suspend/resume. Suggested-by: Bartlomiej Zolnierkiewicz Signed-off-by: Lee Jones Signed-off-by: Tejun Heo diff --git a/drivers/ata/ahci_st.c b/drivers/ata/ahci_st.c index 3edec5d..17191b9 100644 --- a/drivers/ata/ahci_st.c +++ b/drivers/ata/ahci_st.c @@ -186,6 +186,10 @@ static int st_ahci_suspend(struct device *dev) struct ahci_host_priv *hpriv = drv_data->hpriv; int err; + ret = ahci_platform_suspend_host(dev); + if (ret) + return ret; + if (drv_data->pwr) { err = reset_control_assert(drv_data->pwr); if (err) { @@ -215,7 +219,7 @@ static int st_ahci_resume(struct device *dev) return err; } - return 0; + return ahci_platform_resume_host(dev); } #endif -- cgit v0.10.2 From a043971141f163f9845324a2f83502d15011485d Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sat, 14 Dec 2013 20:31:55 -0800 Subject: perf bench: Add futex-hash microbenchmark Introduce futexes to perf-bench and add a program that stresses and measures the kernel's implementation of the hash table. This is a multi-threaded program that simply measures the amount of failed futex wait calls - we only want to deal with the hashing overhead, so a negative return of futex_wait_setup() is enough to do the trick. An example run: $ perf bench futex hash -t 32 Run summary [PID 10989]: 32 threads, each operating on 1024 [private] futexes for 10 secs. [thread 0] futexes: 0x19d9b10 ... 0x19dab0c [ 418713 ops/sec ] [thread 1] futexes: 0x19daca0 ... 0x19dbc9c [ 469913 ops/sec ] [thread 2] futexes: 0x19dbe30 ... 0x19dce2c [ 479744 ops/sec ] ... [thread 31] futexes: 0x19fbb80 ... 0x19fcb7c [ 464179 ops/sec ] Averaged 454310 operations/sec (+- 0.84%), total secs = 10 Signed-off-by: Davidlohr Bueso Acked-by: Darren Hart Cc: Aswin Chandramouleeswaran Cc: Darren Hart Cc: Ingo Molnar Cc: Jason Low Cc: Peter Zijlstra Cc: Scott J Norton Cc: Thomas Gleixner Cc: Waiman Long Link: http://lkml.kernel.org/r/1387081917-9102-2-git-send-email-davidlohr@hp.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 1f7ec48..54ae547 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -426,6 +426,7 @@ BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o endif BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o +BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o BUILTIN_OBJS += $(OUTPUT)builtin-diff.o BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 0fdc852..34edb5c 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -31,6 +31,7 @@ extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused); extern int bench_mem_memset(int argc, const char **argv, const char *prefix); +extern int bench_futex_hash(int argc, const char **argv, const char *prefix); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c new file mode 100644 index 0000000..a84206e --- /dev/null +++ b/tools/perf/bench/futex-hash.c @@ -0,0 +1,212 @@ +/* + * Copyright (C) 2013 Davidlohr Bueso + * + * futex-hash: Stress the hell out of the Linux kernel futex uaddr hashing. + * + * This program is particularly useful for measuring the kernel's futex hash + * table/function implementation. In order for it to make sense, use with as + * many threads and futexes as possible. + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/stat.h" +#include "../util/parse-options.h" +#include "../util/header.h" +#include "bench.h" +#include "futex.h" + +#include +#include +#include +#include + +static unsigned int nthreads = 0; +static unsigned int nsecs = 10; +/* amount of futexes per thread */ +static unsigned int nfutexes = 1024; +static bool fshared = false, done = false, silent = false; + +struct timeval start, end, runtime; +static pthread_mutex_t thread_lock; +static unsigned int threads_starting; +static struct stats throughput_stats; +static pthread_cond_t thread_parent, thread_worker; + +struct worker { + int tid; + u_int32_t *futex; + pthread_t thread; + unsigned long ops; +}; + +static const struct option options[] = { + OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), + OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"), + OPT_UINTEGER('f', "futexes", &nfutexes, "Specify amount of futexes per threads"), + OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), + OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"), + OPT_END() +}; + +static const char * const bench_futex_hash_usage[] = { + "perf bench futex hash ", + NULL +}; + +static void *workerfn(void *arg) +{ + int ret; + unsigned int i; + struct worker *w = (struct worker *) arg; + + pthread_mutex_lock(&thread_lock); + threads_starting--; + if (!threads_starting) + pthread_cond_signal(&thread_parent); + pthread_cond_wait(&thread_worker, &thread_lock); + pthread_mutex_unlock(&thread_lock); + + do { + for (i = 0; i < nfutexes; i++, w->ops++) { + /* + * We want the futex calls to fail in order to stress + * the hashing of uaddr and not measure other steps, + * such as internal waitqueue handling, thus enlarging + * the critical region protected by hb->lock. + */ + ret = futex_wait(&w->futex[i], 1234, NULL, + fshared ? 0 : FUTEX_PRIVATE_FLAG); + if (!silent && + (!ret || errno != EAGAIN || errno != EWOULDBLOCK)) + warn("Non-expected futex return call"); + } + } while (!done); + + return NULL; +} + +static void toggle_done(int sig __maybe_unused, + siginfo_t *info __maybe_unused, + void *uc __maybe_unused) +{ + /* inform all threads that we're done for the day */ + done = true; + gettimeofday(&end, NULL); + timersub(&end, &start, &runtime); +} + +static void print_summary(void) +{ + unsigned long avg = avg_stats(&throughput_stats); + double stddev = stddev_stats(&throughput_stats); + + printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", + !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), + (int) runtime.tv_sec); +} + +int bench_futex_hash(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + int ret = 0; + cpu_set_t cpu; + struct sigaction act; + unsigned int i, ncpus; + pthread_attr_t thread_attr; + struct worker *worker = NULL; + + argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0); + if (argc) { + usage_with_options(bench_futex_hash_usage, options); + exit(EXIT_FAILURE); + } + + ncpus = sysconf(_SC_NPROCESSORS_ONLN); + + sigfillset(&act.sa_mask); + act.sa_sigaction = toggle_done; + sigaction(SIGINT, &act, NULL); + + if (!nthreads) /* default to the number of CPUs */ + nthreads = ncpus; + + worker = calloc(nthreads, sizeof(*worker)); + if (!worker) + goto errmem; + + printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n", + getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs); + + init_stats(&throughput_stats); + pthread_mutex_init(&thread_lock, NULL); + pthread_cond_init(&thread_parent, NULL); + pthread_cond_init(&thread_worker, NULL); + + threads_starting = nthreads; + pthread_attr_init(&thread_attr); + gettimeofday(&start, NULL); + for (i = 0; i < nthreads; i++) { + worker[i].tid = i; + worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex)); + if (!worker[i].futex) + goto errmem; + + CPU_ZERO(&cpu); + CPU_SET(i % ncpus, &cpu); + + ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu); + if (ret) + err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + + ret = pthread_create(&worker[i].thread, &thread_attr, workerfn, + (void *)(struct worker *) &worker[i]); + if (ret) + err(EXIT_FAILURE, "pthread_create"); + + } + pthread_attr_destroy(&thread_attr); + + pthread_mutex_lock(&thread_lock); + while (threads_starting) + pthread_cond_wait(&thread_parent, &thread_lock); + pthread_cond_broadcast(&thread_worker); + pthread_mutex_unlock(&thread_lock); + + sleep(nsecs); + toggle_done(0, NULL, NULL); + + for (i = 0; i < nthreads; i++) { + ret = pthread_join(worker[i].thread, NULL); + if (ret) + err(EXIT_FAILURE, "pthread_join"); + } + + /* cleanup & report results */ + pthread_cond_destroy(&thread_parent); + pthread_cond_destroy(&thread_worker); + pthread_mutex_destroy(&thread_lock); + + for (i = 0; i < nthreads; i++) { + unsigned long t = worker[i].ops/runtime.tv_sec; + update_stats(&throughput_stats, t); + if (!silent) { + if (nfutexes == 1) + printf("[thread %2d] futex: %p [ %ld ops/sec ]\n", + worker[i].tid, &worker[i].futex[0], t); + else + printf("[thread %2d] futexes: %p ... %p [ %ld ops/sec ]\n", + worker[i].tid, &worker[i].futex[0], + &worker[i].futex[nfutexes-1], t); + } + + free(worker[i].futex); + } + + print_summary(); + + free(worker); + return ret; +errmem: + err(EXIT_FAILURE, "calloc"); +} diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h new file mode 100644 index 0000000..7d0bda5 --- /dev/null +++ b/tools/perf/bench/futex.h @@ -0,0 +1,48 @@ +/* + * Glibc independent futex library for testing kernel functionality. + * Shamelessly stolen from Darren Hart + * http://git.kernel.org/cgit/linux/kernel/git/dvhart/futextest.git/ + */ + +#ifndef _FUTEX_H +#define _FUTEX_H + +#include +#include +#include +#include + +/** + * futex() - SYS_futex syscall wrapper + * @uaddr: address of first futex + * @op: futex op code + * @val: typically expected value of uaddr, but varies by op + * @timeout: typically an absolute struct timespec (except where noted + * otherwise). Overloaded by some ops + * @uaddr2: address of second futex for some ops\ + * @val3: varies by op + * @opflags: flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG + * + * futex() is used by all the following futex op wrappers. It can also be + * used for misuse and abuse testing. Generally, the specific op wrappers + * should be used instead. It is a macro instead of an static inline function as + * some of the types over overloaded (timeout is used for nr_requeue for + * example). + * + * These argument descriptions are the defaults for all + * like-named arguments in the following wrappers except where noted below. + */ +#define futex(uaddr, op, val, timeout, uaddr2, val3, opflags) \ + syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3) + +/** + * futex_wait() - block on uaddr with optional timeout + * @timeout: relative timeout + */ +static inline int +futex_wait(u_int32_t *uaddr, u_int32_t val, struct timespec *timeout, int opflags) +{ + return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags); +} + +#endif /* _FUTEX_H */ diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index e47f90c..a8b0138 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -12,6 +12,7 @@ * sched ... scheduler and IPC performance * mem ... memory access performance * numa ... NUMA scheduling and MM performance + * futex ... Futex performance */ #include "perf.h" #include "util/util.h" @@ -54,6 +55,12 @@ static struct bench mem_benchmarks[] = { { NULL, NULL, NULL } }; +static struct bench futex_benchmarks[] = { + { "hash", "Benchmark for futex hash table", bench_futex_hash }, + { "all", "Test all futex benchmarks", NULL }, + { NULL, NULL, NULL } +}; + struct collection { const char *name; const char *summary; @@ -61,11 +68,12 @@ struct collection { }; static struct collection collections[] = { - { "sched", "Scheduler and IPC benchmarks", sched_benchmarks }, + { "sched", "Scheduler and IPC benchmarks", sched_benchmarks }, { "mem", "Memory access benchmarks", mem_benchmarks }, #ifdef HAVE_LIBNUMA_SUPPORT { "numa", "NUMA scheduling and MM benchmarks", numa_benchmarks }, #endif + {"futex", "Futex stressing benchmarks", futex_benchmarks }, { "all", "All benchmarks", NULL }, { NULL, NULL, NULL } }; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 6898ad0..e18a8b5 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -12,6 +12,9 @@ #ifndef __NR_perf_event_open # define __NR_perf_event_open 336 #endif +#ifndef __NR_futex +# define __NR_futex 240 +#endif #endif #if defined(__x86_64__) @@ -23,6 +26,9 @@ #ifndef __NR_perf_event_open # define __NR_perf_event_open 298 #endif +#ifndef __NR_futex +# define __NR_futex 202 +#endif #endif #ifdef __powerpc__ -- cgit v0.10.2 From 27db78307481dbba68c5f3563c6cb694b25521d9 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sat, 14 Dec 2013 20:31:56 -0800 Subject: perf bench: Add futex-wake microbenchmark Block a bunch of threads on a futex and wake them up, N at a time. This program is particularly useful to measure the latency of nthread wakeups in non-error situations: all waiters are queued and all wake calls wakeup one or more tasks. An example run: $ perf bench futex wake -t 512 -r 100 Run summary [PID 27823]: blocking on 512 threads (at futex 0x7e10d4), waking up 1 at a time. [Run 1]: Wokeup 512 of 512 threads in 6.0080 ms [Run 2]: Wokeup 512 of 512 threads in 5.2280 ms [Run 3]: Wokeup 512 of 512 threads in 4.8300 ms ... [Run 100]: Wokeup 512 of 512 threads in 5.0100 ms Wokeup 512 of 512 threads in 5.0109 ms (+-2.25%) Signed-off-by: Davidlohr Bueso Acked-by: Darren Hart Cc: Aswin Chandramouleeswaran Cc: Darren Hart Cc: Ingo Molnar Cc: Jason Low Cc: Peter Zijlstra Cc: Scott J Norton Cc: Thomas Gleixner Cc: Waiman Long Link: http://lkml.kernel.org/r/1387081917-9102-3-git-send-email-davidlohr@hp.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 54ae547..6fa5d8b 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -427,6 +427,7 @@ endif BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o +BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o BUILTIN_OBJS += $(OUTPUT)builtin-diff.o BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 34edb5c..6ac3f1d 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -32,6 +32,7 @@ extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused); extern int bench_mem_memset(int argc, const char **argv, const char *prefix); extern int bench_futex_hash(int argc, const char **argv, const char *prefix); +extern int bench_futex_wake(int argc, const char **argv, const char *prefix); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c new file mode 100644 index 0000000..d096169 --- /dev/null +++ b/tools/perf/bench/futex-wake.c @@ -0,0 +1,201 @@ +/* + * Copyright (C) 2013 Davidlohr Bueso + * + * futex-wake: Block a bunch of threads on a futex and wake'em up, N at a time. + * + * This program is particularly useful to measure the latency of nthread wakeups + * in non-error situations: all waiters are queued and all wake calls wakeup + * one or more tasks, and thus the waitqueue is never empty. + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/stat.h" +#include "../util/parse-options.h" +#include "../util/header.h" +#include "bench.h" +#include "futex.h" + +#include +#include +#include +#include + +/* all threads will block on the same futex */ +static u_int32_t futex1 = 0; + +/* + * How many wakeups to do at a time. + * Default to 1 in order to make the kernel work more. + */ +static unsigned int nwakes = 1; + +/* + * There can be significant variance from run to run, + * the more repeats, the more exact the overall avg and + * the better idea of the futex latency. + */ +static unsigned int repeat = 10; + +pthread_t *worker; +static bool done = 0, silent = 0; +static pthread_mutex_t thread_lock; +static pthread_cond_t thread_parent, thread_worker; +static struct stats waketime_stats, wakeup_stats; +static unsigned int ncpus, threads_starting, nthreads = 0; + +static const struct option options[] = { + OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), + OPT_UINTEGER('w', "nwakes", &nwakes, "Specify amount of threads to wake at once"), + OPT_UINTEGER('r', "repeat", &repeat, "Specify amount of times to repeat the run"), + OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), + OPT_END() +}; + +static const char * const bench_futex_wake_usage[] = { + "perf bench futex wake ", + NULL +}; + +static void *workerfn(void *arg __maybe_unused) +{ + pthread_mutex_lock(&thread_lock); + threads_starting--; + if (!threads_starting) + pthread_cond_signal(&thread_parent); + pthread_cond_wait(&thread_worker, &thread_lock); + pthread_mutex_unlock(&thread_lock); + + futex_wait(&futex1, 0, NULL, FUTEX_PRIVATE_FLAG); + return NULL; +} + +static void print_summary(void) +{ + double waketime_avg = avg_stats(&waketime_stats); + double waketime_stddev = stddev_stats(&waketime_stats); + unsigned int wakeup_avg = avg_stats(&wakeup_stats); + + printf("Wokeup %d of %d threads in %.4f ms (+-%.2f%%)\n", + wakeup_avg, + nthreads, + waketime_avg/1e3, + rel_stddev_stats(waketime_stddev, waketime_avg)); +} + +static void block_threads(pthread_t *w, + pthread_attr_t thread_attr) +{ + cpu_set_t cpu; + unsigned int i; + + threads_starting = nthreads; + + /* create and block all threads */ + for (i = 0; i < nthreads; i++) { + CPU_ZERO(&cpu); + CPU_SET(i % ncpus, &cpu); + + if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) + err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + + if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) + err(EXIT_FAILURE, "pthread_create"); + } +} + +static void toggle_done(int sig __maybe_unused, + siginfo_t *info __maybe_unused, + void *uc __maybe_unused) +{ + done = true; +} + +int bench_futex_wake(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + int ret = 0; + unsigned int i, j; + struct sigaction act; + pthread_attr_t thread_attr; + + argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0); + if (argc) { + usage_with_options(bench_futex_wake_usage, options); + exit(EXIT_FAILURE); + } + + ncpus = sysconf(_SC_NPROCESSORS_ONLN); + + sigfillset(&act.sa_mask); + act.sa_sigaction = toggle_done; + sigaction(SIGINT, &act, NULL); + + if (!nthreads) + nthreads = ncpus; + + worker = calloc(nthreads, sizeof(*worker)); + if (!worker) + err(EXIT_FAILURE, "calloc"); + + printf("Run summary [PID %d]: blocking on %d threads (at futex %p), " + "waking up %d at a time.\n\n", + getpid(), nthreads, &futex1, nwakes); + + init_stats(&wakeup_stats); + init_stats(&waketime_stats); + pthread_attr_init(&thread_attr); + pthread_mutex_init(&thread_lock, NULL); + pthread_cond_init(&thread_parent, NULL); + pthread_cond_init(&thread_worker, NULL); + + for (j = 0; j < repeat && !done; j++) { + unsigned int nwoken = 0; + struct timeval start, end, runtime; + + /* create, launch & block all threads */ + block_threads(worker, thread_attr); + + /* make sure all threads are already blocked */ + pthread_mutex_lock(&thread_lock); + while (threads_starting) + pthread_cond_wait(&thread_parent, &thread_lock); + pthread_cond_broadcast(&thread_worker); + pthread_mutex_unlock(&thread_lock); + + usleep(100000); + + /* Ok, all threads are patiently blocked, start waking folks up */ + gettimeofday(&start, NULL); + while (nwoken != nthreads) + nwoken += futex_wake(&futex1, nwakes, FUTEX_PRIVATE_FLAG); + gettimeofday(&end, NULL); + timersub(&end, &start, &runtime); + + update_stats(&wakeup_stats, nwoken); + update_stats(&waketime_stats, runtime.tv_usec); + + if (!silent) { + printf("[Run %d]: Wokeup %d of %d threads in %.4f ms\n", + j + 1, nwoken, nthreads, runtime.tv_usec/1e3); + } + + for (i = 0; i < nthreads; i++) { + ret = pthread_join(worker[i], NULL); + if (ret) + err(EXIT_FAILURE, "pthread_join"); + } + + } + + /* cleanup & report results */ + pthread_cond_destroy(&thread_parent); + pthread_cond_destroy(&thread_worker); + pthread_mutex_destroy(&thread_lock); + pthread_attr_destroy(&thread_attr); + + print_summary(); + + free(worker); + return ret; +} diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index 7d0bda5..6ac4509 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -45,4 +45,14 @@ futex_wait(u_int32_t *uaddr, u_int32_t val, struct timespec *timeout, int opflag return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags); } +/** + * futex_wake() - wake one or more tasks blocked on uaddr + * @nr_wake: wake up to this many tasks + */ +static inline int +futex_wake(u_int32_t *uaddr, int nr_wake, int opflags) +{ + return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags); +} + #endif /* _FUTEX_H */ diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index a8b0138..743a30a 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -57,6 +57,7 @@ static struct bench mem_benchmarks[] = { static struct bench futex_benchmarks[] = { { "hash", "Benchmark for futex hash table", bench_futex_hash }, + { "wake", "Benchmark for futex wake calls", bench_futex_wake }, { "all", "Test all futex benchmarks", NULL }, { NULL, NULL, NULL } }; -- cgit v0.10.2 From 0fb298cf95c0d8119557b7d4657724a146e0622e Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sat, 14 Dec 2013 20:31:57 -0800 Subject: perf bench: Add futex-requeue microbenchmark Block a bunch of threads on a futex and requeue them on another, N at a time. This program is particularly useful to measure the latency of nthread requeues without waking up any tasks -- thus mimicking a regular futex_wait. An example run: $ perf bench futex requeue -r 100 -t 64 Run summary [PID 151011]: Requeuing 64 threads (from 0x7d15c4 to 0x7d15c8), 1 at a time. [Run 1]: Requeued 64 of 64 threads in 0.0400 ms [Run 2]: Requeued 64 of 64 threads in 0.0390 ms [Run 3]: Requeued 64 of 64 threads in 0.0400 ms ... [Run 100]: Requeued 64 of 64 threads in 0.0390 ms Requeued 64 of 64 threads in 0.0399 ms (+-0.37%) Signed-off-by: Davidlohr Bueso Acked-by: Darren Hart Cc: Aswin Chandramouleeswaran Cc: Darren Hart Cc: Ingo Molnar Cc: Jason Low Cc: Peter Zijlstra Cc: Scott J Norton Cc: Thomas Gleixner Cc: Waiman Long Link: http://lkml.kernel.org/r/1387081917-9102-4-git-send-email-davidlohr@hp.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 6fa5d8b..50d875d 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -428,6 +428,7 @@ BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o +BUILTIN_OBJS += $(OUTPUT)bench/futex-requeue.o BUILTIN_OBJS += $(OUTPUT)builtin-diff.o BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 6ac3f1d..eba46709 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -33,6 +33,7 @@ extern int bench_mem_memcpy(int argc, const char **argv, extern int bench_mem_memset(int argc, const char **argv, const char *prefix); extern int bench_futex_hash(int argc, const char **argv, const char *prefix); extern int bench_futex_wake(int argc, const char **argv, const char *prefix); +extern int bench_futex_requeue(int argc, const char **argv, const char *prefix); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c new file mode 100644 index 0000000..a1625587 --- /dev/null +++ b/tools/perf/bench/futex-requeue.c @@ -0,0 +1,211 @@ +/* + * Copyright (C) 2013 Davidlohr Bueso + * + * futex-requeue: Block a bunch of threads on futex1 and requeue them + * on futex2, N at a time. + * + * This program is particularly useful to measure the latency of nthread + * requeues without waking up any tasks -- thus mimicking a regular futex_wait. + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/stat.h" +#include "../util/parse-options.h" +#include "../util/header.h" +#include "bench.h" +#include "futex.h" + +#include +#include +#include +#include + +static u_int32_t futex1 = 0, futex2 = 0; + +/* + * How many tasks to requeue at a time. + * Default to 1 in order to make the kernel work more. + */ +static unsigned int nrequeue = 1; + +/* + * There can be significant variance from run to run, + * the more repeats, the more exact the overall avg and + * the better idea of the futex latency. + */ +static unsigned int repeat = 10; + +static pthread_t *worker; +static bool done = 0, silent = 0; +static pthread_mutex_t thread_lock; +static pthread_cond_t thread_parent, thread_worker; +static struct stats requeuetime_stats, requeued_stats; +static unsigned int ncpus, threads_starting, nthreads = 0; + +static const struct option options[] = { + OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), + OPT_UINTEGER('q', "nrequeue", &nrequeue, "Specify amount of threads to requeue at once"), + OPT_UINTEGER('r', "repeat", &repeat, "Specify amount of times to repeat the run"), + OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), + OPT_END() +}; + +static const char * const bench_futex_requeue_usage[] = { + "perf bench futex requeue ", + NULL +}; + +static void print_summary(void) +{ + double requeuetime_avg = avg_stats(&requeuetime_stats); + double requeuetime_stddev = stddev_stats(&requeuetime_stats); + unsigned int requeued_avg = avg_stats(&requeued_stats); + + printf("Requeued %d of %d threads in %.4f ms (+-%.2f%%)\n", + requeued_avg, + nthreads, + requeuetime_avg/1e3, + rel_stddev_stats(requeuetime_stddev, requeuetime_avg)); +} + +static void *workerfn(void *arg __maybe_unused) +{ + pthread_mutex_lock(&thread_lock); + threads_starting--; + if (!threads_starting) + pthread_cond_signal(&thread_parent); + pthread_cond_wait(&thread_worker, &thread_lock); + pthread_mutex_unlock(&thread_lock); + + futex_wait(&futex1, 0, NULL, FUTEX_PRIVATE_FLAG); + return NULL; +} + +static void block_threads(pthread_t *w, + pthread_attr_t thread_attr) +{ + cpu_set_t cpu; + unsigned int i; + + threads_starting = nthreads; + + /* create and block all threads */ + for (i = 0; i < nthreads; i++) { + CPU_ZERO(&cpu); + CPU_SET(i % ncpus, &cpu); + + if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) + err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + + if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) + err(EXIT_FAILURE, "pthread_create"); + } +} + +static void toggle_done(int sig __maybe_unused, + siginfo_t *info __maybe_unused, + void *uc __maybe_unused) +{ + done = true; +} + +int bench_futex_requeue(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + int ret = 0; + unsigned int i, j; + struct sigaction act; + pthread_attr_t thread_attr; + + argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0); + if (argc) + goto err; + + ncpus = sysconf(_SC_NPROCESSORS_ONLN); + + sigfillset(&act.sa_mask); + act.sa_sigaction = toggle_done; + sigaction(SIGINT, &act, NULL); + + if (!nthreads) + nthreads = ncpus; + + worker = calloc(nthreads, sizeof(*worker)); + if (!worker) + err(EXIT_FAILURE, "calloc"); + + printf("Run summary [PID %d]: Requeuing %d threads (from %p to %p), " + "%d at a time.\n\n", + getpid(), nthreads, &futex1, &futex2, nrequeue); + + init_stats(&requeued_stats); + init_stats(&requeuetime_stats); + pthread_attr_init(&thread_attr); + pthread_mutex_init(&thread_lock, NULL); + pthread_cond_init(&thread_parent, NULL); + pthread_cond_init(&thread_worker, NULL); + + for (j = 0; j < repeat && !done; j++) { + unsigned int nrequeued = 0; + struct timeval start, end, runtime; + + /* create, launch & block all threads */ + block_threads(worker, thread_attr); + + /* make sure all threads are already blocked */ + pthread_mutex_lock(&thread_lock); + while (threads_starting) + pthread_cond_wait(&thread_parent, &thread_lock); + pthread_cond_broadcast(&thread_worker); + pthread_mutex_unlock(&thread_lock); + + usleep(100000); + + /* Ok, all threads are patiently blocked, start requeueing */ + gettimeofday(&start, NULL); + for (nrequeued = 0; nrequeued < nthreads; nrequeued += nrequeue) + /* + * Do not wakeup any tasks blocked on futex1, allowing + * us to really measure futex_wait functionality. + */ + futex_cmp_requeue(&futex1, 0, &futex2, 0, nrequeue, + FUTEX_PRIVATE_FLAG); + gettimeofday(&end, NULL); + timersub(&end, &start, &runtime); + + update_stats(&requeued_stats, nrequeued); + update_stats(&requeuetime_stats, runtime.tv_usec); + + if (!silent) { + printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n", + j + 1, nrequeued, nthreads, runtime.tv_usec/1e3); + } + + /* everybody should be blocked on futex2, wake'em up */ + nrequeued = futex_wake(&futex2, nthreads, FUTEX_PRIVATE_FLAG); + if (nthreads != nrequeued) + warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads); + + for (i = 0; i < nthreads; i++) { + ret = pthread_join(worker[i], NULL); + if (ret) + err(EXIT_FAILURE, "pthread_join"); + } + + } + + /* cleanup & report results */ + pthread_cond_destroy(&thread_parent); + pthread_cond_destroy(&thread_worker); + pthread_mutex_destroy(&thread_lock); + pthread_attr_destroy(&thread_attr); + + print_summary(); + + free(worker); + return ret; +err: + usage_with_options(bench_futex_requeue_usage, options); + exit(EXIT_FAILURE); +} diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index 6ac4509..71f2844 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -55,4 +55,17 @@ futex_wake(u_int32_t *uaddr, int nr_wake, int opflags) return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags); } +/** +* futex_cmp_requeue() - requeue tasks from uaddr to uaddr2 +* @nr_wake: wake up to this many tasks +* @nr_requeue: requeue up to this many tasks +*/ +static inline int +futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wake, + int nr_requeue, int opflags) +{ + return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2, + val, opflags); +} + #endif /* _FUTEX_H */ diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index 743a30a..f600b74 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -58,6 +58,7 @@ static struct bench mem_benchmarks[] = { static struct bench futex_benchmarks[] = { { "hash", "Benchmark for futex hash table", bench_futex_hash }, { "wake", "Benchmark for futex wake calls", bench_futex_wake }, + { "requeue", "Benchmark for futex requeue calls", bench_futex_requeue }, { "all", "Test all futex benchmarks", NULL }, { NULL, NULL, NULL } }; -- cgit v0.10.2 From b63940970492f5f187d546e191bc2c6831491fe3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 28 Feb 2014 06:02:14 -0800 Subject: perf mem: Clarify load-latency in documentation Clarify in the documentation that 'perf mem report' reports use-latency, not load/store-latency on Intel systems. This often causes confusion with users. Signed-off-by: Andi Kleen Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1393596135-4227-2-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index 888d511..1d78a40 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -18,6 +18,10 @@ from it, into perf.data. Perf record options are accepted and are passed through "perf mem -t report" displays the result. It invokes perf report with the right set of options to display a memory access profile. +Note that on Intel systems the memory latency reported is the use-latency, +not the pure load (or store latency). Use latency includes any pipeline +queueing delays in addition to the memory subsystem latency. + OPTIONS ------- ...:: -- cgit v0.10.2 From 5b4398209d646635a4d84c46a5c7193fbce1a07c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 28 Feb 2014 06:02:15 -0800 Subject: perf probe: Clarify x86 register naming for perf probe Clarify how to specify x86 registers in perf probe. I recently ran into this problem and had to figure it out from the source. Signed-off-by: Andi Kleen Acked-by: Masami Hiramatsu Cc: Masami Hiramatsu Link: http://lkml.kernel.org/r/1393596135-4227-3-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index b715cb7..1513935 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -136,6 +136,8 @@ Each probe argument follows below syntax. 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.) 'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type. +On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid. + LINE SYNTAX ----------- Line range is described by following syntax. -- cgit v0.10.2 From bfd66cc71a3f831ba7c2116d79416cfb8883f6cf Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Wed, 26 Feb 2014 10:45:26 -0500 Subject: perf tools: Fix synthesizing mmaps for threads Currently if a process creates a bunch of threads using pthread_create and then perf is run in system_wide mode, the mmaps for those threads are not captured with a synthesized mmap event. The reason is those threads are not visible when walking the /proc/ directory looking for /proc//maps files. Instead they are discovered using the /proc//tasks file (which the synthesized comm event uses). This causes problems when a program is trying to map a data address to a tid. Because the tid has no maps, the event is dropped. Changing the program to look up using the pid instead of the tid, finds the correct maps but creates ugly hacks in the program to carry the correct tid around. Fix this by moving the walking of the /proc//tasks up a level (out of the comm function) based on Arnaldo's suggestion. Tweaked things a bit to special case the 'full' bit and 'guest' check. Signed-off-by: Don Zickus Cc: Jiri Olsa Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1393429527-167840-2-git-send-email-dzickus@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index b0f3ca8..55eebe9 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -94,14 +94,10 @@ static pid_t perf_event__get_comm_tgid(pid_t pid, char *comm, size_t len) static pid_t perf_event__synthesize_comm(struct perf_tool *tool, union perf_event *event, pid_t pid, - int full, perf_event__handler_t process, struct machine *machine) { - char filename[PATH_MAX]; size_t size; - DIR *tasks; - struct dirent dirent, *next; pid_t tgid; memset(&event->comm, 0, sizeof(event->comm)); @@ -124,53 +120,11 @@ static pid_t perf_event__synthesize_comm(struct perf_tool *tool, event->comm.header.size = (sizeof(event->comm) - (sizeof(event->comm.comm) - size) + machine->id_hdr_size); - if (!full) { - event->comm.tid = pid; - - if (process(tool, event, &synth_sample, machine) != 0) - return -1; + event->comm.tid = pid; - goto out; - } - - if (machine__is_default_guest(machine)) - return 0; - - snprintf(filename, sizeof(filename), "%s/proc/%d/task", - machine->root_dir, pid); - - tasks = opendir(filename); - if (tasks == NULL) { - pr_debug("couldn't open %s\n", filename); - return 0; - } - - while (!readdir_r(tasks, &dirent, &next) && next) { - char *end; - pid = strtol(dirent.d_name, &end, 10); - if (*end) - continue; - - /* already have tgid; jut want to update the comm */ - (void) perf_event__get_comm_tgid(pid, event->comm.comm, - sizeof(event->comm.comm)); - - size = strlen(event->comm.comm) + 1; - size = PERF_ALIGN(size, sizeof(u64)); - memset(event->comm.comm + size, 0, machine->id_hdr_size); - event->comm.header.size = (sizeof(event->comm) - - (sizeof(event->comm.comm) - size) + - machine->id_hdr_size); - - event->comm.tid = pid; - - if (process(tool, event, &synth_sample, machine) != 0) { - tgid = -1; - break; - } - } + if (process(tool, event, &synth_sample, machine) != 0) + return -1; - closedir(tasks); out: return tgid; } @@ -329,12 +283,59 @@ static int __event__synthesize_thread(union perf_event *comm_event, struct perf_tool *tool, struct machine *machine, bool mmap_data) { - pid_t tgid = perf_event__synthesize_comm(tool, comm_event, pid, full, - process, machine); - if (tgid == -1) - return -1; - return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, - process, machine, mmap_data); + char filename[PATH_MAX]; + DIR *tasks; + struct dirent dirent, *next; + pid_t tgid; + + /* special case: only send one comm event using passed in pid */ + if (!full) { + tgid = perf_event__synthesize_comm(tool, comm_event, pid, + process, machine); + + if (tgid == -1) + return -1; + + return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, + process, machine, mmap_data); + } + + if (machine__is_default_guest(machine)) + return 0; + + snprintf(filename, sizeof(filename), "%s/proc/%d/task", + machine->root_dir, pid); + + tasks = opendir(filename); + if (tasks == NULL) { + pr_debug("couldn't open %s\n", filename); + return 0; + } + + while (!readdir_r(tasks, &dirent, &next) && next) { + char *end; + int rc = 0; + pid_t _pid; + + _pid = strtol(dirent.d_name, &end, 10); + if (*end) + continue; + + tgid = perf_event__synthesize_comm(tool, comm_event, _pid, + process, machine); + if (tgid == -1) + return -1; + + /* process the thread's maps too */ + rc = perf_event__synthesize_mmap_events(tool, mmap_event, _pid, tgid, + process, machine, mmap_data); + + if (rc) + return rc; + } + + closedir(tasks); + return 0; } int perf_event__synthesize_thread_map(struct perf_tool *tool, -- cgit v0.10.2 From 840aa78782f02d5d442fae9af425118ed0ce8cba Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Fri, 14 Mar 2014 15:41:31 +0100 Subject: ata: Fix SC1200 dependencies The SC1200 is a SoC based on the Geode GX1 32-bit x86 processor, so its drivers are only needed on this architecture, except for build testing purpose. Signed-off-by: Jean Delvare Signed-off-by: Tejun Heo diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 4c338b2..e4ac819 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -683,7 +683,7 @@ config PATA_RDC config PATA_SC1200 tristate "SC1200 PATA support" - depends on PCI + depends on PCI && (X86_32 || COMPILE_TEST) help This option enables support for the NatSemi/AMD SC1200 SoC companion chip used with the Geode processor family. -- cgit v0.10.2 From 6b5625b2af30de6ff73402ad070dd21592681821 Mon Sep 17 00:00:00 2001 From: Simon Wood Date: Thu, 13 Mar 2014 12:42:03 -0600 Subject: HID: hid-lg4ff: Support new version of G27 It has been reported that there is a new hardware version of the G27 in the 'wild'. This patch add's this new revision so that it can be sent the command to switch to native mode. Reported-by: "Ivan Baldo" Tested-by: "evilcow" Signed-off-by: Simon Wood Signed-off-by: Jiri Kosina diff --git a/drivers/hid/hid-lg4ff.c b/drivers/hid/hid-lg4ff.c index befe0e3..24883b4 100644 --- a/drivers/hid/hid-lg4ff.c +++ b/drivers/hid/hid-lg4ff.c @@ -43,6 +43,7 @@ #define G25_REV_MIN 0x22 #define G27_REV_MAJ 0x12 #define G27_REV_MIN 0x38 +#define G27_2_REV_MIN 0x39 #define to_hid_device(pdev) container_of(pdev, struct hid_device, dev) @@ -130,6 +131,7 @@ static const struct lg4ff_usb_revision lg4ff_revs[] = { {DFP_REV_MAJ, DFP_REV_MIN, &native_dfp}, /* Driving Force Pro */ {G25_REV_MAJ, G25_REV_MIN, &native_g25}, /* G25 */ {G27_REV_MAJ, G27_REV_MIN, &native_g27}, /* G27 */ + {G27_REV_MAJ, G27_2_REV_MIN, &native_g27}, /* G27 v2 */ }; /* Recalculates X axis value accordingly to currently selected range */ -- cgit v0.10.2 From 20e64fa42965a6ec6f0ae4c5d4fda0ccf6c94ff6 Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Fri, 14 Mar 2014 13:46:08 +0800 Subject: libata: zpodd: should depend on PM_RUNTIME ZPODD is built on top of runtime PM functionality, it doesn't make sense to have it in a kernel that doesn't have CONFIG_PM_RUNTIME set. Signed-off-by: Aaron Lu Signed-off-by: Tejun Heo diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index e4ac819..0bec5ac 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -60,7 +60,7 @@ config ATA_ACPI config SATA_ZPODD bool "SATA Zero Power Optical Disc Drive (ZPODD) support" - depends on ATA_ACPI + depends on ATA_ACPI && PM_RUNTIME default n help This option adds support for SATA Zero Power Optical Disc -- cgit v0.10.2 From c75da205e02dda3b79ca057e558f97f3d61c855d Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Fri, 14 Mar 2014 13:46:09 +0800 Subject: libata: acpi: avoid passing NULL to ACPI evaluation method If ACPI handle for an ATA device is NULL, we shouldn't call ata_dev_get_GTF as that function will use handle to do some ACPI evaluation. Signed-off-by: Aaron Lu Signed-off-by: Tejun Heo diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c index 9e69a53..b4f7cc2 100644 --- a/drivers/ata/libata-acpi.c +++ b/drivers/ata/libata-acpi.c @@ -835,6 +835,7 @@ void ata_acpi_on_resume(struct ata_port *ap) ata_for_each_dev(dev, &ap->link, ALL) { ata_acpi_clear_gtf(dev); if (ata_dev_enabled(dev) && + ata_dev_acpi_handle(dev) && ata_dev_get_GTF(dev, NULL) >= 0) dev->flags |= ATA_DFLAG_ACPI_PENDING; } -- cgit v0.10.2 From d920203695029558ef16925182f89cf6302b50b0 Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Fri, 14 Mar 2014 13:46:10 +0800 Subject: libata: zpodd: eliminate odd_can_poweroff Now that we can directly get the ACPI device conterpart of the physical ATA transport device, the odd_can_poweroff can be eliminated. Signed-off-by: Aaron Lu Signed-off-by: Tejun Heo diff --git a/drivers/ata/libata-zpodd.c b/drivers/ata/libata-zpodd.c index 88949c6..f3a65a3 100644 --- a/drivers/ata/libata-zpodd.c +++ b/drivers/ata/libata-zpodd.c @@ -85,21 +85,6 @@ static enum odd_mech_type zpodd_get_mech_type(struct ata_device *dev) return ODD_MECH_TYPE_UNSUPPORTED; } -static bool odd_can_poweroff(struct ata_device *ata_dev) -{ - acpi_handle handle; - struct acpi_device *acpi_dev; - - handle = ata_dev_acpi_handle(ata_dev); - if (!handle) - return false; - - if (acpi_bus_get_device(handle, &acpi_dev)) - return false; - - return acpi_device_can_poweroff(acpi_dev); -} - /* Test if ODD is zero power ready by sense code */ static bool zpready(struct ata_device *dev) { @@ -267,13 +252,11 @@ static void ata_acpi_remove_pm_notifier(struct ata_device *dev) void zpodd_init(struct ata_device *dev) { + struct acpi_device *adev = ACPI_COMPANION(&dev->tdev); enum odd_mech_type mech_type; struct zpodd *zpodd; - if (dev->zpodd) - return; - - if (!odd_can_poweroff(dev)) + if (dev->zpodd || !adev || !acpi_device_can_poweroff(adev)) return; mech_type = zpodd_get_mech_type(dev); -- cgit v0.10.2 From 6eeefccdcfc2cc9697562e740bfe6c35fddd4e1c Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Wed, 12 Mar 2014 18:40:51 -0400 Subject: perf bench: Fix NULL pointer dereference in "perf bench all" The for_each_bench() macro must check that the "benchmarks" field of a collection is not NULL before dereferencing it because the "all" collection in particular has a NULL "benchmarks" field (signifying that it has no benchmarks to iterate over). This fixes this NULL pointer dereference when running "perf bench all": [root@ssdandy ~]# perf bench all # Running mem/memset benchmark... # Copying 1MB Bytes ... 2.453675 GB/Sec 12.056327 GB/Sec (with prefault) Segmentation fault (core dumped) [root@ssdandy ~]# Signed-off-by: Patrick Palka Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1394664051-6037-1-git-send-email-patrick@parcs.ath.cx Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index e47f90c..8a987d2 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -76,7 +76,7 @@ static struct collection collections[] = { /* Iterate over all benchmarks within a collection: */ #define for_each_bench(coll, bench) \ - for (bench = coll->benchmarks; bench->name; bench++) + for (bench = coll->benchmarks; bench && bench->name; bench++) static void dump_benchmarks(struct collection *coll) { -- cgit v0.10.2 From f5868f05dc976ecd849d2a8115fab80301c127cd Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Sun, 9 Feb 2014 14:32:25 +0100 Subject: MIPS: Replace CONFIG_MIPS64 and CONFIG_MIPS32_R2 Commit 597ce1723e0f ("MIPS: Support for 64-bit FP with O32 binaries") introduced references to two undefined Kconfig macros. CONFIG_MIPS32_R2 should clearly be replaced with CONFIG_CPU_MIPS32_R2. And CONFIG_MIPS64 should be replaced with CONFIG_64BIT. Signed-off-by: Paul Bolle Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/6522/ Tested-by: Aaro Koskinen Signed-off-by: Ralf Baechle diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h index 3220c93..69a9a22 100644 --- a/arch/mips/include/asm/asmmacro.h +++ b/arch/mips/include/asm/asmmacro.h @@ -106,7 +106,7 @@ .endm .macro fpu_save_double thread status tmp -#if defined(CONFIG_MIPS64) || defined(CONFIG_CPU_MIPS32_R2) +#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2) sll \tmp, \status, 5 bgez \tmp, 10f fpu_save_16odd \thread @@ -159,7 +159,7 @@ .endm .macro fpu_restore_double thread status tmp -#if defined(CONFIG_MIPS64) || defined(CONFIG_CPU_MIPS32_R2) +#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2) sll \tmp, \status, 5 bgez \tmp, 10f # 16 register mode? diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h index 6b97495..58e50cb 100644 --- a/arch/mips/include/asm/fpu.h +++ b/arch/mips/include/asm/fpu.h @@ -57,7 +57,7 @@ static inline int __enable_fpu(enum fpu_mode mode) return 0; case FPU_64BIT: -#if !(defined(CONFIG_CPU_MIPS32_R2) || defined(CONFIG_MIPS64)) +#if !(defined(CONFIG_CPU_MIPS32_R2) || defined(CONFIG_64BIT)) /* we only have a 32-bit FPU */ return SIGFPE; #endif diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S index 253b2fb..841ffc2 100644 --- a/arch/mips/kernel/r4k_fpu.S +++ b/arch/mips/kernel/r4k_fpu.S @@ -35,9 +35,9 @@ LEAF(_save_fp_context) cfc1 t1, fcr31 -#if defined(CONFIG_64BIT) || defined(CONFIG_MIPS32_R2) +#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2) .set push -#ifdef CONFIG_MIPS32_R2 +#ifdef CONFIG_CPU_MIPS32_R2 .set mips64r2 mfc0 t0, CP0_STATUS sll t0, t0, 5 @@ -148,9 +148,9 @@ LEAF(_save_fp_context32) LEAF(_restore_fp_context) EX lw t0, SC_FPC_CSR(a0) -#if defined(CONFIG_64BIT) || defined(CONFIG_MIPS32_R2) +#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2) .set push -#ifdef CONFIG_MIPS32_R2 +#ifdef CONFIG_CPU_MIPS32_R2 .set mips64r2 mfc0 t0, CP0_STATUS sll t0, t0, 5 -- cgit v0.10.2 From b616365e6dafb5d7c1cf2a8b6e8cb376a3f4a387 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Fri, 7 Feb 2014 22:31:33 +0800 Subject: MIPS: FPU: Fix conflict of register usage In _restore_fp_context/_restore_fp_context32, t0 is used for both CP0_Status and CP1_FCSR. This is a mistake and cause FP exeception on boot, so fix it. Signed-off-by: Huacai Chen Tested-by: Aaro Koskinen Tested-by: Andreas Barth Cc: John Crispin Cc: Steven J. Hill Cc: Aurelien Jarno Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang Cc: Zhangjin Wu Patchwork: https://patchwork.linux-mips.org/patch/6507/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S index 841ffc2..73b0ddf 100644 --- a/arch/mips/kernel/r4k_fpu.S +++ b/arch/mips/kernel/r4k_fpu.S @@ -146,7 +146,7 @@ LEAF(_save_fp_context32) * - cp1 status/control register */ LEAF(_restore_fp_context) - EX lw t0, SC_FPC_CSR(a0) + EX lw t1, SC_FPC_CSR(a0) #if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2) .set push @@ -191,7 +191,7 @@ LEAF(_restore_fp_context) EX ldc1 $f26, SC_FPREGS+208(a0) EX ldc1 $f28, SC_FPREGS+224(a0) EX ldc1 $f30, SC_FPREGS+240(a0) - ctc1 t0, fcr31 + ctc1 t1, fcr31 jr ra li v0, 0 # success END(_restore_fp_context) @@ -199,7 +199,7 @@ LEAF(_restore_fp_context) #ifdef CONFIG_MIPS32_COMPAT LEAF(_restore_fp_context32) /* Restore an o32 sigcontext. */ - EX lw t0, SC32_FPC_CSR(a0) + EX lw t1, SC32_FPC_CSR(a0) mfc0 t0, CP0_STATUS sll t0, t0, 5 @@ -239,7 +239,7 @@ LEAF(_restore_fp_context32) EX ldc1 $f26, SC32_FPREGS+208(a0) EX ldc1 $f28, SC32_FPREGS+224(a0) EX ldc1 $f30, SC32_FPREGS+240(a0) - ctc1 t0, fcr31 + ctc1 t1, fcr31 jr ra li v0, 0 # success END(_restore_fp_context32) -- cgit v0.10.2 From 3be1a5c4f75989cf457f13f38ff0913dff6d4996 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 4 Mar 2014 01:10:04 +0000 Subject: arm64: enable generic CPU feature modalias matching for this architecture This enables support for the generic CPU feature modalias implementation that wires up optional CPU features to udev based module autoprobing. A file is provided that maps CPU feature numbers to elf_hwcap bits, which is the standard way on arm64 to advertise optional CPU features both internally and to user space. Signed-off-by: Ard Biesheuvel [catalin.marinas@arm.com: removed unnecessary "!!"] Signed-off-by: Catalin Marinas diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 507ab6e..8cabe36 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -16,6 +16,7 @@ config ARM64 select DCACHE_WORD_ACCESS select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select GENERIC_CPU_AUTOPROBE select GENERIC_IOMAP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h new file mode 100644 index 0000000..cd4ac05 --- /dev/null +++ b/arch/arm64/include/asm/cpufeature.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2014 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_CPUFEATURE_H +#define __ASM_CPUFEATURE_H + +#include + +/* + * In the arm64 world (as in the ARM world), elf_hwcap is used both internally + * in the kernel and for user space to keep track of which optional features + * are supported by the current system. So let's map feature 'x' to HWCAP_x. + * Note that HWCAP_x constants are bit fields so we need to take the log. + */ + +#define MAX_CPU_FEATURES (8 * sizeof(elf_hwcap)) +#define cpu_feature(x) ilog2(HWCAP_ ## x) + +static inline bool cpu_have_feature(unsigned int num) +{ + return elf_hwcap & (1UL << num); +} + +#endif -- cgit v0.10.2 From e172800e5d3162f97d332b3745e3743ce150ec48 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 14 Mar 2014 17:47:04 +0000 Subject: asm-generic: rwsem: de-PPCify rwsem.h asm-generic/rwsem.h used to live under arch/powerpc. During its liberation to common code, a few references to its former home where preserved, in particular the definition of RWSEM_ACTIVE_MASK is predicated on CONFIG_PPC64. This patch updates the ifdefs and comments to architecturally neutral versions. Acked-by: Arnd Bergmann Acked-by: Richard Kuo Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h index bb1e2cd..d48bf5a 100644 --- a/include/asm-generic/rwsem.h +++ b/include/asm-generic/rwsem.h @@ -1,5 +1,5 @@ -#ifndef _ASM_POWERPC_RWSEM_H -#define _ASM_POWERPC_RWSEM_H +#ifndef _ASM_GENERIC_RWSEM_H +#define _ASM_GENERIC_RWSEM_H #ifndef _LINUX_RWSEM_H #error "Please don't include directly, use instead." @@ -8,7 +8,7 @@ #ifdef __KERNEL__ /* - * R/W semaphores for PPC using the stuff in lib/rwsem.c. + * R/W semaphores originally for PPC using the stuff in lib/rwsem.c. * Adapted largely from include/asm-i386/rwsem.h * by Paul Mackerras . */ @@ -16,7 +16,7 @@ /* * the semaphore definition */ -#ifdef CONFIG_PPC64 +#ifdef CONFIG_64BIT # define RWSEM_ACTIVE_MASK 0xffffffffL #else # define RWSEM_ACTIVE_MASK 0x0000ffffL @@ -129,4 +129,4 @@ static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) } #endif /* __KERNEL__ */ -#endif /* _ASM_POWERPC_RWSEM_H */ +#endif /* _ASM_GENERIC_RWSEM_H */ -- cgit v0.10.2 From c209f79940ac0c75ae8d2f503a2b9d86255e266c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 14 Mar 2014 17:47:05 +0000 Subject: arm64: rwsem: use asm-generic rwsem implementation asm-generic offers an atomic-add based rwsem implementation, which can avoid the need for heavier, spinlock-based synchronisation on the fast path. This patch makes use of the optimised implementation for arm64 CPUs. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8cabe36..4723fc1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -77,7 +77,7 @@ config LOCKDEP_SUPPORT config TRACE_IRQFLAGS_SUPPORT def_bool y -config RWSEM_GENERIC_SPINLOCK +config RWSEM_XCHGADD_ALGORITHM def_bool y config GENERIC_HWEIGHT diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 71c53ec..41cde70 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -29,6 +29,7 @@ generic-y += pci.h generic-y += poll.h generic-y += posix_types.h generic-y += resource.h +generic-y += rwsem.h generic-y += scatterlist.h generic-y += sections.h generic-y += segment.h -- cgit v0.10.2 From 9c7e535fcc1725fc2e2d4f0d9dd14137f0243e23 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Tue, 25 Feb 2014 10:02:13 +0000 Subject: arm64: mm: Route pmd thp functions through pte equivalents Rather than have separate hugetlb and transparent huge page pmd manipulation functions, re-wire our thp functions to simply call the pte equivalents. This allows THP to take advantage of the new PTE_WRITE logic introduced in: c2c93e5 arm64: mm: Introduce PTE_WRITE To represent splitting THPs we use the PTE_SPECIAL bit as this is not used for pmds. Signed-off-by: Steve Capper Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 72c9ac3..ae10350 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -227,36 +227,36 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_PTE_SPECIAL -/* - * Software PMD bits for THP - */ +static inline pte_t pmd_pte(pmd_t pmd) +{ + return __pte(pmd_val(pmd)); +} -#define PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55) -#define PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 57) +static inline pmd_t pte_pmd(pte_t pte) +{ + return __pmd(pte_val(pte)); +} /* * THP definitions. */ -#define pmd_young(pmd) (pmd_val(pmd) & PMD_SECT_AF) - -#define __HAVE_ARCH_PMD_WRITE -#define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY)) #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) -#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING) +#define pmd_trans_splitting(pmd) pte_special(pmd_pte(pmd)) #endif -#define PMD_BIT_FUNC(fn,op) \ -static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; } +#define pmd_young(pmd) pte_young(pmd_pte(pmd)) +#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) +#define pmd_mksplitting(pmd) pte_pmd(pte_mkspecial(pmd_pte(pmd))) +#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) +#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) +#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) +#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) +#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) &= ~PMD_TYPE_MASK)) -PMD_BIT_FUNC(wrprotect, |= PMD_SECT_RDONLY); -PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF); -PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING); -PMD_BIT_FUNC(mkwrite, &= ~PMD_SECT_RDONLY); -PMD_BIT_FUNC(mkdirty, |= PMD_SECT_DIRTY); -PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF); -PMD_BIT_FUNC(mknotpresent, &= ~PMD_TYPE_MASK); +#define __HAVE_ARCH_PMD_WRITE +#define pmd_write(pmd) pte_write(pmd_pte(pmd)) #define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) @@ -266,15 +266,6 @@ PMD_BIT_FUNC(mknotpresent, &= ~PMD_TYPE_MASK); #define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK)) -static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) -{ - const pmdval_t mask = PMD_SECT_USER | PMD_SECT_PXN | PMD_SECT_UXN | - PMD_SECT_RDONLY | PMD_SECT_PROT_NONE | - PMD_SECT_VALID; - pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask); - return pmd; -} - #define set_pmd_at(mm, addr, pmdp, pmd) set_pmd(pmdp, pmd) static inline int has_transparent_hugepage(void) @@ -383,6 +374,11 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) return pte; } +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + return pte_pmd(pte_modify(pmd_pte(pmd), newprot)); +} + extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; -- cgit v0.10.2 From 33081b34681742add8d8c1e49fc93045415e5a18 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 14 Mar 2014 19:20:58 +0100 Subject: ata: ahci_st: build fixes * The config option for ahci_st driver was renamed from CONFIG_SATA_AHCI_ST to CONFIG_AHCI_ST but Makefile was not updated. Fix it (also while at it move the ahci_st driver entry below ahci_imx and ahci_sunxi ones). * Fix a few build issues in the ahci_st driver itself. Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Tejun Heo diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile index 6bbd6da..e20148c 100644 --- a/drivers/ata/Makefile +++ b/drivers/ata/Makefile @@ -5,7 +5,6 @@ obj-$(CONFIG_ATA) += libata.o obj-$(CONFIG_SATA_AHCI) += ahci.o libahci.o obj-$(CONFIG_SATA_ACARD_AHCI) += acard-ahci.o libahci.o obj-$(CONFIG_SATA_AHCI_PLATFORM) += ahci_platform.o libahci.o -obj-$(CONFIG_SATA_AHCI_ST) += ahci_st.o obj-$(CONFIG_SATA_FSL) += sata_fsl.o obj-$(CONFIG_SATA_INIC162X) += sata_inic162x.o obj-$(CONFIG_SATA_SIL24) += sata_sil24.o @@ -13,6 +12,7 @@ obj-$(CONFIG_SATA_DWC) += sata_dwc_460ex.o obj-$(CONFIG_SATA_HIGHBANK) += sata_highbank.o libahci.o obj-$(CONFIG_AHCI_IMX) += ahci_imx.o obj-$(CONFIG_AHCI_SUNXI) += ahci_sunxi.o +obj-$(CONFIG_AHCI_ST) += ahci_st.o # SFF w/ custom DMA obj-$(CONFIG_PDC_ADMA) += pdc_adma.o diff --git a/drivers/ata/ahci_st.c b/drivers/ata/ahci_st.c index 17191b9..e1aa544 100644 --- a/drivers/ata/ahci_st.c +++ b/drivers/ata/ahci_st.c @@ -87,7 +87,7 @@ static int st_ahci_deassert_resets(struct device *dev) return 0; } -static int st_ahci_exit(struct device *dev) +static void st_ahci_exit(struct device *dev) { struct st_ahci_drv_data *drv_data = dev_get_drvdata(dev); struct ahci_host_priv *hpriv = drv_data->hpriv; @@ -96,12 +96,10 @@ static int st_ahci_exit(struct device *dev) if (drv_data->pwr) { err = reset_control_assert(drv_data->pwr); if (err) - dev_err(&pdev->dev, "unable to pwrdwn\n"); + dev_err(dev, "unable to pwrdwn\n"); } ahci_platform_disable_resources(hpriv); - - return 0; } static int st_ahci_probe_resets(struct platform_device *pdev) @@ -186,9 +184,9 @@ static int st_ahci_suspend(struct device *dev) struct ahci_host_priv *hpriv = drv_data->hpriv; int err; - ret = ahci_platform_suspend_host(dev); - if (ret) - return ret; + err = ahci_platform_suspend_host(dev); + if (err) + return err; if (drv_data->pwr) { err = reset_control_assert(drv_data->pwr); -- cgit v0.10.2 From b032378b4c3ffba86d2c78699b385ae646397938 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 14 Mar 2014 19:21:59 +0100 Subject: ata: ahci_st: remove deprecated struct ahci_platform_data usage struct ahci_platform_data is deprecated (please see comments in for details). Convert ahci_st driver to use custom ->host_stop method instead. Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Tejun Heo diff --git a/drivers/ata/ahci_st.c b/drivers/ata/ahci_st.c index e1aa544..6332222 100644 --- a/drivers/ata/ahci_st.c +++ b/drivers/ata/ahci_st.c @@ -87,10 +87,11 @@ static int st_ahci_deassert_resets(struct device *dev) return 0; } -static void st_ahci_exit(struct device *dev) +static void st_ahci_host_stop(struct ata_host *host) { + struct ahci_host_priv *hpriv = host->private_data; + struct device *dev = host->dev; struct st_ahci_drv_data *drv_data = dev_get_drvdata(dev); - struct ahci_host_priv *hpriv = drv_data->hpriv; int err; if (drv_data->pwr) { @@ -127,17 +128,21 @@ static int st_ahci_probe_resets(struct platform_device *pdev) return st_ahci_deassert_resets(&pdev->dev); } +static struct ata_port_operations st_ahci_port_ops = { + .inherits = &ahci_platform_ops, + .host_stop = st_ahci_host_stop, +}; + static const struct ata_port_info st_ahci_port_info = { .flags = AHCI_FLAG_COMMON, .pio_mask = ATA_PIO4, .udma_mask = ATA_UDMA6, - .port_ops = &ahci_platform_ops, + .port_ops = &st_ahci_port_ops, }; static int st_ahci_probe(struct platform_device *pdev) { struct st_ahci_drv_data *drv_data; - struct ahci_platform_data *pdata; struct ahci_host_priv *hpriv; int err; @@ -147,13 +152,6 @@ static int st_ahci_probe(struct platform_device *pdev) platform_set_drvdata(pdev, drv_data); - pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); - if (!pdata) - return -ENOMEM; - - pdata->exit = st_ahci_exit; - pdev->dev.platform_data = pdata; - hpriv = ahci_platform_get_resources(pdev); if (IS_ERR(hpriv)) return PTR_ERR(hpriv); -- cgit v0.10.2 From 13e8e78bdd148f615da1e5d1edcf0b58b7c2b684 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 14 Mar 2014 18:33:13 +0100 Subject: ata: pata_imx: fix devm_ioremap_resource() return value checking devm_ioremap_resource() returns a pointer to the remapped memory or an ERR_PTR() encoded error code on failure. Fix the check inside pata_imx_probe() accordingly. Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Tejun Heo diff --git a/drivers/ata/pata_imx.c b/drivers/ata/pata_imx.c index 121c748..1617693 100644 --- a/drivers/ata/pata_imx.c +++ b/drivers/ata/pata_imx.c @@ -131,8 +131,8 @@ static int pata_imx_probe(struct platform_device *pdev) io_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); priv->host_regs = devm_ioremap_resource(&pdev->dev, io_res); - if (!priv->host_regs) { - ret = -EBUSY; + if (IS_ERR(priv->host_regs)) { + ret = PTR_ERR(priv->host_regs); goto err; } -- cgit v0.10.2 From 5434b203156ef245b7847128c446c5b54f12a6d4 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 14 Mar 2014 18:22:09 +0100 Subject: ata: ahci_platform: fix devm_ioremap_resource() return value checking devm_ioremap_resource() returns a pointer to the remapped memory or an ERR_PTR() encoded error code on failure. Fix the check inside ahci_platform_get_resources() accordingly. Also while at it remove a needless line break. Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Tejun Heo diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index db24d2a..70fbf66 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -199,8 +199,7 @@ static void ahci_platform_put_resources(struct device *dev, void *res) * RETURNS: * The allocated ahci_host_priv on success, otherwise an ERR_PTR value */ -struct ahci_host_priv *ahci_platform_get_resources( - struct platform_device *pdev) +struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct ahci_host_priv *hpriv; @@ -219,8 +218,9 @@ struct ahci_host_priv *ahci_platform_get_resources( hpriv->mmio = devm_ioremap_resource(dev, platform_get_resource(pdev, IORESOURCE_MEM, 0)); - if (!hpriv->mmio) { + if (IS_ERR(hpriv->mmio)) { dev_err(dev, "no mmio space\n"); + rc = PTR_ERR(hpriv->mmio); goto err_out; } -- cgit v0.10.2 From 9b0d2fb86d4737b2cda39bc9c9a8e368cec38960 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 3 Mar 2014 10:14:02 +0900 Subject: perf ui/stdio: Fix invalid output on event group report When some of group member has 0 overhead, it printed previous percentage instead of 0.00%. It's because passing integer 0 as a percent rather than double 0.0 so the remaining bits came from garbage. The TUI and GTK don't have this problem since they pass 0.0. Before: # Samples: 845 of event 'anon group { cycles, cache-references, cache-misses }' # Event count (approx.): 174775051 # # Overhead Samples # ........................ .................................... # 20.32% 8.58% 73.51% 45 30 138 6.87% 6.87% 6.87% 21 0 0 5.29% 0.31% 0.31% 10 1 0 1.89% 1.89% 1.89% 6 0 0 1.76% 1.76% 1.76% 2 0 0 After: # Overhead Samples # ........................ .................................... # 20.32% 8.58% 73.51% 45 30 138 6.87% 0.00% 0.00% 21 0 0 5.29% 0.31% 0.00% 10 1 0 1.89% 0.00% 0.00% 6 0 0 1.76% 0.00% 0.00% 2 0 0 Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1393809254-4480-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 78f4c92..6094562 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -52,8 +52,15 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, * zero-fill group members in the middle which * have no sample */ - ret += print_fn(hpp->buf + ret, hpp->size - ret, - fmt, 0); + if (fmt_percent) { + ret += print_fn(hpp->buf + ret, + hpp->size - ret, + fmt, 0.0); + } else { + ret += print_fn(hpp->buf + ret, + hpp->size - ret, + fmt, 0ULL); + } } if (fmt_percent) @@ -72,8 +79,13 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, /* * zero-fill group members at last which have no sample */ - ret += print_fn(hpp->buf + ret, hpp->size - ret, - fmt, 0); + if (fmt_percent) { + ret += print_fn(hpp->buf + ret, hpp->size - ret, + fmt, 0.0); + } else { + ret += print_fn(hpp->buf + ret, hpp->size - ret, + fmt, 0ULL); + } } } return ret; -- cgit v0.10.2 From 4a62109fe94f68a57b239c1516f97497a4d15c14 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 3 Mar 2014 10:14:03 +0900 Subject: perf ui/gtk: Reuse generic __hpp__fmt() code The __hpp__color_fmt used in the gtk code can be replace by the generic code with small change in print_fn callback. This is a preparation to upcoming changes and no functional changes intended. Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1393809254-4480-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 5b95c44..3dab00e 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -8,16 +8,22 @@ #define MAX_COLUMNS 32 -static int __percent_color_snprintf(char *buf, size_t size, double percent) +static int __percent_color_snprintf(char *buf, size_t size, const char *fmt, ...) { int ret = 0; + va_list args; + double percent; const char *markup; + va_start(args, fmt); + percent = va_arg(args, double); + va_end(args); + markup = perf_gtk__get_percent_color(percent); if (markup) ret += scnprintf(buf, size, markup); - ret += scnprintf(buf + ret, size - ret, " %6.2f%%", percent); + ret += scnprintf(buf + ret, size - ret, fmt, percent); if (markup) ret += scnprintf(buf + ret, size - ret, ""); @@ -25,66 +31,6 @@ static int __percent_color_snprintf(char *buf, size_t size, double percent) return ret; } - -static int __hpp__color_fmt(struct perf_hpp *hpp, struct hist_entry *he, - u64 (*get_field)(struct hist_entry *)) -{ - int ret; - double percent = 0.0; - struct hists *hists = he->hists; - struct perf_evsel *evsel = hists_to_evsel(hists); - - if (hists->stats.total_period) - percent = 100.0 * get_field(he) / hists->stats.total_period; - - ret = __percent_color_snprintf(hpp->buf, hpp->size, percent); - - if (perf_evsel__is_group_event(evsel)) { - int prev_idx, idx_delta; - struct hist_entry *pair; - int nr_members = evsel->nr_members; - - prev_idx = perf_evsel__group_idx(evsel); - - list_for_each_entry(pair, &he->pairs.head, pairs.node) { - u64 period = get_field(pair); - u64 total = pair->hists->stats.total_period; - - evsel = hists_to_evsel(pair->hists); - idx_delta = perf_evsel__group_idx(evsel) - prev_idx - 1; - - while (idx_delta--) { - /* - * zero-fill group members in the middle which - * have no sample - */ - ret += __percent_color_snprintf(hpp->buf + ret, - hpp->size - ret, - 0.0); - } - - percent = 100.0 * period / total; - ret += __percent_color_snprintf(hpp->buf + ret, - hpp->size - ret, - percent); - - prev_idx = perf_evsel__group_idx(evsel); - } - - idx_delta = nr_members - prev_idx - 1; - - while (idx_delta--) { - /* - * zero-fill group members at last which have no sample - */ - ret += __percent_color_snprintf(hpp->buf + ret, - hpp->size - ret, - 0.0); - } - } - return ret; -} - #define __HPP_COLOR_PERCENT_FN(_type, _field) \ static u64 he_get_##_field(struct hist_entry *he) \ { \ @@ -95,7 +41,8 @@ static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, \ struct hist_entry *he) \ { \ - return __hpp__color_fmt(hpp, he, he_get_##_field); \ + return __hpp__fmt(hpp, he, he_get_##_field, " %6.2f%%", \ + __percent_color_snprintf, true); \ } __HPP_COLOR_PERCENT_FN(overhead, period) diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 6094562..0853534 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -8,12 +8,9 @@ /* hist period print (hpp) functions */ -typedef int (*hpp_snprint_fn)(char *buf, size_t size, const char *fmt, ...); - -static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, - u64 (*get_field)(struct hist_entry *), - const char *fmt, hpp_snprint_fn print_fn, - bool fmt_percent) +int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, + u64 (*get_field)(struct hist_entry *), + const char *fmt, hpp_snprint_fn print_fn, bool fmt_percent) { int ret; struct hists *hists = he->hists; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index a59743f..97f924e 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -166,6 +166,12 @@ void perf_hpp__init(void); void perf_hpp__column_register(struct perf_hpp_fmt *format); void perf_hpp__column_enable(unsigned col); +typedef int (*hpp_snprint_fn)(char *buf, size_t size, const char *fmt, ...); + +int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, + u64 (*get_field)(struct hist_entry *), + const char *fmt, hpp_snprint_fn print_fn, bool fmt_percent); + static inline size_t perf_hpp__use_color(void) { return !symbol_conf.field_sep; -- cgit v0.10.2 From a0088adcd651b8eb1a9ca9c7e6ebe1c2c5fb6273 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 3 Mar 2014 10:14:04 +0900 Subject: perf ui/hists: Pass struct hpp to print functions Instead of the pointer to buffer and its size so that it can also get private argument passed along with hpp. This is a preparation of further change. Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1393809254-4480-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 3dab00e..430fd55 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -8,12 +8,14 @@ #define MAX_COLUMNS 32 -static int __percent_color_snprintf(char *buf, size_t size, const char *fmt, ...) +static int __percent_color_snprintf(struct perf_hpp *hpp, const char *fmt, ...) { int ret = 0; va_list args; double percent; const char *markup; + char *buf = hpp->buf; + size_t size = hpp->size; va_start(args, fmt); percent = va_arg(args, double); diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 0853534..0c427e5 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -8,6 +8,13 @@ /* hist period print (hpp) functions */ +#define hpp__call_print_fn(hpp, fn, fmt, ...) \ +({ \ + int __ret = fn(hpp, fmt, ##__VA_ARGS__); \ + advance_hpp(hpp, __ret); \ + __ret; \ +}) + int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, u64 (*get_field)(struct hist_entry *), const char *fmt, hpp_snprint_fn print_fn, bool fmt_percent) @@ -15,6 +22,8 @@ int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, int ret; struct hists *hists = he->hists; struct perf_evsel *evsel = hists_to_evsel(hists); + char *buf = hpp->buf; + size_t size = hpp->size; if (fmt_percent) { double percent = 0.0; @@ -23,9 +32,9 @@ int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, percent = 100.0 * get_field(he) / hists->stats.total_period; - ret = print_fn(hpp->buf, hpp->size, fmt, percent); + ret = hpp__call_print_fn(hpp, print_fn, fmt, percent); } else - ret = print_fn(hpp->buf, hpp->size, fmt, get_field(he)); + ret = hpp__call_print_fn(hpp, print_fn, fmt, get_field(he)); if (perf_evsel__is_group_event(evsel)) { int prev_idx, idx_delta; @@ -50,22 +59,21 @@ int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, * have no sample */ if (fmt_percent) { - ret += print_fn(hpp->buf + ret, - hpp->size - ret, - fmt, 0.0); + ret += hpp__call_print_fn(hpp, print_fn, + fmt, 0.0); } else { - ret += print_fn(hpp->buf + ret, - hpp->size - ret, - fmt, 0ULL); + ret += hpp__call_print_fn(hpp, print_fn, + fmt, 0ULL); } } - if (fmt_percent) - ret += print_fn(hpp->buf + ret, hpp->size - ret, - fmt, 100.0 * period / total); - else - ret += print_fn(hpp->buf + ret, hpp->size - ret, - fmt, period); + if (fmt_percent) { + ret += hpp__call_print_fn(hpp, print_fn, fmt, + 100.0 * period / total); + } else { + ret += hpp__call_print_fn(hpp, print_fn, fmt, + period); + } prev_idx = perf_evsel__group_idx(evsel); } @@ -77,14 +85,22 @@ int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, * zero-fill group members at last which have no sample */ if (fmt_percent) { - ret += print_fn(hpp->buf + ret, hpp->size - ret, - fmt, 0.0); + ret += hpp__call_print_fn(hpp, print_fn, + fmt, 0.0); } else { - ret += print_fn(hpp->buf + ret, hpp->size - ret, - fmt, 0ULL); + ret += hpp__call_print_fn(hpp, print_fn, + fmt, 0ULL); } } } + + /* + * Restore original buf and size as it's where caller expects + * the result will be saved. + */ + hpp->buf = buf; + hpp->size = size; + return ret; } @@ -116,6 +132,34 @@ static int hpp__width_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ return len; \ } +static int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...) +{ + va_list args; + ssize_t ssize = hpp->size; + double percent; + int ret; + + va_start(args, fmt); + percent = va_arg(args, double); + ret = value_color_snprintf(hpp->buf, hpp->size, fmt, percent); + va_end(args); + + return (ret >= ssize) ? (ssize - 1) : ret; +} + +static int hpp_entry_scnprintf(struct perf_hpp *hpp, const char *fmt, ...) +{ + va_list args; + ssize_t ssize = hpp->size; + int ret; + + va_start(args, fmt); + ret = vsnprintf(hpp->buf, hpp->size, fmt, args); + va_end(args); + + return (ret >= ssize) ? (ssize - 1) : ret; +} + #define __HPP_COLOR_PERCENT_FN(_type, _field) \ static u64 he_get_##_field(struct hist_entry *he) \ { \ @@ -126,7 +170,7 @@ static int hpp__color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ struct perf_hpp *hpp, struct hist_entry *he) \ { \ return __hpp__fmt(hpp, he, he_get_##_field, " %6.2f%%", \ - percent_color_snprintf, true); \ + hpp_color_scnprintf, true); \ } #define __HPP_ENTRY_PERCENT_FN(_type, _field) \ @@ -135,7 +179,7 @@ static int hpp__entry_##_type(struct perf_hpp_fmt *_fmt __maybe_unused, \ { \ const char *fmt = symbol_conf.field_sep ? " %.2f" : " %6.2f%%"; \ return __hpp__fmt(hpp, he, he_get_##_field, fmt, \ - scnprintf, true); \ + hpp_entry_scnprintf, true); \ } #define __HPP_ENTRY_RAW_FN(_type, _field) \ @@ -148,7 +192,8 @@ static int hpp__entry_##_type(struct perf_hpp_fmt *_fmt __maybe_unused, \ struct perf_hpp *hpp, struct hist_entry *he) \ { \ const char *fmt = symbol_conf.field_sep ? " %"PRIu64 : " %11"PRIu64; \ - return __hpp__fmt(hpp, he, he_get_raw_##_field, fmt, scnprintf, false); \ + return __hpp__fmt(hpp, he, he_get_raw_##_field, fmt, \ + hpp_entry_scnprintf, false); \ } #define HPP_PERCENT_FNS(_type, _str, _field, _min_width, _unit_width) \ diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 831fbb7..9bad892 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -306,12 +306,6 @@ static size_t hist_entry__callchain_fprintf(struct hist_entry *he, return hist_entry_callchain__fprintf(he, total_period, left_margin, fp); } -static inline void advance_hpp(struct perf_hpp *hpp, int inc) -{ - hpp->buf += inc; - hpp->size -= inc; -} - static int hist_entry__period_snprintf(struct perf_hpp *hpp, struct hist_entry *he) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 97f924e..d51ed98 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -166,12 +166,18 @@ void perf_hpp__init(void); void perf_hpp__column_register(struct perf_hpp_fmt *format); void perf_hpp__column_enable(unsigned col); -typedef int (*hpp_snprint_fn)(char *buf, size_t size, const char *fmt, ...); +typedef int (*hpp_snprint_fn)(struct perf_hpp *hpp, const char *fmt, ...); int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, u64 (*get_field)(struct hist_entry *), const char *fmt, hpp_snprint_fn print_fn, bool fmt_percent); +static inline void advance_hpp(struct perf_hpp *hpp, int inc) +{ + hpp->buf += inc; + hpp->size -= inc; +} + static inline size_t perf_hpp__use_color(void) { return !symbol_conf.field_sep; -- cgit v0.10.2 From 2f6d9009af1df0f7cba1fdfe012a089babd8c747 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 3 Mar 2014 10:14:05 +0900 Subject: perf ui/tui: Reuse generic __hpp__fmt() code The __hpp__color_fmt used in the TUI code can be replace by the generic code with small change in print_fn callback. And it also needs to move callback function to the generic __hpp__fmt(). No functional changes intended. Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1393809254-4480-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index b720b92..7ec871a 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -587,95 +587,52 @@ struct hpp_arg { bool current_entry; }; -static int __hpp__color_callchain(struct hpp_arg *arg) +static int __hpp__overhead_callback(struct perf_hpp *hpp, bool front) { - if (!symbol_conf.use_callchain) - return 0; - - slsmg_printf("%c ", arg->folded_sign); - return 2; -} - -static int __hpp__color_fmt(struct perf_hpp *hpp, struct hist_entry *he, - u64 (*get_field)(struct hist_entry *), - int (*callchain_cb)(struct hpp_arg *)) -{ - int ret = 0; - double percent = 0.0; - struct hists *hists = he->hists; struct hpp_arg *arg = hpp->ptr; - if (hists->stats.total_period) - percent = 100.0 * get_field(he) / hists->stats.total_period; - - ui_browser__set_percent_color(arg->b, percent, arg->current_entry); - - if (callchain_cb) - ret += callchain_cb(arg); - - ret += scnprintf(hpp->buf, hpp->size, "%6.2f%%", percent); - slsmg_printf("%s", hpp->buf); - - if (symbol_conf.event_group) { - int prev_idx, idx_delta; - struct perf_evsel *evsel = hists_to_evsel(hists); - struct hist_entry *pair; - int nr_members = evsel->nr_members; - - if (nr_members <= 1) - goto out; + if (arg->current_entry && arg->b->navkeypressed) + ui_browser__set_color(arg->b, HE_COLORSET_SELECTED); + else + ui_browser__set_color(arg->b, HE_COLORSET_NORMAL); - prev_idx = perf_evsel__group_idx(evsel); + if (front) { + if (!symbol_conf.use_callchain) + return 0; - list_for_each_entry(pair, &he->pairs.head, pairs.node) { - u64 period = get_field(pair); - u64 total = pair->hists->stats.total_period; + slsmg_printf("%c ", arg->folded_sign); + return 2; + } - if (!total) - continue; + return 0; +} - evsel = hists_to_evsel(pair->hists); - idx_delta = perf_evsel__group_idx(evsel) - prev_idx - 1; +static int __hpp__color_callback(struct perf_hpp *hpp, bool front __maybe_unused) +{ + struct hpp_arg *arg = hpp->ptr; - while (idx_delta--) { - /* - * zero-fill group members in the middle which - * have no sample - */ - ui_browser__set_percent_color(arg->b, 0.0, - arg->current_entry); - ret += scnprintf(hpp->buf, hpp->size, - " %6.2f%%", 0.0); - slsmg_printf("%s", hpp->buf); - } + if (!arg->current_entry || !arg->b->navkeypressed) + ui_browser__set_color(arg->b, HE_COLORSET_NORMAL); + return 0; +} - percent = 100.0 * period / total; - ui_browser__set_percent_color(arg->b, percent, - arg->current_entry); - ret += scnprintf(hpp->buf, hpp->size, - " %6.2f%%", percent); - slsmg_printf("%s", hpp->buf); +static int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...) +{ + struct hpp_arg *arg = hpp->ptr; + int ret; + va_list args; + double percent; - prev_idx = perf_evsel__group_idx(evsel); - } + va_start(args, fmt); + percent = va_arg(args, double); + va_end(args); - idx_delta = nr_members - prev_idx - 1; + ui_browser__set_percent_color(arg->b, percent, arg->current_entry); - while (idx_delta--) { - /* - * zero-fill group members at last which have no sample - */ - ui_browser__set_percent_color(arg->b, 0.0, - arg->current_entry); - ret += scnprintf(hpp->buf, hpp->size, - " %6.2f%%", 0.0); - slsmg_printf("%s", hpp->buf); - } - } -out: - if (!arg->current_entry || !arg->b->navkeypressed) - ui_browser__set_color(arg->b, HE_COLORSET_NORMAL); + ret = scnprintf(hpp->buf, hpp->size, fmt, percent); + slsmg_printf("%s", hpp->buf); + advance_hpp(hpp, ret); return ret; } @@ -690,14 +647,15 @@ hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,\ struct perf_hpp *hpp, \ struct hist_entry *he) \ { \ - return __hpp__color_fmt(hpp, he, __hpp_get_##_field, _cb); \ + return __hpp__fmt(hpp, he, __hpp_get_##_field, _cb, " %6.2f%%", \ + __hpp__slsmg_color_printf, true); \ } -__HPP_COLOR_PERCENT_FN(overhead, period, __hpp__color_callchain) -__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys, NULL) -__HPP_COLOR_PERCENT_FN(overhead_us, period_us, NULL) -__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys, NULL) -__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us, NULL) +__HPP_COLOR_PERCENT_FN(overhead, period, __hpp__overhead_callback) +__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys, __hpp__color_callback) +__HPP_COLOR_PERCENT_FN(overhead_us, period_us, __hpp__color_callback) +__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys, __hpp__color_callback) +__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us, __hpp__color_callback) #undef __HPP_COLOR_PERCENT_FN diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 430fd55..7912dab 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -43,7 +43,7 @@ static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, \ struct hist_entry *he) \ { \ - return __hpp__fmt(hpp, he, he_get_##_field, " %6.2f%%", \ + return __hpp__fmt(hpp, he, he_get_##_field, NULL, " %6.2f%%", \ __percent_color_snprintf, true); \ } diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 0c427e5..ac39313 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -16,15 +16,20 @@ }) int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, - u64 (*get_field)(struct hist_entry *), + hpp_field_fn get_field, hpp_callback_fn callback, const char *fmt, hpp_snprint_fn print_fn, bool fmt_percent) { - int ret; + int ret = 0; struct hists *hists = he->hists; struct perf_evsel *evsel = hists_to_evsel(hists); char *buf = hpp->buf; size_t size = hpp->size; + if (callback) { + ret = callback(hpp, true); + advance_hpp(hpp, ret); + } + if (fmt_percent) { double percent = 0.0; @@ -32,9 +37,9 @@ int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, percent = 100.0 * get_field(he) / hists->stats.total_period; - ret = hpp__call_print_fn(hpp, print_fn, fmt, percent); + ret += hpp__call_print_fn(hpp, print_fn, fmt, percent); } else - ret = hpp__call_print_fn(hpp, print_fn, fmt, get_field(he)); + ret += hpp__call_print_fn(hpp, print_fn, fmt, get_field(he)); if (perf_evsel__is_group_event(evsel)) { int prev_idx, idx_delta; @@ -94,6 +99,13 @@ int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, } } + if (callback) { + int __ret = callback(hpp, false); + + advance_hpp(hpp, __ret); + ret += __ret; + } + /* * Restore original buf and size as it's where caller expects * the result will be saved. @@ -169,7 +181,7 @@ static u64 he_get_##_field(struct hist_entry *he) \ static int hpp__color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ struct perf_hpp *hpp, struct hist_entry *he) \ { \ - return __hpp__fmt(hpp, he, he_get_##_field, " %6.2f%%", \ + return __hpp__fmt(hpp, he, he_get_##_field, NULL, " %6.2f%%", \ hpp_color_scnprintf, true); \ } @@ -178,7 +190,7 @@ static int hpp__entry_##_type(struct perf_hpp_fmt *_fmt __maybe_unused, \ struct perf_hpp *hpp, struct hist_entry *he) \ { \ const char *fmt = symbol_conf.field_sep ? " %.2f" : " %6.2f%%"; \ - return __hpp__fmt(hpp, he, he_get_##_field, fmt, \ + return __hpp__fmt(hpp, he, he_get_##_field, NULL, fmt, \ hpp_entry_scnprintf, true); \ } @@ -192,7 +204,7 @@ static int hpp__entry_##_type(struct perf_hpp_fmt *_fmt __maybe_unused, \ struct perf_hpp *hpp, struct hist_entry *he) \ { \ const char *fmt = symbol_conf.field_sep ? " %"PRIu64 : " %11"PRIu64; \ - return __hpp__fmt(hpp, he, he_get_raw_##_field, fmt, \ + return __hpp__fmt(hpp, he, he_get_raw_##_field, NULL, fmt, \ hpp_entry_scnprintf, false); \ } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index d51ed98..9e1cada 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -166,10 +166,12 @@ void perf_hpp__init(void); void perf_hpp__column_register(struct perf_hpp_fmt *format); void perf_hpp__column_enable(unsigned col); +typedef u64 (*hpp_field_fn)(struct hist_entry *he); +typedef int (*hpp_callback_fn)(struct perf_hpp *hpp, bool front); typedef int (*hpp_snprint_fn)(struct perf_hpp *hpp, const char *fmt, ...); int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, - u64 (*get_field)(struct hist_entry *), + hpp_field_fn get_field, hpp_callback_fn callback, const char *fmt, hpp_snprint_fn print_fn, bool fmt_percent); static inline void advance_hpp(struct perf_hpp *hpp, int inc) -- cgit v0.10.2 From 0ea590ae8198547d5898c72b04fa9d8f23bd0b8f Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 25 Feb 2014 22:43:46 -0500 Subject: perf session: Change header.misc dump from decimal to hex When printing the raw dump of a data file, the header.misc is printed as a decimal. Unfortunately, that field is a bit mask, so it is hard to interpret as a decimal. Print in hex, so the user can easily see what bits are set and more importantly what type of info it is conveying. V2: add 0x in front per Jiri Olsa Signed-off-by: Don Zickus Cc: Jiri Olsa Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1393386227-149412-3-git-send-email-dzickus@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 1d555d6..55960f2 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -794,7 +794,7 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, if (!dump_trace) return; - printf("(IP, %d): %d/%d: %#" PRIx64 " period: %" PRIu64 " addr: %#" PRIx64 "\n", + printf("(IP, 0x%x): %d/%d: %#" PRIx64 " period: %" PRIu64 " addr: %#" PRIx64 "\n", event->header.misc, sample->pid, sample->tid, sample->ip, sample->period, sample->addr); -- cgit v0.10.2 From 52a3cb8cfca16db73cf825cb94325cf54da8304f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 11 Mar 2014 16:16:49 -0300 Subject: perf symbols: Introduce thread__find_cpumode_addr_location Its one level up thread__find_addr_location, where it will look in different domains for a sample: user, kernel, hypervisor, etc. Will soon be used by a patchkit by Andi Kleen. Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-so6nxkh7xj48bc5kq4jpj991@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 813e94e..a679953 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1184,39 +1184,22 @@ static bool symbol__match_regex(struct symbol *sym, regex_t *regex) return 0; } -static const u8 cpumodes[] = { - PERF_RECORD_MISC_USER, - PERF_RECORD_MISC_KERNEL, - PERF_RECORD_MISC_GUEST_USER, - PERF_RECORD_MISC_GUEST_KERNEL -}; -#define NCPUMODES (sizeof(cpumodes)/sizeof(u8)) - static void ip__resolve_ams(struct machine *machine, struct thread *thread, struct addr_map_symbol *ams, u64 ip) { struct addr_location al; - size_t i; - u8 m; memset(&al, 0, sizeof(al)); + /* + * We cannot use the header.misc hint to determine whether a + * branch stack address is user, kernel, guest, hypervisor. + * Branches may straddle the kernel/user/hypervisor boundaries. + * Thus, we have to try consecutively until we find a match + * or else, the symbol is unknown + */ + thread__find_cpumode_addr_location(thread, machine, MAP__FUNCTION, ip, &al); - for (i = 0; i < NCPUMODES; i++) { - m = cpumodes[i]; - /* - * We cannot use the header.misc hint to determine whether a - * branch stack address is user, kernel, guest, hypervisor. - * Branches may straddle the kernel/user/hypervisor boundaries. - * Thus, we have to try consecutively until we find a match - * or else, the symbol is unknown - */ - thread__find_addr_location(thread, machine, m, MAP__FUNCTION, - ip, &al); - if (al.map) - goto found; - } -found: ams->addr = ip; ams->al_addr = al.addr; ams->sym = al.sym; diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 0358882..3ce0498 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -142,3 +142,24 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp) return 0; } + +void thread__find_cpumode_addr_location(struct thread *thread, + struct machine *machine, + enum map_type type, u64 addr, + struct addr_location *al) +{ + size_t i; + const u8 const cpumodes[] = { + PERF_RECORD_MISC_USER, + PERF_RECORD_MISC_KERNEL, + PERF_RECORD_MISC_GUEST_USER, + PERF_RECORD_MISC_GUEST_KERNEL + }; + + for (i = 0; i < ARRAY_SIZE(cpumodes); i++) { + thread__find_addr_location(thread, machine, cpumodes[i], type, + addr, al); + if (al->map) + break; + } +} diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 5b856bf..9a07074 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -58,6 +58,11 @@ void thread__find_addr_location(struct thread *thread, struct machine *machine, u8 cpumode, enum map_type type, u64 addr, struct addr_location *al); +void thread__find_cpumode_addr_location(struct thread *thread, + struct machine *machine, + enum map_type type, u64 addr, + struct addr_location *al); + static inline void *thread__priv(struct thread *thread) { return thread->priv; -- cgit v0.10.2 From 94a0793ddf7fa9890006a8dc203b985e7b120785 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 10 Mar 2014 16:43:52 +0900 Subject: perf ui hists: Pass evsel to hpp->header/width functions explicitly Those functions need evsel to investigate event group and it's passed via hpp->ptr. However as it can be missed easily so it's better to pass it via an argument IMHO. Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1394437440-11609-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index a77e312..204fffe 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -952,8 +952,8 @@ static int hpp__entry_global(struct perf_hpp_fmt *_fmt, struct perf_hpp *hpp, dfmt->header_width, buf); } -static int hpp__header(struct perf_hpp_fmt *fmt, - struct perf_hpp *hpp) +static int hpp__header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct perf_evsel *evsel __maybe_unused) { struct diff_hpp_fmt *dfmt = container_of(fmt, struct diff_hpp_fmt, fmt); @@ -963,7 +963,8 @@ static int hpp__header(struct perf_hpp_fmt *fmt, } static int hpp__width(struct perf_hpp_fmt *fmt, - struct perf_hpp *hpp __maybe_unused) + struct perf_hpp *hpp __maybe_unused, + struct perf_evsel *evsel __maybe_unused) { struct diff_hpp_fmt *dfmt = container_of(fmt, struct diff_hpp_fmt, fmt); diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 7912dab..e395ef9 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -165,7 +165,6 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, struct perf_hpp hpp = { .buf = s, .size = sizeof(s), - .ptr = hists_to_evsel(hists), }; nr_cols = 0; @@ -192,7 +191,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, col_idx = 0; perf_hpp__for_each_format(fmt) { - fmt->header(fmt, &hpp); + fmt->header(fmt, &hpp, hists_to_evsel(hists)); gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), -1, ltrim(s), diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index ac39313..0f403b8 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -118,29 +118,27 @@ int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, #define __HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \ static int hpp__header_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ - struct perf_hpp *hpp) \ + struct perf_hpp *hpp, \ + struct perf_evsel *evsel) \ { \ int len = _min_width; \ \ - if (symbol_conf.event_group) { \ - struct perf_evsel *evsel = hpp->ptr; \ - \ + if (symbol_conf.event_group) \ len = max(len, evsel->nr_members * _unit_width); \ - } \ + \ return scnprintf(hpp->buf, hpp->size, "%*s", len, _str); \ } #define __HPP_WIDTH_FN(_type, _min_width, _unit_width) \ static int hpp__width_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ - struct perf_hpp *hpp __maybe_unused) \ + struct perf_hpp *hpp __maybe_unused, \ + struct perf_evsel *evsel) \ { \ int len = _min_width; \ \ - if (symbol_conf.event_group) { \ - struct perf_evsel *evsel = hpp->ptr; \ - \ + if (symbol_conf.event_group) \ len = max(len, evsel->nr_members * _unit_width); \ - } \ + \ return len; \ } @@ -329,15 +327,13 @@ unsigned int hists__sort_list_width(struct hists *hists) struct perf_hpp_fmt *fmt; struct sort_entry *se; int i = 0, ret = 0; - struct perf_hpp dummy_hpp = { - .ptr = hists_to_evsel(hists), - }; + struct perf_hpp dummy_hpp; perf_hpp__for_each_format(fmt) { if (i) ret += 2; - ret += fmt->width(fmt, &dummy_hpp); + ret += fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists)); } list_for_each_entry(se, &hist_entry__sort_list, list) diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 9bad892..d59893e 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -379,7 +379,6 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, struct perf_hpp dummy_hpp = { .buf = bf, .size = sizeof(bf), - .ptr = hists_to_evsel(hists), }; bool first = true; size_t linesz; @@ -398,7 +397,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, else first = false; - fmt->header(fmt, &dummy_hpp); + fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists)); fprintf(fp, "%s", bf); } @@ -443,7 +442,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, else first = false; - width = fmt->width(fmt, &dummy_hpp); + width = fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists)); for (i = 0; i < width; i++) fprintf(fp, "."); } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 9e1cada..0c76bf9 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -132,8 +132,10 @@ struct perf_hpp { }; struct perf_hpp_fmt { - int (*header)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp); - int (*width)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp); + int (*header)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct perf_evsel *evsel); + int (*width)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct perf_evsel *evsel); int (*color)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he); int (*entry)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, -- cgit v0.10.2 From 09a71b97cce70551356b13b668aa1d7d6da84457 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Mon, 3 Mar 2014 20:26:36 -0500 Subject: perf kvm: introduce --list-cmds for use by scripts Introduce $ perf kvm --list-cmds to dump a raw list of commands for use by the completion script. In order to do this, introduce parse_options_subcommand() for handling subcommands as a special case in the parse-options machinery. Signed-off-by: Ramkumar Ramachandra Acked-by: David Ahern Acked-by: Jiri Olsa Cc: David Ahern Link: http://lkml.kernel.org/r/1393896396-10427-1-git-send-email-artagnon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index a735051..21c164b 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1691,17 +1691,15 @@ int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused) OPT_END() }; - - const char * const kvm_usage[] = { - "perf kvm [] {top|record|report|diff|buildid-list|stat}", - NULL - }; + const char *const kvm_subcommands[] = { "top", "record", "report", "diff", + "buildid-list", "stat", NULL }; + const char *kvm_usage[] = { NULL, NULL }; perf_host = 0; perf_guest = 1; - argc = parse_options(argc, argv, kvm_options, kvm_usage, - PARSE_OPT_STOP_AT_NON_OPTION); + argc = parse_options_subcommand(argc, argv, kvm_options, kvm_subcommands, kvm_usage, + PARSE_OPT_STOP_AT_NON_OPTION); if (!argc) usage_with_options(kvm_usage, kvm_options); diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index 496e2ab..ae3a576 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -123,7 +123,7 @@ __perf_main () __perfcomp_colon "$evts" "$cur" # List subcommands for 'perf kvm' elif [[ $prev == "kvm" ]]; then - subcmds="top record report diff buildid-list stat" + subcmds=$($cmd $prev --list-cmds) __perfcomp_colon "$subcmds" "$cur" # List long option names elif [[ $cur == --* ]]; then diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index d22e3f80..bf48092 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -407,7 +407,9 @@ int parse_options_step(struct parse_opt_ctx_t *ctx, if (internal_help && !strcmp(arg + 2, "help")) return usage_with_options_internal(usagestr, options, 0); if (!strcmp(arg + 2, "list-opts")) - return PARSE_OPT_LIST; + return PARSE_OPT_LIST_OPTS; + if (!strcmp(arg + 2, "list-cmds")) + return PARSE_OPT_LIST_SUBCMDS; switch (parse_long_opt(ctx, arg + 2, options)) { case -1: return parse_options_usage(usagestr, options, arg + 2, 0); @@ -433,25 +435,45 @@ int parse_options_end(struct parse_opt_ctx_t *ctx) return ctx->cpidx + ctx->argc; } -int parse_options(int argc, const char **argv, const struct option *options, - const char * const usagestr[], int flags) +int parse_options_subcommand(int argc, const char **argv, const struct option *options, + const char *const subcommands[], const char *usagestr[], int flags) { struct parse_opt_ctx_t ctx; perf_header__set_cmdline(argc, argv); + /* build usage string if it's not provided */ + if (subcommands && !usagestr[0]) { + struct strbuf buf = STRBUF_INIT; + + strbuf_addf(&buf, "perf %s [] {", argv[0]); + for (int i = 0; subcommands[i]; i++) { + if (i) + strbuf_addstr(&buf, "|"); + strbuf_addstr(&buf, subcommands[i]); + } + strbuf_addstr(&buf, "}"); + + usagestr[0] = strdup(buf.buf); + strbuf_release(&buf); + } + parse_options_start(&ctx, argc, argv, flags); switch (parse_options_step(&ctx, options, usagestr)) { case PARSE_OPT_HELP: exit(129); case PARSE_OPT_DONE: break; - case PARSE_OPT_LIST: + case PARSE_OPT_LIST_OPTS: while (options->type != OPTION_END) { printf("--%s ", options->long_name); options++; } exit(130); + case PARSE_OPT_LIST_SUBCMDS: + for (int i = 0; subcommands[i]; i++) + printf("%s ", subcommands[i]); + exit(130); default: /* PARSE_OPT_UNKNOWN */ if (ctx.argv[0][1] == '-') { error("unknown option `%s'", ctx.argv[0] + 2); @@ -464,6 +486,13 @@ int parse_options(int argc, const char **argv, const struct option *options, return parse_options_end(&ctx); } +int parse_options(int argc, const char **argv, const struct option *options, + const char * const usagestr[], int flags) +{ + return parse_options_subcommand(argc, argv, options, NULL, + (const char **) usagestr, flags); +} + #define USAGE_OPTS_WIDTH 24 #define USAGE_GAP 2 diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h index cbf0149..d8dac8a 100644 --- a/tools/perf/util/parse-options.h +++ b/tools/perf/util/parse-options.h @@ -140,6 +140,11 @@ extern int parse_options(int argc, const char **argv, const struct option *options, const char * const usagestr[], int flags); +extern int parse_options_subcommand(int argc, const char **argv, + const struct option *options, + const char *const subcommands[], + const char *usagestr[], int flags); + extern NORETURN void usage_with_options(const char * const *usagestr, const struct option *options); @@ -148,7 +153,8 @@ extern NORETURN void usage_with_options(const char * const *usagestr, enum { PARSE_OPT_HELP = -1, PARSE_OPT_DONE, - PARSE_OPT_LIST, + PARSE_OPT_LIST_OPTS, + PARSE_OPT_LIST_SUBCMDS, PARSE_OPT_UNKNOWN, }; -- cgit v0.10.2 From 363b785f3805a2632eb09a8b430842461c21a640 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 14 Mar 2014 10:43:44 -0400 Subject: perf tools: Speed up thread map generation When trying to capture perf data on a system running spejbb2013, perf hung for about 15 minutes. This is because it took that long to gather about 10,000 thread maps and process them. I don't think a user wants to wait that long. Instead, recognize that thread maps are roughly equivalent to pid maps and just quickly copy those instead. To do this, I synthesize 'fork' events, this eventually calls thread__fork() and copies the maps over. The overhead goes from 15 minutes down to about a few seconds. -- V2: based on Jiri's comments, moved malloc up a level and made sure the memory was freed Signed-off-by: Don Zickus Acked-by: Jiri Olsa Cc: Jiri Olsa Cc: Joe Mario Link: http://lkml.kernel.org/r/1394808224-113774-1-git-send-email-dzickus@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 55eebe9..3e580be 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -129,6 +129,28 @@ out: return tgid; } +static int perf_event__synthesize_fork(struct perf_tool *tool, + union perf_event *event, pid_t pid, + pid_t tgid, perf_event__handler_t process, + struct machine *machine) +{ + memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size); + + /* this is really a clone event but we use fork to synthesize it */ + event->fork.ppid = tgid; + event->fork.ptid = tgid; + event->fork.pid = tgid; + event->fork.tid = pid; + event->fork.header.type = PERF_RECORD_FORK; + + event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size); + + if (process(tool, event, &synth_sample, machine) != 0) + return -1; + + return 0; +} + int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, @@ -278,6 +300,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool, static int __event__synthesize_thread(union perf_event *comm_event, union perf_event *mmap_event, + union perf_event *fork_event, pid_t pid, int full, perf_event__handler_t process, struct perf_tool *tool, @@ -326,9 +349,15 @@ static int __event__synthesize_thread(union perf_event *comm_event, if (tgid == -1) return -1; - /* process the thread's maps too */ - rc = perf_event__synthesize_mmap_events(tool, mmap_event, _pid, tgid, - process, machine, mmap_data); + if (_pid == pid) { + /* process the parent's maps too */ + rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, + process, machine, mmap_data); + } else { + /* only fork the tid's map, to save time */ + rc = perf_event__synthesize_fork(tool, fork_event, _pid, tgid, + process, machine); + } if (rc) return rc; @@ -344,7 +373,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, struct machine *machine, bool mmap_data) { - union perf_event *comm_event, *mmap_event; + union perf_event *comm_event, *mmap_event, *fork_event; int err = -1, thread, j; comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); @@ -355,9 +384,14 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, if (mmap_event == NULL) goto out_free_comm; + fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size); + if (fork_event == NULL) + goto out_free_mmap; + err = 0; for (thread = 0; thread < threads->nr; ++thread) { if (__event__synthesize_thread(comm_event, mmap_event, + fork_event, threads->map[thread], 0, process, tool, machine, mmap_data)) { @@ -383,6 +417,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, /* if not, generate events for it */ if (need_leader && __event__synthesize_thread(comm_event, mmap_event, + fork_event, comm_event->comm.pid, 0, process, tool, machine, mmap_data)) { @@ -391,6 +426,8 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, } } } + free(fork_event); +out_free_mmap: free(mmap_event); out_free_comm: free(comm_event); @@ -405,7 +442,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool, DIR *proc; char proc_path[PATH_MAX]; struct dirent dirent, *next; - union perf_event *comm_event, *mmap_event; + union perf_event *comm_event, *mmap_event, *fork_event; int err = -1; comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); @@ -416,6 +453,10 @@ int perf_event__synthesize_threads(struct perf_tool *tool, if (mmap_event == NULL) goto out_free_comm; + fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size); + if (fork_event == NULL) + goto out_free_mmap; + if (machine__is_default_guest(machine)) return 0; @@ -423,7 +464,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool, proc = opendir(proc_path); if (proc == NULL) - goto out_free_mmap; + goto out_free_fork; while (!readdir_r(proc, &dirent, &next) && next) { char *end; @@ -435,12 +476,14 @@ int perf_event__synthesize_threads(struct perf_tool *tool, * We may race with exiting thread, so don't stop just because * one thread couldn't be synthesized. */ - __event__synthesize_thread(comm_event, mmap_event, pid, 1, - process, tool, machine, mmap_data); + __event__synthesize_thread(comm_event, mmap_event, fork_event, pid, + 1, process, tool, machine, mmap_data); } err = 0; closedir(proc); +out_free_fork: + free(fork_event); out_free_mmap: free(mmap_event); out_free_comm: -- cgit v0.10.2 From d75e6097ef1f7669deb500fbbdf53cfe524f1b53 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 14 Mar 2014 15:00:03 +0100 Subject: perf machine: Factor machine__find_thread to take tid argument Forcing the code to always search thread by pid/tid pair. The PID value will be needed in future to determine the process thread leader for map groups sharing. Signed-off-by: Jiri Olsa Acked-by: Adrian Hunter Cc: Corey Ashford Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1394805606-25883-3-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index f16ea28..c059ee8 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -128,7 +128,7 @@ int test__dwarf_unwind(void) if (verbose > 1) machine__fprintf(machine, stderr); - thread = machine__find_thread(machine, getpid()); + thread = machine__find_thread(machine, getpid(), getpid()); if (!thread) { pr_err("Could not get thread\n"); goto out; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index a679953..5cecd98 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -327,9 +327,10 @@ struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, return __machine__findnew_thread(machine, pid, tid, true); } -struct thread *machine__find_thread(struct machine *machine, pid_t tid) +struct thread *machine__find_thread(struct machine *machine, pid_t pid, + pid_t tid) { - return __machine__findnew_thread(machine, 0, tid, false); + return __machine__findnew_thread(machine, pid, tid, false); } int machine__process_comm_event(struct machine *machine, union perf_event *event, @@ -1114,7 +1115,9 @@ static void machine__remove_thread(struct machine *machine, struct thread *th) int machine__process_fork_event(struct machine *machine, union perf_event *event, struct perf_sample *sample) { - struct thread *thread = machine__find_thread(machine, event->fork.tid); + struct thread *thread = machine__find_thread(machine, + event->fork.pid, + event->fork.tid); struct thread *parent = machine__findnew_thread(machine, event->fork.ppid, event->fork.ptid); @@ -1140,7 +1143,9 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event int machine__process_exit_event(struct machine *machine, union perf_event *event, struct perf_sample *sample __maybe_unused) { - struct thread *thread = machine__find_thread(machine, event->fork.tid); + struct thread *thread = machine__find_thread(machine, + event->fork.pid, + event->fork.tid); if (dump_trace) perf_event__fprintf_task(event, stdout); diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 2e6c248..c8c74a1 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -41,7 +41,8 @@ struct map *machine__kernel_map(struct machine *machine, enum map_type type) return machine->vmlinux_maps[type]; } -struct thread *machine__find_thread(struct machine *machine, pid_t tid); +struct thread *machine__find_thread(struct machine *machine, pid_t pid, + pid_t tid); int machine__process_comm_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); -- cgit v0.10.2 From 7f02c463057fc527f52066742b84d9d89b22e83d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 10 Feb 2014 18:42:57 +0000 Subject: MIPS: Octeon: Fix fall through on bar type OCTEON_DMA_BAR_TYPE_SMALL Bar type OCTEON_DMA_BAR_TYPE_SMALL assigns lo and hi addresses and then falls through to OCTEON_DMA_BAR_TYPE_BIG that re-assignes lo and hi addresses with totally different values. Add a break so we don't fall through. Signed-off-by: Colin Ian King Acked-by: David Daney Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/6529/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/pci/msi-octeon.c b/arch/mips/pci/msi-octeon.c index d37be36..2b91b0e 100644 --- a/arch/mips/pci/msi-octeon.c +++ b/arch/mips/pci/msi-octeon.c @@ -150,6 +150,7 @@ msi_irq_allocated: msg.address_lo = ((128ul << 20) + CVMX_PCI_MSI_RCV) & 0xffffffff; msg.address_hi = ((128ul << 20) + CVMX_PCI_MSI_RCV) >> 32; + break; case OCTEON_DMA_BAR_TYPE_BIG: /* When using big bar, Bar 0 is based at 0 */ msg.address_lo = (0 + CVMX_PCI_MSI_RCV) & 0xffffffff; -- cgit v0.10.2 From fcad3e6b576e78fce9ffca124a15de86b64453a0 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 13 Mar 2014 10:11:45 -0700 Subject: MAINTAINERS: Add linux.nics@intel.com to INTEL ETHERNET DRIVERS If this is added to the driver files, then maybe it's appropriate to add to MAINTAINERS as well. Signed-off-by: Joe Perches Acked-by: Jeff Kirsher Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index b3fdb0f..12a6b29 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4545,6 +4545,7 @@ M: Greg Rose M: Alex Duyck M: John Ronciak M: Mitch Williams +M: Linux NICS L: e1000-devel@lists.sourceforge.net W: http://www.intel.com/support/feedback.htm W: http://e1000.sourceforge.net/ -- cgit v0.10.2 From 32fc3fd41ae44a41ffb8fd5ec4f4764822104655 Mon Sep 17 00:00:00 2001 From: Sebastian Hesselbarth Date: Fri, 14 Mar 2014 10:07:44 +0100 Subject: net: phy: fix uninitalized ethtool_wolinfo in phy_suspend Callers of phy_ethtool_get_wol are supposed to provide a properly cleared struct ethtool_wolinfo. Therefore, fix phy_suspend to clear it before passing it to phy_ethtool_get_wol. Signed-off-by: Sebastian Hesselbarth Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 4b970f7..2f6989b 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -683,10 +683,9 @@ EXPORT_SYMBOL(phy_detach); int phy_suspend(struct phy_device *phydev) { struct phy_driver *phydrv = to_phy_driver(phydev->dev.driver); - struct ethtool_wolinfo wol; + struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL }; /* If the device has WOL enabled, we cannot suspend the PHY */ - wol.cmd = ETHTOOL_GWOL; phy_ethtool_get_wol(phydev, &wol); if (wol.wolopts) return -EBUSY; -- cgit v0.10.2 From 4f87dac386cc43d5525da7a939d4b4e7edbea22c Mon Sep 17 00:00:00 2001 From: Michael Kerrisk Date: Mon, 10 Mar 2014 14:46:07 +0100 Subject: ipc: Fix 2 bugs in msgrcv() MSG_COPY implementation While testing and documenting the msgrcv() MSG_COPY flag that Stanislav Kinsbursky added in commit 4a674f34ba04 ("ipc: introduce message queue copy feature" => kernel 3.8), I discovered a couple of bugs in the implementation. The two bugs concern MSG_COPY interactions with other msgrcv() flags, namely: (A) MSG_COPY + MSG_EXCEPT (B) MSG_COPY + !IPC_NOWAIT The bugs are distinct (and the fix for the first one is obvious), however my fix for both is a single-line patch, which is why I'm combining them in a single mail, rather than writing two mails+patches. ===== (A) MSG_COPY + MSG_EXCEPT ===== With the addition of the MSG_COPY flag, there are now two msgrcv() flags--MSG_COPY and MSG_EXCEPT--that modify the meaning of the 'msgtyp' argument in unrelated ways. Specifying both in the same call is a logical error that is currently permitted, with the effect that MSG_COPY has priority and MSG_EXCEPT is ignored. The call should give an error if both flags are specified. The patch below implements that behavior. ===== (B) (B) MSG_COPY + !IPC_NOWAIT ===== The test code that was submitted in commit 3a665531a3b7 ("selftests: IPC message queue copy feature test") shows MSG_COPY being used in conjunction with IPC_NOWAIT. In other words, if there is no message at the position 'msgtyp'. return immediately with the error in ENOMSG. What was not (fully) tested is the behavior if MSG_COPY is specified *without* IPC_NOWAIT, and there is an odd behavior. If the queue contains less than 'msgtyp' messages, then the call blocks until the next message is written to the queue. At that point, the msgrcv() call returns a copy of the newly added message, regardless of whether that message is at the ordinal position 'msgtyp'. This is clearly bogus, and problematic for applications that might want to make use of the MSG_COPY flag. I considered the following possible solutions to this problem: (1) Force the call to block until a message *does* appear at the position 'msgtyp'. (2) If the MSG_COPY flag is specified, the kernel should implicitly add IPC_NOWAIT, so that the call fails with ENOMSG for this case. (3) If the MSG_COPY flag is specified, but IPC_NOWAIT is not, generate an error (probably, EINVAL is the right one). I do not know if any application would really want to have the functionality of solution (1), especially since an application can determine in advance the number of messages in the queue using msgctl() IPC_STAT. Obviously, this solution would be the most work to implement. Solution (2) would have the effect of silently fixing any applications that tried to employ broken behavior. However, it would mean that if we later decided to implement solution (1), then user-space could not easily detect what the kernel supports (but, since I'm somewhat doubtful that solution (1) is needed, I'm not sure that this is much of a problem). Solution (3) would have the effect of informing broken applications that they are doing something broken. The downside is that this would cause a ABI breakage for any applications that are currently employing the broken behavior. However: a) Those applications are almost certainly not getting the results they expect. b) Possibly, those applications don't even exist, because MSG_COPY is currently hidden behind CONFIG_CHECKPOINT_RESTORE. The upside of solution (3) is that if we later decided to implement solution (1), user-space could determine what the kernel supports, via the error return. In my view, solution (3) is mildly preferable to solution (2), and solution (1) could still be done later if anyone really cares. The patch below implements solution (3). PS. For anyone out there still listening, it's the usual story: documenting an API (and the thinking about, and the testing of the API, that documentation entails) is the one of the single best ways of finding bugs in the API, as I've learned from a lot of experience. Best to do that documentation before releasing the API. Signed-off-by: Michael Kerrisk Acked-by: Stanislav Kinsbursky Cc: Stanislav Kinsbursky Cc: stable@vger.kernel.org Cc: Serge Hallyn Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Al Viro Cc: KOSAKI Motohiro Signed-off-by: Linus Torvalds diff --git a/ipc/msg.c b/ipc/msg.c index 245db11..6498531 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -901,6 +901,8 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgfl return -EINVAL; if (msgflg & MSG_COPY) { + if ((msgflg & MSG_EXCEPT) || !(msgflg & IPC_NOWAIT)) + return -EINVAL; copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax)); if (IS_ERR(copy)) return PTR_ERR(copy); -- cgit v0.10.2 From dcb99fd9b08cfe1afe426af4d8d3cbc429190f15 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 16 Mar 2014 18:51:24 -0700 Subject: Linux 3.14-rc7 diff --git a/Makefile b/Makefile index 1a2628e..ef779ec 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Shuffling Zombie Juror # *DOCUMENTATION* -- cgit v0.10.2 From a8031d2ce15bdb90baeae02d7a231ccece73da8b Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 22 Jan 2014 14:39:57 +0000 Subject: MIPS: asm: syscall: Fix copying system call arguments The syscall_get_arguments function expects the arguments to be copied to the '*args' argument but instead a local variable was used to hold the system call argument. As a result of which, this variable was never passed to the filter and any filter testing the system call arguments would fail. This is fixed by passing the '*args' variable as the destination memory for the system call arguments. Signed-off-by: Markos Chandras Reviewed-by: Paul Burton Reviewed-by: James Hogan Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/6402/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h index 5ce530f..a7e8a53 100644 --- a/arch/mips/include/asm/syscall.h +++ b/arch/mips/include/asm/syscall.h @@ -86,11 +86,10 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned int i, unsigned int n, unsigned long *args) { - unsigned long arg; int ret; while (n--) - ret |= mips_get_syscall_arg(&arg, task, regs, i++); + ret |= mips_get_syscall_arg(args++, task, regs, i++); /* * No way to communicate an error because this is a void function. -- cgit v0.10.2 From 86ca57b5a5525dbf89fc2a3285781fae807276b0 Mon Sep 17 00:00:00 2001 From: Lars Persson Date: Mon, 17 Mar 2014 12:14:13 +0100 Subject: MIPS: Fix syscall tracing interface Fix pointer computation for stack-based arguments. Signed-off-by: Lars Persson Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/6620/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h index a7e8a53..f35b131 100644 --- a/arch/mips/include/asm/syscall.h +++ b/arch/mips/include/asm/syscall.h @@ -40,14 +40,14 @@ static inline unsigned long mips_get_syscall_arg(unsigned long *arg, #ifdef CONFIG_32BIT case 4: case 5: case 6: case 7: - return get_user(*arg, (int *)usp + 4 * n); + return get_user(*arg, (int *)usp + n); #endif #ifdef CONFIG_64BIT case 4: case 5: case 6: case 7: #ifdef CONFIG_MIPS32_O32 if (test_thread_flag(TIF_32BIT_REGS)) - return get_user(*arg, (int *)usp + 4 * n); + return get_user(*arg, (int *)usp + n); else #endif *arg = regs->regs[4 + n]; -- cgit v0.10.2 From a4671094227d11985c06ee1178d7205c5fd39f8a Mon Sep 17 00:00:00 2001 From: Viller Hsiao Date: Sat, 22 Feb 2014 15:46:49 +0800 Subject: MIPS: ftrace: Fix icache flush range error In 32-bit mode, the start address passed to flush_icache_range is shifted by 4 bytes before the second safe_store_code() call. This causes system crash from time to time because the first 4 bytes might not be flushed properly. This bug exists since linux-3.8. Also remove obsoleted comment while at it. Signed-off-by: Viller Hsiao Cc: linux-mips@linux-mips.org Cc: rostedt@goodmis.org Cc: fweisbec@gmail.com Cc: mingo@redhat.com Cc: Qais.Yousef@imgtec.com Patchwork: https://patchwork.linux-mips.org/patch/6586/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c index 185ba25..374ed74 100644 --- a/arch/mips/kernel/ftrace.c +++ b/arch/mips/kernel/ftrace.c @@ -111,11 +111,10 @@ static int ftrace_modify_code_2(unsigned long ip, unsigned int new_code1, safe_store_code(new_code1, ip, faulted); if (unlikely(faulted)) return -EFAULT; - ip += 4; - safe_store_code(new_code2, ip, faulted); + safe_store_code(new_code2, ip + 4, faulted); if (unlikely(faulted)) return -EFAULT; - flush_icache_range(ip, ip + 8); /* original ip + 12 */ + flush_icache_range(ip, ip + 8); return 0; } #endif -- cgit v0.10.2 From 1bf9d885658cbee1bc8e4324d0e27b02b1540d58 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 17 Mar 2014 14:06:57 +0100 Subject: ata: ahci_sunxi: make ahci_sunxi_resume() static Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Tejun Heo Acked-by: Hans de Goede diff --git a/drivers/ata/ahci_sunxi.c b/drivers/ata/ahci_sunxi.c index d1bf3f7..0af6cb8 100644 --- a/drivers/ata/ahci_sunxi.c +++ b/drivers/ata/ahci_sunxi.c @@ -197,7 +197,7 @@ disable_resources: } #ifdef CONFIG_PM_SLEEP -int ahci_sunxi_resume(struct device *dev) +static int ahci_sunxi_resume(struct device *dev) { struct ata_host *host = dev_get_drvdata(dev); struct ahci_host_priv *hpriv = host->private_data; -- cgit v0.10.2 From cdf457a4fe30980f7c15a894af2f954f85cd71d2 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 17 Mar 2014 14:08:12 +0100 Subject: ata: ahci_sunxi: fix code formatting Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Tejun Heo Acked-by: Hans de Goede diff --git a/drivers/ata/ahci_sunxi.c b/drivers/ata/ahci_sunxi.c index 0af6cb8..42d3f64 100644 --- a/drivers/ata/ahci_sunxi.c +++ b/drivers/ata/ahci_sunxi.c @@ -27,28 +27,28 @@ #include #include "ahci.h" -#define AHCI_BISTAFR 0x00a0 -#define AHCI_BISTCR 0x00a4 -#define AHCI_BISTFCTR 0x00a8 -#define AHCI_BISTSR 0x00ac -#define AHCI_BISTDECR 0x00b0 -#define AHCI_DIAGNR0 0x00b4 -#define AHCI_DIAGNR1 0x00b8 -#define AHCI_OOBR 0x00bc -#define AHCI_PHYCS0R 0x00c0 -#define AHCI_PHYCS1R 0x00c4 -#define AHCI_PHYCS2R 0x00c8 -#define AHCI_TIMER1MS 0x00e0 -#define AHCI_GPARAM1R 0x00e8 -#define AHCI_GPARAM2R 0x00ec -#define AHCI_PPARAMR 0x00f0 -#define AHCI_TESTR 0x00f4 -#define AHCI_VERSIONR 0x00f8 -#define AHCI_IDR 0x00fc -#define AHCI_RWCR 0x00fc -#define AHCI_P0DMACR 0x0170 -#define AHCI_P0PHYCR 0x0178 -#define AHCI_P0PHYSR 0x017c +#define AHCI_BISTAFR 0x00a0 +#define AHCI_BISTCR 0x00a4 +#define AHCI_BISTFCTR 0x00a8 +#define AHCI_BISTSR 0x00ac +#define AHCI_BISTDECR 0x00b0 +#define AHCI_DIAGNR0 0x00b4 +#define AHCI_DIAGNR1 0x00b8 +#define AHCI_OOBR 0x00bc +#define AHCI_PHYCS0R 0x00c0 +#define AHCI_PHYCS1R 0x00c4 +#define AHCI_PHYCS2R 0x00c8 +#define AHCI_TIMER1MS 0x00e0 +#define AHCI_GPARAM1R 0x00e8 +#define AHCI_GPARAM2R 0x00ec +#define AHCI_PPARAMR 0x00f0 +#define AHCI_TESTR 0x00f4 +#define AHCI_VERSIONR 0x00f8 +#define AHCI_IDR 0x00fc +#define AHCI_RWCR 0x00fc +#define AHCI_P0DMACR 0x0170 +#define AHCI_P0PHYCR 0x0178 +#define AHCI_P0PHYSR 0x017c static void sunxi_clrbits(void __iomem *reg, u32 clr_val) { -- cgit v0.10.2 From 184d0f0c06afb8f3b583db7b8d712caae06a24dd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 14 Mar 2014 18:02:01 +0100 Subject: s390: update defconfigs Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index e0af2ee..ddaae2f 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -46,6 +46,7 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_CFQ_GROUP_IOSCHED=y CONFIG_DEFAULT_DEADLINE=y CONFIG_MARCH_Z9_109=y +CONFIG_NR_CPUS=256 CONFIG_PREEMPT=y CONFIG_HZ_100=y CONFIG_MEMORY_HOTPLUG=y @@ -58,7 +59,6 @@ CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_CHSC_SCH=y CONFIG_CRASH_DUMP=y -CONFIG_ZFCPDUMP=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_BINFMT_MISC=m CONFIG_HIBERNATION=y @@ -101,7 +101,6 @@ CONFIG_TCP_CONG_VENO=m CONFIG_TCP_CONG_YEAH=m CONFIG_TCP_CONG_ILLINOIS=m CONFIG_IPV6=y -CONFIG_IPV6_PRIVACY=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m @@ -111,6 +110,7 @@ CONFIG_INET6_XFRM_MODE_TRANSPORT=m CONFIG_INET6_XFRM_MODE_TUNNEL=m CONFIG_INET6_XFRM_MODE_BEET=m CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m +CONFIG_IPV6_VTI=m CONFIG_IPV6_SIT=m CONFIG_IPV6_GRE=m CONFIG_IPV6_MULTIPLE_TABLES=y @@ -135,7 +135,17 @@ CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NETFILTER_TPROXY=m +CONFIG_NF_TABLES=m +CONFIG_NFT_EXTHDR=m +CONFIG_NFT_META=m +CONFIG_NFT_CT=m +CONFIG_NFT_RBTREE=m +CONFIG_NFT_HASH=m +CONFIG_NFT_COUNTER=m +CONFIG_NFT_LOG=m +CONFIG_NFT_LIMIT=m +CONFIG_NFT_NAT=m +CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_AUDIT=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -204,7 +214,9 @@ CONFIG_IP_SET_HASH_IP=m CONFIG_IP_SET_HASH_IPPORT=m CONFIG_IP_SET_HASH_IPPORTIP=m CONFIG_IP_SET_HASH_IPPORTNET=m +CONFIG_IP_SET_HASH_NETPORTNET=m CONFIG_IP_SET_HASH_NET=m +CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m @@ -227,6 +239,11 @@ CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m CONFIG_NF_CONNTRACK_IPV4=m # CONFIG_NF_CONNTRACK_PROC_COMPAT is not set +CONFIG_NF_TABLES_IPV4=m +CONFIG_NFT_REJECT_IPV4=m +CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_CHAIN_NAT_IPV4=m +CONFIG_NF_TABLES_ARP=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m @@ -249,6 +266,9 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m +CONFIG_NF_TABLES_IPV6=m +CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -268,6 +288,7 @@ CONFIG_IP6_NF_SECURITY=m CONFIG_NF_NAT_IPV6=m CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_IP6_NF_TARGET_NPT=m +CONFIG_NF_TABLES_BRIDGE=m CONFIG_NET_SCTPPROBE=m CONFIG_RDS=m CONFIG_RDS_RDMA=m @@ -314,6 +335,7 @@ CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_CGROUP=y +CONFIG_NET_CLS_BPF=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m @@ -381,8 +403,8 @@ CONFIG_BLK_DEV_DM=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_MIRROR=m -CONFIG_DM_RAID=m CONFIG_DM_LOG_USERSPACE=m +CONFIG_DM_RAID=m CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_QL=m @@ -434,7 +456,6 @@ CONFIG_TN3270_FS=y CONFIG_WATCHDOG=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m -CONFIG_ZVM_WATCHDOG=m # CONFIG_HID is not set # CONFIG_USB_SUPPORT is not set CONFIG_INFINIBAND=m @@ -534,13 +555,23 @@ CONFIG_UNUSED_SYMBOLS=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_PAGEALLOC=y +CONFIG_DEBUG_OBJECTS=y +CONFIG_DEBUG_OBJECTS_SELFTEST=y +CONFIG_DEBUG_OBJECTS_FREE=y +CONFIG_DEBUG_OBJECTS_TIMERS=y +CONFIG_DEBUG_OBJECTS_WORK=y +CONFIG_DEBUG_OBJECTS_RCU_HEAD=y +CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y CONFIG_SLUB_DEBUG_ON=y CONFIG_SLUB_STATS=y +CONFIG_DEBUG_KMEMLEAK=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_VM=y CONFIG_DEBUG_VM_RB=y CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m CONFIG_DEBUG_PER_CPU_MAPS=y +CONFIG_DEBUG_SHIRQ=y +CONFIG_DETECT_HUNG_TASK=y CONFIG_TIMER_STATS=y CONFIG_DEBUG_RT_MUTEXES=y CONFIG_RT_MUTEX_TESTER=y @@ -573,9 +604,11 @@ CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y CONFIG_BLK_DEV_IO_TRACE=y # CONFIG_KPROBE_EVENT is not set CONFIG_LKDTM=m +CONFIG_TEST_LIST_SORT=y CONFIG_KPROBES_SANITY_TEST=y -CONFIG_RBTREE_TEST=m +CONFIG_RBTREE_TEST=y CONFIG_INTERVAL_TREE_TEST=m +CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y CONFIG_DMA_API_DEBUG=y # CONFIG_STRICT_DEVMEM is not set @@ -638,7 +671,6 @@ CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_ASYMMETRIC_KEY_TYPE=m CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m -CONFIG_PUBLIC_KEY_ALGO_RSA=m CONFIG_X509_CERTIFICATE_PARSER=m CONFIG_CRC7=m CONFIG_CRC8=m diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index b9f6b4c..c81a74e 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -46,6 +46,7 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_CFQ_GROUP_IOSCHED=y CONFIG_DEFAULT_DEADLINE=y CONFIG_MARCH_Z9_109=y +CONFIG_NR_CPUS=256 CONFIG_HZ_100=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y @@ -56,7 +57,6 @@ CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_CHSC_SCH=y CONFIG_CRASH_DUMP=y -CONFIG_ZFCPDUMP=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_BINFMT_MISC=m CONFIG_HIBERNATION=y @@ -99,7 +99,6 @@ CONFIG_TCP_CONG_VENO=m CONFIG_TCP_CONG_YEAH=m CONFIG_TCP_CONG_ILLINOIS=m CONFIG_IPV6=y -CONFIG_IPV6_PRIVACY=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m @@ -109,6 +108,7 @@ CONFIG_INET6_XFRM_MODE_TRANSPORT=m CONFIG_INET6_XFRM_MODE_TUNNEL=m CONFIG_INET6_XFRM_MODE_BEET=m CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m +CONFIG_IPV6_VTI=m CONFIG_IPV6_SIT=m CONFIG_IPV6_GRE=m CONFIG_IPV6_MULTIPLE_TABLES=y @@ -133,7 +133,17 @@ CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NETFILTER_TPROXY=m +CONFIG_NF_TABLES=m +CONFIG_NFT_EXTHDR=m +CONFIG_NFT_META=m +CONFIG_NFT_CT=m +CONFIG_NFT_RBTREE=m +CONFIG_NFT_HASH=m +CONFIG_NFT_COUNTER=m +CONFIG_NFT_LOG=m +CONFIG_NFT_LIMIT=m +CONFIG_NFT_NAT=m +CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_AUDIT=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -202,7 +212,9 @@ CONFIG_IP_SET_HASH_IP=m CONFIG_IP_SET_HASH_IPPORT=m CONFIG_IP_SET_HASH_IPPORTIP=m CONFIG_IP_SET_HASH_IPPORTNET=m +CONFIG_IP_SET_HASH_NETPORTNET=m CONFIG_IP_SET_HASH_NET=m +CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m @@ -225,6 +237,11 @@ CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m CONFIG_NF_CONNTRACK_IPV4=m # CONFIG_NF_CONNTRACK_PROC_COMPAT is not set +CONFIG_NF_TABLES_IPV4=m +CONFIG_NFT_REJECT_IPV4=m +CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_CHAIN_NAT_IPV4=m +CONFIG_NF_TABLES_ARP=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m @@ -247,6 +264,9 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m +CONFIG_NF_TABLES_IPV6=m +CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -266,6 +286,7 @@ CONFIG_IP6_NF_SECURITY=m CONFIG_NF_NAT_IPV6=m CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_IP6_NF_TARGET_NPT=m +CONFIG_NF_TABLES_BRIDGE=m CONFIG_NET_SCTPPROBE=m CONFIG_RDS=m CONFIG_RDS_RDMA=m @@ -311,6 +332,7 @@ CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_CGROUP=y +CONFIG_NET_CLS_BPF=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m @@ -378,8 +400,8 @@ CONFIG_BLK_DEV_DM=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_MIRROR=m -CONFIG_DM_RAID=m CONFIG_DM_LOG_USERSPACE=m +CONFIG_DM_RAID=m CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_QL=m @@ -431,7 +453,6 @@ CONFIG_TN3270_FS=y CONFIG_WATCHDOG=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m -CONFIG_ZVM_WATCHDOG=m # CONFIG_HID is not set # CONFIG_USB_SUPPORT is not set CONFIG_INFINIBAND=m @@ -540,6 +561,7 @@ CONFIG_BLK_DEV_IO_TRACE=y CONFIG_LKDTM=m CONFIG_RBTREE_TEST=m CONFIG_INTERVAL_TREE_TEST=m +CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y # CONFIG_STRICT_DEVMEM is not set CONFIG_S390_PTDUMP=y @@ -601,7 +623,6 @@ CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_ASYMMETRIC_KEY_TYPE=m CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m -CONFIG_PUBLIC_KEY_ALGO_RSA=m CONFIG_X509_CERTIFICATE_PARSER=m CONFIG_CRC7=m CONFIG_CRC8=m diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index 91087b4..b5ba8fe 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -44,6 +44,7 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_CFQ_GROUP_IOSCHED=y CONFIG_DEFAULT_DEADLINE=y CONFIG_MARCH_Z9_109=y +CONFIG_NR_CPUS=256 CONFIG_HZ_100=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y @@ -54,7 +55,6 @@ CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_CHSC_SCH=y CONFIG_CRASH_DUMP=y -CONFIG_ZFCPDUMP=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_BINFMT_MISC=m CONFIG_HIBERNATION=y @@ -97,7 +97,6 @@ CONFIG_TCP_CONG_VENO=m CONFIG_TCP_CONG_YEAH=m CONFIG_TCP_CONG_ILLINOIS=m CONFIG_IPV6=y -CONFIG_IPV6_PRIVACY=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m @@ -107,6 +106,7 @@ CONFIG_INET6_XFRM_MODE_TRANSPORT=m CONFIG_INET6_XFRM_MODE_TUNNEL=m CONFIG_INET6_XFRM_MODE_BEET=m CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m +CONFIG_IPV6_VTI=m CONFIG_IPV6_SIT=m CONFIG_IPV6_GRE=m CONFIG_IPV6_MULTIPLE_TABLES=y @@ -131,7 +131,17 @@ CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_NETLINK_TIMEOUT=m -CONFIG_NETFILTER_TPROXY=m +CONFIG_NF_TABLES=m +CONFIG_NFT_EXTHDR=m +CONFIG_NFT_META=m +CONFIG_NFT_CT=m +CONFIG_NFT_RBTREE=m +CONFIG_NFT_HASH=m +CONFIG_NFT_COUNTER=m +CONFIG_NFT_LOG=m +CONFIG_NFT_LIMIT=m +CONFIG_NFT_NAT=m +CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_AUDIT=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -200,7 +210,9 @@ CONFIG_IP_SET_HASH_IP=m CONFIG_IP_SET_HASH_IPPORT=m CONFIG_IP_SET_HASH_IPPORTIP=m CONFIG_IP_SET_HASH_IPPORTNET=m +CONFIG_IP_SET_HASH_NETPORTNET=m CONFIG_IP_SET_HASH_NET=m +CONFIG_IP_SET_HASH_NETNET=m CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m @@ -223,6 +235,11 @@ CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m CONFIG_NF_CONNTRACK_IPV4=m # CONFIG_NF_CONNTRACK_PROC_COMPAT is not set +CONFIG_NF_TABLES_IPV4=m +CONFIG_NFT_REJECT_IPV4=m +CONFIG_NFT_CHAIN_ROUTE_IPV4=m +CONFIG_NFT_CHAIN_NAT_IPV4=m +CONFIG_NF_TABLES_ARP=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m @@ -245,6 +262,9 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m +CONFIG_NF_TABLES_IPV6=m +CONFIG_NFT_CHAIN_ROUTE_IPV6=m +CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -264,6 +284,7 @@ CONFIG_IP6_NF_SECURITY=m CONFIG_NF_NAT_IPV6=m CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_IP6_NF_TARGET_NPT=m +CONFIG_NF_TABLES_BRIDGE=m CONFIG_NET_SCTPPROBE=m CONFIG_RDS=m CONFIG_RDS_RDMA=m @@ -309,6 +330,7 @@ CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_FLOW=m CONFIG_NET_CLS_CGROUP=y +CONFIG_NET_CLS_BPF=m CONFIG_NET_CLS_ACT=y CONFIG_NET_ACT_POLICE=m CONFIG_NET_ACT_GACT=m @@ -376,8 +398,8 @@ CONFIG_BLK_DEV_DM=m CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_MIRROR=m -CONFIG_DM_RAID=m CONFIG_DM_LOG_USERSPACE=m +CONFIG_DM_RAID=m CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_DM_MULTIPATH_QL=m @@ -429,7 +451,6 @@ CONFIG_TN3270_FS=y CONFIG_WATCHDOG=y CONFIG_WATCHDOG_NOWAYOUT=y CONFIG_SOFT_WATCHDOG=m -CONFIG_ZVM_WATCHDOG=m # CONFIG_HID is not set # CONFIG_USB_SUPPORT is not set CONFIG_INFINIBAND=m @@ -532,6 +553,7 @@ CONFIG_LATENCYTOP=y CONFIG_BLK_DEV_IO_TRACE=y # CONFIG_KPROBE_EVENT is not set CONFIG_LKDTM=m +CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y # CONFIG_STRICT_DEVMEM is not set CONFIG_S390_PTDUMP=y @@ -593,7 +615,6 @@ CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_ASYMMETRIC_KEY_TYPE=m CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m -CONFIG_PUBLIC_KEY_ALGO_RSA=m CONFIG_X509_CERTIFICATE_PARSER=m CONFIG_CRC7=m CONFIG_CRC8=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index d725c4d..cef073c 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -19,7 +19,6 @@ CONFIG_HZ_100=y # CONFIG_CHSC_SCH is not set # CONFIG_SCM_BUS is not set CONFIG_CRASH_DUMP=y -CONFIG_ZFCPDUMP=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_SECCOMP is not set # CONFIG_IUCV is not set diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 33f5751..4557cb7 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -40,6 +40,7 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y CONFIG_DEFAULT_DEADLINE=y CONFIG_MARCH_Z196=y +CONFIG_NR_CPUS=256 CONFIG_HZ_100=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y @@ -122,22 +123,31 @@ CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_HUGETLBFS=y # CONFIG_NETWORK_FILESYSTEMS is not set +CONFIG_UNUSED_SYMBOLS=y +CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_PAGEALLOC=y +CONFIG_DETECT_HUNG_TASK=y CONFIG_TIMER_STATS=y +CONFIG_DEBUG_RT_MUTEXES=y CONFIG_PROVE_LOCKING=y CONFIG_LOCK_STAT=y CONFIG_DEBUG_LOCKDEP=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_WRITECOUNT=y CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y CONFIG_PROVE_RCU=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_RCU_TRACE=y CONFIG_LATENCYTOP=y +CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_KPROBES_SANITY_TEST=y # CONFIG_STRICT_DEVMEM is not set +CONFIG_S390_PTDUMP=y CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_AUTHENC=m CONFIG_CRYPTO_TEST=m -- cgit v0.10.2 From 36a554021b44d146178ae98e598c9502a1269f7d Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 17 Mar 2014 11:31:28 +0100 Subject: hypfs: Add clarification for "weight_min" attribute The "weight_min" attribute got the wrong name. The value represents the number of non-stopped (operating) CPUS. Therefore add a note and rename the struct member to "ocpus". Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index 24908ce..32040ac 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -32,7 +32,7 @@ struct diag2fc_data { __u32 pcpus; __u32 lcpus; __u32 vcpus; - __u32 cpu_min; + __u32 ocpus; __u32 cpu_max; __u32 cpu_shares; __u32 cpu_use_samp; @@ -142,7 +142,12 @@ static int hpyfs_vm_create_guest(struct dentry *systems_dir, ATTRIBUTE(cpus_dir, "capped", capped_value); ATTRIBUTE(cpus_dir, "dedicated", dedicated_flag); ATTRIBUTE(cpus_dir, "count", data->vcpus); - ATTRIBUTE(cpus_dir, "weight_min", data->cpu_min); + /* + * Note: The "weight_min" attribute got the wrong name. + * The value represents the number of non-stopped (operating) + * CPUS. + */ + ATTRIBUTE(cpus_dir, "weight_min", data->ocpus); ATTRIBUTE(cpus_dir, "weight_max", data->cpu_max); ATTRIBUTE(cpus_dir, "weight_cur", data->cpu_shares); -- cgit v0.10.2 From cf813db0b448b45b454f0983329c3c7b007f9ab7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 10 Mar 2014 14:50:16 +0100 Subject: s390/smp: limit number of cpus in possible cpu mask Limit the number of bits to the maximum number of cpus a machine can have. possible_cpu_mask typically will have more bits set than a machine may physically have. This results in wasted memory during per-cpu memory allocations, if the possible mask contains more cpus than physically possible for a given configuration. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index abaca22..2f5e993 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -46,6 +46,7 @@ int sclp_cpu_configure(u8 cpu); int sclp_cpu_deconfigure(u8 cpu); unsigned long long sclp_get_rnmax(void); unsigned long long sclp_get_rzm(void); +unsigned int sclp_get_max_cpu(void); int sclp_sdias_blk_count(void); int sclp_sdias_copy(void *dest, int blk_num, int nr_blks); int sclp_chp_configure(struct chp_id chpid); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index a7125b6..8827883 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -773,11 +773,11 @@ void __noreturn cpu_die(void) void __init smp_fill_possible_mask(void) { - unsigned int possible, cpu; + unsigned int possible, sclp, cpu; - possible = setup_possible_cpus; - if (!possible) - possible = MACHINE_IS_VM ? 64 : nr_cpu_ids; + sclp = sclp_get_max_cpu() ?: nr_cpu_ids; + possible = setup_possible_cpus ?: nr_cpu_ids; + possible = min(possible, sclp); for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++) set_cpu_possible(cpu, true); } diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 2c6aac6..14196ea 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -20,7 +20,9 @@ struct read_info_sccb { struct sccb_header header; /* 0-7 */ u16 rnmax; /* 8-9 */ u8 rnsize; /* 10 */ - u8 _reserved0[24 - 11]; /* 11-15 */ + u8 _reserved0[16 - 11]; /* 11-15 */ + u16 ncpurl; /* 16-17 */ + u8 _reserved7[24 - 18]; /* 18-23 */ u8 loadparm[8]; /* 24-31 */ u8 _reserved1[48 - 32]; /* 32-47 */ u64 facilities; /* 48-55 */ @@ -32,13 +34,16 @@ struct read_info_sccb { u8 _reserved4[100 - 92]; /* 92-99 */ u32 rnsize2; /* 100-103 */ u64 rnmax2; /* 104-111 */ - u8 _reserved5[4096 - 112]; /* 112-4095 */ + u8 _reserved5[120 - 112]; /* 112-119 */ + u16 hcpua; /* 120-121 */ + u8 _reserved6[4096 - 122]; /* 122-4095 */ } __packed __aligned(PAGE_SIZE); static char sccb_early[PAGE_SIZE] __aligned(PAGE_SIZE) __initdata; static unsigned int sclp_con_has_vt220 __initdata; static unsigned int sclp_con_has_linemode __initdata; static unsigned long sclp_hsa_size; +static unsigned int sclp_max_cpu; static struct sclp_ipl_info sclp_ipl_info; u64 sclp_facilities; @@ -102,6 +107,15 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) sclp_rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2; sclp_rzm <<= 20; + if (!sccb->hcpua) { + if (MACHINE_IS_VM) + sclp_max_cpu = 64; + else + sclp_max_cpu = sccb->ncpurl; + } else { + sclp_max_cpu = sccb->hcpua + 1; + } + /* Save IPL information */ sclp_ipl_info.is_valid = 1; if (sccb->flags & 0x2) @@ -129,6 +143,11 @@ unsigned long long sclp_get_rzm(void) return sclp_rzm; } +unsigned int sclp_get_max_cpu(void) +{ + return sclp_max_cpu; +} + /* * This function will be called after sclp_facilities_detect(), which gets * called from early.c code. The sclp_facilities_detect() function retrieves -- cgit v0.10.2 From 06e2e88292e9ea6f5a23ead2e9c5ccf8bbd99e93 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 14 Feb 2014 17:55:18 +0000 Subject: MIPS: mark O32+FP64 experimental for now Commit 597ce1723e0f "MIPS: Support for 64-bit FP with O32 binaries" introduced support for setting Status.FR=1 for O32 binaries with the EF_MIPS_FP64 ELF header flag set. Whilst this flag is currently supported by binutils it does introduce an ABI break within userland. Objects built with EF_MIPS_FP64 cannot be safely linked with those built without it since code in either object may assume behaviour specific to a value of FR. More recently there has been discussion around avoiding further fragmentation of the O32 ABI whilst still allowing the use of FR=1 and features such as MSA which depend upon it. Details of the plan to allow this are still being worked on, and whilst the kernel will need the ability to handle FR=1 with O32 tasks it is unclear what else it may need to provide to a userland which seeks to avoid another ABI break. In order to prevent the proliferation of userland which may rely upon the current EF_MIPS_FP64 behaviour this patch marks the kernel support for it experimental & disables it by default. Under current proposals it is likely that this support can simply be enabled again later, but possibly after the introduction of further interfaces with userland and support for the MIPS R5 UFR feature. Signed-off-by: Paul Burton Cc: Matthew Fortune Cc: linux-mips@linux-mips.org Cc: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/6549/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index dcae3a7..1534474 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2353,9 +2353,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. config MIPS_O32_FP64_SUPPORT - bool "Support for O32 binaries using 64-bit FP" + bool "Support for O32 binaries using 64-bit FP (EXPERIMENTAL)" depends on 32BIT || MIPS32_O32 - default y help When this is enabled, the kernel will support use of 64-bit floating point registers with binaries using the O32 ABI along with the @@ -2367,7 +2366,14 @@ config MIPS_O32_FP64_SUPPORT of your kernel & potentially improve FP emulation performance by saying N here. - If unsure, say Y. + Although binutils currently supports use of this flag the details + concerning its effect upon the O32 ABI in userland are still being + worked on. In order to avoid userland becoming dependant upon current + behaviour before the details have been finalised, this option should + be considered experimental and only enabled by those working upon + said details. + + If unsure, say N. config USE_OF bool -- cgit v0.10.2 From 71b70ee9350f239ea021bbb737771ebd5d02c020 Mon Sep 17 00:00:00 2001 From: Loc Ho Date: Fri, 14 Mar 2014 17:53:18 -0600 Subject: arm64: Add APM X-Gene SoC 15Gbps Multi-purpose PHY DTS entries This patch adds the DTS entries for the APM X-Gene SoC 15Gbps Multi-purpose PHY driver. The PHY for SATA controller 2 and 3 are enabled by default. Signed-off-by: Loc Ho Signed-off-by: Tuan Phan Signed-off-by: Suman Tripathi Acked-by: Arnd Bergmann Signed-off-by: Tejun Heo diff --git a/arch/arm64/boot/dts/apm-storm.dtsi b/arch/arm64/boot/dts/apm-storm.dtsi index d37d736..6d4f493 100644 --- a/arch/arm64/boot/dts/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm-storm.dtsi @@ -176,6 +176,48 @@ reg-names = "csr-reg"; clock-output-names = "eth8clk"; }; + + sataphy1clk: sataphy1clk@1f21c000 { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + reg = <0x0 0x1f21c000 0x0 0x1000>; + reg-names = "csr-reg"; + clock-output-names = "sataphy1clk"; + status = "disabled"; + csr-offset = <0x4>; + csr-mask = <0x00>; + enable-offset = <0x0>; + enable-mask = <0x06>; + }; + + sataphy2clk: sataphy1clk@1f22c000 { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + reg = <0x0 0x1f22c000 0x0 0x1000>; + reg-names = "csr-reg"; + clock-output-names = "sataphy2clk"; + status = "ok"; + csr-offset = <0x4>; + csr-mask = <0x3a>; + enable-offset = <0x0>; + enable-mask = <0x06>; + }; + + sataphy3clk: sataphy1clk@1f23c000 { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + reg = <0x0 0x1f23c000 0x0 0x1000>; + reg-names = "csr-reg"; + clock-output-names = "sataphy3clk"; + status = "ok"; + csr-offset = <0x4>; + csr-mask = <0x3a>; + enable-offset = <0x0>; + enable-mask = <0x06>; + }; }; serial0: serial@1c020000 { @@ -187,5 +229,35 @@ interrupt-parent = <&gic>; interrupts = <0x0 0x4c 0x4>; }; + + phy1: phy@1f21a000 { + compatible = "apm,xgene-phy"; + reg = <0x0 0x1f21a000 0x0 0x100>; + #phy-cells = <1>; + clocks = <&sataphy1clk 0>; + status = "disabled"; + apm,tx-boost-gain = <30 30 30 30 30 30>; + apm,tx-eye-tuning = <2 10 10 2 10 10>; + }; + + phy2: phy@1f22a000 { + compatible = "apm,xgene-phy"; + reg = <0x0 0x1f22a000 0x0 0x100>; + #phy-cells = <1>; + clocks = <&sataphy2clk 0>; + status = "ok"; + apm,tx-boost-gain = <30 30 30 30 30 30>; + apm,tx-eye-tuning = <1 10 10 2 10 10>; + }; + + phy3: phy@1f23a000 { + compatible = "apm,xgene-phy"; + reg = <0x0 0x1f23a000 0x0 0x100>; + #phy-cells = <1>; + clocks = <&sataphy3clk 0>; + status = "ok"; + apm,tx-boost-gain = <31 31 31 31 31 31>; + apm,tx-eye-tuning = <2 10 10 2 10 10>; + }; }; }; -- cgit v0.10.2 From 1ccaead5f5a66162fafd1ce1e6282df04e181d0a Mon Sep 17 00:00:00 2001 From: Loc Ho Date: Fri, 14 Mar 2014 17:53:19 -0600 Subject: Documentation: Add documentation for the APM X-Gene SoC SATA host controller DTS binding This patch adds documentation for the APM X-Gene SoC SATA host controller DTS binding. Signed-off-by: Loc Ho Signed-off-by: Tuan Phan Signed-off-by: Suman Tripathi Acked-by: Arnd Bergmann Signed-off-by: Tejun Heo diff --git a/Documentation/devicetree/bindings/ata/apm-xgene.txt b/Documentation/devicetree/bindings/ata/apm-xgene.txt new file mode 100644 index 0000000..7bcfbf5 --- /dev/null +++ b/Documentation/devicetree/bindings/ata/apm-xgene.txt @@ -0,0 +1,76 @@ +* APM X-Gene 6.0 Gb/s SATA host controller nodes + +SATA host controller nodes are defined to describe on-chip Serial ATA +controllers. Each SATA controller (pair of ports) have its own node. + +Required properties: +- compatible : Shall contain: + * "apm,xgene-ahci" +- reg : First memory resource shall be the AHCI memory + resource. + Second memory resource shall be the host controller + core memory resource. + Third memory resource shall be the host controller + diagnostic memory resource. + 4th memory resource shall be the host controller + AXI memory resource. + 5th optional memory resource shall be the host + controller MUX memory resource if required. +- interrupts : Interrupt-specifier for SATA host controller IRQ. +- clocks : Reference to the clock entry. +- phys : A list of phandles + phy-specifiers, one for each + entry in phy-names. +- phy-names : Should contain: + * "sata-phy" for the SATA 6.0Gbps PHY + +Optional properties: +- status : Shall be "ok" if enabled or "disabled" if disabled. + Default is "ok". + +Example: + sataclk: sataclk { + compatible = "fixed-clock"; + #clock-cells = <1>; + clock-frequency = <100000000>; + clock-output-names = "sataclk"; + }; + + phy2: phy@1f22a000 { + compatible = "apm,xgene-phy"; + reg = <0x0 0x1f22a000 0x0 0x100>; + #phy-cells = <1>; + }; + + phy3: phy@1f23a000 { + compatible = "apm,xgene-phy"; + reg = <0x0 0x1f23a000 0x0 0x100>; + #phy-cells = <1>; + }; + + sata2: sata@1a400000 { + compatible = "apm,xgene-ahci"; + reg = <0x0 0x1a400000 0x0 0x1000>, + <0x0 0x1f220000 0x0 0x1000>, + <0x0 0x1f22d000 0x0 0x1000>, + <0x0 0x1f22e000 0x0 0x1000>, + <0x0 0x1f227000 0x0 0x1000>; + interrupts = <0x0 0x87 0x4>; + status = "ok"; + clocks = <&sataclk 0>; + phys = <&phy2 0>; + phy-names = "sata-phy"; + }; + + sata3: sata@1a800000 { + compatible = "apm,xgene-ahci-pcie"; + reg = <0x0 0x1a800000 0x0 0x1000>, + <0x0 0x1f230000 0x0 0x1000>, + <0x0 0x1f23d000 0x0 0x1000>, + <0x0 0x1f23e000 0x0 0x1000>, + <0x0 0x1f237000 0x0 0x1000>; + interrupts = <0x0 0x88 0x4>; + status = "ok"; + clocks = <&sataclk 0>; + phys = <&phy3 0>; + phy-names = "sata-phy"; + }; -- cgit v0.10.2 From 81d01bfa51300d14191e0013856a7b25f809468f Mon Sep 17 00:00:00 2001 From: Loc Ho Date: Fri, 14 Mar 2014 17:53:20 -0600 Subject: ata: Add APM X-Gene SoC AHCI SATA host controller driver This patch adds support for the APM X-Gene SoC AHCI SATA host controller driver. It requires the corresponding APM X-Gene SoC PHY driver. This initial version only supports Gen3 speed. Signed-off-by: Loc Ho Signed-off-by: Tuan Phan Signed-off-by: Suman Tripathi Reviewed-by: Bartlomiej Zolnierkiewicz Signed-off-by: Tejun Heo diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 0bec5ac..9f3e326 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -124,6 +124,13 @@ config AHCI_SUNXI If unsure, say N. +config AHCI_XGENE + tristate "APM X-Gene 6.0Gbps AHCI SATA host controller support" + depends on SATA_AHCI_PLATFORM && (ARM64 || COMPILE_TEST) + select PHY_XGENE + help + This option enables support for APM X-Gene SoC SATA host controller. + config SATA_FSL tristate "Freescale 3.0Gbps SATA support" depends on FSL_SOC diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile index e20148c..095d461 100644 --- a/drivers/ata/Makefile +++ b/drivers/ata/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_SATA_HIGHBANK) += sata_highbank.o libahci.o obj-$(CONFIG_AHCI_IMX) += ahci_imx.o obj-$(CONFIG_AHCI_SUNXI) += ahci_sunxi.o obj-$(CONFIG_AHCI_ST) += ahci_st.o +obj-$(CONFIG_AHCI_XGENE) += ahci_xgene.o # SFF w/ custom DMA obj-$(CONFIG_PDC_ADMA) += pdc_adma.o diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c new file mode 100644 index 0000000..fcf21f1 --- /dev/null +++ b/drivers/ata/ahci_xgene.c @@ -0,0 +1,486 @@ +/* + * AppliedMicro X-Gene SoC SATA Host Controller Driver + * + * Copyright (c) 2014, Applied Micro Circuits Corporation + * Author: Loc Ho + * Tuan Phan + * Suman Tripathi + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * NOTE: PM support is not currently available. + * + */ +#include +#include +#include +#include +#include +#include +#include "ahci.h" + +/* Max # of disk per a controller */ +#define MAX_AHCI_CHN_PERCTR 2 + +/* MUX CSR */ +#define SATA_ENET_CONFIG_REG 0x00000000 +#define CFG_SATA_ENET_SELECT_MASK 0x00000001 + +/* SATA core host controller CSR */ +#define SLVRDERRATTRIBUTES 0x00000000 +#define SLVWRERRATTRIBUTES 0x00000004 +#define MSTRDERRATTRIBUTES 0x00000008 +#define MSTWRERRATTRIBUTES 0x0000000c +#define BUSCTLREG 0x00000014 +#define IOFMSTRWAUX 0x00000018 +#define INTSTATUSMASK 0x0000002c +#define ERRINTSTATUS 0x00000030 +#define ERRINTSTATUSMASK 0x00000034 + +/* SATA host AHCI CSR */ +#define PORTCFG 0x000000a4 +#define PORTADDR_SET(dst, src) \ + (((dst) & ~0x0000003f) | (((u32)(src)) & 0x0000003f)) +#define PORTPHY1CFG 0x000000a8 +#define PORTPHY1CFG_FRCPHYRDY_SET(dst, src) \ + (((dst) & ~0x00100000) | (((u32)(src) << 0x14) & 0x00100000)) +#define PORTPHY2CFG 0x000000ac +#define PORTPHY3CFG 0x000000b0 +#define PORTPHY4CFG 0x000000b4 +#define PORTPHY5CFG 0x000000b8 +#define SCTL0 0x0000012C +#define PORTPHY5CFG_RTCHG_SET(dst, src) \ + (((dst) & ~0xfff00000) | (((u32)(src) << 0x14) & 0xfff00000)) +#define PORTAXICFG_EN_CONTEXT_SET(dst, src) \ + (((dst) & ~0x01000000) | (((u32)(src) << 0x18) & 0x01000000)) +#define PORTAXICFG 0x000000bc +#define PORTAXICFG_OUTTRANS_SET(dst, src) \ + (((dst) & ~0x00f00000) | (((u32)(src) << 0x14) & 0x00f00000)) + +/* SATA host controller AXI CSR */ +#define INT_SLV_TMOMASK 0x00000010 + +/* SATA diagnostic CSR */ +#define CFG_MEM_RAM_SHUTDOWN 0x00000070 +#define BLOCK_MEM_RDY 0x00000074 + +struct xgene_ahci_context { + struct ahci_host_priv *hpriv; + struct device *dev; + void __iomem *csr_core; /* Core CSR address of IP */ + void __iomem *csr_diag; /* Diag CSR address of IP */ + void __iomem *csr_axi; /* AXI CSR address of IP */ + void __iomem *csr_mux; /* MUX CSR address of IP */ +}; + +static int xgene_ahci_init_memram(struct xgene_ahci_context *ctx) +{ + dev_dbg(ctx->dev, "Release memory from shutdown\n"); + writel(0x0, ctx->csr_diag + CFG_MEM_RAM_SHUTDOWN); + readl(ctx->csr_diag + CFG_MEM_RAM_SHUTDOWN); /* Force a barrier */ + msleep(1); /* reset may take up to 1ms */ + if (readl(ctx->csr_diag + BLOCK_MEM_RDY) != 0xFFFFFFFF) { + dev_err(ctx->dev, "failed to release memory from shutdown\n"); + return -ENODEV; + } + return 0; +} + +/** + * xgene_ahci_read_id - Read ID data from the specified device + * @dev: device + * @tf: proposed taskfile + * @id: data buffer + * + * This custom read ID function is required due to the fact that the HW + * does not support DEVSLP and the controller state machine may get stuck + * after processing the ID query command. + */ +static unsigned int xgene_ahci_read_id(struct ata_device *dev, + struct ata_taskfile *tf, u16 *id) +{ + u32 err_mask; + void __iomem *port_mmio = ahci_port_base(dev->link->ap); + + err_mask = ata_do_dev_read_id(dev, tf, id); + if (err_mask) + return err_mask; + + /* + * Mask reserved area. Word78 spec of Link Power Management + * bit15-8: reserved + * bit7: NCQ autosence + * bit6: Software settings preservation supported + * bit5: reserved + * bit4: In-order sata delivery supported + * bit3: DIPM requests supported + * bit2: DMA Setup FIS Auto-Activate optimization supported + * bit1: DMA Setup FIX non-Zero buffer offsets supported + * bit0: Reserved + * + * Clear reserved bit 8 (DEVSLP bit) as we don't support DEVSLP + */ + id[ATA_ID_FEATURE_SUPP] &= ~(1 << 8); + + /* + * Due to HW errata, restart the port if no other command active. + * Otherwise the controller may get stuck. + */ + if (!readl(port_mmio + PORT_CMD_ISSUE)) { + writel(PORT_CMD_FIS_RX, port_mmio + PORT_CMD); + readl(port_mmio + PORT_CMD); /* Force a barrier */ + writel(PORT_CMD_FIS_RX | PORT_CMD_START, port_mmio + PORT_CMD); + readl(port_mmio + PORT_CMD); /* Force a barrier */ + } + return 0; +} + +static void xgene_ahci_set_phy_cfg(struct xgene_ahci_context *ctx, int channel) +{ + void __iomem *mmio = ctx->hpriv->mmio; + u32 val; + + dev_dbg(ctx->dev, "port configure mmio 0x%p channel %d\n", + mmio, channel); + val = readl(mmio + PORTCFG); + val = PORTADDR_SET(val, channel == 0 ? 2 : 3); + writel(val, mmio + PORTCFG); + readl(mmio + PORTCFG); /* Force a barrier */ + /* Disable fix rate */ + writel(0x0001fffe, mmio + PORTPHY1CFG); + readl(mmio + PORTPHY1CFG); /* Force a barrier */ + writel(0x5018461c, mmio + PORTPHY2CFG); + readl(mmio + PORTPHY2CFG); /* Force a barrier */ + writel(0x1c081907, mmio + PORTPHY3CFG); + readl(mmio + PORTPHY3CFG); /* Force a barrier */ + writel(0x1c080815, mmio + PORTPHY4CFG); + readl(mmio + PORTPHY4CFG); /* Force a barrier */ + /* Set window negotiation */ + val = readl(mmio + PORTPHY5CFG); + val = PORTPHY5CFG_RTCHG_SET(val, 0x300); + writel(val, mmio + PORTPHY5CFG); + readl(mmio + PORTPHY5CFG); /* Force a barrier */ + val = readl(mmio + PORTAXICFG); + val = PORTAXICFG_EN_CONTEXT_SET(val, 0x1); /* Enable context mgmt */ + val = PORTAXICFG_OUTTRANS_SET(val, 0xe); /* Set outstanding */ + writel(val, mmio + PORTAXICFG); + readl(mmio + PORTAXICFG); /* Force a barrier */ +} + +/** + * xgene_ahci_do_hardreset - Issue the actual COMRESET + * @link: link to reset + * @deadline: deadline jiffies for the operation + * @online: Return value to indicate if device online + * + * Due to the limitation of the hardware PHY, a difference set of setting is + * required for each supported disk speed - Gen3 (6.0Gbps), Gen2 (3.0Gbps), + * and Gen1 (1.5Gbps). Otherwise during long IO stress test, the PHY will + * report disparity error and etc. In addition, during COMRESET, there can + * be error reported in the register PORT_SCR_ERR. For SERR_DISPARITY and + * SERR_10B_8B_ERR, the PHY receiver line must be reseted. The following + * algorithm is followed to proper configure the hardware PHY during COMRESET: + * + * Alg Part 1: + * 1. Start the PHY at Gen3 speed (default setting) + * 2. Issue the COMRESET + * 3. If no link, go to Alg Part 3 + * 4. If link up, determine if the negotiated speed matches the PHY + * configured speed + * 5. If they matched, go to Alg Part 2 + * 6. If they do not matched and first time, configure the PHY for the linked + * up disk speed and repeat step 2 + * 7. Go to Alg Part 2 + * + * Alg Part 2: + * 1. On link up, if there are any SERR_DISPARITY and SERR_10B_8B_ERR error + * reported in the register PORT_SCR_ERR, then reset the PHY receiver line + * 2. Go to Alg Part 3 + * + * Alg Part 3: + * 1. Clear any pending from register PORT_SCR_ERR. + * + * NOTE: For the initial version, we will NOT support Gen1/Gen2. In addition + * and until the underlying PHY supports an method to reset the receiver + * line, on detection of SERR_DISPARITY or SERR_10B_8B_ERR errors, + * an warning message will be printed. + */ +static int xgene_ahci_do_hardreset(struct ata_link *link, + unsigned long deadline, bool *online) +{ + const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context); + struct ata_port *ap = link->ap; + struct ahci_host_priv *hpriv = ap->host->private_data; + struct xgene_ahci_context *ctx = hpriv->plat_data; + struct ahci_port_priv *pp = ap->private_data; + u8 *d2h_fis = pp->rx_fis + RX_FIS_D2H_REG; + void __iomem *port_mmio = ahci_port_base(ap); + struct ata_taskfile tf; + int rc; + u32 val; + + /* clear D2H reception area to properly wait for D2H FIS */ + ata_tf_init(link->device, &tf); + tf.command = ATA_BUSY; + ata_tf_to_fis(&tf, 0, 0, d2h_fis); + rc = sata_link_hardreset(link, timing, deadline, online, + ahci_check_ready); + + val = readl(port_mmio + PORT_SCR_ERR); + if (val & (SERR_DISPARITY | SERR_10B_8B_ERR)) + dev_warn(ctx->dev, "link has error\n"); + + /* clear all errors if any pending */ + val = readl(port_mmio + PORT_SCR_ERR); + writel(val, port_mmio + PORT_SCR_ERR); + + return rc; +} + +static int xgene_ahci_hardreset(struct ata_link *link, unsigned int *class, + unsigned long deadline) +{ + struct ata_port *ap = link->ap; + struct ahci_host_priv *hpriv = ap->host->private_data; + void __iomem *port_mmio = ahci_port_base(ap); + bool online; + int rc; + u32 portcmd_saved; + u32 portclb_saved; + u32 portclbhi_saved; + u32 portrxfis_saved; + u32 portrxfishi_saved; + + /* As hardreset resets these CSR, save it to restore later */ + portcmd_saved = readl(port_mmio + PORT_CMD); + portclb_saved = readl(port_mmio + PORT_LST_ADDR); + portclbhi_saved = readl(port_mmio + PORT_LST_ADDR_HI); + portrxfis_saved = readl(port_mmio + PORT_FIS_ADDR); + portrxfishi_saved = readl(port_mmio + PORT_FIS_ADDR_HI); + + ahci_stop_engine(ap); + + rc = xgene_ahci_do_hardreset(link, deadline, &online); + + /* As controller hardreset clears them, restore them */ + writel(portcmd_saved, port_mmio + PORT_CMD); + writel(portclb_saved, port_mmio + PORT_LST_ADDR); + writel(portclbhi_saved, port_mmio + PORT_LST_ADDR_HI); + writel(portrxfis_saved, port_mmio + PORT_FIS_ADDR); + writel(portrxfishi_saved, port_mmio + PORT_FIS_ADDR_HI); + + hpriv->start_engine(ap); + + if (online) + *class = ahci_dev_classify(ap); + + return rc; +} + +static void xgene_ahci_host_stop(struct ata_host *host) +{ + struct ahci_host_priv *hpriv = host->private_data; + + ahci_platform_disable_resources(hpriv); +} + +static struct ata_port_operations xgene_ahci_ops = { + .inherits = &ahci_ops, + .host_stop = xgene_ahci_host_stop, + .hardreset = xgene_ahci_hardreset, + .read_id = xgene_ahci_read_id, +}; + +static const struct ata_port_info xgene_ahci_port_info = { + AHCI_HFLAGS(AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ), + .flags = AHCI_FLAG_COMMON | ATA_FLAG_NCQ, + .pio_mask = ATA_PIO4, + .udma_mask = ATA_UDMA6, + .port_ops = &xgene_ahci_ops, +}; + +static int xgene_ahci_hw_init(struct ahci_host_priv *hpriv) +{ + struct xgene_ahci_context *ctx = hpriv->plat_data; + int i; + int rc; + u32 val; + + /* Remove IP RAM out of shutdown */ + rc = xgene_ahci_init_memram(ctx); + if (rc) + return rc; + + for (i = 0; i < MAX_AHCI_CHN_PERCTR; i++) + xgene_ahci_set_phy_cfg(ctx, i); + + /* AXI disable Mask */ + writel(0xffffffff, hpriv->mmio + HOST_IRQ_STAT); + readl(hpriv->mmio + HOST_IRQ_STAT); /* Force a barrier */ + writel(0, ctx->csr_core + INTSTATUSMASK); + readl(ctx->csr_core + INTSTATUSMASK); /* Force a barrier */ + dev_dbg(ctx->dev, "top level interrupt mask 0x%X value 0x%08X\n", + INTSTATUSMASK, val); + + writel(0x0, ctx->csr_core + ERRINTSTATUSMASK); + readl(ctx->csr_core + ERRINTSTATUSMASK); /* Force a barrier */ + writel(0x0, ctx->csr_axi + INT_SLV_TMOMASK); + readl(ctx->csr_axi + INT_SLV_TMOMASK); + + /* Enable AXI Interrupt */ + writel(0xffffffff, ctx->csr_core + SLVRDERRATTRIBUTES); + writel(0xffffffff, ctx->csr_core + SLVWRERRATTRIBUTES); + writel(0xffffffff, ctx->csr_core + MSTRDERRATTRIBUTES); + writel(0xffffffff, ctx->csr_core + MSTWRERRATTRIBUTES); + + /* Enable coherency */ + val = readl(ctx->csr_core + BUSCTLREG); + val &= ~0x00000002; /* Enable write coherency */ + val &= ~0x00000001; /* Enable read coherency */ + writel(val, ctx->csr_core + BUSCTLREG); + + val = readl(ctx->csr_core + IOFMSTRWAUX); + val |= (1 << 3); /* Enable read coherency */ + val |= (1 << 9); /* Enable write coherency */ + writel(val, ctx->csr_core + IOFMSTRWAUX); + val = readl(ctx->csr_core + IOFMSTRWAUX); + dev_dbg(ctx->dev, "coherency 0x%X value 0x%08X\n", + IOFMSTRWAUX, val); + + return rc; +} + +static int xgene_ahci_mux_select(struct xgene_ahci_context *ctx) +{ + u32 val; + + /* Check for optional MUX resource */ + if (IS_ERR(ctx->csr_mux)) + return 0; + + val = readl(ctx->csr_mux + SATA_ENET_CONFIG_REG); + val &= ~CFG_SATA_ENET_SELECT_MASK; + writel(val, ctx->csr_mux + SATA_ENET_CONFIG_REG); + val = readl(ctx->csr_mux + SATA_ENET_CONFIG_REG); + return val & CFG_SATA_ENET_SELECT_MASK ? -1 : 0; +} + +static int xgene_ahci_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct ahci_host_priv *hpriv; + struct xgene_ahci_context *ctx; + struct resource *res; + int rc; + + hpriv = ahci_platform_get_resources(pdev); + if (IS_ERR(hpriv)) + return PTR_ERR(hpriv); + + ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + hpriv->plat_data = ctx; + ctx->hpriv = hpriv; + ctx->dev = dev; + + /* Retrieve the IP core resource */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + ctx->csr_core = devm_ioremap_resource(dev, res); + if (IS_ERR(ctx->csr_core)) + return PTR_ERR(ctx->csr_core); + + /* Retrieve the IP diagnostic resource */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 2); + ctx->csr_diag = devm_ioremap_resource(dev, res); + if (IS_ERR(ctx->csr_diag)) + return PTR_ERR(ctx->csr_diag); + + /* Retrieve the IP AXI resource */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 3); + ctx->csr_axi = devm_ioremap_resource(dev, res); + if (IS_ERR(ctx->csr_axi)) + return PTR_ERR(ctx->csr_axi); + + /* Retrieve the optional IP mux resource */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 4); + ctx->csr_mux = devm_ioremap_resource(dev, res); + + dev_dbg(dev, "VAddr 0x%p Mmio VAddr 0x%p\n", ctx->csr_core, + hpriv->mmio); + + /* Select ATA */ + if ((rc = xgene_ahci_mux_select(ctx))) { + dev_err(dev, "SATA mux selection failed error %d\n", rc); + return -ENODEV; + } + + /* Due to errata, HW requires full toggle transition */ + rc = ahci_platform_enable_clks(hpriv); + if (rc) + goto disable_resources; + ahci_platform_disable_clks(hpriv); + + rc = ahci_platform_enable_resources(hpriv); + if (rc) + goto disable_resources; + + /* Configure the host controller */ + xgene_ahci_hw_init(hpriv); + + /* + * Setup DMA mask. This is preliminary until the DMA range is sorted + * out. + */ + rc = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); + if (rc) { + dev_err(dev, "Unable to set dma mask\n"); + goto disable_resources; + } + + rc = ahci_platform_init_host(pdev, hpriv, &xgene_ahci_port_info, 0, 0); + if (rc) + goto disable_resources; + + dev_dbg(dev, "X-Gene SATA host controller initialized\n"); + return 0; + +disable_resources: + ahci_platform_disable_resources(hpriv); + return rc; +} + +static const struct of_device_id xgene_ahci_of_match[] = { + {.compatible = "apm,xgene-ahci"}, + {}, +}; +MODULE_DEVICE_TABLE(of, xgene_ahci_of_match); + +static struct platform_driver xgene_ahci_driver = { + .probe = xgene_ahci_probe, + .remove = ata_platform_remove_one, + .driver = { + .name = "xgene-ahci", + .owner = THIS_MODULE, + .of_match_table = xgene_ahci_of_match, + }, +}; + +module_platform_driver(xgene_ahci_driver); + +MODULE_DESCRIPTION("APM X-Gene AHCI SATA driver"); +MODULE_AUTHOR("Loc Ho "); +MODULE_LICENSE("GPL"); +MODULE_VERSION("0.4"); -- cgit v0.10.2 From db8c0286d18c2d3eaec2c4da34767db0f4f6ffaa Mon Sep 17 00:00:00 2001 From: Loc Ho Date: Fri, 14 Mar 2014 17:53:21 -0600 Subject: arm64: Add APM X-Gene SoC AHCI SATA host controller DTS entries This patch adds APM X-Gene SoC AHCI SATA host controller DTS entries. Signed-off-by: Loc Ho Signed-off-by: Tuan Phan Signed-off-by: Suman Tripathi Acked-by: Arnd Bergmann Signed-off-by: Tejun Heo diff --git a/arch/arm64/boot/dts/apm-storm.dtsi b/arch/arm64/boot/dts/apm-storm.dtsi index 6d4f493..93f4b2d 100644 --- a/arch/arm64/boot/dts/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm-storm.dtsi @@ -218,6 +218,45 @@ enable-offset = <0x0>; enable-mask = <0x06>; }; + + sata01clk: sata01clk@1f21c000 { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + reg = <0x0 0x1f21c000 0x0 0x1000>; + reg-names = "csr-reg"; + clock-output-names = "sata01clk"; + csr-offset = <0x4>; + csr-mask = <0x05>; + enable-offset = <0x0>; + enable-mask = <0x39>; + }; + + sata23clk: sata23clk@1f22c000 { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + reg = <0x0 0x1f22c000 0x0 0x1000>; + reg-names = "csr-reg"; + clock-output-names = "sata23clk"; + csr-offset = <0x4>; + csr-mask = <0x05>; + enable-offset = <0x0>; + enable-mask = <0x39>; + }; + + sata45clk: sata45clk@1f23c000 { + compatible = "apm,xgene-device-clock"; + #clock-cells = <1>; + clocks = <&socplldiv2 0>; + reg = <0x0 0x1f23c000 0x0 0x1000>; + reg-names = "csr-reg"; + clock-output-names = "sata45clk"; + csr-offset = <0x4>; + csr-mask = <0x05>; + enable-offset = <0x0>; + enable-mask = <0x39>; + }; }; serial0: serial@1c020000 { @@ -259,5 +298,46 @@ apm,tx-boost-gain = <31 31 31 31 31 31>; apm,tx-eye-tuning = <2 10 10 2 10 10>; }; + + sata1: sata@1a000000 { + compatible = "apm,xgene-ahci"; + reg = <0x0 0x1a000000 0x0 0x1000>, + <0x0 0x1f210000 0x0 0x1000>, + <0x0 0x1f21d000 0x0 0x1000>, + <0x0 0x1f21e000 0x0 0x1000>, + <0x0 0x1f217000 0x0 0x1000>; + interrupts = <0x0 0x86 0x4>; + status = "disabled"; + clocks = <&sata01clk 0>; + phys = <&phy1 0>; + phy-names = "sata-phy"; + }; + + sata2: sata@1a400000 { + compatible = "apm,xgene-ahci"; + reg = <0x0 0x1a400000 0x0 0x1000>, + <0x0 0x1f220000 0x0 0x1000>, + <0x0 0x1f22d000 0x0 0x1000>, + <0x0 0x1f22e000 0x0 0x1000>, + <0x0 0x1f227000 0x0 0x1000>; + interrupts = <0x0 0x87 0x4>; + status = "ok"; + clocks = <&sata23clk 0>; + phys = <&phy2 0>; + phy-names = "sata-phy"; + }; + + sata3: sata@1a800000 { + compatible = "apm,xgene-ahci"; + reg = <0x0 0x1a800000 0x0 0x1000>, + <0x0 0x1f230000 0x0 0x1000>, + <0x0 0x1f23d000 0x0 0x1000>, + <0x0 0x1f23e000 0x0 0x1000>; + interrupts = <0x0 0x88 0x4>; + status = "ok"; + clocks = <&sata45clk 0>; + phys = <&phy3 0>; + phy-names = "sata-phy"; + }; }; }; -- cgit v0.10.2 From 1535bd8adbdedd60a0ee62e28fd5225d66434371 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Fri, 14 Mar 2014 10:42:01 -0500 Subject: sparc64: don't treat 64-bit syscall return codes as 32-bit When checking a system call return code for an error, linux_sparc_syscall was sign-extending the lower 32-bit value and comparing it to -ERESTART_RESTARTBLOCK. lseek can return valid return codes whose lower 32-bits alone would indicate a failure (such as 4G-1). Use the whole 64-bit value to check for errors. Only the 32-bit path should sign extend the lower 32-bit value. Signed-off-by: Dave Kleikamp Acked-by: Bob Picco Acked-by: Allen Pais Cc: David S. Miller Cc: sparclinux@vger.kernel.org Signed-off-by: David S. Miller diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S index 87729ff..33a17e7 100644 --- a/arch/sparc/kernel/syscalls.S +++ b/arch/sparc/kernel/syscalls.S @@ -189,7 +189,8 @@ linux_sparc_syscall32: mov %i0, %l5 ! IEU1 5: call %l7 ! CTI Group brk forced srl %i5, 0, %o5 ! IEU1 - ba,a,pt %xcc, 3f + ba,pt %xcc, 3f + sra %o0, 0, %o0 /* Linux native system calls enter here... */ .align 32 @@ -217,7 +218,6 @@ linux_sparc_syscall: 3: stx %o0, [%sp + PTREGS_OFF + PT_V9_I0] ret_sys_call: ldx [%sp + PTREGS_OFF + PT_V9_TSTATE], %g3 - sra %o0, 0, %o0 mov %ulo(TSTATE_XCARRY | TSTATE_ICARRY), %g2 sllx %g2, 32, %g2 -- cgit v0.10.2 From 151b628f104566a450125a6a0f6775a35bde58d6 Mon Sep 17 00:00:00 2001 From: Doug Wilson Date: Fri, 7 Mar 2014 16:29:03 +0530 Subject: sparc64:tsb.c:use array size macro rather than number This is a small patch which uses ARRAY_SIZE macro rather than a number to make code readability better. Signed-off-by: Doug Wilson Signed-off-by: David S. Miller diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index 3b3a360..f5d506f 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -273,7 +273,7 @@ void __init pgtable_cache_init(void) prom_halt(); } - for (i = 0; i < 8; i++) { + for (i = 0; i < ARRAY_SIZE(tsb_cache_names); i++) { unsigned long size = 8192 << i; const char *name = tsb_cache_names[i]; -- cgit v0.10.2 From 2f69fa829cb4ca062aaffee9ab9eb44484db75b1 Mon Sep 17 00:00:00 2001 From: Viro Date: Mon, 17 Mar 2014 16:01:27 -0400 Subject: percpu: allocation size should be even 723ad1d90b56 ("percpu: store offsets instead of lengths in ->map[]") updated percpu area allocator to use the lowest bit, instead of sign, to signify whether the area is occupied and forced min align to 2; unfortunately, it forgot to force the allocation size to be even causing malfunctions for the very rare odd-sized allocations. Always force the allocations to be even sized. tj: Wrote patch description. Original-patch-by: Al Viro Signed-off-by: Tejun Heo diff --git a/mm/percpu.c b/mm/percpu.c index c7206d0..202e104 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -713,11 +713,14 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved) /* * We want the lowest bit of offset available for in-use/free - * indicator. + * indicator, so force >= 16bit alignment and make size even. */ if (unlikely(align < 2)) align = 2; + if (unlikely(size & 1)) + size++; + if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { WARN(true, "illegal size (%zu) or align (%zu) for " "percpu allocation\n", size, align); -- cgit v0.10.2 From 8d7f1fbf083e19688619e6ca25c95434a2c30537 Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Sun, 16 Mar 2014 00:30:52 +0100 Subject: ATHEROS-ALX: Use dma_set_mask_and_coherent and fix a bug 1. For the 64 bits dma mask use dma_set_mask_and_coherent instead of dma_set_mask and dma_set_coherent_mask. 2. For the 32 bits dma mask dma_set_coherent_mask is only called if dma_set_mask fails, which is unusual. Assuming this as a bug, fixes it by replacing calls to dma_set_mask and dma_set_coherent_mask by a call to dma_set_mask_and_coherent. Signed-off-by: Peter Senna Tschudin Tested-by: Jonas Hahnfeld Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 2e45f6e..380d249 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1248,19 +1248,13 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) * shared register for the high 32 bits, so only a single, aligned, * 4 GB physical address range can be used for descriptors. */ - if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)) && - !dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64))) { + if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) { dev_dbg(&pdev->dev, "DMA to 64-BIT addresses\n"); } else { - err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (err) { - err = dma_set_coherent_mask(&pdev->dev, - DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, - "No usable DMA config, aborting\n"); - goto out_pci_disable; - } + dev_err(&pdev->dev, "No usable DMA config, aborting\n"); + goto out_pci_disable; } } -- cgit v0.10.2 From e10848a26a962e404ac00c897dfe54f14290806d Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Thu, 13 Mar 2014 14:58:42 +0000 Subject: x86/efi: Preserve segment registers in mixed mode I was triggering a #GP(0) from userland when running with CONFIG_EFI_MIXED and CONFIG_IA32_EMULATION, from what looked like register corruption. Turns out that the mixed mode code was trashing the contents of %ds, %es and %ss in __efi64_thunk(). Save and restore the contents of these segment registers across the call to __efi64_thunk() so that we don't corrupt the CPU context. Signed-off-by: Matt Fleming diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 65b787a..e0984ef 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -176,6 +176,13 @@ ENDPROC(efi_call6) * This function must be invoked with a 1:1 mapped stack. */ ENTRY(__efi64_thunk) + movl %ds, %eax + push %rax + movl %es, %eax + push %rax + movl %ss, %eax + push %rax + subq $32, %rsp movl %esi, 0x0(%rsp) movl %edx, 0x4(%rsp) @@ -191,7 +198,7 @@ ENTRY(__efi64_thunk) movq %rbx, func_rt_ptr(%rip) /* Switch to gdt with 32-bit segments */ - movl 40(%rsp), %eax + movl 64(%rsp), %eax lgdt (%rax) leaq efi_enter32(%rip), %rax @@ -203,6 +210,13 @@ ENTRY(__efi64_thunk) lgdt save_gdt(%rip) + pop %rbx + movl %ebx, %ss + pop %rbx + movl %ebx, %es + pop %rbx + movl %ebx, %ds + /* * Convert 32-bit status code into 64-bit. */ @@ -218,11 +232,6 @@ ENTRY(__efi64_thunk) ENDPROC(__efi64_thunk) ENTRY(efi_exit32) - xorq %rax, %rax - movl %eax, %ds - movl %eax, %es - movl %eax, %ss - movq func_rt_ptr(%rip), %rax push %rax mov %rdi, %rax @@ -267,7 +276,7 @@ ENTRY(efi_enter32) */ cli - movl 44(%esp), %eax + movl 68(%esp), %eax movl %eax, 2(%eax) lgdtl (%eax) @@ -286,7 +295,7 @@ ENTRY(efi_enter32) xorl %eax, %eax lldt %ax - movl 48(%esp), %eax + movl 72(%esp), %eax pushl $__KERNEL_CS pushl %eax -- cgit v0.10.2 From 3f4a7836e33134d4ac34fa7c99788f0c6a79fa1c Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Thu, 13 Mar 2014 19:41:08 +0000 Subject: x86/efi: Rip out phys_efi_get_time() Dan reported that phys_efi_get_time() is doing kmalloc(..., GFP_KERNEL) under a spinlock which is very clearly a bug. Since phys_efi_get_time() has no users let's just delete it instead of trying to fix it. Note that since there are no users of phys_efi_get_time(), it is not possible to actually trigger a GFP_KERNEL alloc under the spinlock. Reported-by: Dan Carpenter Cc: Ingo Molnar Cc: H. Peter Anvin Cc: Nathan Zimmer Cc: Matthew Garrett Cc: Jan Beulich Signed-off-by: Matt Fleming diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 43e7cf6..3781dd3 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -246,21 +246,6 @@ static efi_status_t __init phys_efi_set_virtual_address_map( return status; } -static efi_status_t __init phys_efi_get_time(efi_time_t *tm, - efi_time_cap_t *tc) -{ - unsigned long flags; - efi_status_t status; - - spin_lock_irqsave(&rtc_lock, flags); - efi_call_phys_prelog(); - status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm), - virt_to_phys(tc)); - efi_call_phys_epilog(); - spin_unlock_irqrestore(&rtc_lock, flags); - return status; -} - int efi_set_rtc_mmss(const struct timespec *now) { unsigned long nowtime = now->tv_sec; @@ -592,16 +577,9 @@ static int __init efi_runtime_init32(void) * EFI runtime services before set_virtual_address_map * is invoked. */ - efi_phys.get_time = (efi_get_time_t *) - (unsigned long)runtime->get_time; efi_phys.set_virtual_address_map = (efi_set_virtual_address_map_t *) (unsigned long)runtime->set_virtual_address_map; - /* - * Make efi_get_time can be called before entering - * virtual mode. - */ - efi.get_time = phys_efi_get_time; early_iounmap(runtime, sizeof(efi_runtime_services_32_t)); return 0; @@ -623,16 +601,9 @@ static int __init efi_runtime_init64(void) * EFI runtime services before set_virtual_address_map * is invoked. */ - efi_phys.get_time = (efi_get_time_t *) - (unsigned long)runtime->get_time; efi_phys.set_virtual_address_map = (efi_set_virtual_address_map_t *) (unsigned long)runtime->set_virtual_address_map; - /* - * Make efi_get_time can be called before entering - * virtual mode. - */ - efi.get_time = phys_efi_get_time; early_iounmap(runtime, sizeof(efi_runtime_services_64_t)); return 0; -- cgit v0.10.2 From 9a11040ff962304c1838aa9a9f33be78784eae47 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sun, 16 Mar 2014 17:46:46 +0000 Subject: x86/efi: Restore 'attr' argument to query_variable_info() In the thunk patches the 'attr' argument was dropped to query_variable_info(). Restore it otherwise the firmware will return EFI_INVALID_PARAMETER. Signed-off-by: Matt Fleming diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 7e7f195..290d397 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -567,7 +567,7 @@ efi_thunk_query_variable_info(u32 attr, u64 *storage_space, phys_remaining = virt_to_phys(remaining_space); phys_max = virt_to_phys(max_variable_size); - status = efi_thunk(query_variable_info, phys_storage, + status = efi_thunk(query_variable_info, attr, phys_storage, phys_remaining, phys_max); return status; -- cgit v0.10.2 From b085f311e85b1d6f75d610097c2f20583b776fda Mon Sep 17 00:00:00 2001 From: Benedikt Spranger Date: Sun, 16 Mar 2014 16:36:29 +0100 Subject: net: cpsw: do not register cpts twice commit f280e89a (drivers: net: cpsw: fix for cpsw crash when build as modules) moved cpts_register()/cpts_unregister() to ndo_open()/ndo_stop(), but failed to remove cpts_register in cpsw_probe() which leads to a double registration and the following debug object splat. [ 18.991902] ODEBUG: init active (active state 0) object type: timer_list hint: delayed_work_timer_fn+0x0/0x2c [ 19.082249] [] (init_timer_key) from [] (cpts_register+0x1f0/0x2c4) [ 19.090642] [] (cpts_register) from [] (cpsw_ndo_open+0x780/0x81c) [ 19.098948] [] (cpsw_ndo_open) from [] (__dev_open+0xb4/0x118) Signed-off-by: Benedikt Spranger Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index ffd4d12..7d6d8ec 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2229,10 +2229,6 @@ static int cpsw_probe(struct platform_device *pdev) goto clean_ale_ret; } - if (cpts_register(&pdev->dev, priv->cpts, - data->cpts_clock_mult, data->cpts_clock_shift)) - dev_err(priv->dev, "error registering cpts device\n"); - cpsw_notice(priv, probe, "initialized device (regs %pa, irq %d)\n", &ss_res->start, ndev->irq); -- cgit v0.10.2 From 27410e8248c64f5c6d28891389083b1022c15a10 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 30 Jan 2014 17:58:38 +0100 Subject: drm: Fix use-after-free in the shadow-attache exit code This regression has been introduced in commit b3f2333de8e81b089262b26d52272911523e605f Author: Daniel Vetter Date: Wed Dec 11 11:34:31 2013 +0100 drm: restrict the device list for shadow attached drivers Reported-by: Dave Jones Signed-off-by: Daniel Vetter Reviewed-by: David Herrmann Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c index 5736aaa..f7af69b 100644 --- a/drivers/gpu/drm/drm_pci.c +++ b/drivers/gpu/drm/drm_pci.c @@ -468,8 +468,8 @@ void drm_pci_exit(struct drm_driver *driver, struct pci_driver *pdriver) } else { list_for_each_entry_safe(dev, tmp, &driver->legacy_dev_list, legacy_dev_list) { - drm_put_dev(dev); list_del(&dev->legacy_dev_list); + drm_put_dev(dev); } } DRM_INFO("Module unloaded\n"); -- cgit v0.10.2 From e4362d1e64ff6da64118dcf74ff450cd0a029deb Mon Sep 17 00:00:00 2001 From: Alex Smith Date: Tue, 21 Jan 2014 11:22:35 +0000 Subject: MIPS: Fix possible build error with transparent hugepages enabled If CONFIG_TRANSPARENT_HUGEPAGE is enabled, but CONFIG_HUGETLB_PAGE is not, it is possible to end up with a configuration that fails to build with the following error: include/linux/huge_mm.h:125:2: error: #error "hugepages can't be allocated by the buddy allocator" This is due to CONFIG_FORCE_MAX_ZONEORDER defaulting to 11. It already has ranges that change the valid values when HUGETLB_PAGE is enabled, but this is not done for TRANSPARENT_HUGEPAGE. Fix by changing the HUGETLB_PAGE dependencies to MIPS_HUGE_TLB_SUPPORT, which includes both TRANSPARENT_HUGEPAGE and HUGETLB_PAGE. Signed-off-by: Alex Smith Reviewed-by: Markos Chandras Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/6391/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 1534474..95fa1f1 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1776,12 +1776,12 @@ endchoice config FORCE_MAX_ZONEORDER int "Maximum zone order" - range 14 64 if HUGETLB_PAGE && PAGE_SIZE_64KB - default "14" if HUGETLB_PAGE && PAGE_SIZE_64KB - range 13 64 if HUGETLB_PAGE && PAGE_SIZE_32KB - default "13" if HUGETLB_PAGE && PAGE_SIZE_32KB - range 12 64 if HUGETLB_PAGE && PAGE_SIZE_16KB - default "12" if HUGETLB_PAGE && PAGE_SIZE_16KB + range 14 64 if MIPS_HUGE_TLB_SUPPORT && PAGE_SIZE_64KB + default "14" if MIPS_HUGE_TLB_SUPPORT && PAGE_SIZE_64KB + range 13 64 if MIPS_HUGE_TLB_SUPPORT && PAGE_SIZE_32KB + default "13" if MIPS_HUGE_TLB_SUPPORT && PAGE_SIZE_32KB + range 12 64 if MIPS_HUGE_TLB_SUPPORT && PAGE_SIZE_16KB + default "12" if MIPS_HUGE_TLB_SUPPORT && PAGE_SIZE_16KB range 11 64 default "11" help -- cgit v0.10.2 From 305564413c99514a14a49215790d96b8d71a6295 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Tue, 18 Mar 2014 09:31:18 +0100 Subject: ALSA: oxygen: Xonar DG(X): fix Stereo Upmixing regression The code introduced in commit 1f91ecc14dee ("ALSA: oxygen: modify adjust_dg_dac_routing function") accidentally disregarded the old value of the playback routing register, so it broke the "Stereo Upmixing" mixer control. The unmuted parts of the channel routing are the same for all settings of the output destination, so it suffices to revert that part of the patch. Fixes: 1f91ecc14dee ('ALSA: oxygen: modify adjust_dg_dac_routing function') Tested-by: Roman Volkov Signed-off-by: Clemens Ladisch Signed-off-by: Takashi Iwai diff --git a/sound/pci/oxygen/xonar_dg.c b/sound/pci/oxygen/xonar_dg.c index ed6f199..4cf3200 100644 --- a/sound/pci/oxygen/xonar_dg.c +++ b/sound/pci/oxygen/xonar_dg.c @@ -238,11 +238,21 @@ void set_cs4245_adc_params(struct oxygen *chip, cs4245_write_spi(chip, CS4245_MCLK_FREQ); } +static inline unsigned int shift_bits(unsigned int value, + unsigned int shift_from, + unsigned int shift_to, + unsigned int mask) +{ + if (shift_from < shift_to) + return (value << (shift_to - shift_from)) & mask; + else + return (value >> (shift_from - shift_to)) & mask; +} + unsigned int adjust_dg_dac_routing(struct oxygen *chip, unsigned int play_routing) { struct dg *data = chip->model_data; - unsigned int routing = 0; switch (data->output_sel) { case PLAYBACK_DST_HP: @@ -252,15 +262,23 @@ unsigned int adjust_dg_dac_routing(struct oxygen *chip, OXYGEN_PLAY_MUTE67, OXYGEN_PLAY_MUTE_MASK); break; case PLAYBACK_DST_MULTICH: - routing = (0 << OXYGEN_PLAY_DAC0_SOURCE_SHIFT) | - (2 << OXYGEN_PLAY_DAC1_SOURCE_SHIFT) | - (1 << OXYGEN_PLAY_DAC2_SOURCE_SHIFT) | - (0 << OXYGEN_PLAY_DAC3_SOURCE_SHIFT); oxygen_write8_masked(chip, OXYGEN_PLAY_ROUTING, OXYGEN_PLAY_MUTE01, OXYGEN_PLAY_MUTE_MASK); break; } - return routing; + return (play_routing & OXYGEN_PLAY_DAC0_SOURCE_MASK) | + shift_bits(play_routing, + OXYGEN_PLAY_DAC2_SOURCE_SHIFT, + OXYGEN_PLAY_DAC1_SOURCE_SHIFT, + OXYGEN_PLAY_DAC1_SOURCE_MASK) | + shift_bits(play_routing, + OXYGEN_PLAY_DAC1_SOURCE_SHIFT, + OXYGEN_PLAY_DAC2_SOURCE_SHIFT, + OXYGEN_PLAY_DAC2_SOURCE_MASK) | + shift_bits(play_routing, + OXYGEN_PLAY_DAC0_SOURCE_SHIFT, + OXYGEN_PLAY_DAC3_SOURCE_SHIFT, + OXYGEN_PLAY_DAC3_SOURCE_MASK); } void dump_cs4245_registers(struct oxygen *chip, -- cgit v0.10.2 From d1958f8c2f0d6bfaa115f4dce1d5d19be5c6b090 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Sun, 16 Mar 2014 18:44:21 +0100 Subject: isdn/capi: Make Middleware depend on CAPI2.0 The Kconfig symbol ISDN_CAPI_MIDDLEWARE is only used in capi.c. Setting it without setting ISDN_CAPI_CAPI20 is therefor useless. Make it depend on ISDN_CAPI_CAPI20 and put its entry after ISDN_CAPI_CAPI20's entry. Signed-off-by: Paul Bolle Signed-off-by: Tilman Schmidt Signed-off-by: David S. Miller diff --git a/drivers/isdn/capi/Kconfig b/drivers/isdn/capi/Kconfig index f046865..9816c51 100644 --- a/drivers/isdn/capi/Kconfig +++ b/drivers/isdn/capi/Kconfig @@ -16,9 +16,17 @@ config CAPI_TRACE This will increase the size of the kernelcapi module by 20 KB. If unsure, say Y. +config ISDN_CAPI_CAPI20 + tristate "CAPI2.0 /dev/capi support" + help + This option will provide the CAPI 2.0 interface to userspace + applications via /dev/capi20. Applications should use the + standardized libcapi20 to access this functionality. You should say + Y/M here. + config ISDN_CAPI_MIDDLEWARE bool "CAPI2.0 Middleware support" - depends on TTY + depends on ISDN_CAPI_CAPI20 && TTY help This option will enhance the capabilities of the /dev/capi20 interface. It will provide a means of moving a data connection, @@ -26,14 +34,6 @@ config ISDN_CAPI_MIDDLEWARE device. If you want to use pppd with pppdcapiplugin to dial up to your ISP, say Y here. -config ISDN_CAPI_CAPI20 - tristate "CAPI2.0 /dev/capi support" - help - This option will provide the CAPI 2.0 interface to userspace - applications via /dev/capi20. Applications should use the - standardized libcapi20 to access this functionality. You should say - Y/M here. - config ISDN_CAPI_CAPIDRV tristate "CAPI2.0 capidrv interface support" depends on ISDN_I4L -- cgit v0.10.2 From e367c2d03dba4c9bcafad24688fadb79dd95b218 Mon Sep 17 00:00:00 2001 From: lucien Date: Mon, 17 Mar 2014 12:51:01 +0800 Subject: ipv6: ip6_append_data_mtu do not handle the mtu of the second fragment properly In ip6_append_data_mtu(), when the xfrm mode is not tunnel(such as transport),the ipsec header need to be added in the first fragment, so the mtu will decrease to reserve space for it, then the second fragment come, the mtu should be turn back, as the commit 0c1833797a5a6ec23ea9261d979aa18078720b74 said. however, in the commit a493e60ac4bbe2e977e7129d6d8cbb0dd236be, it use *mtu = min(*mtu, ...) to change the mtu, which lead to the new mtu is alway equal with the first fragment's. and cannot turn back. when I test through ping6 -c1 -s5000 $ip (mtu=1280): ...frag (0|1232) ESP(spi=0x00002000,seq=0xb), length 1232 ...frag (1232|1216) ...frag (2448|1216) ...frag (3664|1216) ...frag (4880|164) which should be: ...frag (0|1232) ESP(spi=0x00001000,seq=0x1), length 1232 ...frag (1232|1232) ...frag (2464|1232) ...frag (3696|1232) ...frag (4928|116) so delete the min() when change back the mtu. Signed-off-by: Xin Long Fixes: 75a493e60ac4bb ("ipv6: ip6_append_data_mtu did not care about pmtudisc and frag_size") Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 16f91a2..64d6073 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1101,21 +1101,19 @@ static void ip6_append_data_mtu(unsigned int *mtu, unsigned int fragheaderlen, struct sk_buff *skb, struct rt6_info *rt, - bool pmtuprobe) + unsigned int orig_mtu) { if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { if (skb == NULL) { /* first fragment, reserve header_len */ - *mtu = *mtu - rt->dst.header_len; + *mtu = orig_mtu - rt->dst.header_len; } else { /* * this fragment is not first, the headers * space is regarded as data space. */ - *mtu = min(*mtu, pmtuprobe ? - rt->dst.dev->mtu : - dst_mtu(rt->dst.path)); + *mtu = orig_mtu; } *maxfraglen = ((*mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); @@ -1132,7 +1130,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, struct ipv6_pinfo *np = inet6_sk(sk); struct inet_cork *cork; struct sk_buff *skb, *skb_prev = NULL; - unsigned int maxfraglen, fragheaderlen, mtu; + unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu; int exthdrlen; int dst_exthdrlen; int hh_len; @@ -1214,6 +1212,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, dst_exthdrlen = 0; mtu = cork->fragsize; } + orig_mtu = mtu; hh_len = LL_RESERVED_SPACE(rt->dst.dev); @@ -1311,8 +1310,7 @@ alloc_new_skb: if (skb == NULL || skb_prev == NULL) ip6_append_data_mtu(&mtu, &maxfraglen, fragheaderlen, skb, rt, - np->pmtudisc >= - IPV6_PMTUDISC_PROBE); + orig_mtu); skb_prev = skb; -- cgit v0.10.2 From ff0992e9036e9810e7cd45234fa32ca1e79750e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Mon, 17 Mar 2014 16:25:18 +0100 Subject: net: cdc_ncm: fix control message ordering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a context modified revert of commit 6a9612e2cb22 ("net: cdc_ncm: remove ncm_parm field") which introduced a NCM specification violation, causing setup errors for some devices. These errors resulted in the device and host disagreeing about shared settings, with complete failure to communicate as the end result. The NCM specification require that many of the NCM specific control reuests are sent only while the NCM Data Interface is in alternate setting 0. Reverting the commit ensures that we follow this requirement. Fixes: 6a9612e2cb22 ("net: cdc_ncm: remove ncm_parm field") Reported-and-tested-by: Pasi Kärkkäinen Reported-by: Thomas Schäfer Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index dbff290..d350d27 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -68,7 +68,6 @@ static struct usb_driver cdc_ncm_driver; static int cdc_ncm_setup(struct usbnet *dev) { struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; - struct usb_cdc_ncm_ntb_parameters ncm_parm; u32 val; u8 flags; u8 iface_no; @@ -82,22 +81,22 @@ static int cdc_ncm_setup(struct usbnet *dev) err = usbnet_read_cmd(dev, USB_CDC_GET_NTB_PARAMETERS, USB_TYPE_CLASS | USB_DIR_IN |USB_RECIP_INTERFACE, - 0, iface_no, &ncm_parm, - sizeof(ncm_parm)); + 0, iface_no, &ctx->ncm_parm, + sizeof(ctx->ncm_parm)); if (err < 0) { dev_err(&dev->intf->dev, "failed GET_NTB_PARAMETERS\n"); return err; /* GET_NTB_PARAMETERS is required */ } /* read correct set of parameters according to device mode */ - ctx->rx_max = le32_to_cpu(ncm_parm.dwNtbInMaxSize); - ctx->tx_max = le32_to_cpu(ncm_parm.dwNtbOutMaxSize); - ctx->tx_remainder = le16_to_cpu(ncm_parm.wNdpOutPayloadRemainder); - ctx->tx_modulus = le16_to_cpu(ncm_parm.wNdpOutDivisor); - ctx->tx_ndp_modulus = le16_to_cpu(ncm_parm.wNdpOutAlignment); + ctx->rx_max = le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize); + ctx->tx_max = le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize); + ctx->tx_remainder = le16_to_cpu(ctx->ncm_parm.wNdpOutPayloadRemainder); + ctx->tx_modulus = le16_to_cpu(ctx->ncm_parm.wNdpOutDivisor); + ctx->tx_ndp_modulus = le16_to_cpu(ctx->ncm_parm.wNdpOutAlignment); /* devices prior to NCM Errata shall set this field to zero */ - ctx->tx_max_datagrams = le16_to_cpu(ncm_parm.wNtbOutMaxDatagrams); - ntb_fmt_supported = le16_to_cpu(ncm_parm.bmNtbFormatsSupported); + ctx->tx_max_datagrams = le16_to_cpu(ctx->ncm_parm.wNtbOutMaxDatagrams); + ntb_fmt_supported = le16_to_cpu(ctx->ncm_parm.bmNtbFormatsSupported); /* there are some minor differences in NCM and MBIM defaults */ if (cdc_ncm_comm_intf_is_mbim(ctx->control->cur_altsetting)) { @@ -146,7 +145,7 @@ static int cdc_ncm_setup(struct usbnet *dev) } /* inform device about NTB input size changes */ - if (ctx->rx_max != le32_to_cpu(ncm_parm.dwNtbInMaxSize)) { + if (ctx->rx_max != le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize)) { __le32 dwNtbInMaxSize = cpu_to_le32(ctx->rx_max); err = usbnet_write_cmd(dev, USB_CDC_SET_NTB_INPUT_SIZE, @@ -162,14 +161,6 @@ static int cdc_ncm_setup(struct usbnet *dev) dev_dbg(&dev->intf->dev, "Using default maximum transmit length=%d\n", CDC_NCM_NTB_MAX_SIZE_TX); ctx->tx_max = CDC_NCM_NTB_MAX_SIZE_TX; - - /* Adding a pad byte here simplifies the handling in - * cdc_ncm_fill_tx_frame, by making tx_max always - * represent the real skb max size. - */ - if (ctx->tx_max % usb_maxpacket(dev->udev, dev->out, 1) == 0) - ctx->tx_max++; - } /* @@ -439,6 +430,10 @@ advance: goto error2; } + /* initialize data interface */ + if (cdc_ncm_setup(dev)) + goto error2; + /* configure data interface */ temp = usb_set_interface(dev->udev, iface_no, data_altsetting); if (temp) { @@ -453,12 +448,6 @@ advance: goto error2; } - /* initialize data interface */ - if (cdc_ncm_setup(dev)) { - dev_dbg(&intf->dev, "cdc_ncm_setup() failed\n"); - goto error2; - } - usb_set_intfdata(ctx->data, dev); usb_set_intfdata(ctx->control, dev); @@ -475,6 +464,15 @@ advance: dev->hard_mtu = ctx->tx_max; dev->rx_urb_size = ctx->rx_max; + /* cdc_ncm_setup will override dwNtbOutMaxSize if it is + * outside the sane range. Adding a pad byte here if necessary + * simplifies the handling in cdc_ncm_fill_tx_frame, making + * tx_max always represent the real skb max size. + */ + if (ctx->tx_max != le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize) && + ctx->tx_max % usb_maxpacket(dev->udev, dev->out, 1) == 0) + ctx->tx_max++; + return 0; error2: diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index c3fa807..2c14d9c 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -88,6 +88,7 @@ #define cdc_ncm_data_intf_is_mbim(x) ((x)->desc.bInterfaceProtocol == USB_CDC_MBIM_PROTO_NTB) struct cdc_ncm_ctx { + struct usb_cdc_ncm_ntb_parameters ncm_parm; struct hrtimer tx_timer; struct tasklet_struct bh; -- cgit v0.10.2 From 6a96918a6aa7b434d15710fc9e06589c6c8bd3a6 Mon Sep 17 00:00:00 2001 From: Loc Ho Date: Tue, 18 Mar 2014 12:14:37 -0600 Subject: ata: Fix compiler warning with APM X-Gene host controller driver This patch fixes an compiler warning with APM X-Gene host controller driver when compiled with DEBUG enabled. Signed-off-by: Loc Ho Signed-off-by: Tejun Heo diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c index fcf21f1..77c89bf 100644 --- a/drivers/ata/ahci_xgene.c +++ b/drivers/ata/ahci_xgene.c @@ -329,7 +329,7 @@ static int xgene_ahci_hw_init(struct ahci_host_priv *hpriv) writel(0xffffffff, hpriv->mmio + HOST_IRQ_STAT); readl(hpriv->mmio + HOST_IRQ_STAT); /* Force a barrier */ writel(0, ctx->csr_core + INTSTATUSMASK); - readl(ctx->csr_core + INTSTATUSMASK); /* Force a barrier */ + val = readl(ctx->csr_core + INTSTATUSMASK); /* Force a barrier */ dev_dbg(ctx->dev, "top level interrupt mask 0x%X value 0x%08X\n", INTSTATUSMASK, val); -- cgit v0.10.2 From f7bd12d09ed6e4093a56dbbfbe8411cc52a738d1 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 17 Mar 2014 19:19:06 -0800 Subject: cnic: Use proper ulp_ops for per device operations. For per device operations, cnic needs to dereference the RCU protected cp->ulp_ops instead of the global cnic_ulp_tbl. In 2 locations, cnic_send_nlmsg() and cnic_copy_ulp_stats(), it was referencing the global table. If the device has been unregistered and these functions are still being called (very unlikely scenarios), it could lead to NULL pointer dereference. Signed-off-by: Michael Chan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c index fcf9105a5..2cb2482 100644 --- a/drivers/net/ethernet/broadcom/cnic.c +++ b/drivers/net/ethernet/broadcom/cnic.c @@ -342,7 +342,7 @@ static int cnic_send_nlmsg(struct cnic_local *cp, u32 type, while (retry < 3) { rc = 0; rcu_read_lock(); - ulp_ops = rcu_dereference(cnic_ulp_tbl[CNIC_ULP_ISCSI]); + ulp_ops = rcu_dereference(cp->ulp_ops[CNIC_ULP_ISCSI]); if (ulp_ops) rc = ulp_ops->iscsi_nl_send_msg( cp->ulp_handle[CNIC_ULP_ISCSI], @@ -3244,7 +3244,8 @@ static int cnic_copy_ulp_stats(struct cnic_dev *dev, int ulp_type) int rc; mutex_lock(&cnic_lock); - ulp_ops = cnic_ulp_tbl_prot(ulp_type); + ulp_ops = rcu_dereference_protected(cp->ulp_ops[ulp_type], + lockdep_is_held(&cnic_lock)); if (ulp_ops && ulp_ops->cnic_get_stats) rc = ulp_ops->cnic_get_stats(cp->ulp_handle[ulp_type]); else -- cgit v0.10.2 From be1fefc21433f6202fcd76bdc7916e557fe80b9a Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 17 Mar 2014 19:19:07 -0800 Subject: cnic,bnx2i,bnx2fc: Fix inconsistent use of page size The bnx2/bnx2x rings are made up of linked pages. However there is an upper limit on the page size as some the page size settings are 16-bit in the hardware/firmware interface. In the current code, some parts use BNX2_PAGE_SIZE which has a 16K upper limit and some parts use PAGE_SIZE. On archs with >= 64K PAGE_SIZE, it generates some compile warnings. Define a new CNIC_PAGE_SZIE which has an upper limit of 16K and use it consistently in all relevant parts. Signed-off-by: Michael Chan Signed-off-by: Eddie Wai Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c index 2cb2482..ea37ea2 100644 --- a/drivers/net/ethernet/broadcom/cnic.c +++ b/drivers/net/ethernet/broadcom/cnic.c @@ -726,7 +726,7 @@ static void cnic_free_dma(struct cnic_dev *dev, struct cnic_dma *dma) for (i = 0; i < dma->num_pages; i++) { if (dma->pg_arr[i]) { - dma_free_coherent(&dev->pcidev->dev, BNX2_PAGE_SIZE, + dma_free_coherent(&dev->pcidev->dev, CNIC_PAGE_SIZE, dma->pg_arr[i], dma->pg_map_arr[i]); dma->pg_arr[i] = NULL; } @@ -785,7 +785,7 @@ static int cnic_alloc_dma(struct cnic_dev *dev, struct cnic_dma *dma, for (i = 0; i < pages; i++) { dma->pg_arr[i] = dma_alloc_coherent(&dev->pcidev->dev, - BNX2_PAGE_SIZE, + CNIC_PAGE_SIZE, &dma->pg_map_arr[i], GFP_ATOMIC); if (dma->pg_arr[i] == NULL) @@ -794,8 +794,8 @@ static int cnic_alloc_dma(struct cnic_dev *dev, struct cnic_dma *dma, if (!use_pg_tbl) return 0; - dma->pgtbl_size = ((pages * 8) + BNX2_PAGE_SIZE - 1) & - ~(BNX2_PAGE_SIZE - 1); + dma->pgtbl_size = ((pages * 8) + CNIC_PAGE_SIZE - 1) & + ~(CNIC_PAGE_SIZE - 1); dma->pgtbl = dma_alloc_coherent(&dev->pcidev->dev, dma->pgtbl_size, &dma->pgtbl_map, GFP_ATOMIC); if (dma->pgtbl == NULL) @@ -900,8 +900,8 @@ static int cnic_alloc_context(struct cnic_dev *dev) if (BNX2_CHIP(cp) == BNX2_CHIP_5709) { int i, k, arr_size; - cp->ctx_blk_size = BNX2_PAGE_SIZE; - cp->cids_per_blk = BNX2_PAGE_SIZE / 128; + cp->ctx_blk_size = CNIC_PAGE_SIZE; + cp->cids_per_blk = CNIC_PAGE_SIZE / 128; arr_size = BNX2_MAX_CID / cp->cids_per_blk * sizeof(struct cnic_ctx); cp->ctx_arr = kzalloc(arr_size, GFP_KERNEL); @@ -933,7 +933,7 @@ static int cnic_alloc_context(struct cnic_dev *dev) for (i = 0; i < cp->ctx_blks; i++) { cp->ctx_arr[i].ctx = dma_alloc_coherent(&dev->pcidev->dev, - BNX2_PAGE_SIZE, + CNIC_PAGE_SIZE, &cp->ctx_arr[i].mapping, GFP_KERNEL); if (cp->ctx_arr[i].ctx == NULL) @@ -1013,7 +1013,7 @@ static int __cnic_alloc_uio_rings(struct cnic_uio_dev *udev, int pages) if (udev->l2_ring) return 0; - udev->l2_ring_size = pages * BNX2_PAGE_SIZE; + udev->l2_ring_size = pages * CNIC_PAGE_SIZE; udev->l2_ring = dma_alloc_coherent(&udev->pdev->dev, udev->l2_ring_size, &udev->l2_ring_map, GFP_KERNEL | __GFP_COMP); @@ -1021,7 +1021,7 @@ static int __cnic_alloc_uio_rings(struct cnic_uio_dev *udev, int pages) return -ENOMEM; udev->l2_buf_size = (cp->l2_rx_ring_size + 1) * cp->l2_single_buf_size; - udev->l2_buf_size = PAGE_ALIGN(udev->l2_buf_size); + udev->l2_buf_size = CNIC_PAGE_ALIGN(udev->l2_buf_size); udev->l2_buf = dma_alloc_coherent(&udev->pdev->dev, udev->l2_buf_size, &udev->l2_buf_map, GFP_KERNEL | __GFP_COMP); @@ -1102,7 +1102,7 @@ static int cnic_init_uio(struct cnic_dev *dev) uinfo->mem[0].size = MB_GET_CID_ADDR(TX_TSS_CID + TX_MAX_TSS_RINGS + 1); uinfo->mem[1].addr = (unsigned long) cp->status_blk.gen & - PAGE_MASK; + CNIC_PAGE_MASK; if (cp->ethdev->drv_state & CNIC_DRV_STATE_USING_MSIX) uinfo->mem[1].size = BNX2_SBLK_MSIX_ALIGN_SIZE * 9; else @@ -1113,7 +1113,7 @@ static int cnic_init_uio(struct cnic_dev *dev) uinfo->mem[0].size = pci_resource_len(dev->pcidev, 0); uinfo->mem[1].addr = (unsigned long) cp->bnx2x_def_status_blk & - PAGE_MASK; + CNIC_PAGE_MASK; uinfo->mem[1].size = sizeof(*cp->bnx2x_def_status_blk); uinfo->name = "bnx2x_cnic"; @@ -1267,14 +1267,14 @@ static int cnic_alloc_bnx2x_resc(struct cnic_dev *dev) for (i = MAX_ISCSI_TBL_SZ; i < cp->max_cid_space; i++) cp->ctx_tbl[i].ulp_proto_id = CNIC_ULP_FCOE; - pages = PAGE_ALIGN(cp->max_cid_space * CNIC_KWQ16_DATA_SIZE) / - PAGE_SIZE; + pages = CNIC_PAGE_ALIGN(cp->max_cid_space * CNIC_KWQ16_DATA_SIZE) / + CNIC_PAGE_SIZE; ret = cnic_alloc_dma(dev, kwq_16_dma, pages, 0); if (ret) return -ENOMEM; - n = PAGE_SIZE / CNIC_KWQ16_DATA_SIZE; + n = CNIC_PAGE_SIZE / CNIC_KWQ16_DATA_SIZE; for (i = 0, j = 0; i < cp->max_cid_space; i++) { long off = CNIC_KWQ16_DATA_SIZE * (i % n); @@ -1296,7 +1296,7 @@ static int cnic_alloc_bnx2x_resc(struct cnic_dev *dev) goto error; } - pages = PAGE_ALIGN(BNX2X_ISCSI_GLB_BUF_SIZE) / PAGE_SIZE; + pages = CNIC_PAGE_ALIGN(BNX2X_ISCSI_GLB_BUF_SIZE) / CNIC_PAGE_SIZE; ret = cnic_alloc_dma(dev, &cp->gbl_buf_info, pages, 0); if (ret) goto error; @@ -1466,8 +1466,8 @@ static int cnic_bnx2x_iscsi_init1(struct cnic_dev *dev, struct kwqe *kwqe) cp->r2tq_size = cp->num_iscsi_tasks * BNX2X_ISCSI_MAX_PENDING_R2TS * BNX2X_ISCSI_R2TQE_SIZE; cp->hq_size = cp->num_ccells * BNX2X_ISCSI_HQ_BD_SIZE; - pages = PAGE_ALIGN(cp->hq_size) / PAGE_SIZE; - hq_bds = pages * (PAGE_SIZE / BNX2X_ISCSI_HQ_BD_SIZE); + pages = CNIC_PAGE_ALIGN(cp->hq_size) / CNIC_PAGE_SIZE; + hq_bds = pages * (CNIC_PAGE_SIZE / BNX2X_ISCSI_HQ_BD_SIZE); cp->num_cqs = req1->num_cqs; if (!dev->max_iscsi_conn) @@ -1477,9 +1477,9 @@ static int cnic_bnx2x_iscsi_init1(struct cnic_dev *dev, struct kwqe *kwqe) CNIC_WR16(dev, BAR_TSTRORM_INTMEM + TSTORM_ISCSI_RQ_SIZE_OFFSET(pfid), req1->rq_num_wqes); CNIC_WR16(dev, BAR_TSTRORM_INTMEM + TSTORM_ISCSI_PAGE_SIZE_OFFSET(pfid), - PAGE_SIZE); + CNIC_PAGE_SIZE); CNIC_WR8(dev, BAR_TSTRORM_INTMEM + - TSTORM_ISCSI_PAGE_SIZE_LOG_OFFSET(pfid), PAGE_SHIFT); + TSTORM_ISCSI_PAGE_SIZE_LOG_OFFSET(pfid), CNIC_PAGE_BITS); CNIC_WR16(dev, BAR_TSTRORM_INTMEM + TSTORM_ISCSI_NUM_OF_TASKS_OFFSET(pfid), req1->num_tasks_per_conn); @@ -1489,9 +1489,9 @@ static int cnic_bnx2x_iscsi_init1(struct cnic_dev *dev, struct kwqe *kwqe) USTORM_ISCSI_RQ_BUFFER_SIZE_OFFSET(pfid), req1->rq_buffer_size); CNIC_WR16(dev, BAR_USTRORM_INTMEM + USTORM_ISCSI_PAGE_SIZE_OFFSET(pfid), - PAGE_SIZE); + CNIC_PAGE_SIZE); CNIC_WR8(dev, BAR_USTRORM_INTMEM + - USTORM_ISCSI_PAGE_SIZE_LOG_OFFSET(pfid), PAGE_SHIFT); + USTORM_ISCSI_PAGE_SIZE_LOG_OFFSET(pfid), CNIC_PAGE_BITS); CNIC_WR16(dev, BAR_USTRORM_INTMEM + USTORM_ISCSI_NUM_OF_TASKS_OFFSET(pfid), req1->num_tasks_per_conn); @@ -1504,9 +1504,9 @@ static int cnic_bnx2x_iscsi_init1(struct cnic_dev *dev, struct kwqe *kwqe) /* init Xstorm RAM */ CNIC_WR16(dev, BAR_XSTRORM_INTMEM + XSTORM_ISCSI_PAGE_SIZE_OFFSET(pfid), - PAGE_SIZE); + CNIC_PAGE_SIZE); CNIC_WR8(dev, BAR_XSTRORM_INTMEM + - XSTORM_ISCSI_PAGE_SIZE_LOG_OFFSET(pfid), PAGE_SHIFT); + XSTORM_ISCSI_PAGE_SIZE_LOG_OFFSET(pfid), CNIC_PAGE_BITS); CNIC_WR16(dev, BAR_XSTRORM_INTMEM + XSTORM_ISCSI_NUM_OF_TASKS_OFFSET(pfid), req1->num_tasks_per_conn); @@ -1519,9 +1519,9 @@ static int cnic_bnx2x_iscsi_init1(struct cnic_dev *dev, struct kwqe *kwqe) /* init Cstorm RAM */ CNIC_WR16(dev, BAR_CSTRORM_INTMEM + CSTORM_ISCSI_PAGE_SIZE_OFFSET(pfid), - PAGE_SIZE); + CNIC_PAGE_SIZE); CNIC_WR8(dev, BAR_CSTRORM_INTMEM + - CSTORM_ISCSI_PAGE_SIZE_LOG_OFFSET(pfid), PAGE_SHIFT); + CSTORM_ISCSI_PAGE_SIZE_LOG_OFFSET(pfid), CNIC_PAGE_BITS); CNIC_WR16(dev, BAR_CSTRORM_INTMEM + CSTORM_ISCSI_NUM_OF_TASKS_OFFSET(pfid), req1->num_tasks_per_conn); @@ -1623,18 +1623,18 @@ static int cnic_alloc_bnx2x_conn_resc(struct cnic_dev *dev, u32 l5_cid) } ctx->cid = cid; - pages = PAGE_ALIGN(cp->task_array_size) / PAGE_SIZE; + pages = CNIC_PAGE_ALIGN(cp->task_array_size) / CNIC_PAGE_SIZE; ret = cnic_alloc_dma(dev, &iscsi->task_array_info, pages, 1); if (ret) goto error; - pages = PAGE_ALIGN(cp->r2tq_size) / PAGE_SIZE; + pages = CNIC_PAGE_ALIGN(cp->r2tq_size) / CNIC_PAGE_SIZE; ret = cnic_alloc_dma(dev, &iscsi->r2tq_info, pages, 1); if (ret) goto error; - pages = PAGE_ALIGN(cp->hq_size) / PAGE_SIZE; + pages = CNIC_PAGE_ALIGN(cp->hq_size) / CNIC_PAGE_SIZE; ret = cnic_alloc_dma(dev, &iscsi->hq_info, pages, 1); if (ret) goto error; @@ -1760,7 +1760,7 @@ static int cnic_setup_bnx2x_ctx(struct cnic_dev *dev, struct kwqe *wqes[], ictx->tstorm_st_context.iscsi.hdr_bytes_2_fetch = ISCSI_HEADER_SIZE; /* TSTORM requires the base address of RQ DB & not PTE */ ictx->tstorm_st_context.iscsi.rq_db_phy_addr.lo = - req2->rq_page_table_addr_lo & PAGE_MASK; + req2->rq_page_table_addr_lo & CNIC_PAGE_MASK; ictx->tstorm_st_context.iscsi.rq_db_phy_addr.hi = req2->rq_page_table_addr_hi; ictx->tstorm_st_context.iscsi.iscsi_conn_id = req1->iscsi_conn_id; @@ -1842,7 +1842,7 @@ static int cnic_setup_bnx2x_ctx(struct cnic_dev *dev, struct kwqe *wqes[], /* CSTORM and USTORM initialization is different, CSTORM requires * CQ DB base & not PTE addr */ ictx->cstorm_st_context.cq_db_base.lo = - req1->cq_page_table_addr_lo & PAGE_MASK; + req1->cq_page_table_addr_lo & CNIC_PAGE_MASK; ictx->cstorm_st_context.cq_db_base.hi = req1->cq_page_table_addr_hi; ictx->cstorm_st_context.iscsi_conn_id = req1->iscsi_conn_id; ictx->cstorm_st_context.cq_proc_en_bit_map = (1 << cp->num_cqs) - 1; @@ -2911,7 +2911,7 @@ static int cnic_l2_completion(struct cnic_local *cp) u16 hw_cons, sw_cons; struct cnic_uio_dev *udev = cp->udev; union eth_rx_cqe *cqe, *cqe_ring = (union eth_rx_cqe *) - (udev->l2_ring + (2 * BNX2_PAGE_SIZE)); + (udev->l2_ring + (2 * CNIC_PAGE_SIZE)); u32 cmd; int comp = 0; @@ -4385,7 +4385,7 @@ static int cnic_setup_5709_context(struct cnic_dev *dev, int valid) u32 idx = cp->ctx_arr[i].cid / cp->cids_per_blk; u32 val; - memset(cp->ctx_arr[i].ctx, 0, BNX2_PAGE_SIZE); + memset(cp->ctx_arr[i].ctx, 0, CNIC_PAGE_SIZE); CNIC_WR(dev, BNX2_CTX_HOST_PAGE_TBL_DATA0, (cp->ctx_arr[i].mapping & 0xffffffff) | valid_bit); @@ -4629,7 +4629,7 @@ static void cnic_init_bnx2_rx_ring(struct cnic_dev *dev) val = BNX2_L2CTX_L2_STATUSB_NUM(sb_id); cnic_ctx_wr(dev, cid_addr, BNX2_L2CTX_HOST_BDIDX, val); - rxbd = udev->l2_ring + BNX2_PAGE_SIZE; + rxbd = udev->l2_ring + CNIC_PAGE_SIZE; for (i = 0; i < BNX2_MAX_RX_DESC_CNT; i++, rxbd++) { dma_addr_t buf_map; int n = (i % cp->l2_rx_ring_size) + 1; @@ -4640,11 +4640,11 @@ static void cnic_init_bnx2_rx_ring(struct cnic_dev *dev) rxbd->rx_bd_haddr_hi = (u64) buf_map >> 32; rxbd->rx_bd_haddr_lo = (u64) buf_map & 0xffffffff; } - val = (u64) (ring_map + BNX2_PAGE_SIZE) >> 32; + val = (u64) (ring_map + CNIC_PAGE_SIZE) >> 32; cnic_ctx_wr(dev, cid_addr, BNX2_L2CTX_NX_BDHADDR_HI, val); rxbd->rx_bd_haddr_hi = val; - val = (u64) (ring_map + BNX2_PAGE_SIZE) & 0xffffffff; + val = (u64) (ring_map + CNIC_PAGE_SIZE) & 0xffffffff; cnic_ctx_wr(dev, cid_addr, BNX2_L2CTX_NX_BDHADDR_LO, val); rxbd->rx_bd_haddr_lo = val; @@ -4710,10 +4710,10 @@ static int cnic_start_bnx2_hw(struct cnic_dev *dev) val = CNIC_RD(dev, BNX2_MQ_CONFIG); val &= ~BNX2_MQ_CONFIG_KNL_BYP_BLK_SIZE; - if (BNX2_PAGE_BITS > 12) + if (CNIC_PAGE_BITS > 12) val |= (12 - 8) << 4; else - val |= (BNX2_PAGE_BITS - 8) << 4; + val |= (CNIC_PAGE_BITS - 8) << 4; CNIC_WR(dev, BNX2_MQ_CONFIG, val); @@ -4743,13 +4743,13 @@ static int cnic_start_bnx2_hw(struct cnic_dev *dev) /* Initialize the kernel work queue context. */ val = KRNLQ_TYPE_TYPE_KRNLQ | KRNLQ_SIZE_TYPE_SIZE | - (BNX2_PAGE_BITS - 8) | KRNLQ_FLAGS_QE_SELF_SEQ; + (CNIC_PAGE_BITS - 8) | KRNLQ_FLAGS_QE_SELF_SEQ; cnic_ctx_wr(dev, kwq_cid_addr, L5_KRNLQ_TYPE, val); - val = (BNX2_PAGE_SIZE / sizeof(struct kwqe) - 1) << 16; + val = (CNIC_PAGE_SIZE / sizeof(struct kwqe) - 1) << 16; cnic_ctx_wr(dev, kwq_cid_addr, L5_KRNLQ_QE_SELF_SEQ_MAX, val); - val = ((BNX2_PAGE_SIZE / sizeof(struct kwqe)) << 16) | KWQ_PAGE_CNT; + val = ((CNIC_PAGE_SIZE / sizeof(struct kwqe)) << 16) | KWQ_PAGE_CNT; cnic_ctx_wr(dev, kwq_cid_addr, L5_KRNLQ_PGTBL_NPAGES, val); val = (u32) ((u64) cp->kwq_info.pgtbl_map >> 32); @@ -4769,13 +4769,13 @@ static int cnic_start_bnx2_hw(struct cnic_dev *dev) /* Initialize the kernel complete queue context. */ val = KRNLQ_TYPE_TYPE_KRNLQ | KRNLQ_SIZE_TYPE_SIZE | - (BNX2_PAGE_BITS - 8) | KRNLQ_FLAGS_QE_SELF_SEQ; + (CNIC_PAGE_BITS - 8) | KRNLQ_FLAGS_QE_SELF_SEQ; cnic_ctx_wr(dev, kcq_cid_addr, L5_KRNLQ_TYPE, val); - val = (BNX2_PAGE_SIZE / sizeof(struct kcqe) - 1) << 16; + val = (CNIC_PAGE_SIZE / sizeof(struct kcqe) - 1) << 16; cnic_ctx_wr(dev, kcq_cid_addr, L5_KRNLQ_QE_SELF_SEQ_MAX, val); - val = ((BNX2_PAGE_SIZE / sizeof(struct kcqe)) << 16) | KCQ_PAGE_CNT; + val = ((CNIC_PAGE_SIZE / sizeof(struct kcqe)) << 16) | KCQ_PAGE_CNT; cnic_ctx_wr(dev, kcq_cid_addr, L5_KRNLQ_PGTBL_NPAGES, val); val = (u32) ((u64) cp->kcq1.dma.pgtbl_map >> 32); @@ -4919,7 +4919,7 @@ static void cnic_init_bnx2x_tx_ring(struct cnic_dev *dev, u32 cli = cp->ethdev->iscsi_l2_client_id; u32 val; - memset(txbd, 0, BNX2_PAGE_SIZE); + memset(txbd, 0, CNIC_PAGE_SIZE); buf_map = udev->l2_buf_map; for (i = 0; i < BNX2_MAX_TX_DESC_CNT; i += 3, txbd += 3) { @@ -4979,9 +4979,9 @@ static void cnic_init_bnx2x_rx_ring(struct cnic_dev *dev, struct bnx2x *bp = netdev_priv(dev->netdev); struct cnic_uio_dev *udev = cp->udev; struct eth_rx_bd *rxbd = (struct eth_rx_bd *) (udev->l2_ring + - BNX2_PAGE_SIZE); + CNIC_PAGE_SIZE); struct eth_rx_cqe_next_page *rxcqe = (struct eth_rx_cqe_next_page *) - (udev->l2_ring + (2 * BNX2_PAGE_SIZE)); + (udev->l2_ring + (2 * CNIC_PAGE_SIZE)); struct host_sp_status_block *sb = cp->bnx2x_def_status_blk; int i; u32 cli = cp->ethdev->iscsi_l2_client_id; @@ -5005,20 +5005,20 @@ static void cnic_init_bnx2x_rx_ring(struct cnic_dev *dev, rxbd->addr_lo = cpu_to_le32(buf_map & 0xffffffff); } - val = (u64) (ring_map + BNX2_PAGE_SIZE) >> 32; + val = (u64) (ring_map + CNIC_PAGE_SIZE) >> 32; rxbd->addr_hi = cpu_to_le32(val); data->rx.bd_page_base.hi = cpu_to_le32(val); - val = (u64) (ring_map + BNX2_PAGE_SIZE) & 0xffffffff; + val = (u64) (ring_map + CNIC_PAGE_SIZE) & 0xffffffff; rxbd->addr_lo = cpu_to_le32(val); data->rx.bd_page_base.lo = cpu_to_le32(val); rxcqe += BNX2X_MAX_RCQ_DESC_CNT; - val = (u64) (ring_map + (2 * BNX2_PAGE_SIZE)) >> 32; + val = (u64) (ring_map + (2 * CNIC_PAGE_SIZE)) >> 32; rxcqe->addr_hi = cpu_to_le32(val); data->rx.cqe_page_base.hi = cpu_to_le32(val); - val = (u64) (ring_map + (2 * BNX2_PAGE_SIZE)) & 0xffffffff; + val = (u64) (ring_map + (2 * CNIC_PAGE_SIZE)) & 0xffffffff; rxcqe->addr_lo = cpu_to_le32(val); data->rx.cqe_page_base.lo = cpu_to_le32(val); @@ -5266,8 +5266,8 @@ static void cnic_shutdown_rings(struct cnic_dev *dev) msleep(10); } clear_bit(CNIC_LCL_FL_RINGS_INITED, &cp->cnic_local_flags); - rx_ring = udev->l2_ring + BNX2_PAGE_SIZE; - memset(rx_ring, 0, BNX2_PAGE_SIZE); + rx_ring = udev->l2_ring + CNIC_PAGE_SIZE; + memset(rx_ring, 0, CNIC_PAGE_SIZE); } static int cnic_register_netdev(struct cnic_dev *dev) diff --git a/drivers/net/ethernet/broadcom/cnic_if.h b/drivers/net/ethernet/broadcom/cnic_if.h index 8cf6b192..40bb563 100644 --- a/drivers/net/ethernet/broadcom/cnic_if.h +++ b/drivers/net/ethernet/broadcom/cnic_if.h @@ -24,6 +24,16 @@ #define MAX_CNIC_ULP_TYPE_EXT 3 #define MAX_CNIC_ULP_TYPE 4 +/* Use CPU native page size up to 16K for cnic ring sizes. */ +#if (PAGE_SHIFT > 14) +#define CNIC_PAGE_BITS 14 +#else +#define CNIC_PAGE_BITS PAGE_SHIFT +#endif +#define CNIC_PAGE_SIZE (1 << (CNIC_PAGE_BITS)) +#define CNIC_PAGE_ALIGN(addr) ALIGN(addr, CNIC_PAGE_SIZE) +#define CNIC_PAGE_MASK (~((CNIC_PAGE_SIZE) - 1)) + struct kwqe { u32 kwqe_op_flag; diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c index ed88089..e9279a8 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_io.c +++ b/drivers/scsi/bnx2fc/bnx2fc_io.c @@ -594,13 +594,13 @@ static void bnx2fc_free_mp_resc(struct bnx2fc_cmd *io_req) mp_req->mp_resp_bd = NULL; } if (mp_req->req_buf) { - dma_free_coherent(&hba->pcidev->dev, PAGE_SIZE, + dma_free_coherent(&hba->pcidev->dev, CNIC_PAGE_SIZE, mp_req->req_buf, mp_req->req_buf_dma); mp_req->req_buf = NULL; } if (mp_req->resp_buf) { - dma_free_coherent(&hba->pcidev->dev, PAGE_SIZE, + dma_free_coherent(&hba->pcidev->dev, CNIC_PAGE_SIZE, mp_req->resp_buf, mp_req->resp_buf_dma); mp_req->resp_buf = NULL; @@ -622,7 +622,7 @@ int bnx2fc_init_mp_req(struct bnx2fc_cmd *io_req) mp_req->req_len = sizeof(struct fcp_cmnd); io_req->data_xfer_len = mp_req->req_len; - mp_req->req_buf = dma_alloc_coherent(&hba->pcidev->dev, PAGE_SIZE, + mp_req->req_buf = dma_alloc_coherent(&hba->pcidev->dev, CNIC_PAGE_SIZE, &mp_req->req_buf_dma, GFP_ATOMIC); if (!mp_req->req_buf) { @@ -631,7 +631,7 @@ int bnx2fc_init_mp_req(struct bnx2fc_cmd *io_req) return FAILED; } - mp_req->resp_buf = dma_alloc_coherent(&hba->pcidev->dev, PAGE_SIZE, + mp_req->resp_buf = dma_alloc_coherent(&hba->pcidev->dev, CNIC_PAGE_SIZE, &mp_req->resp_buf_dma, GFP_ATOMIC); if (!mp_req->resp_buf) { @@ -639,8 +639,8 @@ int bnx2fc_init_mp_req(struct bnx2fc_cmd *io_req) bnx2fc_free_mp_resc(io_req); return FAILED; } - memset(mp_req->req_buf, 0, PAGE_SIZE); - memset(mp_req->resp_buf, 0, PAGE_SIZE); + memset(mp_req->req_buf, 0, CNIC_PAGE_SIZE); + memset(mp_req->resp_buf, 0, CNIC_PAGE_SIZE); /* Allocate and map mp_req_bd and mp_resp_bd */ sz = sizeof(struct fcoe_bd_ctx); @@ -665,7 +665,7 @@ int bnx2fc_init_mp_req(struct bnx2fc_cmd *io_req) mp_req_bd = mp_req->mp_req_bd; mp_req_bd->buf_addr_lo = (u32)addr & 0xffffffff; mp_req_bd->buf_addr_hi = (u32)((u64)addr >> 32); - mp_req_bd->buf_len = PAGE_SIZE; + mp_req_bd->buf_len = CNIC_PAGE_SIZE; mp_req_bd->flags = 0; /* @@ -677,7 +677,7 @@ int bnx2fc_init_mp_req(struct bnx2fc_cmd *io_req) addr = mp_req->resp_buf_dma; mp_resp_bd->buf_addr_lo = (u32)addr & 0xffffffff; mp_resp_bd->buf_addr_hi = (u32)((u64)addr >> 32); - mp_resp_bd->buf_len = PAGE_SIZE; + mp_resp_bd->buf_len = CNIC_PAGE_SIZE; mp_resp_bd->flags = 0; return SUCCESS; diff --git a/drivers/scsi/bnx2fc/bnx2fc_tgt.c b/drivers/scsi/bnx2fc/bnx2fc_tgt.c index 4d93177..d9bae56 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_tgt.c +++ b/drivers/scsi/bnx2fc/bnx2fc_tgt.c @@ -673,7 +673,8 @@ static int bnx2fc_alloc_session_resc(struct bnx2fc_hba *hba, /* Allocate and map SQ */ tgt->sq_mem_size = tgt->max_sqes * BNX2FC_SQ_WQE_SIZE; - tgt->sq_mem_size = (tgt->sq_mem_size + (PAGE_SIZE - 1)) & PAGE_MASK; + tgt->sq_mem_size = (tgt->sq_mem_size + (CNIC_PAGE_SIZE - 1)) & + CNIC_PAGE_MASK; tgt->sq = dma_alloc_coherent(&hba->pcidev->dev, tgt->sq_mem_size, &tgt->sq_dma, GFP_KERNEL); @@ -686,7 +687,8 @@ static int bnx2fc_alloc_session_resc(struct bnx2fc_hba *hba, /* Allocate and map CQ */ tgt->cq_mem_size = tgt->max_cqes * BNX2FC_CQ_WQE_SIZE; - tgt->cq_mem_size = (tgt->cq_mem_size + (PAGE_SIZE - 1)) & PAGE_MASK; + tgt->cq_mem_size = (tgt->cq_mem_size + (CNIC_PAGE_SIZE - 1)) & + CNIC_PAGE_MASK; tgt->cq = dma_alloc_coherent(&hba->pcidev->dev, tgt->cq_mem_size, &tgt->cq_dma, GFP_KERNEL); @@ -699,7 +701,8 @@ static int bnx2fc_alloc_session_resc(struct bnx2fc_hba *hba, /* Allocate and map RQ and RQ PBL */ tgt->rq_mem_size = tgt->max_rqes * BNX2FC_RQ_WQE_SIZE; - tgt->rq_mem_size = (tgt->rq_mem_size + (PAGE_SIZE - 1)) & PAGE_MASK; + tgt->rq_mem_size = (tgt->rq_mem_size + (CNIC_PAGE_SIZE - 1)) & + CNIC_PAGE_MASK; tgt->rq = dma_alloc_coherent(&hba->pcidev->dev, tgt->rq_mem_size, &tgt->rq_dma, GFP_KERNEL); @@ -710,8 +713,9 @@ static int bnx2fc_alloc_session_resc(struct bnx2fc_hba *hba, } memset(tgt->rq, 0, tgt->rq_mem_size); - tgt->rq_pbl_size = (tgt->rq_mem_size / PAGE_SIZE) * sizeof(void *); - tgt->rq_pbl_size = (tgt->rq_pbl_size + (PAGE_SIZE - 1)) & PAGE_MASK; + tgt->rq_pbl_size = (tgt->rq_mem_size / CNIC_PAGE_SIZE) * sizeof(void *); + tgt->rq_pbl_size = (tgt->rq_pbl_size + (CNIC_PAGE_SIZE - 1)) & + CNIC_PAGE_MASK; tgt->rq_pbl = dma_alloc_coherent(&hba->pcidev->dev, tgt->rq_pbl_size, &tgt->rq_pbl_dma, GFP_KERNEL); @@ -722,7 +726,7 @@ static int bnx2fc_alloc_session_resc(struct bnx2fc_hba *hba, } memset(tgt->rq_pbl, 0, tgt->rq_pbl_size); - num_pages = tgt->rq_mem_size / PAGE_SIZE; + num_pages = tgt->rq_mem_size / CNIC_PAGE_SIZE; page = tgt->rq_dma; pbl = (u32 *)tgt->rq_pbl; @@ -731,13 +735,13 @@ static int bnx2fc_alloc_session_resc(struct bnx2fc_hba *hba, pbl++; *pbl = (u32)((u64)page >> 32); pbl++; - page += PAGE_SIZE; + page += CNIC_PAGE_SIZE; } /* Allocate and map XFERQ */ tgt->xferq_mem_size = tgt->max_sqes * BNX2FC_XFERQ_WQE_SIZE; - tgt->xferq_mem_size = (tgt->xferq_mem_size + (PAGE_SIZE - 1)) & - PAGE_MASK; + tgt->xferq_mem_size = (tgt->xferq_mem_size + (CNIC_PAGE_SIZE - 1)) & + CNIC_PAGE_MASK; tgt->xferq = dma_alloc_coherent(&hba->pcidev->dev, tgt->xferq_mem_size, &tgt->xferq_dma, GFP_KERNEL); @@ -750,8 +754,8 @@ static int bnx2fc_alloc_session_resc(struct bnx2fc_hba *hba, /* Allocate and map CONFQ & CONFQ PBL */ tgt->confq_mem_size = tgt->max_sqes * BNX2FC_CONFQ_WQE_SIZE; - tgt->confq_mem_size = (tgt->confq_mem_size + (PAGE_SIZE - 1)) & - PAGE_MASK; + tgt->confq_mem_size = (tgt->confq_mem_size + (CNIC_PAGE_SIZE - 1)) & + CNIC_PAGE_MASK; tgt->confq = dma_alloc_coherent(&hba->pcidev->dev, tgt->confq_mem_size, &tgt->confq_dma, GFP_KERNEL); @@ -763,9 +767,9 @@ static int bnx2fc_alloc_session_resc(struct bnx2fc_hba *hba, memset(tgt->confq, 0, tgt->confq_mem_size); tgt->confq_pbl_size = - (tgt->confq_mem_size / PAGE_SIZE) * sizeof(void *); + (tgt->confq_mem_size / CNIC_PAGE_SIZE) * sizeof(void *); tgt->confq_pbl_size = - (tgt->confq_pbl_size + (PAGE_SIZE - 1)) & PAGE_MASK; + (tgt->confq_pbl_size + (CNIC_PAGE_SIZE - 1)) & CNIC_PAGE_MASK; tgt->confq_pbl = dma_alloc_coherent(&hba->pcidev->dev, tgt->confq_pbl_size, @@ -777,7 +781,7 @@ static int bnx2fc_alloc_session_resc(struct bnx2fc_hba *hba, } memset(tgt->confq_pbl, 0, tgt->confq_pbl_size); - num_pages = tgt->confq_mem_size / PAGE_SIZE; + num_pages = tgt->confq_mem_size / CNIC_PAGE_SIZE; page = tgt->confq_dma; pbl = (u32 *)tgt->confq_pbl; @@ -786,7 +790,7 @@ static int bnx2fc_alloc_session_resc(struct bnx2fc_hba *hba, pbl++; *pbl = (u32)((u64)page >> 32); pbl++; - page += PAGE_SIZE; + page += CNIC_PAGE_SIZE; } /* Allocate and map ConnDB */ @@ -805,8 +809,8 @@ static int bnx2fc_alloc_session_resc(struct bnx2fc_hba *hba, /* Allocate and map LCQ */ tgt->lcq_mem_size = (tgt->max_sqes + 8) * BNX2FC_SQ_WQE_SIZE; - tgt->lcq_mem_size = (tgt->lcq_mem_size + (PAGE_SIZE - 1)) & - PAGE_MASK; + tgt->lcq_mem_size = (tgt->lcq_mem_size + (CNIC_PAGE_SIZE - 1)) & + CNIC_PAGE_MASK; tgt->lcq = dma_alloc_coherent(&hba->pcidev->dev, tgt->lcq_mem_size, &tgt->lcq_dma, GFP_KERNEL); diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c index e4cf23d..b87a193 100644 --- a/drivers/scsi/bnx2i/bnx2i_hwi.c +++ b/drivers/scsi/bnx2i/bnx2i_hwi.c @@ -61,7 +61,7 @@ static void bnx2i_adjust_qp_size(struct bnx2i_hba *hba) * yield integral num of page buffers */ /* adjust SQ */ - num_elements_per_pg = PAGE_SIZE / BNX2I_SQ_WQE_SIZE; + num_elements_per_pg = CNIC_PAGE_SIZE / BNX2I_SQ_WQE_SIZE; if (hba->max_sqes < num_elements_per_pg) hba->max_sqes = num_elements_per_pg; else if (hba->max_sqes % num_elements_per_pg) @@ -69,7 +69,7 @@ static void bnx2i_adjust_qp_size(struct bnx2i_hba *hba) ~(num_elements_per_pg - 1); /* adjust CQ */ - num_elements_per_pg = PAGE_SIZE / BNX2I_CQE_SIZE; + num_elements_per_pg = CNIC_PAGE_SIZE / BNX2I_CQE_SIZE; if (hba->max_cqes < num_elements_per_pg) hba->max_cqes = num_elements_per_pg; else if (hba->max_cqes % num_elements_per_pg) @@ -77,7 +77,7 @@ static void bnx2i_adjust_qp_size(struct bnx2i_hba *hba) ~(num_elements_per_pg - 1); /* adjust RQ */ - num_elements_per_pg = PAGE_SIZE / BNX2I_RQ_WQE_SIZE; + num_elements_per_pg = CNIC_PAGE_SIZE / BNX2I_RQ_WQE_SIZE; if (hba->max_rqes < num_elements_per_pg) hba->max_rqes = num_elements_per_pg; else if (hba->max_rqes % num_elements_per_pg) @@ -959,7 +959,7 @@ static void setup_qp_page_tables(struct bnx2i_endpoint *ep) /* SQ page table */ memset(ep->qp.sq_pgtbl_virt, 0, ep->qp.sq_pgtbl_size); - num_pages = ep->qp.sq_mem_size / PAGE_SIZE; + num_pages = ep->qp.sq_mem_size / CNIC_PAGE_SIZE; page = ep->qp.sq_phys; if (cnic_dev_10g) @@ -973,7 +973,7 @@ static void setup_qp_page_tables(struct bnx2i_endpoint *ep) ptbl++; *ptbl = (u32) ((u64) page >> 32); ptbl++; - page += PAGE_SIZE; + page += CNIC_PAGE_SIZE; } else { /* PTE is written in big endian format for * 5706/5708/5709 devices */ @@ -981,13 +981,13 @@ static void setup_qp_page_tables(struct bnx2i_endpoint *ep) ptbl++; *ptbl = (u32) page; ptbl++; - page += PAGE_SIZE; + page += CNIC_PAGE_SIZE; } } /* RQ page table */ memset(ep->qp.rq_pgtbl_virt, 0, ep->qp.rq_pgtbl_size); - num_pages = ep->qp.rq_mem_size / PAGE_SIZE; + num_pages = ep->qp.rq_mem_size / CNIC_PAGE_SIZE; page = ep->qp.rq_phys; if (cnic_dev_10g) @@ -1001,7 +1001,7 @@ static void setup_qp_page_tables(struct bnx2i_endpoint *ep) ptbl++; *ptbl = (u32) ((u64) page >> 32); ptbl++; - page += PAGE_SIZE; + page += CNIC_PAGE_SIZE; } else { /* PTE is written in big endian format for * 5706/5708/5709 devices */ @@ -1009,13 +1009,13 @@ static void setup_qp_page_tables(struct bnx2i_endpoint *ep) ptbl++; *ptbl = (u32) page; ptbl++; - page += PAGE_SIZE; + page += CNIC_PAGE_SIZE; } } /* CQ page table */ memset(ep->qp.cq_pgtbl_virt, 0, ep->qp.cq_pgtbl_size); - num_pages = ep->qp.cq_mem_size / PAGE_SIZE; + num_pages = ep->qp.cq_mem_size / CNIC_PAGE_SIZE; page = ep->qp.cq_phys; if (cnic_dev_10g) @@ -1029,7 +1029,7 @@ static void setup_qp_page_tables(struct bnx2i_endpoint *ep) ptbl++; *ptbl = (u32) ((u64) page >> 32); ptbl++; - page += PAGE_SIZE; + page += CNIC_PAGE_SIZE; } else { /* PTE is written in big endian format for * 5706/5708/5709 devices */ @@ -1037,7 +1037,7 @@ static void setup_qp_page_tables(struct bnx2i_endpoint *ep) ptbl++; *ptbl = (u32) page; ptbl++; - page += PAGE_SIZE; + page += CNIC_PAGE_SIZE; } } } @@ -1064,11 +1064,11 @@ int bnx2i_alloc_qp_resc(struct bnx2i_hba *hba, struct bnx2i_endpoint *ep) /* Allocate page table memory for SQ which is page aligned */ ep->qp.sq_mem_size = hba->max_sqes * BNX2I_SQ_WQE_SIZE; ep->qp.sq_mem_size = - (ep->qp.sq_mem_size + (PAGE_SIZE - 1)) & PAGE_MASK; + (ep->qp.sq_mem_size + (CNIC_PAGE_SIZE - 1)) & CNIC_PAGE_MASK; ep->qp.sq_pgtbl_size = - (ep->qp.sq_mem_size / PAGE_SIZE) * sizeof(void *); + (ep->qp.sq_mem_size / CNIC_PAGE_SIZE) * sizeof(void *); ep->qp.sq_pgtbl_size = - (ep->qp.sq_pgtbl_size + (PAGE_SIZE - 1)) & PAGE_MASK; + (ep->qp.sq_pgtbl_size + (CNIC_PAGE_SIZE - 1)) & CNIC_PAGE_MASK; ep->qp.sq_pgtbl_virt = dma_alloc_coherent(&hba->pcidev->dev, ep->qp.sq_pgtbl_size, @@ -1101,11 +1101,11 @@ int bnx2i_alloc_qp_resc(struct bnx2i_hba *hba, struct bnx2i_endpoint *ep) /* Allocate page table memory for CQ which is page aligned */ ep->qp.cq_mem_size = hba->max_cqes * BNX2I_CQE_SIZE; ep->qp.cq_mem_size = - (ep->qp.cq_mem_size + (PAGE_SIZE - 1)) & PAGE_MASK; + (ep->qp.cq_mem_size + (CNIC_PAGE_SIZE - 1)) & CNIC_PAGE_MASK; ep->qp.cq_pgtbl_size = - (ep->qp.cq_mem_size / PAGE_SIZE) * sizeof(void *); + (ep->qp.cq_mem_size / CNIC_PAGE_SIZE) * sizeof(void *); ep->qp.cq_pgtbl_size = - (ep->qp.cq_pgtbl_size + (PAGE_SIZE - 1)) & PAGE_MASK; + (ep->qp.cq_pgtbl_size + (CNIC_PAGE_SIZE - 1)) & CNIC_PAGE_MASK; ep->qp.cq_pgtbl_virt = dma_alloc_coherent(&hba->pcidev->dev, ep->qp.cq_pgtbl_size, @@ -1144,11 +1144,11 @@ int bnx2i_alloc_qp_resc(struct bnx2i_hba *hba, struct bnx2i_endpoint *ep) /* Allocate page table memory for RQ which is page aligned */ ep->qp.rq_mem_size = hba->max_rqes * BNX2I_RQ_WQE_SIZE; ep->qp.rq_mem_size = - (ep->qp.rq_mem_size + (PAGE_SIZE - 1)) & PAGE_MASK; + (ep->qp.rq_mem_size + (CNIC_PAGE_SIZE - 1)) & CNIC_PAGE_MASK; ep->qp.rq_pgtbl_size = - (ep->qp.rq_mem_size / PAGE_SIZE) * sizeof(void *); + (ep->qp.rq_mem_size / CNIC_PAGE_SIZE) * sizeof(void *); ep->qp.rq_pgtbl_size = - (ep->qp.rq_pgtbl_size + (PAGE_SIZE - 1)) & PAGE_MASK; + (ep->qp.rq_pgtbl_size + (CNIC_PAGE_SIZE - 1)) & CNIC_PAGE_MASK; ep->qp.rq_pgtbl_virt = dma_alloc_coherent(&hba->pcidev->dev, ep->qp.rq_pgtbl_size, @@ -1270,7 +1270,7 @@ int bnx2i_send_fw_iscsi_init_msg(struct bnx2i_hba *hba) bnx2i_adjust_qp_size(hba); iscsi_init.flags = - ISCSI_PAGE_SIZE_4K << ISCSI_KWQE_INIT1_PAGE_SIZE_SHIFT; + (CNIC_PAGE_BITS - 8) << ISCSI_KWQE_INIT1_PAGE_SIZE_SHIFT; if (en_tcp_dack) iscsi_init.flags |= ISCSI_KWQE_INIT1_DELAYED_ACK_ENABLE; iscsi_init.reserved0 = 0; @@ -1288,15 +1288,15 @@ int bnx2i_send_fw_iscsi_init_msg(struct bnx2i_hba *hba) ((hba->num_ccell & 0xFFFF) | (hba->max_sqes << 16)); iscsi_init.num_ccells_per_conn = hba->num_ccell; iscsi_init.num_tasks_per_conn = hba->max_sqes; - iscsi_init.sq_wqes_per_page = PAGE_SIZE / BNX2I_SQ_WQE_SIZE; + iscsi_init.sq_wqes_per_page = CNIC_PAGE_SIZE / BNX2I_SQ_WQE_SIZE; iscsi_init.sq_num_wqes = hba->max_sqes; iscsi_init.cq_log_wqes_per_page = - (u8) bnx2i_power_of2(PAGE_SIZE / BNX2I_CQE_SIZE); + (u8) bnx2i_power_of2(CNIC_PAGE_SIZE / BNX2I_CQE_SIZE); iscsi_init.cq_num_wqes = hba->max_cqes; iscsi_init.cq_num_pages = (hba->max_cqes * BNX2I_CQE_SIZE + - (PAGE_SIZE - 1)) / PAGE_SIZE; + (CNIC_PAGE_SIZE - 1)) / CNIC_PAGE_SIZE; iscsi_init.sq_num_pages = (hba->max_sqes * BNX2I_SQ_WQE_SIZE + - (PAGE_SIZE - 1)) / PAGE_SIZE; + (CNIC_PAGE_SIZE - 1)) / CNIC_PAGE_SIZE; iscsi_init.rq_buffer_size = BNX2I_RQ_WQE_SIZE; iscsi_init.rq_num_wqes = hba->max_rqes; diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c index 854dad7..c8b0aff 100644 --- a/drivers/scsi/bnx2i/bnx2i_iscsi.c +++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c @@ -525,7 +525,7 @@ static int bnx2i_setup_mp_bdt(struct bnx2i_hba *hba) struct iscsi_bd *mp_bdt; u64 addr; - hba->mp_bd_tbl = dma_alloc_coherent(&hba->pcidev->dev, PAGE_SIZE, + hba->mp_bd_tbl = dma_alloc_coherent(&hba->pcidev->dev, CNIC_PAGE_SIZE, &hba->mp_bd_dma, GFP_KERNEL); if (!hba->mp_bd_tbl) { printk(KERN_ERR "unable to allocate Middle Path BDT\n"); @@ -533,11 +533,12 @@ static int bnx2i_setup_mp_bdt(struct bnx2i_hba *hba) goto out; } - hba->dummy_buffer = dma_alloc_coherent(&hba->pcidev->dev, PAGE_SIZE, + hba->dummy_buffer = dma_alloc_coherent(&hba->pcidev->dev, + CNIC_PAGE_SIZE, &hba->dummy_buf_dma, GFP_KERNEL); if (!hba->dummy_buffer) { printk(KERN_ERR "unable to alloc Middle Path Dummy Buffer\n"); - dma_free_coherent(&hba->pcidev->dev, PAGE_SIZE, + dma_free_coherent(&hba->pcidev->dev, CNIC_PAGE_SIZE, hba->mp_bd_tbl, hba->mp_bd_dma); hba->mp_bd_tbl = NULL; rc = -1; @@ -548,7 +549,7 @@ static int bnx2i_setup_mp_bdt(struct bnx2i_hba *hba) addr = (unsigned long) hba->dummy_buf_dma; mp_bdt->buffer_addr_lo = addr & 0xffffffff; mp_bdt->buffer_addr_hi = addr >> 32; - mp_bdt->buffer_length = PAGE_SIZE; + mp_bdt->buffer_length = CNIC_PAGE_SIZE; mp_bdt->flags = ISCSI_BD_LAST_IN_BD_CHAIN | ISCSI_BD_FIRST_IN_BD_CHAIN; out: @@ -565,12 +566,12 @@ out: static void bnx2i_free_mp_bdt(struct bnx2i_hba *hba) { if (hba->mp_bd_tbl) { - dma_free_coherent(&hba->pcidev->dev, PAGE_SIZE, + dma_free_coherent(&hba->pcidev->dev, CNIC_PAGE_SIZE, hba->mp_bd_tbl, hba->mp_bd_dma); hba->mp_bd_tbl = NULL; } if (hba->dummy_buffer) { - dma_free_coherent(&hba->pcidev->dev, PAGE_SIZE, + dma_free_coherent(&hba->pcidev->dev, CNIC_PAGE_SIZE, hba->dummy_buffer, hba->dummy_buf_dma); hba->dummy_buffer = NULL; } @@ -934,14 +935,14 @@ static void bnx2i_conn_free_login_resources(struct bnx2i_hba *hba, struct bnx2i_conn *bnx2i_conn) { if (bnx2i_conn->gen_pdu.resp_bd_tbl) { - dma_free_coherent(&hba->pcidev->dev, PAGE_SIZE, + dma_free_coherent(&hba->pcidev->dev, CNIC_PAGE_SIZE, bnx2i_conn->gen_pdu.resp_bd_tbl, bnx2i_conn->gen_pdu.resp_bd_dma); bnx2i_conn->gen_pdu.resp_bd_tbl = NULL; } if (bnx2i_conn->gen_pdu.req_bd_tbl) { - dma_free_coherent(&hba->pcidev->dev, PAGE_SIZE, + dma_free_coherent(&hba->pcidev->dev, CNIC_PAGE_SIZE, bnx2i_conn->gen_pdu.req_bd_tbl, bnx2i_conn->gen_pdu.req_bd_dma); bnx2i_conn->gen_pdu.req_bd_tbl = NULL; @@ -998,13 +999,13 @@ static int bnx2i_conn_alloc_login_resources(struct bnx2i_hba *hba, bnx2i_conn->gen_pdu.resp_wr_ptr = bnx2i_conn->gen_pdu.resp_buf; bnx2i_conn->gen_pdu.req_bd_tbl = - dma_alloc_coherent(&hba->pcidev->dev, PAGE_SIZE, + dma_alloc_coherent(&hba->pcidev->dev, CNIC_PAGE_SIZE, &bnx2i_conn->gen_pdu.req_bd_dma, GFP_KERNEL); if (bnx2i_conn->gen_pdu.req_bd_tbl == NULL) goto login_req_bd_tbl_failure; bnx2i_conn->gen_pdu.resp_bd_tbl = - dma_alloc_coherent(&hba->pcidev->dev, PAGE_SIZE, + dma_alloc_coherent(&hba->pcidev->dev, CNIC_PAGE_SIZE, &bnx2i_conn->gen_pdu.resp_bd_dma, GFP_KERNEL); if (bnx2i_conn->gen_pdu.resp_bd_tbl == NULL) @@ -1013,7 +1014,7 @@ static int bnx2i_conn_alloc_login_resources(struct bnx2i_hba *hba, return 0; login_resp_bd_tbl_failure: - dma_free_coherent(&hba->pcidev->dev, PAGE_SIZE, + dma_free_coherent(&hba->pcidev->dev, CNIC_PAGE_SIZE, bnx2i_conn->gen_pdu.req_bd_tbl, bnx2i_conn->gen_pdu.req_bd_dma); bnx2i_conn->gen_pdu.req_bd_tbl = NULL; -- cgit v0.10.2 From c3661283f947ff5023b717240d069e9be765b0a9 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 17 Mar 2014 19:19:08 -0800 Subject: cnic: Update version to 2.5.20 and copyright year. Signed-off-by: Michael Chan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c index ea37ea2..09f3fef 100644 --- a/drivers/net/ethernet/broadcom/cnic.c +++ b/drivers/net/ethernet/broadcom/cnic.c @@ -1,6 +1,6 @@ /* cnic.c: Broadcom CNIC core network driver. * - * Copyright (c) 2006-2013 Broadcom Corporation + * Copyright (c) 2006-2014 Broadcom Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/drivers/net/ethernet/broadcom/cnic.h b/drivers/net/ethernet/broadcom/cnic.h index 0d6b13f..d535ae4 100644 --- a/drivers/net/ethernet/broadcom/cnic.h +++ b/drivers/net/ethernet/broadcom/cnic.h @@ -1,6 +1,6 @@ /* cnic.h: Broadcom CNIC core network driver. * - * Copyright (c) 2006-2013 Broadcom Corporation + * Copyright (c) 2006-2014 Broadcom Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/drivers/net/ethernet/broadcom/cnic_defs.h b/drivers/net/ethernet/broadcom/cnic_defs.h index 95a8e4b..dcbca69 100644 --- a/drivers/net/ethernet/broadcom/cnic_defs.h +++ b/drivers/net/ethernet/broadcom/cnic_defs.h @@ -1,7 +1,7 @@ /* cnic.c: Broadcom CNIC core network driver. * - * Copyright (c) 2006-2013 Broadcom Corporation + * Copyright (c) 2006-2014 Broadcom Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/drivers/net/ethernet/broadcom/cnic_if.h b/drivers/net/ethernet/broadcom/cnic_if.h index 40bb563..5f4d557 100644 --- a/drivers/net/ethernet/broadcom/cnic_if.h +++ b/drivers/net/ethernet/broadcom/cnic_if.h @@ -1,6 +1,6 @@ /* cnic_if.h: Broadcom CNIC core network driver. * - * Copyright (c) 2006-2013 Broadcom Corporation + * Copyright (c) 2006-2014 Broadcom Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -14,8 +14,8 @@ #include "bnx2x/bnx2x_mfw_req.h" -#define CNIC_MODULE_VERSION "2.5.19" -#define CNIC_MODULE_RELDATE "December 19, 2013" +#define CNIC_MODULE_VERSION "2.5.20" +#define CNIC_MODULE_RELDATE "March 14, 2014" #define CNIC_ULP_RDMA 0 #define CNIC_ULP_ISCSI 1 -- cgit v0.10.2 From bc6e7c4b0d1a1f742d96556f63d68f17f4e232c3 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 14 Mar 2014 13:52:48 -0700 Subject: libata, libsas: kill pm_result and related cleanup Tejun says: "At least for libata, worrying about suspend/resume failures don't make whole lot of sense. If suspend failed, just proceed with suspend. If the device can't be woken up afterwards, that's that. There isn't anything we could have done differently anyway. The same for resume, if spinup fails, the device is dud and the following commands will invoke EH actions and will eventually fail. Again, there really isn't any *choice* to make. Just making sure the errors are handled gracefully (ie. don't crash) and the following commands are handled correctly should be enough." The only libata user that actually cares about the result from a suspend operation is libsas. However, it only cares about whether queuing a new operation collides with an in-flight one. All libsas does with the error is retry, but we can just let libata wait for the previous operation before continuing. Other cleanups include: 1/ Unifying all ata port pm operations on an ata_port_pm_ prefix 2/ Marking all ata port pm helper routines as returning void, only ata_port_pm_ entry points need to fake a 0 return value. 3/ Killing ata_port_{suspend|resume}_common() in favor of calling ata_port_request_pm() directly 4/ Killing the wrappers that just do a to_ata_port() conversion 5/ Clearly marking the entry points that do async operations with an _async suffix. Reference: http://marc.info/?l=linux-scsi&m=138995409532286&w=2 Cc: Phillip Susi Cc: Alan Stern Suggested-by: Tejun Heo Signed-off-by: Todd Brandt Signed-off-by: Dan Williams Signed-off-by: Tejun Heo diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 1a3dbd1..66110ed 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5351,22 +5351,17 @@ bool ata_link_offline(struct ata_link *link) } #ifdef CONFIG_PM -static int ata_port_request_pm(struct ata_port *ap, pm_message_t mesg, - unsigned int action, unsigned int ehi_flags, - int *async) +static void ata_port_request_pm(struct ata_port *ap, pm_message_t mesg, + unsigned int action, unsigned int ehi_flags, + bool async) { struct ata_link *link; unsigned long flags; - int rc = 0; /* Previous resume operation might still be in * progress. Wait for PM_PENDING to clear. */ if (ap->pflags & ATA_PFLAG_PM_PENDING) { - if (async) { - *async = -EAGAIN; - return 0; - } ata_port_wait_eh(ap); WARN_ON(ap->pflags & ATA_PFLAG_PM_PENDING); } @@ -5375,11 +5370,6 @@ static int ata_port_request_pm(struct ata_port *ap, pm_message_t mesg, spin_lock_irqsave(ap->lock, flags); ap->pm_mesg = mesg; - if (async) - ap->pm_result = async; - else - ap->pm_result = &rc; - ap->pflags |= ATA_PFLAG_PM_PENDING; ata_for_each_link(link, ap, HOST_FIRST) { link->eh_info.action |= action; @@ -5390,87 +5380,81 @@ static int ata_port_request_pm(struct ata_port *ap, pm_message_t mesg, spin_unlock_irqrestore(ap->lock, flags); - /* wait and check result */ if (!async) { ata_port_wait_eh(ap); WARN_ON(ap->pflags & ATA_PFLAG_PM_PENDING); } - - return rc; } -static int __ata_port_suspend_common(struct ata_port *ap, pm_message_t mesg, int *async) +/* + * On some hardware, device fails to respond after spun down for suspend. As + * the device won't be used before being resumed, we don't need to touch the + * device. Ask EH to skip the usual stuff and proceed directly to suspend. + * + * http://thread.gmane.org/gmane.linux.ide/46764 + */ +static const unsigned int ata_port_suspend_ehi = ATA_EHI_QUIET + | ATA_EHI_NO_AUTOPSY + | ATA_EHI_NO_RECOVERY; + +static void ata_port_suspend(struct ata_port *ap, pm_message_t mesg) { - /* - * On some hardware, device fails to respond after spun down - * for suspend. As the device won't be used before being - * resumed, we don't need to touch the device. Ask EH to skip - * the usual stuff and proceed directly to suspend. - * - * http://thread.gmane.org/gmane.linux.ide/46764 - */ - unsigned int ehi_flags = ATA_EHI_QUIET | ATA_EHI_NO_AUTOPSY | - ATA_EHI_NO_RECOVERY; - return ata_port_request_pm(ap, mesg, 0, ehi_flags, async); + ata_port_request_pm(ap, mesg, 0, ata_port_suspend_ehi, false); } -static int ata_port_suspend_common(struct device *dev, pm_message_t mesg) +static void ata_port_suspend_async(struct ata_port *ap, pm_message_t mesg) { - struct ata_port *ap = to_ata_port(dev); - - return __ata_port_suspend_common(ap, mesg, NULL); + ata_port_request_pm(ap, mesg, 0, ata_port_suspend_ehi, true); } -static int ata_port_suspend(struct device *dev) +static int ata_port_pm_suspend(struct device *dev) { + struct ata_port *ap = to_ata_port(dev); + if (pm_runtime_suspended(dev)) return 0; - return ata_port_suspend_common(dev, PMSG_SUSPEND); + ata_port_suspend(ap, PMSG_SUSPEND); + return 0; } -static int ata_port_do_freeze(struct device *dev) +static int ata_port_pm_freeze(struct device *dev) { + struct ata_port *ap = to_ata_port(dev); + if (pm_runtime_suspended(dev)) return 0; - return ata_port_suspend_common(dev, PMSG_FREEZE); + ata_port_suspend(ap, PMSG_FREEZE); + return 0; } -static int ata_port_poweroff(struct device *dev) +static int ata_port_pm_poweroff(struct device *dev) { - return ata_port_suspend_common(dev, PMSG_HIBERNATE); + ata_port_suspend(to_ata_port(dev), PMSG_HIBERNATE); + return 0; } -static int __ata_port_resume_common(struct ata_port *ap, pm_message_t mesg, - int *async) -{ - int rc; +static const unsigned int ata_port_resume_ehi = ATA_EHI_NO_AUTOPSY + | ATA_EHI_QUIET; - rc = ata_port_request_pm(ap, mesg, ATA_EH_RESET, - ATA_EHI_NO_AUTOPSY | ATA_EHI_QUIET, async); - return rc; +static void ata_port_resume(struct ata_port *ap, pm_message_t mesg) +{ + ata_port_request_pm(ap, mesg, ATA_EH_RESET, ata_port_resume_ehi, false); } -static int ata_port_resume_common(struct device *dev, pm_message_t mesg) +static void ata_port_resume_async(struct ata_port *ap, pm_message_t mesg) { - struct ata_port *ap = to_ata_port(dev); - - return __ata_port_resume_common(ap, mesg, NULL); + ata_port_request_pm(ap, mesg, ATA_EH_RESET, ata_port_resume_ehi, true); } -static int ata_port_resume(struct device *dev) +static int ata_port_pm_resume(struct device *dev) { - int rc; - - rc = ata_port_resume_common(dev, PMSG_RESUME); - if (!rc) { - pm_runtime_disable(dev); - pm_runtime_set_active(dev); - pm_runtime_enable(dev); - } - - return rc; + ata_port_resume(to_ata_port(dev), PMSG_RESUME); + pm_runtime_disable(dev); + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + return 0; } /* @@ -5499,21 +5483,23 @@ static int ata_port_runtime_idle(struct device *dev) static int ata_port_runtime_suspend(struct device *dev) { - return ata_port_suspend_common(dev, PMSG_AUTO_SUSPEND); + ata_port_suspend(to_ata_port(dev), PMSG_AUTO_SUSPEND); + return 0; } static int ata_port_runtime_resume(struct device *dev) { - return ata_port_resume_common(dev, PMSG_AUTO_RESUME); + ata_port_resume(to_ata_port(dev), PMSG_AUTO_RESUME); + return 0; } static const struct dev_pm_ops ata_port_pm_ops = { - .suspend = ata_port_suspend, - .resume = ata_port_resume, - .freeze = ata_port_do_freeze, - .thaw = ata_port_resume, - .poweroff = ata_port_poweroff, - .restore = ata_port_resume, + .suspend = ata_port_pm_suspend, + .resume = ata_port_pm_resume, + .freeze = ata_port_pm_freeze, + .thaw = ata_port_pm_resume, + .poweroff = ata_port_pm_poweroff, + .restore = ata_port_pm_resume, .runtime_suspend = ata_port_runtime_suspend, .runtime_resume = ata_port_runtime_resume, @@ -5525,18 +5511,17 @@ static const struct dev_pm_ops ata_port_pm_ops = { * level. sas suspend/resume is async to allow parallel port recovery * since sas has multiple ata_port instances per Scsi_Host. */ -int ata_sas_port_async_suspend(struct ata_port *ap, int *async) +void ata_sas_port_suspend(struct ata_port *ap) { - return __ata_port_suspend_common(ap, PMSG_SUSPEND, async); + ata_port_suspend_async(ap, PMSG_SUSPEND); } -EXPORT_SYMBOL_GPL(ata_sas_port_async_suspend); +EXPORT_SYMBOL_GPL(ata_sas_port_suspend); -int ata_sas_port_async_resume(struct ata_port *ap, int *async) +void ata_sas_port_resume(struct ata_port *ap) { - return __ata_port_resume_common(ap, PMSG_RESUME, async); + ata_port_resume_async(ap, PMSG_RESUME); } -EXPORT_SYMBOL_GPL(ata_sas_port_async_resume); - +EXPORT_SYMBOL_GPL(ata_sas_port_resume); /** * ata_host_suspend - suspend host diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index c1d0170..6760fc4 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -4070,7 +4070,7 @@ static void ata_eh_handle_port_suspend(struct ata_port *ap) ata_acpi_set_state(ap, ap->pm_mesg); out: - /* report result */ + /* update the flags */ spin_lock_irqsave(ap->lock, flags); ap->pflags &= ~ATA_PFLAG_PM_PENDING; @@ -4079,11 +4079,6 @@ static void ata_eh_handle_port_suspend(struct ata_port *ap) else if (ap->pflags & ATA_PFLAG_FROZEN) ata_port_schedule_eh(ap); - if (ap->pm_result) { - *ap->pm_result = rc; - ap->pm_result = NULL; - } - spin_unlock_irqrestore(ap->lock, flags); return; @@ -4135,13 +4130,9 @@ static void ata_eh_handle_port_resume(struct ata_port *ap) /* tell ACPI that we're resuming */ ata_acpi_on_resume(ap); - /* report result */ + /* update the flags */ spin_lock_irqsave(ap->lock, flags); ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); - if (ap->pm_result) { - *ap->pm_result = rc; - ap->pm_result = NULL; - } spin_unlock_irqrestore(ap->lock, flags); } #endif /* CONFIG_PM */ diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index d289583..766098a 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -700,46 +700,26 @@ void sas_probe_sata(struct asd_sas_port *port) } -static bool sas_ata_flush_pm_eh(struct asd_sas_port *port, const char *func) +static void sas_ata_flush_pm_eh(struct asd_sas_port *port, const char *func) { struct domain_device *dev, *n; - bool retry = false; list_for_each_entry_safe(dev, n, &port->dev_list, dev_list_node) { - int rc; - if (!dev_is_sata(dev)) continue; sas_ata_wait_eh(dev); - rc = dev->sata_dev.pm_result; - if (rc == -EAGAIN) - retry = true; - else if (rc) { - /* since we don't have a - * ->port_{suspend|resume} routine in our - * ata_port ops, and no entanglements with - * acpi, suspend should just be mechanical trip - * through eh, catch cases where these - * assumptions are invalidated - */ - WARN_ONCE(1, "failed %s %s error: %d\n", func, - dev_name(&dev->rphy->dev), rc); - } /* if libata failed to power manage the device, tear it down */ if (ata_dev_disabled(sas_to_ata_dev(dev))) sas_fail_probe(dev, func, -ENODEV); } - - return retry; } void sas_suspend_sata(struct asd_sas_port *port) { struct domain_device *dev; - retry: mutex_lock(&port->ha->disco_mutex); list_for_each_entry(dev, &port->dev_list, dev_list_node) { struct sata_device *sata; @@ -751,20 +731,17 @@ void sas_suspend_sata(struct asd_sas_port *port) if (sata->ap->pm_mesg.event == PM_EVENT_SUSPEND) continue; - sata->pm_result = -EIO; - ata_sas_port_async_suspend(sata->ap, &sata->pm_result); + ata_sas_port_suspend(sata->ap); } mutex_unlock(&port->ha->disco_mutex); - if (sas_ata_flush_pm_eh(port, __func__)) - goto retry; + sas_ata_flush_pm_eh(port, __func__); } void sas_resume_sata(struct asd_sas_port *port) { struct domain_device *dev; - retry: mutex_lock(&port->ha->disco_mutex); list_for_each_entry(dev, &port->dev_list, dev_list_node) { struct sata_device *sata; @@ -776,13 +753,11 @@ void sas_resume_sata(struct asd_sas_port *port) if (sata->ap->pm_mesg.event == PM_EVENT_ON) continue; - sata->pm_result = -EIO; - ata_sas_port_async_resume(sata->ap, &sata->pm_result); + ata_sas_port_resume(sata->ap); } mutex_unlock(&port->ha->disco_mutex); - if (sas_ata_flush_pm_eh(port, __func__)) - goto retry; + sas_ata_flush_pm_eh(port, __func__); } /** diff --git a/include/linux/libata.h b/include/linux/libata.h index bec6dbe..5c09e86 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -848,7 +848,6 @@ struct ata_port { struct completion park_req_pending; pm_message_t pm_mesg; - int *pm_result; enum ata_lpm_policy target_lpm_policy; struct timer_list fastdrain_timer; @@ -1140,16 +1139,14 @@ extern bool ata_link_offline(struct ata_link *link); #ifdef CONFIG_PM extern int ata_host_suspend(struct ata_host *host, pm_message_t mesg); extern void ata_host_resume(struct ata_host *host); -extern int ata_sas_port_async_suspend(struct ata_port *ap, int *async); -extern int ata_sas_port_async_resume(struct ata_port *ap, int *async); +extern void ata_sas_port_suspend(struct ata_port *ap); +extern void ata_sas_port_resume(struct ata_port *ap); #else -static inline int ata_sas_port_async_suspend(struct ata_port *ap, int *async) +static inline void ata_sas_port_suspend(struct ata_port *ap) { - return 0; } -static inline int ata_sas_port_async_resume(struct ata_port *ap, int *async) +static inline void ata_sas_port_async_resume(struct ata_port *ap) { - return 0; } #endif extern int ata_ratelimit(void); diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index f843dd8..ef7872c 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -172,7 +172,6 @@ struct sata_device { enum ata_command_set command_set; struct smp_resp rps_resp; /* report_phy_sata_resp */ u8 port_no; /* port number, if this is a PM (Port) */ - int pm_result; struct ata_port *ap; struct ata_host ata_host; -- cgit v0.10.2 From 200421a80f6e0a9e39d698944cc35cba103eb6ce Mon Sep 17 00:00:00 2001 From: Todd Brandt Date: Fri, 14 Mar 2014 13:52:54 -0700 Subject: libata: async resume Improve overall system resume time by making libata link recovery actions asynchronous relative to other resume events. Link resume operations are performed using the scsi_eh thread, so commands, particularly the sd resume start/stop command, will be held off until the device exits error handling. Libata already flushes eh with ata_port_wait_eh() in the port teardown paths, so there are no concerns with async operation colliding with the end-of-life of the ata_port object. Also, libata-core is already careful to flush in-flight pm operations before another round of pm starts on the given ata_port. Reference: https://01.org/suspendresume/blogs/tebrandt/2013/hard-disk-resume-optimization-simpler-approach Cc: Len Brown Cc: Phillip Susi Cc: Alan Stern Signed-off-by: Todd Brandt [djbw: rebase on cleanup patch, changelog wordsmithing] Signed-off-by: Dan Williams Signed-off-by: Tejun Heo diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 66110ed..c37eb02 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5450,7 +5450,7 @@ static void ata_port_resume_async(struct ata_port *ap, pm_message_t mesg) static int ata_port_pm_resume(struct device *dev) { - ata_port_resume(to_ata_port(dev), PMSG_RESUME); + ata_port_resume_async(to_ata_port(dev), PMSG_RESUME); pm_runtime_disable(dev); pm_runtime_set_active(dev); pm_runtime_enable(dev); -- cgit v0.10.2 From 7346135dcd3f9b57f30a5512094848c678d7143e Mon Sep 17 00:00:00 2001 From: David Stevens Date: Tue, 18 Mar 2014 12:32:29 -0400 Subject: vxlan: fix potential NULL dereference in arp_reduce() This patch fixes a NULL pointer dereference in the event of an skb allocation failure in arp_reduce(). Signed-Off-By: David L Stevens Acked-by: Cong Wang Signed-off-by: David S. Miller diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index b0f705c..a7eb3f2 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1318,6 +1318,9 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb) neigh_release(n); + if (reply == NULL) + goto out; + skb_reset_mac_header(reply); __skb_pull(reply, skb_network_offset(reply)); reply->ip_summed = CHECKSUM_UNNECESSARY; -- cgit v0.10.2 From 3e3d35402140f15a626f24afafb0d7fc3b2e5aa2 Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Tue, 18 Mar 2014 17:11:24 +0100 Subject: ATHEROS-ATL1E: Convert iounmap to pci_iounmap Use pci_iounmap instead of iounmap when the virtual mapping was done with pci_iomap. A simplified version of the semantic patch that finds this issue is as follows: (http://coccinelle.lip6.fr/) // @r@ expression addr; @@ addr = pci_iomap(...) @rr@ expression r.addr; @@ * iounmap(addr) // Signed-off-by: Peter Senna Tschudin Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index d5c2d3e..422aab2 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -2436,7 +2436,7 @@ err_reset: err_register: err_sw_init: err_eeprom: - iounmap(adapter->hw.hw_addr); + pci_iounmap(pdev, adapter->hw.hw_addr); err_init_netdev: err_ioremap: free_netdev(netdev); @@ -2474,7 +2474,7 @@ static void atl1e_remove(struct pci_dev *pdev) unregister_netdev(netdev); atl1e_free_ring_resources(adapter); atl1e_force_ps(&adapter->hw); - iounmap(adapter->hw.hw_addr); + pci_iounmap(pdev, adapter->hw.hw_addr); pci_release_regions(pdev); free_netdev(netdev); pci_disable_device(pdev); -- cgit v0.10.2 From 707d4eefbdb31f8e588277157056b0ce637d6c68 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 18 Mar 2014 14:26:12 -0600 Subject: Revert "[PATCH] Insert GART region into resource map" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 56dd669a138c, which makes the GART visible in /proc/iomem. This fixes a regression: e501b3d87f00 ("agp: Support 64-bit APBASE") exposed an existing problem with a conflict between the GART region and a PCI BAR region. The GART addresses are bus addresses, not CPU addresses, and therefore should not be inserted in iomem_resource. On many machines, the GART region is addressable by the CPU as well as by an AGP master, but CPU addressability is not required by the spec. On some of these machines, the GART is mapped by a PCI BAR, and in that case, the PCI core automatically inserts it into iomem_resource, just as it does for all BARs. Inserting it here means we'll have a conflict if the PCI core later tries to claim the GART region, so let's drop the insertion here. The conflict indirectly causes X failures, as reported by Jouni in the bugzilla below. We detected the conflict even before e501b3d87f00, but after it the AGP code (fix_northbridge()) uses the PCI resource (which is zeroed because of the conflict) instead of reading the BAR again. Conflicts: arch/x86_64/kernel/aperture.c Fixes: e501b3d87f00 agp: Support 64-bit APBASE Link: https://bugzilla.kernel.org/show_bug.cgi?id=72201 Reported-and-tested-by: Jouni Mettälä Signed-off-by: Bjorn Helgaas diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index fd972a3..9fa8aa0 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -54,18 +53,6 @@ int fallback_aper_force __initdata; int fix_aperture __initdata = 1; -static struct resource gart_resource = { - .name = "GART", - .flags = IORESOURCE_MEM, -}; - -static void __init insert_aperture_resource(u32 aper_base, u32 aper_size) -{ - gart_resource.start = aper_base; - gart_resource.end = aper_base + aper_size - 1; - insert_resource(&iomem_resource, &gart_resource); -} - /* This code runs before the PCI subsystem is initialized, so just access the northbridge directly. */ @@ -96,7 +83,6 @@ static u32 __init allocate_aperture(void) memblock_reserve(addr, aper_size); printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", aper_size >> 10, addr); - insert_aperture_resource((u32)addr, aper_size); register_nosave_region(addr >> PAGE_SHIFT, (addr+aper_size) >> PAGE_SHIFT); @@ -444,12 +430,8 @@ int __init gart_iommu_hole_init(void) out: if (!fix && !fallback_aper_force) { - if (last_aper_base) { - unsigned long n = (32 * 1024 * 1024) << last_aper_order; - - insert_aperture_resource((u32)last_aper_base, n); + if (last_aper_base) return 1; - } return 0; } -- cgit v0.10.2 From 3eb59ec64fc7a3f4576da23f811b39331b830ba2 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 18 Mar 2014 17:02:36 +0800 Subject: cgroup: fix a failure path in create_css() If online_css() fails, we should remove cgroup files belonging to css->ss. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 105f273..0c753dd 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4112,17 +4112,17 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss) err = percpu_ref_init(&css->refcnt, css_release); if (err) - goto err_free; + goto err_free_css; init_css(css, ss, cgrp); err = cgroup_populate_dir(cgrp, 1 << ss->subsys_id); if (err) - goto err_free; + goto err_free_percpu_ref; err = online_css(css); if (err) - goto err_free; + goto err_clear_dir; dget(cgrp->dentry); css_get(css->parent); @@ -4138,8 +4138,11 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss) return 0; -err_free: +err_clear_dir: + cgroup_clear_dir(css->cgroup, 1 << css->ss->subsys_id); +err_free_percpu_ref: percpu_ref_cancel_init(&css->refcnt); +err_free_css: ss->css_free(css); return err; } -- cgit v0.10.2 From 263f89bf7d0f5ba98077dda8df1ff814862ad5ba Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Sun, 16 Mar 2014 12:06:05 -0400 Subject: perf timechart: Fix off-by-one error in 'record' argv handling Since 367b315 (perf timechart: Add support for -P and -T in timechart recording, 2013-11-01), the 'perf timechart record' command stopped working: $ perf timechart record -- git status Workload failed: No such file or directory This happens because of an off-by-one error while preparing the argv for cmd_record(): it attempts to execute the command 'status' and complains that it doesn't exist. Fix this error. Signed-off-by: Ramkumar Ramachandra Acked-by: Stanislav Fomichev Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stanislav Fomichev Link: http://lkml.kernel.org/r/1394985965-2332-1-git-send-email-artagnon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 25526d6..d4991a2 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -1238,7 +1238,7 @@ static int timechart__record(struct timechart *tchart, int argc, const char **ar for (i = 0; i < old_power_args_nr; i++) *p++ = strdup(old_power_args[i]); - for (j = 1; j < (unsigned int)argc; j++) + for (j = 0; j < (unsigned int)argc; j++) *p++ = argv[j]; return cmd_record(rec_argc, rec_argv, NULL); -- cgit v0.10.2 From 80790e0b7ef768b6591fdf764b62c572b76a5d80 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Mon, 17 Mar 2014 10:18:21 -0400 Subject: perf sched: Fixup header alignment in 'latency' output Before: --------------------------------------------------------------------------------------------------------------- Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at | --------------------------------------------------------------------------------------------------------------- ... | | | | | git:24540 | 336.622 ms | 10 | avg: 0.032 ms | max: 0.062 ms | max at: 115610.111046 s git:24541 | 0.457 ms | 1 | avg: 0.000 ms | max: 0.000 ms | max at: 0.000000 s ----------------------------------------------------------------------------------------- TOTAL: | 396.542 ms | 353 | --------------------------------------------------- After: ----------------------------------------------------------------------------------------------------------------- Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at | ----------------------------------------------------------------------------------------------------------------- ... | | | | | git:24540 | 336.622 ms | 10 | avg: 0.032 ms | max: 0.062 ms | max at: 115610.111046 s git:24541 | 0.457 ms | 1 | avg: 0.000 ms | max: 0.000 ms | max at: 0.000000 s ----------------------------------------------------------------------------------------------------------------- TOTAL: | 396.542 ms | 353 | --------------------------------------------------- Signed-off-by: Ramkumar Ramachandra Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1395065901-25740-1-git-send-email-artagnon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 6a76a07..9ac0a49 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1124,7 +1124,7 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_ avg = work_list->total_lat / work_list->nb_atoms; - printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %9.6f s\n", + printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13.6f s\n", (double)work_list->total_runtime / 1e6, work_list->nb_atoms, (double)avg / 1e6, (double)work_list->max_lat / 1e6, @@ -1527,9 +1527,9 @@ static int perf_sched__lat(struct perf_sched *sched) perf_sched__sort_lat(sched); - printf("\n ---------------------------------------------------------------------------------------------------------------\n"); - printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); - printf(" ---------------------------------------------------------------------------------------------------------------\n"); + printf("\n -----------------------------------------------------------------------------------------------------------------\n"); + printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); + printf(" -----------------------------------------------------------------------------------------------------------------\n"); next = rb_first(&sched->sorted_atom_root); @@ -1541,7 +1541,7 @@ static int perf_sched__lat(struct perf_sched *sched) next = rb_next(next); } - printf(" -----------------------------------------------------------------------------------------\n"); + printf(" -----------------------------------------------------------------------------------------------------------------\n"); printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n", (double)sched->all_runtime / 1e6, sched->all_count); -- cgit v0.10.2 From b3cef7f60f17d953545f7069f6407fc24202a64d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Mar 2014 16:59:21 -0300 Subject: perf symbols: Record the reason for filtering an address_location By turning the addr_location->filtered member from a boolean to a u8 bitmap, reusing (and extending) the hist_filter enum for that. This patch doesn't change the logic at all, as it keeps the meaning of al->filtered !0 to mean that the entry _was_ filtered, so no change in how this value is interpreted needs to be done at this point. This will be soon used in upcoming patches. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-89hmfgtr9t22sky1lyg7nw7l@git.kernel.org [ yanked this out of a previous patch ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index d4991a2..74db256 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -494,7 +494,7 @@ static const char *cat_backtrace(union perf_event *event, continue; } - tal.filtered = false; + tal.filtered = 0; thread__find_addr_location(al.thread, machine, cpumode, MAP__FUNCTION, ip, &tal); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 3e580be..0da09db 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1,6 +1,7 @@ #include #include "event.h" #include "debug.h" +#include "hist.h" #include "machine.h" #include "sort.h" #include "string.h" @@ -705,7 +706,7 @@ void thread__find_addr_map(struct thread *thread, al->thread = thread; al->addr = addr; al->cpumode = cpumode; - al->filtered = false; + al->filtered = 0; if (machine == NULL) { al->map = NULL; @@ -731,11 +732,11 @@ void thread__find_addr_map(struct thread *thread, if ((cpumode == PERF_RECORD_MISC_GUEST_USER || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) && !perf_guest) - al->filtered = true; + al->filtered |= (1 << HIST_FILTER__GUEST); if ((cpumode == PERF_RECORD_MISC_USER || cpumode == PERF_RECORD_MISC_KERNEL) && !perf_host) - al->filtered = true; + al->filtered |= (1 << HIST_FILTER__HOST); return; } @@ -792,8 +793,10 @@ int perf_event__preprocess_sample(const union perf_event *event, if (thread == NULL) return -1; - if (thread__is_filtered(thread)) + if (thread__is_filtered(thread)) { + al->filtered |= (1 << HIST_FILTER__THREAD); goto out_filtered; + } dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid); /* @@ -823,8 +826,10 @@ int perf_event__preprocess_sample(const union perf_event *event, dso->short_name) || (dso->short_name != dso->long_name && strlist__has_entry(symbol_conf.dso_list, - dso->long_name))))) + dso->long_name))))) { + al->filtered |= (1 << HIST_FILTER__DSO); goto out_filtered; + } al->sym = map__find_symbol(al->map, al->addr, machine->symbol_filter); @@ -832,12 +837,13 @@ int perf_event__preprocess_sample(const union perf_event *event, if (symbol_conf.sym_list && (!al->sym || !strlist__has_entry(symbol_conf.sym_list, - al->sym->name))) + al->sym->name))) { + al->filtered |= (1 << HIST_FILTER__SYMBOL); goto out_filtered; + } return 0; out_filtered: - al->filtered = true; return 0; } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 0466efa..9507f33 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -13,13 +13,6 @@ static bool hists__filter_entry_by_thread(struct hists *hists, static bool hists__filter_entry_by_symbol(struct hists *hists, struct hist_entry *he); -enum hist_filter { - HIST_FILTER__DSO, - HIST_FILTER__THREAD, - HIST_FILTER__PARENT, - HIST_FILTER__SYMBOL, -}; - struct callchain_param callchain_param = { .mode = CHAIN_GRAPH_REL, .min_percent = 0.5, diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 0c76bf9..1f1f513 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -14,6 +14,15 @@ struct hist_entry; struct addr_location; struct symbol; +enum hist_filter { + HIST_FILTER__DSO, + HIST_FILTER__THREAD, + HIST_FILTER__PARENT, + HIST_FILTER__SYMBOL, + HIST_FILTER__GUEST, + HIST_FILTER__HOST, +}; + /* * The kernel collects the number of events it couldn't send in a stretch and * when possible sends this number in a PERF_RECORD_LOST event. The number of diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 5cecd98..d280bf2 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1312,7 +1312,7 @@ static int machine__resolve_callchain_sample(struct machine *machine, continue; } - al.filtered = false; + al.filtered = 0; thread__find_addr_location(thread, machine, cpumode, MAP__FUNCTION, ip, &al); if (al.sym != NULL) { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 2553ae0..501e4e7 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -186,7 +186,7 @@ struct addr_location { struct symbol *sym; u64 addr; char level; - bool filtered; + u8 filtered; u8 cpumode; s32 cpu; }; -- cgit v0.10.2 From 466fa7647413665dcba46c4f7f2b4a9808426989 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Mar 2014 17:12:27 -0300 Subject: perf symbols: Apply all filters to an addr_location Instead of bailing out as soon as we find a filter that applies, go on checking all of them so that we can zoom in/out filters. We also need to make sure we only update al->filtered after thread__find_addr_map(), because there is where al->filtered gets initialized to zero. This will increase the cost of processing when all we don't need this toggling, but will provide flexibility for the TUI and GTK+ interfaces, that will incur in creating the hist_entries just once. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-fhv9lhzdjxgp9w3w3668lsfw@git.kernel.org [ yanked this out of a previous patch ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 0da09db..ebb48a6 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -793,11 +793,6 @@ int perf_event__preprocess_sample(const union perf_event *event, if (thread == NULL) return -1; - if (thread__is_filtered(thread)) { - al->filtered |= (1 << HIST_FILTER__THREAD); - goto out_filtered; - } - dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid); /* * Have we already created the kernel maps for this machine? @@ -815,6 +810,10 @@ int perf_event__preprocess_sample(const union perf_event *event, dump_printf(" ...... dso: %s\n", al->map ? al->map->dso->long_name : al->level == 'H' ? "[hypervisor]" : ""); + + if (thread__is_filtered(thread)) + al->filtered |= (1 << HIST_FILTER__THREAD); + al->sym = NULL; al->cpu = sample->cpu; @@ -828,7 +827,6 @@ int perf_event__preprocess_sample(const union perf_event *event, strlist__has_entry(symbol_conf.dso_list, dso->long_name))))) { al->filtered |= (1 << HIST_FILTER__DSO); - goto out_filtered; } al->sym = map__find_symbol(al->map, al->addr, @@ -839,11 +837,7 @@ int perf_event__preprocess_sample(const union perf_event *event, (!al->sym || !strlist__has_entry(symbol_conf.sym_list, al->sym->name))) { al->filtered |= (1 << HIST_FILTER__SYMBOL); - goto out_filtered; } return 0; - -out_filtered: - return 0; } -- cgit v0.10.2 From 2c86c7ca760634f09dcbd76069e5102b4de6f8f1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Mar 2014 18:18:54 -0300 Subject: perf report: Merge al->filtered with hist_entry->filtered I.e. don't drop al->filtered entries, create the hist_entries and use its ->filtered bitmap, that is kept with the same semantics for its bitmap, leaving the filtering to be done at the hist_entry level, i.e. in the UIs. This will allow zooming in/out the filters. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-xeyhkepu7plw716lrtb0zlnu@git.kernel.org [ yanked this out of a previous patch ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index c47bf58..a74059f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -231,7 +231,7 @@ static int process_sample_event(struct perf_tool *tool, return -1; } - if (al.filtered || (rep->hide_unresolved && al.sym == NULL)) + if (rep->hide_unresolved && al.sym == NULL) return 0; if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 9507f33..f38590d 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -422,7 +422,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, .weight = weight, }, .parent = sym_parent, - .filtered = symbol__parent_filter(sym_parent), + .filtered = symbol__parent_filter(sym_parent) | al->filtered, .hists = hists, .branch_info = bi, .mem_info = mi, -- cgit v0.10.2 From 11c9abf2270793bd1c1b8828edb4223f8010e56c Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Wed, 26 Feb 2014 10:45:27 -0500 Subject: perf tools: Use tid in mmap/mmap2 events to find maps Now that we can properly synthesize threads system-wide, make sure the mmap and mmap2 events use tids instead of pids to locate their maps. Signed-off-by: Don Zickus Cc: Jiri Olsa Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1393429527-167840-3-git-send-email-dzickus@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 2b6519e..7ccbc7b 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -101,6 +101,7 @@ static struct machine *setup_fake_machine(struct machines *machines) .mmap = { .header = { .misc = PERF_RECORD_MISC_USER, }, .pid = fake_mmap_info[i].pid, + .tid = fake_mmap_info[i].pid, .start = fake_mmap_info[i].start, .len = 0x1000ULL, .pgoff = 0ULL, diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index d280bf2..a53cd0b 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1027,7 +1027,7 @@ int machine__process_mmap2_event(struct machine *machine, } thread = machine__findnew_thread(machine, event->mmap2.pid, - event->mmap2.pid); + event->mmap2.tid); if (thread == NULL) goto out_problem; @@ -1075,7 +1075,7 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event } thread = machine__findnew_thread(machine, event->mmap.pid, - event->mmap.pid); + event->mmap.tid); if (thread == NULL) goto out_problem; -- cgit v0.10.2 From 574799bfdbc3a13e0b5e2f6af34b761bde743a9a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Mar 2014 10:45:49 +0900 Subject: perf tools: Fix memory leak when synthesizing thread records Checking default guest machine should be done before allocating event structures otherwise it'll leak memory. Signed-off-by: Namhyung Kim Cc: Don Zickus Cc: Jiri Olsa Cc: Joe Mario Link: http://lkml.kernel.org/r/87ob15tx6a.fsf@sejong.aot.lge.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index ebb48a6..9d12aa6 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -446,6 +446,9 @@ int perf_event__synthesize_threads(struct perf_tool *tool, union perf_event *comm_event, *mmap_event, *fork_event; int err = -1; + if (machine__is_default_guest(machine)) + return 0; + comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size); if (comm_event == NULL) goto out; @@ -458,9 +461,6 @@ int perf_event__synthesize_threads(struct perf_tool *tool, if (fork_event == NULL) goto out_free_mmap; - if (machine__is_default_guest(machine)) - return 0; - snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir); proc = opendir(proc_path); -- cgit v0.10.2 From b9ce0c99d820b7680fdb4dc39bc7b5ff79d6b5b0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 18 Mar 2014 15:32:26 +0900 Subject: perf report: Use ui__has_annotation() Since we introduced the ui__has_annotation() for that, don't open code it. Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1395124359-11744-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index a74059f..c8f2113 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -928,7 +928,7 @@ repeat: * so don't allocate extra space that won't be used in the stdio * implementation. */ - if (use_browser == 1 && sort__has_sym) { + if (ui__has_annotation()) { symbol_conf.priv_size = sizeof(struct annotation); machines__set_symbol_filter(&session->machines, symbol__annotate_init); -- cgit v0.10.2 From 9cdbadceca31bf2ccd9e50fecdc5591d3fb8dae3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 18 Mar 2014 11:50:21 -0300 Subject: perf annotate: Print the evsel name in the stdio output So that when showing multiple events annotations, we can figure out which is which: # perf record -a -e instructions,cycles sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.826 MB perf.data (~36078 samples) ] # perf evlist instructions cycles # perf annotate intel_idle 2> /dev/null | head -1 Percent | Source code & Disassembly of vmlinux for instructions # Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-n1r51l329434js84qtb2c6l9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 3aa555f..809b4c5 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1236,6 +1236,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, struct dso *dso = map->dso; char *filename; const char *d_filename; + const char *evsel_name = perf_evsel__name(evsel); struct annotation *notes = symbol__annotation(sym); struct disasm_line *pos, *queue = NULL; u64 start = map__rip_2objdump(map, sym->start); @@ -1243,7 +1244,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, int more = 0; u64 len; int width = 8; - int namelen; + int namelen, evsel_name_len, graph_dotted_len; filename = strdup(dso->long_name); if (!filename) @@ -1256,14 +1257,17 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, len = symbol__size(sym); namelen = strlen(d_filename); + evsel_name_len = strlen(evsel_name); if (perf_evsel__is_group_event(evsel)) width *= evsel->nr_members; - printf(" %-*.*s| Source code & Disassembly of %s\n", - width, width, "Percent", d_filename); - printf("-%-*.*s-------------------------------------\n", - width+namelen, width+namelen, graph_dotted_line); + printf(" %-*.*s| Source code & Disassembly of %s for %s\n", + width, width, "Percent", d_filename, evsel_name); + + graph_dotted_len = width + namelen + evsel_name_len; + printf("-%-*.*s-----------------------------------------\n", + graph_dotted_len, graph_dotted_len, graph_dotted_line); if (verbose) symbol__annotate_hits(sym, evsel); -- cgit v0.10.2 From 8fffdb6821e8ca249f56fba796ef463ac74ab196 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 18 Mar 2014 15:46:52 +0100 Subject: perf tools: Remove thread__find_map function Because it's not used any more. Signed-off-by: Jiri Olsa Cc: Corey Ashford Cc: David Ahern Cc: Don Zickus Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1395154016-26709-3-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 9a07074..9b29f08 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -44,12 +44,6 @@ void thread__insert_map(struct thread *thread, struct map *map); int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp); size_t thread__fprintf(struct thread *thread, FILE *fp); -static inline struct map *thread__find_map(struct thread *thread, - enum map_type type, u64 addr) -{ - return thread ? map_groups__find(&thread->mg, type, addr) : NULL; -} - void thread__find_addr_map(struct thread *thread, struct machine *machine, u8 cpumode, enum map_type type, u64 addr, struct addr_location *al); -- cgit v0.10.2 From a33f6efc43cb71795bbdaf0251544ff3edbfdb2f Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Tue, 18 Mar 2014 15:10:42 -0400 Subject: perf evsel: Update function names in debug messages perf_event_open() was renamed to sys_perf_event_open(); update the debug messages to reflect this. Signed-off-by: Ramkumar Ramachandra Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1395169842-1399-1-git-send-email-artagnon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 26b67b1..5c28d82 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1023,7 +1023,7 @@ retry_sample_id: group_fd = get_group_fd(evsel, cpu, thread); retry_open: - pr_debug2("perf_event_open: pid %d cpu %d group_fd %d flags %#lx\n", + pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx\n", pid, cpus->map[cpu], group_fd, flags); FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, @@ -1032,7 +1032,7 @@ retry_open: group_fd, flags); if (FD(evsel, cpu, thread) < 0) { err = -errno; - pr_debug2("perf_event_open failed, error %d\n", + pr_debug2("sys_perf_event_open failed, error %d\n", err); goto try_fallback; } -- cgit v0.10.2 From b68eebd1c2a539256e373123cdefabfd1986bfe2 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Tue, 18 Mar 2014 15:10:04 -0400 Subject: perf tools: Update some code references in design.txt Update the names of some functions and enums in design.txt. The document still has some stale information, but the motivation behind this patch is to allow a developer to quickly grep and learn about the associated structures. Signed-off-by: Ramkumar Ramachandra Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1395169804-1293-1-git-send-email-artagnon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/design.txt b/tools/perf/design.txt index 63a0e6f..a28dca2 100644 --- a/tools/perf/design.txt +++ b/tools/perf/design.txt @@ -18,7 +18,7 @@ underlying hardware counters. Performance counters are accessed via special file descriptors. There's one file descriptor per virtual counter used. -The special file descriptor is opened via the perf_event_open() +The special file descriptor is opened via the sys_perf_event_open() system call: int sys_perf_event_open(struct perf_event_attr *hw_event_uptr, @@ -82,7 +82,7 @@ machine-specific. If 'raw_type' is 0, then the 'type' field says what kind of counter this is, with the following encoding: -enum perf_event_types { +enum perf_type_id { PERF_TYPE_HARDWARE = 0, PERF_TYPE_SOFTWARE = 1, PERF_TYPE_TRACEPOINT = 2, @@ -95,7 +95,7 @@ specified by 'event_id': * Generalized performance counter event types, used by the hw_event.event_id * parameter of the sys_perf_event_open() syscall: */ -enum hw_event_ids { +enum perf_hw_id { /* * Common hardware events, generalized by the kernel: */ @@ -129,7 +129,7 @@ software events, selected by 'event_id': * physical and sw events of the kernel (and allow the profiling of them as * well): */ -enum sw_event_ids { +enum perf_sw_ids { PERF_COUNT_SW_CPU_CLOCK = 0, PERF_COUNT_SW_TASK_CLOCK = 1, PERF_COUNT_SW_PAGE_FAULTS = 2, @@ -230,7 +230,7 @@ these events are recorded in the ring-buffer (see below). The 'comm' bit allows tracking of process comm data on process creation. This too is recorded in the ring-buffer (see below). -The 'pid' parameter to the perf_event_open() system call allows the +The 'pid' parameter to the sys_perf_event_open() system call allows the counter to be specific to a task: pid == 0: if the pid parameter is zero, the counter is attached to the @@ -260,7 +260,7 @@ The 'flags' parameter is currently unused and must be zero. The 'group_fd' parameter allows counter "groups" to be set up. A counter group has one counter which is the group "leader". The leader -is created first, with group_fd = -1 in the perf_event_open call +is created first, with group_fd = -1 in the sys_perf_event_open call that creates it. The rest of the group members are created subsequently, with group_fd giving the fd of the group leader. (A single counter on its own is created with group_fd = -1 and is -- cgit v0.10.2 From a51e87cb5a0fbebee15a3373d951dbf6f59a76c2 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Tue, 18 Mar 2014 17:05:15 -0400 Subject: perf tools: Remove unused simple_strtoul() function Moreover, the corresponding function in include/linux/kernel.h is marked obsolete. Signed-off-by: Ramkumar Ramachandra Cc: David Ahern Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1395176715-4465-1-git-send-email-artagnon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h index d8c927c..9844c31 100644 --- a/tools/perf/util/include/linux/kernel.h +++ b/tools/perf/util/include/linux/kernel.h @@ -94,12 +94,6 @@ static inline int scnprintf(char * buf, size_t size, const char * fmt, ...) return (i >= ssize) ? (ssize - 1) : i; } -static inline unsigned long -simple_strtoul(const char *nptr, char **endptr, int base) -{ - return strtoul(nptr, endptr, base); -} - int eprintf(int level, const char *fmt, ...) __attribute__((format(printf, 2, 3))); -- cgit v0.10.2 From 825938307f812c7cbfe064c7884bd8bdb27c2144 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 17 Mar 2014 15:20:59 +0200 Subject: Revert "drm/i915: don't touch the VDD when disabling the panel" This reverts commit dff392dbd258381a6c3164f38420593f2d291e3b Author: Paulo Zanoni Date: Fri Dec 6 17:32:41 2013 -0200 drm/i915: don't touch the VDD when disabling the panel which didn't take into account commit 6cb49835da0426f69a2931bc2a0a8156344b0e41 Author: Daniel Vetter Date: Sun May 20 17:14:50 2012 +0200 drm/i915: enable vdd when switching off the eDP panel and commit 35a38556d900b9cb5dfa2529c93944b847f8a8a4 Author: Daniel Vetter Date: Sun Aug 12 22:17:14 2012 +0200 drm/i915: reorder edp disabling to fix ivb MacBook Air Unsurprisingly, various MacBooks failed. Effectively the same has already been done in drm-intel-next-queued. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=74628 Tested-by: Patrik Jakobsson Cc: Paulo Zanoni Acked-by: Daniel Vetter Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index e06b9e0..234ac5f 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1244,6 +1244,7 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder) if (type == INTEL_OUTPUT_DISPLAYPORT || type == INTEL_OUTPUT_EDP) { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); + ironlake_edp_panel_vdd_on(intel_dp); ironlake_edp_panel_off(intel_dp); } diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 41bdac4..2688f6d 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1249,17 +1249,24 @@ void ironlake_edp_panel_off(struct intel_dp *intel_dp) DRM_DEBUG_KMS("Turn eDP power off\n"); + WARN(!intel_dp->want_panel_vdd, "Need VDD to turn off panel\n"); + pp = ironlake_get_pp_control(intel_dp); /* We need to switch off panel power _and_ force vdd, for otherwise some * panels get very unhappy and cease to work. */ - pp &= ~(POWER_TARGET_ON | PANEL_POWER_RESET | EDP_BLC_ENABLE); + pp &= ~(POWER_TARGET_ON | EDP_FORCE_VDD | PANEL_POWER_RESET | EDP_BLC_ENABLE); pp_ctrl_reg = _pp_ctrl_reg(intel_dp); I915_WRITE(pp_ctrl_reg, pp); POSTING_READ(pp_ctrl_reg); + intel_dp->want_panel_vdd = false; + ironlake_wait_panel_off(intel_dp); + + /* We got a reference when we enabled the VDD. */ + intel_runtime_pm_put(dev_priv); } void ironlake_edp_backlight_on(struct intel_dp *intel_dp) @@ -1784,6 +1791,7 @@ static void intel_disable_dp(struct intel_encoder *encoder) /* Make sure the panel is off before trying to change the mode. But also * ensure that we have vdd while we switch off the panel. */ + ironlake_edp_panel_vdd_on(intel_dp); ironlake_edp_backlight_off(intel_dp); intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); ironlake_edp_panel_off(intel_dp); -- cgit v0.10.2 From 0f4706d2740f2a221cd502922b22e522009041d9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 18 Mar 2014 14:50:50 +0200 Subject: drm/i915: Disable stolen memory when DMAR is active We have reports of heavy screen corruption if we try to use the stolen memory reserved by the BIOS whilst the DMA-Remapper is active. This quirk may be only specific to a few machines or BIOSes, but first lets apply the big hammer and always disable use of stolen memory when DMAR is active. v2 by Jani: Rebase on -fixes, only look at intel_iommu_gfx_mapped. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68535 Signed-off-by: Chris Wilson Reviewed-by: Daniel Vetter Cc: stable@vger.kernel.org Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index d58b4e2..28d24ca 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -214,6 +214,13 @@ int i915_gem_init_stolen(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; int bios_reserved = 0; +#ifdef CONFIG_INTEL_IOMMU + if (intel_iommu_gfx_mapped) { + DRM_INFO("DMAR active, disabling use of stolen memory\n"); + return 0; + } +#endif + if (dev_priv->gtt.stolen_size == 0) return 0; -- cgit v0.10.2 From 4fe2169acecb6e62821dfe14bc5c5852870b516f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 13 Feb 2014 17:48:12 +0100 Subject: MIPS: BCM47XX: Check all (32) GPIOs when looking for a pin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Broadcom boards support 32 GPIOs and NVRAM may have entires for higher ones too. Example: gpio23=wombo_reset Signed-off-by: Rafa? Mi?ecki Acked-by: Hauke Mehrtens Cc: linux-mips@linux-mips.org Cc: Rafał Miłecki Patchwork: https://patchwork.linux-mips.org/patch/6547/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/bcm47xx/nvram.c b/arch/mips/bcm47xx/nvram.c index 6decb27..2bed73a 100644 --- a/arch/mips/bcm47xx/nvram.c +++ b/arch/mips/bcm47xx/nvram.c @@ -196,7 +196,7 @@ int bcm47xx_nvram_gpio_pin(const char *name) char nvram_var[10]; char buf[30]; - for (i = 0; i < 16; i++) { + for (i = 0; i < 32; i++) { err = snprintf(nvram_var, sizeof(nvram_var), "gpio%i", i); if (err <= 0) continue; -- cgit v0.10.2 From 14b4319a44f2e0385e1794bf41a07d872908b539 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 19 Mar 2014 11:17:40 +0100 Subject: m68k: amiga: Add linux/irq.h to make it compile again The removal of linux/irq.h from kernel_stat.h causes arch/m68k/amiga/cia.c:171: error: 'handle_simple_irq' undeclared Reported-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/m68k/amiga/cia.c b/arch/m68k/amiga/cia.c index 18c0e29..2081b8c 100644 --- a/arch/m68k/amiga/cia.c +++ b/arch/m68k/amiga/cia.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include -- cgit v0.10.2 From d532676cc7329e1088702ccb0015942cc370b954 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 19 Mar 2014 11:19:52 +0100 Subject: softirq: Add linux/irq.h to make it compile again On Sparc and S390 the removal of irq.h from kernel_stat.h causes: kernel/softirq.c:774:9: error: 'NR_IRQS_LEGACY' undeclared Reported-by: Peter Zijlstra Signed-off-by: Thomas Gleixner diff --git a/kernel/softirq.c b/kernel/softirq.c index 490fcbb..b50990a 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -25,6 +25,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include -- cgit v0.10.2 From 74397174989e5f704bcefde7af02e12cc32cb164 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 11 Mar 2014 13:17:31 +0000 Subject: arm64: Fix duplicated Kconfig entries Probably due to rebasing over the lengthy time it took to get the patch merged commit addea9ef055b (cpufreq: enable ARM drivers on arm64) added a duplicate Power management options section. Add CPUfreq to the CPU power management section and remove a duplicate include of the main power section. Signed-off-by: Mark Brown Signed-off-by: Catalin Marinas diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 4723fc1..516d8a7 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -321,12 +321,6 @@ menu "CPU Power Management" source "drivers/cpuidle/Kconfig" -endmenu - -menu "Power management options" - -source "kernel/power/Kconfig" - source "drivers/cpufreq/Kconfig" endmenu -- cgit v0.10.2 From bab5c790cc64adb1ede54b4077444375108ac8da Mon Sep 17 00:00:00 2001 From: Chema Gonzalez Date: Thu, 13 Mar 2014 19:50:55 -0700 Subject: genirq: procfs: Make smp_affinity values go+r Includes: - /proc/irq/default_smp_affinity - /proc/irq/*/affinity_hint - /proc/irq/*/smp_affinity - /proc/irq/*/smp_affinity_list Users can distill the same information by reading /proc/interrupts. Signed-off-by: Chema Gonzalez Cc: Eric Dumazet Link: http://lkml.kernel.org/r/1394765455-1217-1-git-send-email-chema@google.com Signed-off-by: Thomas Gleixner diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 36f6ee1..ac1ba2f 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -324,15 +324,15 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) #ifdef CONFIG_SMP /* create /proc/irq//smp_affinity */ - proc_create_data("smp_affinity", 0600, desc->dir, + proc_create_data("smp_affinity", 0644, desc->dir, &irq_affinity_proc_fops, (void *)(long)irq); /* create /proc/irq//affinity_hint */ - proc_create_data("affinity_hint", 0400, desc->dir, + proc_create_data("affinity_hint", 0444, desc->dir, &irq_affinity_hint_proc_fops, (void *)(long)irq); /* create /proc/irq//smp_affinity_list */ - proc_create_data("smp_affinity_list", 0600, desc->dir, + proc_create_data("smp_affinity_list", 0644, desc->dir, &irq_affinity_list_proc_fops, (void *)(long)irq); proc_create_data("node", 0444, desc->dir, @@ -372,7 +372,7 @@ void unregister_handler_proc(unsigned int irq, struct irqaction *action) static void register_default_affinity_proc(void) { #ifdef CONFIG_SMP - proc_create("irq/default_smp_affinity", 0600, NULL, + proc_create("irq/default_smp_affinity", 0644, NULL, &default_affinity_proc_fops); #endif } -- cgit v0.10.2 From 915b78ce8ef0178305cb100e830832a866b42faa Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 15 Mar 2014 16:04:53 +0100 Subject: irqchip: sun4i: Use handle_fasteoi_irq for all interrupts Since the sun4i irq chip does not require any action and clears the interrupt when the level goes back to inactive, we don't need to mask / unmask for non oneshot IRQs, to achieve this we make sun4i_irq_ack a nop for all irqs except irq 0 and use handle_fasteoi_irq for all interrupts. Now there might be a case when the device reactivates the interrupt before the RETI. But that does not matter as we run the primary interrupt handlers with interrupts disabled. This also allows us to get rid of needing to use 2 irq_chip structs, this means that the IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED will now influence all interrupts rather then just irq 0, but that does not matter as the eoi is now a nop anyways for all interrupts but irq 0. Signed-off-by: Hans de Goede Acked-by: Maxime Ripard Cc: linux-arm-kernel@lists.infradead.org Cc: linux-sunxi@googlegroups.com Link: http://lkml.kernel.org/r/1394895894-8891-2-git-send-email-hdegoede@redhat.com Signed-off-by: Thomas Gleixner diff --git a/drivers/irqchip/irq-sun4i.c b/drivers/irqchip/irq-sun4i.c index 2029cc5..003a146 100644 --- a/drivers/irqchip/irq-sun4i.c +++ b/drivers/irqchip/irq-sun4i.c @@ -45,6 +45,9 @@ static void sun4i_irq_ack(struct irq_data *irqd) int reg = irq / 32; u32 val; + if (irq != 0) + return; /* Only IRQ 0 / the ENMI needs to be acked */ + val = readl(sun4i_irq_base + SUN4I_IRQ_PENDING_REG(reg)); writel(val | (1 << irq_off), sun4i_irq_base + SUN4I_IRQ_PENDING_REG(reg)); @@ -76,13 +79,6 @@ static void sun4i_irq_unmask(struct irq_data *irqd) static struct irq_chip sun4i_irq_chip = { .name = "sun4i_irq", - .irq_mask = sun4i_irq_mask, - .irq_unmask = sun4i_irq_unmask, -}; - -/* IRQ 0 / the ENMI needs a late eoi call */ -static struct irq_chip sun4i_irq_chip_enmi = { - .name = "sun4i_irq", .irq_eoi = sun4i_irq_ack, .irq_mask = sun4i_irq_mask, .irq_unmask = sun4i_irq_unmask, @@ -92,13 +88,7 @@ static struct irq_chip sun4i_irq_chip_enmi = { static int sun4i_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw) { - if (hw == 0) - irq_set_chip_and_handler(virq, &sun4i_irq_chip_enmi, - handle_fasteoi_irq); - else - irq_set_chip_and_handler(virq, &sun4i_irq_chip, - handle_level_irq); - + irq_set_chip_and_handler(virq, &sun4i_irq_chip, handle_fasteoi_irq); set_irq_flags(virq, IRQF_VALID | IRQF_PROBE); return 0; -- cgit v0.10.2 From cc3b68fea29c3af018734501e166124c8eb04a6c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 15 Mar 2014 16:04:54 +0100 Subject: irqchip: sun4i: Simplify sun4i_irq_ack Now that we only ack irq 0 the code can be simplified a lot. Also switch from read / modify / write to a simple write clear: 1) This is what the android code does (it has a hack for acking irq 0 in its unmask code doing this) 2) read / modify / write simply does not make sense for an irq status register like this, if the other bits are writeable (and the data sheet says they are not) they should be write 1 to clear, since otherwise a read / modify / write can race with a device raising an interrupt and then clear the pending bit unintentionally Signed-off-by: Hans de Goede Acked-by: Maxime Ripard Cc: linux-arm-kernel@lists.infradead.org Cc: linux-sunxi@googlegroups.com Link: http://lkml.kernel.org/r/1394895894-8891-3-git-send-email-hdegoede@redhat.com Signed-off-by: Thomas Gleixner diff --git a/drivers/irqchip/irq-sun4i.c b/drivers/irqchip/irq-sun4i.c index 003a146..6fcef4a 100644 --- a/drivers/irqchip/irq-sun4i.c +++ b/drivers/irqchip/irq-sun4i.c @@ -41,16 +41,11 @@ static void __exception_irq_entry sun4i_handle_irq(struct pt_regs *regs); static void sun4i_irq_ack(struct irq_data *irqd) { unsigned int irq = irqd_to_hwirq(irqd); - unsigned int irq_off = irq % 32; - int reg = irq / 32; - u32 val; if (irq != 0) return; /* Only IRQ 0 / the ENMI needs to be acked */ - val = readl(sun4i_irq_base + SUN4I_IRQ_PENDING_REG(reg)); - writel(val | (1 << irq_off), - sun4i_irq_base + SUN4I_IRQ_PENDING_REG(reg)); + writel(BIT(0), sun4i_irq_base + SUN4I_IRQ_PENDING_REG(0)); } static void sun4i_irq_mask(struct irq_data *irqd) -- cgit v0.10.2 From 749d32237bf39e6576dd95bfdf24e4378e51716c Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 19 Mar 2014 12:59:39 +0000 Subject: ALSA: compress: Pass through return value of open ops callback The snd_compr_open function would always return 0 even if the compressed ops open function failed, obviously this is incorrect. Looks like this was introduced by a small typo in: commit a0830dbd4e42b38aefdf3fb61ba5019a1a99ea85 ALSA: Add a reference counter to card instance This patch returns the value from the compressed op as it should. Signed-off-by: Charles Keepax Acked-by: Vinod Koul Cc: Signed-off-by: Takashi Iwai diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index 7a20897..7403f34 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -133,7 +133,7 @@ static int snd_compr_open(struct inode *inode, struct file *f) kfree(data); } snd_card_unref(compr->card); - return 0; + return ret; } static int snd_compr_free(struct inode *inode, struct file *f) -- cgit v0.10.2 From a5a6569959fc55d4ebf1526f7855003596946c32 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 19 Mar 2014 10:46:25 -0700 Subject: libata.h: add stub for ata_sas_port_resume Fix build error when CONFIG_PM is not enabled by adding a stub function in . drivers/scsi/libsas/sas_ata.c: In function 'sas_resume_sata': drivers/scsi/libsas/sas_ata.c:756:3: error: implicit declaration of function 'ata_sas_port_resume' [-Werror=implicit-function-declaration] Signed-off-by: Randy Dunlap Reported-by: Jim Davis Signed-off-by: Tejun Heo Cc: Dan Williams diff --git a/include/linux/libata.h b/include/linux/libata.h index 5c09e86..5272378 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1148,6 +1148,9 @@ static inline void ata_sas_port_suspend(struct ata_port *ap) static inline void ata_sas_port_async_resume(struct ata_port *ap) { } +static inline void ata_sas_port_resume(struct ata_port *ap) +{ +} #endif extern int ata_ratelimit(void); extern void ata_msleep(struct ata_port *ap, unsigned int msecs); -- cgit v0.10.2 From ae996154f72fdd116291f69ec6c856be9c77042a Mon Sep 17 00:00:00 2001 From: Roger Luethi Date: Tue, 18 Mar 2014 18:14:01 +0100 Subject: via-rhine: Disable device in error path Currently, via-rhine fails to call pci_disable_device() for errors in rhine_init_one(). Reported-by: Huqiu Liu Signed-off-by: Roger Luethi Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index ef312bc..6ac20a6 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -923,7 +923,7 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) { dev_err(&pdev->dev, "32-bit PCI DMA addresses not supported by the card!?\n"); - goto err_out; + goto err_out_pci_disable; } /* sanity check */ @@ -931,7 +931,7 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) (pci_resource_len(pdev, 1) < io_size)) { rc = -EIO; dev_err(&pdev->dev, "Insufficient PCI resources, aborting\n"); - goto err_out; + goto err_out_pci_disable; } pioaddr = pci_resource_start(pdev, 0); @@ -942,7 +942,7 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev = alloc_etherdev(sizeof(struct rhine_private)); if (!dev) { rc = -ENOMEM; - goto err_out; + goto err_out_pci_disable; } SET_NETDEV_DEV(dev, &pdev->dev); @@ -1084,6 +1084,8 @@ err_out_free_res: pci_release_regions(pdev); err_out_free_netdev: free_netdev(dev); +err_out_pci_disable: + pci_disable_device(pdev); err_out: return rc; } -- cgit v0.10.2 From 0dd5d6f0e8763ff09939adf3e5b1465a3a414fea Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 19 Mar 2014 11:14:15 -0700 Subject: libata: remove unused ata_sas_port_async_resume() stub Commit bc6e7c4b0d1a "libata, libsas: kill pm_result and related cleanup" renamed ata_sas_port_async_resume() to ata_sas_port_resume(), but missed a CONFIG_PM=n stub conversion. Randy fixed that up in commit a5a6569959fc "libata.h: add stub for ata_sas_port_resume", but missed the deletion of the now unused ata_sas_port_async_resume() routine. Cc: Randy Dunlap Signed-off-by: Dan Williams Signed-off-by: Tejun Heo diff --git a/include/linux/libata.h b/include/linux/libata.h index 5272378..1de36be 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1145,9 +1145,6 @@ extern void ata_sas_port_resume(struct ata_port *ap); static inline void ata_sas_port_suspend(struct ata_port *ap) { } -static inline void ata_sas_port_async_resume(struct ata_port *ap) -{ -} static inline void ata_sas_port_resume(struct ata_port *ap) { } -- cgit v0.10.2 From b08ac66b4026f0151d712903695bf266042fbe2c Mon Sep 17 00:00:00 2001 From: Viller Hsiao Date: Tue, 18 Mar 2014 15:39:34 +0800 Subject: MIPS: ftrace: Tweak safe_load()/safe_store() macros Due to name collision in ftrace safe_load and safe_store macros, these macros cannot take expressions as operands. For example, compiler will complain for a macro call like the following: safe_store_code(new_code2, ip + 4, faulted); arch/mips/include/asm/ftrace.h:61:6: note: in definition of macro 'safe_store' : [dst] "r" (dst), [src] "r" (src)\ ^ arch/mips/kernel/ftrace.c:118:2: note: in expansion of macro 'safe_store_code' safe_store_code(new_code2, ip + 4, faulted); ^ arch/mips/kernel/ftrace.c:118:32: error: undefined named operand 'ip + 4' safe_store_code(new_code2, ip + 4, faulted); ^ arch/mips/include/asm/ftrace.h:61:6: note: in definition of macro 'safe_store' : [dst] "r" (dst), [src] "r" (src)\ ^ arch/mips/kernel/ftrace.c:118:2: note: in expansion of macro 'safe_store_code' safe_store_code(new_code2, ip + 4, faulted); ^ This build error is triggered by a4671094 [MIPS: ftrace: Fix icache flush range error]. Tweak variable naming in those macros to allow flexible operands. Signed-off-by: Viller Hsiao Cc: linux-mips@linux-mips.org Cc: rostedt@goodmis.org Cc: fweisbec@gmail.com Cc: mingo@redhat.com Cc: Qais.Yousef@imgtec.com Patchwork: https://patchwork.linux-mips.org/patch/6622/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h index ce35c9a..992aaba 100644 --- a/arch/mips/include/asm/ftrace.h +++ b/arch/mips/include/asm/ftrace.h @@ -22,12 +22,12 @@ extern void _mcount(void); #define safe_load(load, src, dst, error) \ do { \ asm volatile ( \ - "1: " load " %[" STR(dst) "], 0(%[" STR(src) "])\n"\ - " li %[" STR(error) "], 0\n" \ + "1: " load " %[tmp_dst], 0(%[tmp_src])\n" \ + " li %[tmp_err], 0\n" \ "2:\n" \ \ ".section .fixup, \"ax\"\n" \ - "3: li %[" STR(error) "], 1\n" \ + "3: li %[tmp_err], 1\n" \ " j 2b\n" \ ".previous\n" \ \ @@ -35,8 +35,8 @@ do { \ STR(PTR) "\t1b, 3b\n\t" \ ".previous\n" \ \ - : [dst] "=&r" (dst), [error] "=r" (error)\ - : [src] "r" (src) \ + : [tmp_dst] "=&r" (dst), [tmp_err] "=r" (error)\ + : [tmp_src] "r" (src) \ : "memory" \ ); \ } while (0) @@ -44,12 +44,12 @@ do { \ #define safe_store(store, src, dst, error) \ do { \ asm volatile ( \ - "1: " store " %[" STR(src) "], 0(%[" STR(dst) "])\n"\ - " li %[" STR(error) "], 0\n" \ + "1: " store " %[tmp_src], 0(%[tmp_dst])\n"\ + " li %[tmp_err], 0\n" \ "2:\n" \ \ ".section .fixup, \"ax\"\n" \ - "3: li %[" STR(error) "], 1\n" \ + "3: li %[tmp_err], 1\n" \ " j 2b\n" \ ".previous\n" \ \ @@ -57,8 +57,8 @@ do { \ STR(PTR) "\t1b, 3b\n\t" \ ".previous\n" \ \ - : [error] "=r" (error) \ - : [dst] "r" (dst), [src] "r" (src)\ + : [tmp_err] "=r" (error) \ + : [tmp_dst] "r" (dst), [tmp_src] "r" (src)\ : "memory" \ ); \ } while (0) -- cgit v0.10.2 From 2eddb708d83ead02b5d41c65bfb26bab5afc8210 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 19 Mar 2014 23:03:30 +0100 Subject: MIPS: Octeon: Fix warning in of_device_alloc on cn3xxx Starting with commit 3da5278727a895d49a601f67fd49dffa0b80f9a5 (of/irq: Rework of_irq_count()) the following warning is triggered on octeon cn3xxx: [ 0.887281] WARNING: CPU: 0 PID: 1 at drivers/of/platform.c:171 of_device_alloc+0x228/0x230() [ 0.895642] Modules linked in: [ 0.898689] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.14.0-rc7-00012-g9ae51f2-dirty #41 [ 0.906860] Stack : c8b439581166d96e ffffffff816b0000 0000000040808000 ffffffff81185ddc [ 0.906860] 0000000000000000 0000000000000000 0000000000000000 000000000000000b [ 0.906860] 000000000000000a 000000000000000a 0000000000000000 0000000000000000 [ 0.906860] ffffffff81740000 ffffffff81720000 ffffffff81615900 ffffffff816b0177 [ 0.906860] ffffffff81727d10 800000041f868fb0 0000000000000001 0000000000000000 [ 0.906860] 0000000000000000 0000000000000038 0000000000000001 ffffffff81568484 [ 0.906860] 800000041f86faa8 ffffffff81145ddc 0000000000000000 ffffffff811873f4 [ 0.906860] 800000041f868b88 800000041f86f9c0 0000000000000000 ffffffff81569c9c [ 0.906860] 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 0.906860] 0000000000000000 ffffffff811205e0 0000000000000000 0000000000000000 [ 0.906860] ... [ 0.971695] Call Trace: [ 0.974139] [] show_stack+0x68/0x80 [ 0.979183] [] dump_stack+0x8c/0xe0 [ 0.984196] [] warn_slowpath_common+0x84/0xb8 [ 0.990110] [] of_device_alloc+0x228/0x230 [ 0.995726] [] of_platform_device_create_pdata+0x48/0xd0 [ 1.002593] [] of_platform_bus_create+0x134/0x1e8 [ 1.008837] [] of_platform_bus_create+0x198/0x1e8 [ 1.015064] [] of_platform_bus_probe+0xa4/0x100 [ 1.021149] [] do_one_initcall+0xd8/0x128 [ 1.026701] [] kernel_init_freeable+0x144/0x210 [ 1.032753] [] kernel_init+0x14/0x110 [ 1.037973] [] ret_from_kernel_thread+0x14/0x1c With this commit the kernel starts mapping the interrupts listed for gpio-controller node. irq_domain_ops for CIU (octeon_irq_ciu_map and octeon_irq_ciu_xlat) refuse to handle the GPIO lines (returning -EINVAL) and this is causing above warning in of_device_alloc(). Modify irq_domain_ops for CIU and CIU2 to "gracefully handle" GPIO lines (neither return error code nor call octeon_irq_set_ciu_mapping for it). This should avoid the warning. (As before the real setup for GPIO lines will happen using irq_domain_ops of gpio-controller.) This patch is based on Wei's patch v2 (see http://marc.info/?l=linux-mips&m=139511814813247). Signed-off-by: Andreas Herrmann Reported-by: Yang Wei Acked-by: David Daney Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/6624/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 25fbfae..c2bb4f8 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -975,10 +975,6 @@ static int octeon_irq_ciu_xlat(struct irq_domain *d, if (ciu > 1 || bit > 63) return -EINVAL; - /* These are the GPIO lines */ - if (ciu == 0 && bit >= 16 && bit < 32) - return -EINVAL; - *out_hwirq = (ciu << 6) | bit; *out_type = 0; @@ -1007,6 +1003,10 @@ static int octeon_irq_ciu_map(struct irq_domain *d, if (!octeon_irq_virq_in_range(virq)) return -EINVAL; + /* Don't map irq if it is reserved for GPIO. */ + if (line == 0 && bit >= 16 && bit <32) + return 0; + if (line > 1 || octeon_irq_ciu_to_irq[line][bit] != 0) return -EINVAL; @@ -1525,10 +1525,6 @@ static int octeon_irq_ciu2_xlat(struct irq_domain *d, ciu = intspec[0]; bit = intspec[1]; - /* Line 7 are the GPIO lines */ - if (ciu > 6 || bit > 63) - return -EINVAL; - *out_hwirq = (ciu << 6) | bit; *out_type = 0; @@ -1570,8 +1566,14 @@ static int octeon_irq_ciu2_map(struct irq_domain *d, if (!octeon_irq_virq_in_range(virq)) return -EINVAL; - /* Line 7 are the GPIO lines */ - if (line > 6 || octeon_irq_ciu_to_irq[line][bit] != 0) + /* + * Don't map irq if it is reserved for GPIO. + * (Line 7 are the GPIO lines.) + */ + if (line == 7) + return 0; + + if (line > 7 || octeon_irq_ciu_to_irq[line][bit] != 0) return -EINVAL; if (octeon_irq_ciu2_is_edge(line, bit)) -- cgit v0.10.2 From 887843961c4b4681ee993c36d4997bf4b4aa8253 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 18 Mar 2014 17:38:38 -0700 Subject: mm: fix bad rss-counter if remap_file_pages raced migration Fix some "Bad rss-counter state" reports on exit, arising from the interaction between page migration and remap_file_pages(): zap_pte() must count a migration entry when zapping it. And yes, it is possible (though very unusual) to find an anon page or swap entry in a VM_SHARED nonlinear mapping: coming from that horrid get_user_pages(write, force) case which COWs even in a shared mapping. Signed-off-by: Hugh Dickins Tested-by: Sasha Levin sasha.levin@oracle.com> Tested-by: Dave Jones davej@redhat.com> Cc: Cyrill Gorcunov Cc: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/fremap.c b/mm/fremap.c index bbc4d66..34feba6 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -23,28 +23,44 @@ #include "internal.h" +static int mm_counter(struct page *page) +{ + return PageAnon(page) ? MM_ANONPAGES : MM_FILEPAGES; +} + static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { pte_t pte = *ptep; + struct page *page; + swp_entry_t entry; if (pte_present(pte)) { - struct page *page; - flush_cache_page(vma, addr, pte_pfn(pte)); pte = ptep_clear_flush(vma, addr, ptep); page = vm_normal_page(vma, addr, pte); if (page) { if (pte_dirty(pte)) set_page_dirty(page); + update_hiwater_rss(mm); + dec_mm_counter(mm, mm_counter(page)); page_remove_rmap(page); page_cache_release(page); + } + } else { /* zap_pte() is not called when pte_none() */ + if (!pte_file(pte)) { update_hiwater_rss(mm); - dec_mm_counter(mm, MM_FILEPAGES); + entry = pte_to_swp_entry(pte); + if (non_swap_entry(entry)) { + if (is_migration_entry(entry)) { + page = migration_entry_to_page(entry); + dec_mm_counter(mm, mm_counter(page)); + } + } else { + free_swap_and_cache(entry); + dec_mm_counter(mm, MM_SWAPENTS); + } } - } else { - if (!pte_file(pte)) - free_swap_and_cache(pte_to_swp_entry(pte)); pte_clear_not_present_full(mm, addr, ptep, 0); } } -- cgit v0.10.2 From 06325190bd577e11429444d54f454b9d13f560c9 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 27 Feb 2014 08:47:02 +0000 Subject: x86, hash: Fix build failure with older binutils Just like for other ISA extension instruction uses we should check whether the assembler actually supports them. The fallback here simply is to encode an instruction with fixed operands (%eax and %ecx). [ hpa: tagging for -stable as a build fix ] Signed-off-by: Jan Beulich Link: http://lkml.kernel.org/r/530F0996020000780011FBE7@nat28.tlf.novell.com Cc: Francesco Fusco Cc: Thomas Graf Cc: David S. Miller Acked-by: Daniel Borkmann Signed-off-by: H. Peter Anvin Cc: # v3.14 diff --git a/arch/x86/Makefile b/arch/x86/Makefile index eeda43a..f8842c4 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -152,6 +152,7 @@ cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTI # does binutils support specific instructions? asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1) +asinstr += $(call as-instr,crc32l %eax$(comma)%eax,-DCONFIG_AS_CRC32=1) avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1) avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1) diff --git a/arch/x86/lib/hash.c b/arch/x86/lib/hash.c index 3056702..060cc44 100644 --- a/arch/x86/lib/hash.c +++ b/arch/x86/lib/hash.c @@ -39,7 +39,11 @@ static inline u32 crc32_u32(u32 crc, u32 val) { +#ifdef CONFIG_AS_CRC32 asm ("crc32l %1,%0\n" : "+r" (crc) : "rm" (val)); +#else + asm (".byte 0xf2, 0x0f, 0x38, 0xf1, 0xc1" : "+a" (crc) : "c" (val)); +#endif return crc; } -- cgit v0.10.2 From c5cdfdf90901c51363441365997eecd58efd9374 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 27 Feb 2014 08:47:34 +0000 Subject: x86, hash: Swap arguments passed to crc32_u32() ... to match the function's parameters. While reportedly commutative, using the proper order allows for leveraging the instruction permitting the source operand to be in memory. [ hpa: This code originated in the dpdk toolkit. This was a bug in dpdk which has recently been fixed in part due to an earlier version of this patch. ] Signed-off-by: Jan Beulich Link: http://lkml.kernel.org/r/530F09B6020000780011FBEB@nat28.tlf.novell.com Acked-by: Daniel Borkmann Cc: Francesco Fusco Cc: Thomas Graf Cc: David S. Miller Signed-off-by: H. Peter Anvin diff --git a/arch/x86/lib/hash.c b/arch/x86/lib/hash.c index 060cc44..ac628aa 100644 --- a/arch/x86/lib/hash.c +++ b/arch/x86/lib/hash.c @@ -53,7 +53,7 @@ static u32 intel_crc4_2_hash(const void *data, u32 len, u32 seed) u32 i, tmp = 0; for (i = 0; i < len / 4; i++) - seed = crc32_u32(*p32++, seed); + seed = crc32_u32(seed, *p32++); switch (3 - (len & 0x03)) { case 0: @@ -64,7 +64,7 @@ static u32 intel_crc4_2_hash(const void *data, u32 len, u32 seed) /* fallthrough */ case 2: tmp |= *((const u8 *) p32); - seed = crc32_u32(tmp, seed); + seed = crc32_u32(seed, tmp); default: break; } @@ -78,7 +78,7 @@ static u32 intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed) u32 i; for (i = 0; i < len; i++) - seed = crc32_u32(*p32++, seed); + seed = crc32_u32(seed, *p32++); return seed; } -- cgit v0.10.2 From 7a5917e9787dd73284f04e35c3cfdb39a67bf0d5 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 27 Feb 2014 08:47:58 +0000 Subject: x86, hash: Simplify switch, add __init annotation Minor cleanups: - simplify switch statement - add __init annotation to setup_arch_fast_hash() Signed-off-by: Jan Beulich Link: http://lkml.kernel.org/r/530F09CE020000780011FBEF@nat28.tlf.novell.com Cc: Francesco Fusco Cc: Thomas Graf Cc: David S. Miller Acked-by: Daniel Borkmann Signed-off-by: H. Peter Anvin diff --git a/arch/x86/lib/hash.c b/arch/x86/lib/hash.c index ac628aa..ff4fa51 100644 --- a/arch/x86/lib/hash.c +++ b/arch/x86/lib/hash.c @@ -32,6 +32,7 @@ */ #include +#include #include #include @@ -55,17 +56,16 @@ static u32 intel_crc4_2_hash(const void *data, u32 len, u32 seed) for (i = 0; i < len / 4; i++) seed = crc32_u32(seed, *p32++); - switch (3 - (len & 0x03)) { - case 0: + switch (len & 3) { + case 3: tmp |= *((const u8 *) p32 + 2) << 16; /* fallthrough */ - case 1: + case 2: tmp |= *((const u8 *) p32 + 1) << 8; /* fallthrough */ - case 2: + case 1: tmp |= *((const u8 *) p32); seed = crc32_u32(seed, tmp); - default: break; } @@ -83,7 +83,7 @@ static u32 intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed) return seed; } -void setup_arch_fast_hash(struct fast_hash_ops *ops) +void __init setup_arch_fast_hash(struct fast_hash_ops *ops) { if (cpu_has_xmm4_2) { ops->hash = intel_crc4_2_hash; -- cgit v0.10.2 From 85d898bf8f638b7a23af95dc7d32b4a72c178637 Mon Sep 17 00:00:00 2001 From: Daniel Kurtz Date: Mon, 17 Mar 2014 11:28:06 +0800 Subject: drm/exynos: Fix (more) freeing issues in exynos_drm_drv.c The following commit [0] fixed a use-after-free, but left the subdrv open in the error path. [0] commit 6ca605f7c70895a35737435f17ae9cc5e36f1466 drm/exynos: Fix freeing issues in exynos_drm_drv.c Signed-off-by: Daniel Kurtz Acked-by: Sachin Kamat Signed-off-by: Inki Dae diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index 215131a..c204b4e 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -172,20 +172,24 @@ static int exynos_drm_open(struct drm_device *dev, struct drm_file *file) ret = exynos_drm_subdrv_open(dev, file); if (ret) - goto out; + goto err_file_priv_free; anon_filp = anon_inode_getfile("exynos_gem", &exynos_drm_gem_fops, NULL, 0); if (IS_ERR(anon_filp)) { ret = PTR_ERR(anon_filp); - goto out; + goto err_subdrv_close; } anon_filp->f_mode = FMODE_READ | FMODE_WRITE; file_priv->anon_filp = anon_filp; return ret; -out: + +err_subdrv_close: + exynos_drm_subdrv_close(dev, file); + +err_file_priv_free: kfree(file_priv); file->driver_priv = NULL; return ret; -- cgit v0.10.2 From c41eba7de133e43ea2c998ccd484059feab200f6 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 18 Mar 2014 13:23:15 +0530 Subject: timer: Use variable head instead of &work_list in __run_timers() We already have a variable 'head' that points to '&work_list', and so we should use that instead wherever possible. Signed-off-by: Viresh Kumar Cc: linaro-kernel@lists.linaro.org Link: http://lkml.kernel.org/r/0d8645a6efc8360c4196c9797d59343abbfdcc5e.1395129136.git.viresh.kumar@linaro.org Signed-off-by: Thomas Gleixner diff --git a/kernel/timer.c b/kernel/timer.c index 949d74e..8e503fe 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1192,7 +1192,7 @@ static inline void __run_timers(struct tvec_base *base) !cascade(base, &base->tv4, INDEX(2))) cascade(base, &base->tv5, INDEX(3)); ++base->timer_jiffies; - list_replace_init(base->tv1.vec + index, &work_list); + list_replace_init(base->tv1.vec + index, head); while (!list_empty(head)) { void (*fn)(unsigned long); unsigned long data; -- cgit v0.10.2 From e2e680fb7566880f7210cadf628c092c0433971c Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 19 Mar 2014 16:21:33 +0530 Subject: hrtimer: Rearrange comments in the order struct members are declared Rearrange kernel doc comments in the order members of struct hrtimer are declared. Signed-off-by: Viresh Kumar Cc: linaro-kernel@lists.linaro.org Cc: fweisbec@gmail.com Cc: trivial@kernel.org Link: http://lkml.kernel.org/r/1db1a3cfbe8a9ea49396af75c6ac04a2e67e3ab0.1395226248.git.viresh.kumar@linaro.org Signed-off-by: Thomas Gleixner diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index d19a5c2..e7a8d3f 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -96,12 +96,12 @@ enum hrtimer_restart { * @function: timer expiry callback function * @base: pointer to the timer base (per cpu and per clock) * @state: state information (See bit values above) + * @start_pid: timer statistics field to store the pid of the task which + * started the timer * @start_site: timer statistics field to store the site where the timer * was started * @start_comm: timer statistics field to store the name of the process which * started the timer - * @start_pid: timer statistics field to store the pid of the task which - * started the timer * * The hrtimer structure must be initialized by hrtimer_init() */ -- cgit v0.10.2 From 6201b4d61fbf194df6371fb3376c5026cb8f5eec Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 18 Mar 2014 16:26:07 +0530 Subject: timer: Remove code redundancy while calling get_nohz_timer_target() There are only two users of get_nohz_timer_target(): timer and hrtimer. Both call it under same circumstances, i.e. #ifdef CONFIG_NO_HZ_COMMON if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) return get_nohz_timer_target(); #endif So, it makes more sense to get all this as part of get_nohz_timer_target() instead of duplicating code at two places. For this another parameter is required to be passed to this routine, pinned. Signed-off-by: Viresh Kumar Cc: linaro-kernel@lists.linaro.org Cc: fweisbec@gmail.com Cc: peterz@infradead.org Link: http://lkml.kernel.org/r/1e1b53537217d58d48c2d7a222a9c3ac47d5b64c.1395140107.git.viresh.kumar@linaro.org Signed-off-by: Thomas Gleixner diff --git a/include/linux/sched.h b/include/linux/sched.h index 68a0e84..6f6c56f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -291,10 +291,14 @@ extern int runqueue_is_locked(int cpu); #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) extern void nohz_balance_enter_idle(int cpu); extern void set_cpu_sd_state_idle(void); -extern int get_nohz_timer_target(void); +extern int get_nohz_timer_target(int pinned); #else static inline void nohz_balance_enter_idle(int cpu) { } static inline void set_cpu_sd_state_idle(void) { } +static inline int get_nohz_timer_target(int pinned) +{ + return smp_processor_id(); +} #endif /* diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 0909436..d55092c 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -168,19 +168,6 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, } } - -/* - * Get the preferred target CPU for NOHZ - */ -static int hrtimer_get_target(int this_cpu, int pinned) -{ -#ifdef CONFIG_NO_HZ_COMMON - if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) - return get_nohz_timer_target(); -#endif - return this_cpu; -} - /* * With HIGHRES=y we do not migrate the timer when it is expiring * before the next event on the target cpu because we cannot reprogram @@ -214,7 +201,7 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, struct hrtimer_clock_base *new_base; struct hrtimer_cpu_base *new_cpu_base; int this_cpu = smp_processor_id(); - int cpu = hrtimer_get_target(this_cpu, pinned); + int cpu = get_nohz_timer_target(pinned); int basenum = base->index; again: diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b46131e..c0339e2 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -555,12 +555,15 @@ void resched_cpu(int cpu) * selecting an idle cpu will add more delays to the timers than intended * (as that cpu's timer base may not be uptodate wrt jiffies etc). */ -int get_nohz_timer_target(void) +int get_nohz_timer_target(int pinned) { int cpu = smp_processor_id(); int i; struct sched_domain *sd; + if (pinned || !get_sysctl_timer_migration() || !idle_cpu(cpu)) + return cpu; + rcu_read_lock(); for_each_domain(cpu, sd) { for_each_cpu(i, sched_domain_span(sd)) { diff --git a/kernel/timer.c b/kernel/timer.c index 8e503fe..1d35dda 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -760,12 +760,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, debug_activate(timer, expires); - cpu = smp_processor_id(); - -#if defined(CONFIG_NO_HZ_COMMON) && defined(CONFIG_SMP) - if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) - cpu = get_nohz_timer_target(); -#endif + cpu = get_nohz_timer_target(pinned); new_base = per_cpu(tvec_bases, cpu); if (base != new_base) { -- cgit v0.10.2 From b718102e7d4ce2f9640805251fbbb3619fbb9a46 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Mar 2014 12:40:30 +0100 Subject: m68k: atari: Fix the last kernel_stat.h fallout Reported-by: Ingo Molnar Signed-off-by: Thomas Gleixner diff --git a/arch/m68k/atari/ataints.c b/arch/m68k/atari/ataints.c index 3e73a63..3d2b63b 100644 --- a/arch/m68k/atari/ataints.c +++ b/arch/m68k/atari/ataints.c @@ -41,6 +41,7 @@ #include #include #include +#include #include -- cgit v0.10.2 From b524ca742ef304970f61c32af320e5c5bdb355ff Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Mar 2014 12:44:02 +0100 Subject: arm: omap: Fix typo in ams-delta-fiq.c 8435cf757 (arm: Replace various irq_desc accesses) typoed irq_get_irq_chip() instead of irq_get_chip(). Reported-by: Fengguang Wu Signed-off-by: Thomas Gleixner diff --git a/arch/arm/mach-omap1/ams-delta-fiq.c b/arch/arm/mach-omap1/ams-delta-fiq.c index 2ebc514..d1f1209 100644 --- a/arch/arm/mach-omap1/ams-delta-fiq.c +++ b/arch/arm/mach-omap1/ams-delta-fiq.c @@ -47,7 +47,7 @@ static irqreturn_t deferred_fiq(int irq, void *dev_id) int gpio, irq_num, fiq_count; struct irq_chip *irq_chip; - irq_chip = irq_get_irq_chip(gpio_to_irq(AMS_DELTA_GPIO_PIN_KEYBRD_CLK)); + irq_chip = irq_get_chip(gpio_to_irq(AMS_DELTA_GPIO_PIN_KEYBRD_CLK)); /* * For each handled GPIO interrupt, keep calling its interrupt handler -- cgit v0.10.2 From 71ca75888953166b72cf7a65b4c2b6a50fc0ce3b Mon Sep 17 00:00:00 2001 From: Jim Quinlan Date: Wed, 27 Nov 2013 15:34:50 -0500 Subject: MIPS: Make local_irq_disable macro safe for non-Mipsr2 For non-mipsr2 processors, the local_irq_disable contains an mfc0-mtc0 pair with instructions inbetween. With preemption enabled, this sequence may get preempted and effect a stale value of CP0_STATUS when executing the mtc0 instruction. This commit avoids this scenario by incrementing the preempt count before the mfc0 and decrementing it after the mtc9. [ralf@linux-mips.org: This patch is sorting out the part that were missed by e97c5b6098 [MIPS: Make irqflags.h functions preempt-safe for non-mipsr2 cpus.] I also re-enabled the inclusion of at the top of ]. Signed-off-by: Jim Quinlan Cc: linux-mips@linux-mips.org Cc: cernekee@gmail.com Patchwork: https://patchwork.linux-mips.org/patch/6164/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h index 69a9a22..4225e99 100644 --- a/arch/mips/include/asm/asmmacro.h +++ b/arch/mips/include/asm/asmmacro.h @@ -9,6 +9,7 @@ #define _ASM_ASMMACRO_H #include +#include #ifdef CONFIG_32BIT #include @@ -54,11 +55,21 @@ .endm .macro local_irq_disable reg=t0 +#ifdef CONFIG_PREEMPT + lw \reg, TI_PRE_COUNT($28) + addi \reg, \reg, 1 + sw \reg, TI_PRE_COUNT($28) +#endif mfc0 \reg, CP0_STATUS ori \reg, \reg, 1 xori \reg, \reg, 1 mtc0 \reg, CP0_STATUS irq_disable_hazard +#ifdef CONFIG_PREEMPT + lw \reg, TI_PRE_COUNT($28) + addi \reg, \reg, -1 + sw \reg, TI_PRE_COUNT($28) +#endif .endm #endif /* CONFIG_MIPS_MT_SMTC */ -- cgit v0.10.2 From f5b972e9fbd2e99a2abc3221783d089799b69394 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 20 Mar 2014 15:30:14 +0100 Subject: compat: include linux/unistd.h within linux/compat.h linux/compat.h does not include linux/unistd.h but the compat.h header file contains various conditional #ifdef __ARCH_WANT_COMPAT_... asmlinkage long compat...() #endif compat system call function declarations. If linux/unistd.h isn't included it depends on previous includes if those __ARCH_WANT_COMPAT_... defines are defined or not. So add an additional linux/unistd.h include. Should fix this compile error on tile: include/uapi/asm-generic/unistd.h:195:1: error: 'compat_sys_getdents64' undeclared make[3]: *** [arch/tile/kernel/compat.o] Error 1 Reported-by: Geert Uytterhoeven Acked-by: Geert Uytterhoeven Acked-by: Chris Metcalf Signed-off-by: Heiko Carstens diff --git a/include/linux/compat.h b/include/linux/compat.h index 7c76562..01c0aa5 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -14,6 +14,7 @@ #include #include #include /* for aio_context_t */ +#include #include #include -- cgit v0.10.2 From 87291347c49dc40aa339f587b209618201c2e527 Mon Sep 17 00:00:00 2001 From: Vaibhav Nagarnaik Date: Thu, 13 Feb 2014 19:51:48 -0800 Subject: tracing: Fix array size mismatch in format string In event format strings, the array size is reported in two locations. One in array subscript and then via the "size:" attribute. The values reported there have a mismatch. For e.g., in sched:sched_switch the prev_comm and next_comm character arrays have subscript values as [32] where as the actual field size is 16. name: sched_switch ID: 301 format: field:unsigned short common_type; offset:0; size:2; signed:0; field:unsigned char common_flags; offset:2; size:1; signed:0; field:unsigned char common_preempt_count; offset:3; size:1;signed:0; field:int common_pid; offset:4; size:4; signed:1; field:char prev_comm[32]; offset:8; size:16; signed:1; field:pid_t prev_pid; offset:24; size:4; signed:1; field:int prev_prio; offset:28; size:4; signed:1; field:long prev_state; offset:32; size:8; signed:1; field:char next_comm[32]; offset:40; size:16; signed:1; field:pid_t next_pid; offset:56; size:4; signed:1; field:int next_prio; offset:60; size:4; signed:1; After bisection, the following commit was blamed: 92edca0 tracing: Use direct field, type and system names This commit removes the duplication of strings for field->name and field->type assuming that all the strings passed in __trace_define_field() are immutable. This is not true for arrays, where the type string is created in event_storage variable and field->type for all array fields points to event_storage. Use __stringify() to create a string constant for the type string. Also, get rid of event_storage and event_storage_mutex that are not needed anymore. also, an added benefit is that this reduces the overhead of events a bit more: text data bss dec hex filename 8424787 2036472 1302528 11763787 b3804b vmlinux 8420814 2036408 1302528 11759750 b37086 vmlinux.patched Link: http://lkml.kernel.org/r/1392349908-29685-1-git-send-email-vnagarnaik@google.com Cc: Laurent Chavey Cc: stable@vger.kernel.org # 3.10+ Signed-off-by: Vaibhav Nagarnaik Signed-off-by: Steven Rostedt diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 4e4cc28..4cdb3a1 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -495,10 +495,6 @@ enum { FILTER_TRACE_FN, }; -#define EVENT_STORAGE_SIZE 128 -extern struct mutex event_storage_mutex; -extern char event_storage[EVENT_STORAGE_SIZE]; - extern int trace_event_raw_init(struct ftrace_event_call *call); extern int trace_define_field(struct ftrace_event_call *call, const char *type, const char *name, int offset, int size, diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 1a8b28d..1ee19a2 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -310,15 +310,12 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = { \ #undef __array #define __array(type, item, len) \ do { \ - mutex_lock(&event_storage_mutex); \ + char *type_str = #type"["__stringify(len)"]"; \ BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - snprintf(event_storage, sizeof(event_storage), \ - "%s[%d]", #type, len); \ - ret = trace_define_field(event_call, event_storage, #item, \ + ret = trace_define_field(event_call, type_str, #item, \ offsetof(typeof(field), item), \ sizeof(field.item), \ is_signed_type(type), FILTER_OTHER); \ - mutex_unlock(&event_storage_mutex); \ if (ret) \ return ret; \ } while (0); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f3989ce..7b16d40 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -27,12 +27,6 @@ DEFINE_MUTEX(event_mutex); -DEFINE_MUTEX(event_storage_mutex); -EXPORT_SYMBOL_GPL(event_storage_mutex); - -char event_storage[EVENT_STORAGE_SIZE]; -EXPORT_SYMBOL_GPL(event_storage); - LIST_HEAD(ftrace_events); static LIST_HEAD(ftrace_common_fields); diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 7c3e3e7..ee0a509 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -95,15 +95,12 @@ static void __always_unused ____ftrace_check_##name(void) \ #undef __array #define __array(type, item, len) \ do { \ + char *type_str = #type"["__stringify(len)"]"; \ BUILD_BUG_ON(len > MAX_FILTER_STR_VAL); \ - mutex_lock(&event_storage_mutex); \ - snprintf(event_storage, sizeof(event_storage), \ - "%s[%d]", #type, len); \ - ret = trace_define_field(event_call, event_storage, #item, \ + ret = trace_define_field(event_call, type_str, #item, \ offsetof(typeof(field), item), \ sizeof(field.item), \ is_signed_type(type), filter_type); \ - mutex_unlock(&event_storage_mutex); \ if (ret) \ return ret; \ } while (0); -- cgit v0.10.2 From 31b1e940c5d47ee1a01baeccfb1b2b8890822d1a Mon Sep 17 00:00:00 2001 From: Christopher Covington Date: Wed, 19 Mar 2014 16:29:37 +0000 Subject: arm64: Fix __range_ok macro Without this, the following scenario is incorrectly determined to be invalid. addr 0x7f_ffffe000 size 8192 addr_limit 0x80_00000000 This behavior was observed while trying to vmsplice the stack as part of a CRIU dump of a process on a system started with the norandmaps kernel parameter. Signed-off-by: Christopher Covington Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 6c0f684..3bf8f4e 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -83,7 +83,7 @@ static inline void set_fs(mm_segment_t fs) * Returns 1 if the range is valid, 0 otherwise. * * This is equivalent to the following test: - * (u65)addr + (u65)size < (u65)current->addr_limit + * (u65)addr + (u65)size <= current->addr_limit * * This needs 65-bit arithmetic. */ @@ -91,7 +91,7 @@ static inline void set_fs(mm_segment_t fs) ({ \ unsigned long flag, roksum; \ __chk_user_ptr(addr); \ - asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, cc" \ + asm("adds %1, %1, %3; ccmp %1, %4, #2, cc; cset %0, ls" \ : "=&r" (flag), "=&r" (roksum) \ : "1" (addr), "Ir" (size), \ "r" (current_thread_info()->addr_limit) \ -- cgit v0.10.2 From f9b8c4c8baded129535d82d74df8e87a7a369f54 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 20 Mar 2014 10:45:21 -0700 Subject: openvswitch: Correctly report flow used times for first 5 minutes after boot. The kernel starts out its "jiffies" timer as 5 minutes below zero, as shown in include/linux/jiffies.h: /* * Have the 32 bit jiffies value wrap 5 minutes after boot * so jiffies wrap bugs show up earlier. */ #define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ)) The loop in ovs_flow_stats_get() starts out with 'used' set to 0, then takes any "later" time. This means that for the first five minutes after boot, flows will always be reported as never used, since 0 is greater than any time already seen. Signed-off-by: Ben Pfaff Acked-by: Pravin B Shelar Signed-off-by: Jesse Gross diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index d71e60f..dda451f 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -92,7 +92,7 @@ static void stats_read(struct flow_stats *stats, unsigned long *used, __be16 *tcp_flags) { spin_lock(&stats->lock); - if (time_after(stats->used, *used)) + if (!*used || time_after(stats->used, *used)) *used = stats->used; *tcp_flags |= stats->tcp_flags; ovs_stats->n_packets += stats->packet_count; -- cgit v0.10.2 From 88050049e7111f074e8887ba5a4cefc9f8fa8d41 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 19 Mar 2014 21:54:20 -0700 Subject: netlink: fix setsockopt in mmap examples in documentation The documentation for how to use netlink mmap interface is incorrect. The calls to setsockopt() require an additional argument. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller diff --git a/Documentation/networking/netlink_mmap.txt b/Documentation/networking/netlink_mmap.txt index b261229..c6af4ba 100644 --- a/Documentation/networking/netlink_mmap.txt +++ b/Documentation/networking/netlink_mmap.txt @@ -226,9 +226,9 @@ Ring setup: void *rx_ring, *tx_ring; /* Configure ring parameters */ - if (setsockopt(fd, NETLINK_RX_RING, &req, sizeof(req)) < 0) + if (setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &req, sizeof(req)) < 0) exit(1); - if (setsockopt(fd, NETLINK_TX_RING, &req, sizeof(req)) < 0) + if (setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &req, sizeof(req)) < 0) exit(1) /* Calculate size of each individual ring */ -- cgit v0.10.2 From 632623153196bf183a69686ed9c07eee98ff1bf8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 19 Mar 2014 21:02:21 -0700 Subject: tcp: syncookies: do not use getnstimeofday() While it is true that getnstimeofday() uses about 40 cycles if TSC is available, it can use 1600 cycles if hpet is the clocksource. Switch to get_jiffies_64(), as this is more than enough, and go back to 60 seconds periods. Fixes: 8c27bd75f04f ("tcp: syncookies: reduce cookie lifetime to 128 seconds") Signed-off-by: Eric Dumazet Cc: Florian Westphal Acked-by: Florian Westphal Signed-off-by: David S. Miller diff --git a/include/net/tcp.h b/include/net/tcp.h index 8c4dd63..743acce 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -480,20 +480,21 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, #ifdef CONFIG_SYN_COOKIES #include -/* Syncookies use a monotonic timer which increments every 64 seconds. +/* Syncookies use a monotonic timer which increments every 60 seconds. * This counter is used both as a hash input and partially encoded into * the cookie value. A cookie is only validated further if the delta * between the current counter value and the encoded one is less than this, - * i.e. a sent cookie is valid only at most for 128 seconds (or less if + * i.e. a sent cookie is valid only at most for 2*60 seconds (or less if * the counter advances immediately after a cookie is generated). */ #define MAX_SYNCOOKIE_AGE 2 static inline u32 tcp_cookie_time(void) { - struct timespec now; - getnstimeofday(&now); - return now.tv_sec >> 6; /* 64 seconds granularity */ + u64 val = get_jiffies_64(); + + do_div(val, 60 * HZ); + return val; } u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, -- cgit v0.10.2 From 1c104a6bebf3c16b6248408b84f91d09ac8a26b6 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 19 Mar 2014 17:47:49 +0100 Subject: rtnetlink: fix fdb notification flags Commit 3ff661c38c84 ("net: rtnetlink notify events for FDB NTF_SELF adds and deletes") reuses the function nlmsg_populate_fdb_fill() to notify fdb events. But this function was used only for dump and thus was always setting the flag NLM_F_MULTI, which is wrong in case of a single notification. Libraries like libnl will wait forever for NLMSG_DONE. CC: Thomas Graf Signed-off-by: Nicolas Dichtel Acked-by: Thomas Graf Signed-off-by: David S. Miller diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1a0dac2..120eecc 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2121,12 +2121,13 @@ EXPORT_SYMBOL(rtmsg_ifinfo); static int nlmsg_populate_fdb_fill(struct sk_buff *skb, struct net_device *dev, u8 *addr, u32 pid, u32 seq, - int type, unsigned int flags) + int type, unsigned int flags, + int nlflags) { struct nlmsghdr *nlh; struct ndmsg *ndm; - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), NLM_F_MULTI); + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), nlflags); if (!nlh) return -EMSGSIZE; @@ -2164,7 +2165,7 @@ static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, int type) if (!skb) goto errout; - err = nlmsg_populate_fdb_fill(skb, dev, addr, 0, 0, type, NTF_SELF); + err = nlmsg_populate_fdb_fill(skb, dev, addr, 0, 0, type, NTF_SELF, 0); if (err < 0) { kfree_skb(skb); goto errout; @@ -2389,7 +2390,8 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, portid, seq, - RTM_NEWNEIGH, NTF_SELF); + RTM_NEWNEIGH, NTF_SELF, + NLM_F_MULTI); if (err < 0) return err; skip: -- cgit v0.10.2 From 65886f439ab0fdc2dff20d1fa87afb98c6717472 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 19 Mar 2014 17:47:50 +0100 Subject: ipmr: fix mfc notification flags Commit 8cd3ac9f9b7b ("ipmr: advertise new mfc entries via rtnl") reuses the function ipmr_fill_mroute() to notify mfc events. But this function was used only for dump and thus was always setting the flag NLM_F_MULTI, which is wrong in case of a single notification. Libraries like libnl will wait forever for NLMSG_DONE. CC: Thomas Graf Signed-off-by: Nicolas Dichtel Acked-by: Thomas Graf Signed-off-by: David S. Miller diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index b9b3472..28863570 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2255,13 +2255,14 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, } static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, - u32 portid, u32 seq, struct mfc_cache *c, int cmd) + u32 portid, u32 seq, struct mfc_cache *c, int cmd, + int flags) { struct nlmsghdr *nlh; struct rtmsg *rtm; int err; - nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -2329,7 +2330,7 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, if (skb == NULL) goto errout; - err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd); + err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); if (err < 0) goto errout; @@ -2368,7 +2369,8 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (ipmr_fill_mroute(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE) < 0) + mfc, RTM_NEWROUTE, + NLM_F_MULTI) < 0) goto done; next_entry: e++; @@ -2382,7 +2384,8 @@ next_entry: if (ipmr_fill_mroute(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE) < 0) { + mfc, RTM_NEWROUTE, + NLM_F_MULTI) < 0) { spin_unlock_bh(&mfc_unres_lock); goto done; } -- cgit v0.10.2 From f518338b16038beeb73e155e60d0f70beb9379f4 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 19 Mar 2014 17:47:51 +0100 Subject: ip6mr: fix mfc notification flags Commit 812e44dd1829 ("ip6mr: advertise new mfc entries via rtnl") reuses the function ip6mr_fill_mroute() to notify mfc events. But this function was used only for dump and thus was always setting the flag NLM_F_MULTI, which is wrong in case of a single notification. Libraries like libnl will wait forever for NLMSG_DONE. CC: Thomas Graf Signed-off-by: Nicolas Dichtel Acked-by: Thomas Graf Signed-off-by: David S. Miller diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 0eb4038..8737400 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2349,13 +2349,14 @@ int ip6mr_get_route(struct net *net, } static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, - u32 portid, u32 seq, struct mfc6_cache *c, int cmd) + u32 portid, u32 seq, struct mfc6_cache *c, int cmd, + int flags) { struct nlmsghdr *nlh; struct rtmsg *rtm; int err; - nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -2423,7 +2424,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc, if (skb == NULL) goto errout; - err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd); + err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); if (err < 0) goto errout; @@ -2462,7 +2463,8 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (ip6mr_fill_mroute(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE) < 0) + mfc, RTM_NEWROUTE, + NLM_F_MULTI) < 0) goto done; next_entry: e++; @@ -2476,7 +2478,8 @@ next_entry: if (ip6mr_fill_mroute(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE) < 0) { + mfc, RTM_NEWROUTE, + NLM_F_MULTI) < 0) { spin_unlock_bh(&mfc_unres_lock); goto done; } -- cgit v0.10.2 From 8c90487cdc64847b4fdd812ab3047f426fec4d13 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 26 Feb 2014 10:49:49 -0500 Subject: Rename TAINT_UNSAFE_SMP to TAINT_CPU_OUT_OF_SPEC Rename TAINT_UNSAFE_SMP to TAINT_CPU_OUT_OF_SPEC, so we can repurpose the flag to encompass a wider range of pushing the CPU beyond its warrany. Signed-off-by: Dave Jones Link: http://lkml.kernel.org/r/20140226154949.GA770@redhat.com Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index b85e43a..ce8b8ff 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -218,7 +218,7 @@ static void amd_k7_smp_check(struct cpuinfo_x86 *c) */ WARN_ONCE(1, "WARNING: This combination of AMD" " processors is not suitable for SMP.\n"); - add_taint(TAINT_UNSAFE_SMP, LOCKDEP_NOW_UNRELIABLE); + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE); } static void init_amd_k7(struct cpuinfo_x86 *c) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 196d1ea..08fb024 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -458,7 +458,7 @@ extern enum system_states { #define TAINT_PROPRIETARY_MODULE 0 #define TAINT_FORCED_MODULE 1 -#define TAINT_UNSAFE_SMP 2 +#define TAINT_CPU_OUT_OF_SPEC 2 #define TAINT_FORCED_RMMOD 3 #define TAINT_MACHINE_CHECK 4 #define TAINT_BAD_PAGE 5 diff --git a/kernel/module.c b/kernel/module.c index d24fcf2..ca2c1ad 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1015,7 +1015,7 @@ static size_t module_flags_taint(struct module *mod, char *buf) buf[l++] = 'C'; /* * TAINT_FORCED_RMMOD: could be added. - * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't + * TAINT_CPU_OUT_OF_SPEC, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't * apply to modules. */ return l; diff --git a/kernel/panic.c b/kernel/panic.c index 6d63003..2270cfd 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -199,7 +199,7 @@ struct tnt { static const struct tnt tnts[] = { { TAINT_PROPRIETARY_MODULE, 'P', 'G' }, { TAINT_FORCED_MODULE, 'F', ' ' }, - { TAINT_UNSAFE_SMP, 'S', ' ' }, + { TAINT_CPU_OUT_OF_SPEC, 'S', ' ' }, { TAINT_FORCED_RMMOD, 'R', ' ' }, { TAINT_MACHINE_CHECK, 'M', ' ' }, { TAINT_BAD_PAGE, 'B', ' ' }, -- cgit v0.10.2 From 69f2366c9456d0ce784cf5aba87ee77eeadc1d5e Mon Sep 17 00:00:00 2001 From: Chris Bainbridge Date: Fri, 7 Mar 2014 18:40:42 +0700 Subject: x86, cpu: Add forcepae parameter for booting PAE kernels on PAE-disabled Pentium M Many Pentium M systems disable PAE but may have a functionally usable PAE implementation. This adds the "forcepae" parameter which bypasses the boot check for PAE, and sets the CPU as being PAE capable. Using this parameter will taint the kernel with TAINT_CPU_OUT_OF_SPEC. Signed-off-by: Chris Bainbridge Link: http://lkml.kernel.org/r/20140307114040.GA4997@localhost Acked-by: Borislav Petkov Signed-off-by: H. Peter Anvin diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 7116fda..06600cc 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1011,6 +1011,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted. parameter will force ia64_sal_cache_flush to call ia64_pal_cache_flush instead of SAL_CACHE_FLUSH. + forcepae [X86-32] + Forcefully enable Physical Address Extension (PAE). + Many Pentium M systems disable PAE but may have a + functionally usable PAE implementation. + Warning: use of this parameter will taint the kernel + and may cause unknown problems. + ftrace=[tracer] [FTRACE] will set and start the specified tracer as early as possible in order to facilitate early diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index 100a9a1..f0d0b20 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -67,6 +67,13 @@ static int is_transmeta(void) cpu_vendor[2] == A32('M', 'x', '8', '6'); } +static int is_intel(void) +{ + return cpu_vendor[0] == A32('G', 'e', 'n', 'u') && + cpu_vendor[1] == A32('i', 'n', 'e', 'I') && + cpu_vendor[2] == A32('n', 't', 'e', 'l'); +} + /* Returns a bitmask of which words we have error bits in */ static int check_cpuflags(void) { @@ -153,6 +160,19 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); err = check_cpuflags(); + } else if (err == 0x01 && + !(err_flags[0] & ~(1 << X86_FEATURE_PAE)) && + is_intel() && cpu.level == 6 && + (cpu.model == 9 || cpu.model == 13)) { + /* PAE is disabled on this Pentium M but can be forced */ + if (cmdline_find_option_bool("forcepae")) { + puts("WARNING: Forcing PAE in CPU flags\n"); + set_bit(X86_FEATURE_PAE, cpu.flags); + err = check_cpuflags(); + } + else { + puts("WARNING: PAE disabled. Use parameter 'forcepae' to enable at your own risk!\n"); + } } if (err_flags_ptr) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 34bbb55..897d620 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -186,6 +186,14 @@ static void intel_smp_check(struct cpuinfo_x86 *c) } } +static int forcepae; +static int __init forcepae_setup(char *__unused) +{ + forcepae = 1; + return 1; +} +__setup("forcepae", forcepae_setup); + static void intel_workarounds(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_F00F_BUG @@ -214,6 +222,17 @@ static void intel_workarounds(struct cpuinfo_x86 *c) clear_cpu_cap(c, X86_FEATURE_SEP); /* + * PAE CPUID issue: many Pentium M report no PAE but may have a + * functionally usable PAE implementation. + * Forcefully enable PAE if kernel parameter "forcepae" is present. + */ + if (forcepae) { + printk(KERN_WARNING "PAE forced!\n"); + set_cpu_cap(c, X86_FEATURE_PAE); + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE); + } + + /* * P4 Xeon errata 037 workaround. * Hardware prefetcher may cause stale data to be loaded into the cache. */ -- cgit v0.10.2 From 765a3f4fed708ae429ee095914a7897acb3a65bd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 14 Mar 2014 16:37:08 -0700 Subject: rcu: Provide grace-period piggybacking API The following pattern is currently not well supported by RCU: 1. Make data element inaccessible to RCU readers. 2. Do work that probably lasts for more than one grace period. 3. Do something to make sure RCU readers in flight before #1 above have completed. Here are some things that could currently be done: a. Do a synchronize_rcu() unconditionally at either #1 or #3 above. This works, but imposes needless work and latency. b. Post an RCU callback at #1 above that does a wakeup, then wait for the wakeup at #3. This works well, but likely results in an extra unneeded grace period. Open-coding this is also a bit more semi-tricky code than would be good. This commit therefore adds get_state_synchronize_rcu() and cond_synchronize_rcu() APIs. Call get_state_synchronize_rcu() at #1 above and pass its return value to cond_synchronize_rcu() at #3 above. This results in a call to synchronize_rcu() if no grace period has elapsed between #1 and #3, but requires only a load, comparison, and memory barrier if a full grace period did elapse. Requested-by: Peter Zijlstra Signed-off-by: Paul E. McKenney Acked-by: Peter Zijlstra diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index e8cb6e3..425c659 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -27,6 +27,16 @@ #include +static inline unsigned long get_state_synchronize_rcu(void) +{ + return 0; +} + +static inline void cond_synchronize_rcu(unsigned long oldstate) +{ + might_sleep(); +} + static inline void rcu_barrier_bh(void) { wait_rcu_gp(call_rcu_bh); diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index e9c6388..a59ca05 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -76,6 +76,8 @@ static inline void synchronize_rcu_bh_expedited(void) void rcu_barrier(void); void rcu_barrier_bh(void); void rcu_barrier_sched(void); +unsigned long get_state_synchronize_rcu(void); +void cond_synchronize_rcu(unsigned long oldstate); extern unsigned long rcutorture_testseq; extern unsigned long rcutorture_vernum; diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 351faba..0c47e30 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1421,13 +1421,14 @@ static int rcu_gp_init(struct rcu_state *rsp) /* Advance to a new grace period and initialize state. */ record_gp_stall_check_time(rsp); - smp_wmb(); /* Record GP times before starting GP. */ - rsp->gpnum++; + /* Record GP times before starting GP, hence smp_store_release(). */ + smp_store_release(&rsp->gpnum, rsp->gpnum + 1); trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start")); raw_spin_unlock_irq(&rnp->lock); /* Exclude any concurrent CPU-hotplug operations. */ mutex_lock(&rsp->onoff_mutex); + smp_mb__after_unlock_lock(); /* ->gpnum increment before GP! */ /* * Set the quiescent-state-needed bits in all the rcu_node @@ -1555,10 +1556,11 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) } rnp = rcu_get_root(rsp); raw_spin_lock_irq(&rnp->lock); - smp_mb__after_unlock_lock(); + smp_mb__after_unlock_lock(); /* Order GP before ->completed update. */ rcu_nocb_gp_set(rnp, nocb); - rsp->completed = rsp->gpnum; /* Declare grace period done. */ + /* Declare grace period done. */ + ACCESS_ONCE(rsp->completed) = rsp->gpnum; trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end")); rsp->fqs_state = RCU_GP_IDLE; rdp = this_cpu_ptr(rsp->rda); @@ -2637,6 +2639,58 @@ void synchronize_rcu_bh(void) } EXPORT_SYMBOL_GPL(synchronize_rcu_bh); +/** + * get_state_synchronize_rcu - Snapshot current RCU state + * + * Returns a cookie that is used by a later call to cond_synchronize_rcu() + * to determine whether or not a full grace period has elapsed in the + * meantime. + */ +unsigned long get_state_synchronize_rcu(void) +{ + /* + * Any prior manipulation of RCU-protected data must happen + * before the load from ->gpnum. + */ + smp_mb(); /* ^^^ */ + + /* + * Make sure this load happens before the purportedly + * time-consuming work between get_state_synchronize_rcu() + * and cond_synchronize_rcu(). + */ + return smp_load_acquire(&rcu_state->gpnum); +} +EXPORT_SYMBOL_GPL(get_state_synchronize_rcu); + +/** + * cond_synchronize_rcu - Conditionally wait for an RCU grace period + * + * @oldstate: return value from earlier call to get_state_synchronize_rcu() + * + * If a full RCU grace period has elapsed since the earlier call to + * get_state_synchronize_rcu(), just return. Otherwise, invoke + * synchronize_rcu() to wait for a full grace period. + * + * Yes, this function does not take counter wrap into account. But + * counter wrap is harmless. If the counter wraps, we have waited for + * more than 2 billion grace periods (and way more on a 64-bit system!), + * so waiting for one additional grace period should be just fine. + */ +void cond_synchronize_rcu(unsigned long oldstate) +{ + unsigned long newstate; + + /* + * Ensure that this load happens before any RCU-destructive + * actions the caller might carry out after we return. + */ + newstate = smp_load_acquire(&rcu_state->completed); + if (ULONG_CMP_GE(oldstate, newstate)) + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(cond_synchronize_rcu); + static int synchronize_sched_expedited_cpu_stop(void *data) { /* -- cgit v0.10.2 From 7e09e738afd21ef99f047425fc0b0c9be8b03254 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 20 Mar 2014 21:52:17 -0700 Subject: mm: fix swapops.h:131 bug if remap_file_pages raced migration Add remove_linear_migration_ptes_from_nonlinear(), to fix an interesting little include/linux/swapops.h:131 BUG_ON(!PageLocked) found by trinity: indicating that remove_migration_ptes() failed to find one of the migration entries that was temporarily inserted. The problem comes from remap_file_pages()'s switch from vma_interval_tree (good for inserting the migration entry) to i_mmap_nonlinear list (no good for locating it again); but can only be a problem if the remap_file_pages() range does not cover the whole of the vma (zap_pte() clears the range). remove_migration_ptes() needs a file_nonlinear method to go down the i_mmap_nonlinear list, applying linear location to look for migration entries in those vmas too, just in case there was this race. The file_nonlinear method does need rmap_walk_control.arg to do this; but it never needed vma passed in - vma comes from its own iteration. Reported-and-tested-by: Dave Jones Reported-and-tested-by: Sasha Levin Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 1da693d..b66c211 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -250,8 +250,7 @@ struct rmap_walk_control { int (*rmap_one)(struct page *page, struct vm_area_struct *vma, unsigned long addr, void *arg); int (*done)(struct page *page); - int (*file_nonlinear)(struct page *, struct address_space *, - struct vm_area_struct *vma); + int (*file_nonlinear)(struct page *, struct address_space *, void *arg); struct anon_vma *(*anon_lock)(struct page *page); bool (*invalid_vma)(struct vm_area_struct *vma, void *arg); }; diff --git a/mm/migrate.c b/mm/migrate.c index b494fdb..bed4880 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -178,6 +178,37 @@ out: } /* + * Congratulations to trinity for discovering this bug. + * mm/fremap.c's remap_file_pages() accepts any range within a single vma to + * convert that vma to VM_NONLINEAR; and generic_file_remap_pages() will then + * replace the specified range by file ptes throughout (maybe populated after). + * If page migration finds a page within that range, while it's still located + * by vma_interval_tree rather than lost to i_mmap_nonlinear list, no problem: + * zap_pte() clears the temporary migration entry before mmap_sem is dropped. + * But if the migrating page is in a part of the vma outside the range to be + * remapped, then it will not be cleared, and remove_migration_ptes() needs to + * deal with it. Fortunately, this part of the vma is of course still linear, + * so we just need to use linear location on the nonlinear list. + */ +static int remove_linear_migration_ptes_from_nonlinear(struct page *page, + struct address_space *mapping, void *arg) +{ + struct vm_area_struct *vma; + /* hugetlbfs does not support remap_pages, so no huge pgoff worries */ + pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + unsigned long addr; + + list_for_each_entry(vma, + &mapping->i_mmap_nonlinear, shared.nonlinear) { + + addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); + if (addr >= vma->vm_start && addr < vma->vm_end) + remove_migration_pte(page, vma, addr, arg); + } + return SWAP_AGAIN; +} + +/* * Get rid of all migration entries and replace them by * references to the indicated page. */ @@ -186,6 +217,7 @@ static void remove_migration_ptes(struct page *old, struct page *new) struct rmap_walk_control rwc = { .rmap_one = remove_migration_pte, .arg = old, + .file_nonlinear = remove_linear_migration_ptes_from_nonlinear, }; rmap_walk(new, &rwc); diff --git a/mm/rmap.c b/mm/rmap.c index d9d4231..8fc049f 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1360,8 +1360,9 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, } static int try_to_unmap_nonlinear(struct page *page, - struct address_space *mapping, struct vm_area_struct *vma) + struct address_space *mapping, void *arg) { + struct vm_area_struct *vma; int ret = SWAP_AGAIN; unsigned long cursor; unsigned long max_nl_cursor = 0; @@ -1663,7 +1664,7 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc) if (list_empty(&mapping->i_mmap_nonlinear)) goto done; - ret = rwc->file_nonlinear(page, mapping, vma); + ret = rwc->file_nonlinear(page, mapping, rwc->arg); done: mutex_unlock(&mapping->i_mmap_mutex); -- cgit v0.10.2 From 11d4616bd07f38d496bd489ed8fad1dc4d928823 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 20 Mar 2014 22:11:17 -0700 Subject: futex: revert back to the explicit waiter counting code Srikar Dronamraju reports that commit b0c29f79ecea ("futexes: Avoid taking the hb->lock if there's nothing to wake up") causes java threads getting stuck on futexes when runing specjbb on a power7 numa box. The cause appears to be that the powerpc spinlocks aren't using the same ticket lock model that we use on x86 (and other) architectures, which in turn result in the "spin_is_locked()" test in hb_waiters_pending() occasionally reporting an unlocked spinlock even when there are pending waiters. So this reinstates Davidlohr Bueso's original explicit waiter counting code, which I had convinced Davidlohr to drop in favor of figuring out the pending waiters by just using the existing state of the spinlock and the wait queue. Reported-and-tested-by: Srikar Dronamraju Original-code-by: Davidlohr Bueso Signed-off-by: Linus Torvalds diff --git a/kernel/futex.c b/kernel/futex.c index 44a1261..08ec814 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -234,6 +234,7 @@ static const struct futex_q futex_q_init = { * waiting on a futex. */ struct futex_hash_bucket { + atomic_t waiters; spinlock_t lock; struct plist_head chain; } ____cacheline_aligned_in_smp; @@ -253,22 +254,37 @@ static inline void futex_get_mm(union futex_key *key) smp_mb__after_atomic_inc(); } -static inline bool hb_waiters_pending(struct futex_hash_bucket *hb) +/* + * Reflects a new waiter being added to the waitqueue. + */ +static inline void hb_waiters_inc(struct futex_hash_bucket *hb) { #ifdef CONFIG_SMP + atomic_inc(&hb->waiters); /* - * Tasks trying to enter the critical region are most likely - * potential waiters that will be added to the plist. Ensure - * that wakers won't miss to-be-slept tasks in the window between - * the wait call and the actual plist_add. + * Full barrier (A), see the ordering comment above. */ - if (spin_is_locked(&hb->lock)) - return true; - smp_rmb(); /* Make sure we check the lock state first */ + smp_mb__after_atomic_inc(); +#endif +} + +/* + * Reflects a waiter being removed from the waitqueue by wakeup + * paths. + */ +static inline void hb_waiters_dec(struct futex_hash_bucket *hb) +{ +#ifdef CONFIG_SMP + atomic_dec(&hb->waiters); +#endif +} - return !plist_head_empty(&hb->chain); +static inline int hb_waiters_pending(struct futex_hash_bucket *hb) +{ +#ifdef CONFIG_SMP + return atomic_read(&hb->waiters); #else - return true; + return 1; #endif } @@ -954,6 +970,7 @@ static void __unqueue_futex(struct futex_q *q) hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock); plist_del(&q->list, &hb->chain); + hb_waiters_dec(hb); } /* @@ -1257,7 +1274,9 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, */ if (likely(&hb1->chain != &hb2->chain)) { plist_del(&q->list, &hb1->chain); + hb_waiters_dec(hb1); plist_add(&q->list, &hb2->chain); + hb_waiters_inc(hb2); q->lock_ptr = &hb2->lock; } get_futex_key_refs(key2); @@ -1600,6 +1619,17 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q) struct futex_hash_bucket *hb; hb = hash_futex(&q->key); + + /* + * Increment the counter before taking the lock so that + * a potential waker won't miss a to-be-slept task that is + * waiting for the spinlock. This is safe as all queue_lock() + * users end up calling queue_me(). Similarly, for housekeeping, + * decrement the counter at queue_unlock() when some error has + * occurred and we don't end up adding the task to the list. + */ + hb_waiters_inc(hb); + q->lock_ptr = &hb->lock; spin_lock(&hb->lock); /* implies MB (A) */ @@ -1611,6 +1641,7 @@ queue_unlock(struct futex_hash_bucket *hb) __releases(&hb->lock) { spin_unlock(&hb->lock); + hb_waiters_dec(hb); } /** @@ -2342,6 +2373,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, * Unqueue the futex_q and determine which it was. */ plist_del(&q->list, &hb->chain); + hb_waiters_dec(hb); /* Handle spurious wakeups gracefully */ ret = -EWOULDBLOCK; @@ -2875,6 +2907,7 @@ static int __init futex_init(void) futex_cmpxchg_enabled = 1; for (i = 0; i < futex_hashsize; i++) { + atomic_set(&futex_queues[i].waiters, 0); plist_head_init(&futex_queues[i].chain); spin_lock_init(&futex_queues[i].lock); } -- cgit v0.10.2 From 708f04d2abf4e90abee61d9ffb1f165038017ecf Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 20 Mar 2014 15:03:58 -0600 Subject: block: free q->flush_rq in blk_init_allocated_queue error paths Commit 7982e90c3a57 ("block: fix q->flush_rq NULL pointer crash on dm-mpath flush") moved an allocation to blk_init_allocated_queue(), but neglected to free that allocation on the error paths that follow. Signed-off-by: Dave Jones Acked-by: Mike Snitzer Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds diff --git a/block/blk-core.c b/block/blk-core.c index 4cd5ffc..bfe16d5 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -713,7 +713,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, return NULL; if (blk_init_rl(&q->root_rl, q, GFP_KERNEL)) - return NULL; + goto fail; q->request_fn = rfn; q->prep_rq_fn = NULL; @@ -737,12 +737,16 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, /* init elevator */ if (elevator_init(q, NULL)) { mutex_unlock(&q->sysfs_lock); - return NULL; + goto fail; } mutex_unlock(&q->sysfs_lock); return q; + +fail: + kfree(q->flush_rq); + return NULL; } EXPORT_SYMBOL(blk_init_allocated_queue); -- cgit v0.10.2 From c7c5be73ccc05da9899b313b9fa0042aae56502f Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Wed, 19 Mar 2014 10:13:22 +0100 Subject: s390/mm: fixing comment so that parameter name match Signed-off-by: Dominik Dingel Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index f8b58a7..27a1b93 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -748,7 +748,7 @@ EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); /** * gmap_ipte_notify - mark a range of ptes for invalidation notification * @gmap: pointer to guest mapping meta data structure - * @address: virtual address in the guest address space + * @start: virtual address in the guest address space * @len: size of area * * Returns 0 if for each page in the given range a gmap mapping exists and -- cgit v0.10.2 From aaeff84a2dfa224611fc9fee89cb20277469c454 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Wed, 19 Mar 2014 10:18:49 +0100 Subject: s390/mm: remove unnecessary parameter from gmap_do_ipte_notify Signed-off-by: Dominik Dingel Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index a7dd672..9602154 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -841,7 +841,7 @@ void __gmap_zap(unsigned long address, struct gmap *); void gmap_register_ipte_notifier(struct gmap_notifier *); void gmap_unregister_ipte_notifier(struct gmap_notifier *); int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); -void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *); +void gmap_do_ipte_notify(struct mm_struct *, pte_t *); static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, unsigned long addr, @@ -850,7 +850,7 @@ static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, #ifdef CONFIG_PGSTE if (pgste_val(pgste) & PGSTE_IN_BIT) { pgste_val(pgste) &= ~PGSTE_IN_BIT; - gmap_do_ipte_notify(mm, addr, ptep); + gmap_do_ipte_notify(mm, ptep); } #endif return pgste; diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 27a1b93..796c932 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -802,13 +802,12 @@ EXPORT_SYMBOL_GPL(gmap_ipte_notify); /** * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte. * @mm: pointer to the process mm_struct - * @addr: virtual address in the process address space * @pte: pointer to the page table entry * * This function is assumed to be called with the page table lock held * for the pte to notify. */ -void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte) +void gmap_do_ipte_notify(struct mm_struct *mm, pte_t *pte) { unsigned long segment_offset; struct gmap_notifier *nb; -- cgit v0.10.2 From 6e5a40a49fe16f8032c4dcd4fa5ff866da09d445 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Wed, 19 Mar 2014 10:19:35 +0100 Subject: s390/mm: remove unecessary parameter from pgste_ipte_notify Signed-off-by: Dominik Dingel Signed-off-by: Martin Schwidefsky diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 9602154..1ab75ea 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -844,7 +844,6 @@ int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); void gmap_do_ipte_notify(struct mm_struct *, pte_t *); static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, - unsigned long addr, pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE @@ -1104,7 +1103,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, if (mm_has_pgste(vma->vm_mm)) { pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste); + pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); } pte = *ptep; @@ -1150,7 +1149,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(mm, address, ptep, pgste); + pgste = pgste_ipte_notify(mm, ptep, pgste); } pte = *ptep; @@ -1174,7 +1173,7 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); - pgste_ipte_notify(mm, address, ptep, pgste); + pgste_ipte_notify(mm, ptep, pgste); } pte = *ptep; @@ -1211,7 +1210,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, if (mm_has_pgste(vma->vm_mm)) { pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); + pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); } pte = *ptep; @@ -1245,7 +1244,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, if (!full && mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(mm, address, ptep, pgste); + pgste = pgste_ipte_notify(mm, ptep, pgste); } pte = *ptep; @@ -1270,7 +1269,7 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, if (pte_write(pte)) { if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(mm, address, ptep, pgste); + pgste = pgste_ipte_notify(mm, ptep, pgste); } ptep_flush_lazy(mm, address, ptep); @@ -1296,7 +1295,7 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma, return 0; if (mm_has_pgste(vma->vm_mm)) { pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); + pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste); } ptep_flush_direct(vma->vm_mm, address, ptep); -- cgit v0.10.2 From 87291a9267ecc0a8efceb4d9cbd4da870f958fc1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 20 Mar 2014 15:05:50 +0100 Subject: clocksource: CMT, MTU2, TMU and STI should depend on GENERIC_CLOCKEVENTS If GENERIC_CLOCKEVENTS=n: drivers/clocksource/sh_cmt.c:54:28: error: field 'ced' has incomplete type drivers/clocksource/sh_cmt.c: In function 'sh_cmt_interrupt': drivers/clocksource/sh_cmt.c:407:23: error: 'CLOCK_EVT_MODE_ONESHOT' undeclared (first use in this function) drivers/clocksource/sh_mtu2.c:44:28: error: field 'ced' has incomplete type drivers/clocksource/sh_mtu2.c: In function 'ced_to_sh_mtu2': drivers/clocksource/sh_mtu2.c:184:70: warning: initialization from incompatible pointer type [enabled by default] drivers/clocksource/sh_mtu2.c: At top level: drivers/clocksource/sh_mtu2.c:188:16: warning: 'enum clock_event_mode' declared inside parameter list [enabled by default] drivers/clocksource/sh_tmu.c:45:28: error: field 'ced' has incomplete type drivers/clocksource/sh_tmu.c: In function 'sh_tmu_interrupt': drivers/clocksource/sh_tmu.c:207:21: error: 'CLOCK_EVT_MODE_ONESHOT' undeclared (first use in this function) drivers/clocksource/em_sti.c:44:28: error: field 'ced' has incomplete type drivers/clocksource/em_sti.c: In function 'ced_to_em_sti': drivers/clocksource/em_sti.c:251:69: warning: initialization from incompatible pointer type [enabled by default] drivers/clocksource/em_sti.c: At top level: drivers/clocksource/em_sti.c:255:16: warning: 'enum clock_event_mode' declared inside parameter list [enabled by default] Signed-off-by: Geert Uytterhoeven Cc: Magnus Damm Cc: Daniel Lezcano Link: http://lkml.kernel.org/r/1395324352-9146-1-git-send-email-geert@linux-m68k.org Signed-off-by: Thomas Gleixner diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 4f754a9..52e9329 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -155,6 +155,7 @@ config SYS_SUPPORTS_EM_STI config SH_TIMER_CMT bool "Renesas CMT timer driver" if COMPILE_TEST + depends on GENERIC_CLOCKEVENTS default SYS_SUPPORTS_SH_CMT help This enables build of a clocksource and clockevent driver for @@ -163,6 +164,7 @@ config SH_TIMER_CMT config SH_TIMER_MTU2 bool "Renesas MTU2 timer driver" if COMPILE_TEST + depends on GENERIC_CLOCKEVENTS default SYS_SUPPORTS_SH_MTU2 help This enables build of a clockevent driver for the Multi-Function @@ -171,6 +173,7 @@ config SH_TIMER_MTU2 config SH_TIMER_TMU bool "Renesas TMU timer driver" if COMPILE_TEST + depends on GENERIC_CLOCKEVENTS default SYS_SUPPORTS_SH_TMU help This enables build of a clocksource and clockevent driver for @@ -179,6 +182,7 @@ config SH_TIMER_TMU config EM_TIMER_STI bool "Renesas STI timer driver" if COMPILE_TEST + depends on GENERIC_CLOCKEVENTS default SYS_SUPPORTS_EM_STI help This enables build of a clocksource and clockevent driver for -- cgit v0.10.2 From d6ee6d2325faeec3fb0122a4840678a2ba62b04b Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Sat, 22 Mar 2014 12:20:31 +0400 Subject: genirq: Export symbol no_action() This will allow to use the dummy IRQ handler no_action() from drivers compiled as module. Drivers which use ARM FIQ interrupts can use this to request the interrupt via the normal request_irq() mechanism w/o having to copy the dummy handler to their own code. Signed-off-by: Alexander Shiyan Link: http://lkml.kernel.org/r/1395476431-16070-1-git-send-email-shc_work@mail.ru Signed-off-by: Thomas Gleixner diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index bfec453..6354802 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -41,6 +41,7 @@ irqreturn_t no_action(int cpl, void *dev_id) { return IRQ_NONE; } +EXPORT_SYMBOL_GPL(no_action); static void warn_no_thread(unsigned int irq, struct irqaction *action) { -- cgit v0.10.2 From d7a15f8d0777955986a2ab00ab181795cab14b01 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 16 Mar 2014 14:24:08 -0500 Subject: vfs: atomic f_pos access in llseek() Commit 9c225f2655e36a4 ("vfs: atomic f_pos accesses as per POSIX") changed several system calls to use fdget_pos() instead of fdget(), but missed sys_llseek(). Fix it. Signed-off-by: Eric Biggers Signed-off-by: Al Viro diff --git a/fs/read_write.c b/fs/read_write.c index 54e19b9..28cc9c8 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -307,7 +307,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, unsigned int, whence) { int retval; - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); loff_t offset; if (!f.file) @@ -327,7 +327,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, retval = 0; } out_putf: - fdput(f); + fdput_pos(f); return retval; } #endif -- cgit v0.10.2 From 99aea68134f3c2a27b4d463c91cfa298c3efaccf Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 16 Mar 2014 15:47:48 -0500 Subject: vfs: Don't let __fdget_pos() get FMODE_PATH files Commit bd2a31d522344 ("get rid of fget_light()") introduced the __fdget_pos() function, which returns the resulting file pointer and fdput flags combined in an 'unsigned long'. However, it also changed the behavior to return files with FMODE_PATH set, which shouldn't happen because read(), write(), lseek(), etc. aren't allowed on such files. This commit restores the old behavior. This regression actually had no effect on read() and write() since FMODE_READ and FMODE_WRITE are not set on file descriptors opened with O_PATH, but it did cause lseek() on a file descriptor opened with O_PATH to fail with ESPIPE rather than EBADF. Signed-off-by: Eric Biggers Signed-off-by: Al Viro diff --git a/fs/file.c b/fs/file.c index 60a45e9..eb56a13 100644 --- a/fs/file.c +++ b/fs/file.c @@ -713,27 +713,16 @@ unsigned long __fdget_raw(unsigned int fd) unsigned long __fdget_pos(unsigned int fd) { - struct files_struct *files = current->files; - struct file *file; - unsigned long v; - - if (atomic_read(&files->count) == 1) { - file = __fcheck_files(files, fd); - v = 0; - } else { - file = __fget(fd, 0); - v = FDPUT_FPUT; - } - if (!file) - return 0; + unsigned long v = __fdget(fd); + struct file *file = (struct file *)(v & ~3); - if (file->f_mode & FMODE_ATOMIC_POS) { + if (file && (file->f_mode & FMODE_ATOMIC_POS)) { if (file_count(file) > 1) { v |= FDPUT_POS_UNLOCK; mutex_lock(&file->f_pos_lock); } } - return v | (unsigned long)file; + return v; } /* -- cgit v0.10.2 From e825196d48d2b89a6ec3a8eff280098d2a78207e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 23 Mar 2014 00:28:40 -0400 Subject: make prepend_name() work correctly when called with negative *buflen In all callchains leading to prepend_name(), the value left in *buflen is eventually discarded unused if prepend_name() has returned a negative. So we are free to do what prepend() does, and subtract from *buflen *before* checking for underflow (which turns into checking the sign of subtraction result, of course). Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/fs/dcache.c b/fs/dcache.c index 265e0ce..ca02c13 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2833,9 +2833,9 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) u32 dlen = ACCESS_ONCE(name->len); char *p; - if (*buflen < dlen + 1) - return -ENAMETOOLONG; *buflen -= dlen + 1; + if (*buflen < 0) + return -ENAMETOOLONG; p = *buffer -= dlen + 1; *p++ = '/'; while (dlen--) { -- cgit v0.10.2 From b37199e626b31e1175fb06764c5d1d687723aac2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 20 Mar 2014 15:18:22 -0400 Subject: rcuwalk: recheck mount_lock after mountpoint crossing attempts We can get false negative from __lookup_mnt() if an unrelated vfsmount gets moved. In that case legitimize_mnt() is guaranteed to fail, and we will fall back to non-RCU walk... unless we end up running into a hard error on a filesystem object we wouldn't have reached if not for that false negative. IOW, delaying that check until the end of pathname resolution is wrong - we should recheck right after we attempt to cross the mountpoint. We don't need to recheck unless we see d_mountpoint() being true - in that case even if we have just raced with mount/umount, we can simply go on as if we'd come at the moment when the sucker wasn't a mountpoint; if we run into a hard error as the result, it was a legitimate outcome. __lookup_mnt() returning NULL is different in that respect, since it might've happened due to operation on completely unrelated mountpoint. Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 2f730ef..4b491b4 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1109,7 +1109,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, return false; if (!d_mountpoint(path->dentry)) - break; + return true; mounted = __lookup_mnt(path->mnt, path->dentry); if (!mounted) @@ -1125,20 +1125,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, */ *inode = path->dentry->d_inode; } - return true; -} - -static void follow_mount_rcu(struct nameidata *nd) -{ - while (d_mountpoint(nd->path.dentry)) { - struct mount *mounted; - mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry); - if (!mounted) - break; - nd->path.mnt = &mounted->mnt; - nd->path.dentry = mounted->mnt.mnt_root; - nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); - } + return read_seqretry(&mount_lock, nd->m_seq); } static int follow_dotdot_rcu(struct nameidata *nd) @@ -1166,7 +1153,17 @@ static int follow_dotdot_rcu(struct nameidata *nd) break; nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); } - follow_mount_rcu(nd); + while (d_mountpoint(nd->path.dentry)) { + struct mount *mounted; + mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry); + if (!mounted) + break; + nd->path.mnt = &mounted->mnt; + nd->path.dentry = mounted->mnt.mnt_root; + nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); + if (!read_seqretry(&mount_lock, nd->m_seq)) + goto failed; + } nd->inode = nd->path.dentry->d_inode; return 0; -- cgit v0.10.2 From a2fb4d782c61f77480e586578eeb4dfd27d134ea Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 23 Mar 2014 16:39:52 +0100 Subject: partly revert commit 8a10bc9: parisc/sti_console: prefer Linux fonts over built-in ROM fonts STI console is used on parisc and m68k HP machines. This patch partly reverts my previous commit and as such restores the fonts for the m68k machines. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v3.13 diff --git a/lib/fonts/Kconfig b/lib/fonts/Kconfig index 4dc1b99..34fd931 100644 --- a/lib/fonts/Kconfig +++ b/lib/fonts/Kconfig @@ -9,7 +9,7 @@ if FONT_SUPPORT config FONTS bool "Select compiled-in fonts" - depends on FRAMEBUFFER_CONSOLE + depends on FRAMEBUFFER_CONSOLE || STI_CONSOLE help Say Y here if you would like to use fonts other than the default your frame buffer console usually use. @@ -22,7 +22,7 @@ config FONTS config FONT_8x8 bool "VGA 8x8 font" if FONTS - depends on FRAMEBUFFER_CONSOLE + depends on FRAMEBUFFER_CONSOLE || STI_CONSOLE default y if !SPARC && !FONTS help This is the "high resolution" font for the VGA frame buffer (the one @@ -45,7 +45,7 @@ config FONT_8x16 config FONT_6x11 bool "Mac console 6x11 font (not supported by all drivers)" if FONTS - depends on FRAMEBUFFER_CONSOLE + depends on FRAMEBUFFER_CONSOLE || STI_CONSOLE default y if !SPARC && !FONTS && MAC help Small console font with Macintosh-style high-half glyphs. Some Mac -- cgit v0.10.2 From 4b02a72a266b58268979cb6f9c5d4dd002148634 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sat, 1 Mar 2014 17:41:22 -0500 Subject: parisc: Remove unused CONFIG_PARISC_TMPALIAS code The attached change removes the unused and experimental CONFIG_PARISC_TMPALIAS code. It doesn't work and I don't believe it will ever be used. Signed-off-by: John David Anglin Signed-off-by: Helge Deller diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h index 637fe03..60d5d17 100644 --- a/arch/parisc/include/asm/page.h +++ b/arch/parisc/include/asm/page.h @@ -32,17 +32,6 @@ void copy_page_asm(void *to, void *from); void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, struct page *pg); -/* #define CONFIG_PARISC_TMPALIAS */ - -#ifdef CONFIG_PARISC_TMPALIAS -void clear_user_highpage(struct page *page, unsigned long vaddr); -#define clear_user_highpage clear_user_highpage -struct vm_area_struct; -void copy_user_highpage(struct page *to, struct page *from, - unsigned long vaddr, struct vm_area_struct *vma); -#define __HAVE_ARCH_COPY_USER_HIGHPAGE -#endif - /* * These are used to make use of C type-checking.. */ diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index ac87a40..a6ffc77 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -581,67 +581,3 @@ flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); } } - -#ifdef CONFIG_PARISC_TMPALIAS - -void clear_user_highpage(struct page *page, unsigned long vaddr) -{ - void *vto; - unsigned long flags; - - /* Clear using TMPALIAS region. The page doesn't need to - be flushed but the kernel mapping needs to be purged. */ - - vto = kmap_atomic(page); - - /* The PA-RISC 2.0 Architecture book states on page F-6: - "Before a write-capable translation is enabled, *all* - non-equivalently-aliased translations must be removed - from the page table and purged from the TLB. (Note - that the caches are not required to be flushed at this - time.) Before any non-equivalent aliased translation - is re-enabled, the virtual address range for the writeable - page (the entire page) must be flushed from the cache, - and the write-capable translation removed from the page - table and purged from the TLB." */ - - purge_kernel_dcache_page_asm((unsigned long)vto); - purge_tlb_start(flags); - pdtlb_kernel(vto); - purge_tlb_end(flags); - preempt_disable(); - clear_user_page_asm(vto, vaddr); - preempt_enable(); - - pagefault_enable(); /* kunmap_atomic(addr, KM_USER0); */ -} - -void copy_user_highpage(struct page *to, struct page *from, - unsigned long vaddr, struct vm_area_struct *vma) -{ - void *vfrom, *vto; - unsigned long flags; - - /* Copy using TMPALIAS region. This has the advantage - that the `from' page doesn't need to be flushed. However, - the `to' page must be flushed in copy_user_page_asm since - it can be used to bring in executable code. */ - - vfrom = kmap_atomic(from); - vto = kmap_atomic(to); - - purge_kernel_dcache_page_asm((unsigned long)vto); - purge_tlb_start(flags); - pdtlb_kernel(vto); - pdtlb_kernel(vfrom); - purge_tlb_end(flags); - preempt_disable(); - copy_user_page_asm(vto, vfrom, vaddr); - flush_dcache_page_asm(__pa(vto), vaddr); - preempt_enable(); - - pagefault_enable(); /* kunmap_atomic(addr, KM_USER1); */ - pagefault_enable(); /* kunmap_atomic(addr, KM_USER0); */ -} - -#endif /* CONFIG_PARISC_TMPALIAS */ -- cgit v0.10.2 From e9af8b7aba1fd5f2175ec552ec36d5efe57f6c1d Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 23 Mar 2014 15:24:40 +0100 Subject: parisc: wire up sys_utimes We seem to be nearly the only platform which does not provide the sys_utimes syscall. Adding it now makes our life much easier with userspace applications (like dietlibc and e2fsprogs) since we then behave like all other platforms too and don't need extra patches which are hard to get upstream anyway because we are not a mainstream architecture. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v3.13 diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h index 4270679..265ae51 100644 --- a/arch/parisc/include/uapi/asm/unistd.h +++ b/arch/parisc/include/uapi/asm/unistd.h @@ -828,13 +828,13 @@ #define __NR_finit_module (__NR_Linux + 333) #define __NR_sched_setattr (__NR_Linux + 334) #define __NR_sched_getattr (__NR_Linux + 335) +#define __NR_utimes (__NR_Linux + 336) -#define __NR_Linux_syscalls (__NR_sched_getattr + 1) +#define __NR_Linux_syscalls (__NR_utimes + 1) #define __IGNORE_select /* newselect */ #define __IGNORE_fadvise64 /* fadvise64_64 */ -#define __IGNORE_utimes /* utime */ #define HPUX_GATEWAY_ADDR 0xC0000004 diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 8fa3fbb..80e5dd2 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -431,6 +431,7 @@ ENTRY_SAME(finit_module) ENTRY_SAME(sched_setattr) ENTRY_SAME(sched_getattr) /* 335 */ + ENTRY_COMP(utimes) /* Nothing yet */ -- cgit v0.10.2 From a34fe10750ebe524a39f97bd78ab4d232a554edb Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 21 Feb 2014 17:34:57 +0000 Subject: parisc: locks: remove redundant arch_*_relax operations Now that the arch_{spin,read,write}_relax macros default to cpu_relax(), remove the redundant definitions for parisc. Cc: Helge Deller Signed-off-by: Will Deacon Signed-off-by: Helge Deller diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h index 3516e0b..64f2992 100644 --- a/arch/parisc/include/asm/spinlock.h +++ b/arch/parisc/include/asm/spinlock.h @@ -191,8 +191,4 @@ static __inline__ int arch_write_can_lock(arch_rwlock_t *rw) #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif /* __ASM_SPINLOCK_H */ -- cgit v0.10.2 From ebf4ad955d3e26d4d2a33709624fc7b5b9d3b969 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Fri, 21 Mar 2014 01:52:48 -0500 Subject: net: micrel : ks8851-ml: add vdd-supply support Few platforms use external regulator to keep the ethernet MAC supplied. So, request and enable the regulator for driver functionality. Fixes: 66fda75f47dc (regulator: core: Replace direct ops->disable usage) Reported-by: Russell King Suggested-by: Markus Pargmann Signed-off-by: Nishanth Menon Signed-off-by: David S. Miller diff --git a/Documentation/devicetree/bindings/net/micrel-ks8851.txt b/Documentation/devicetree/bindings/net/micrel-ks8851.txt index 11ace3c..4fc3927 100644 --- a/Documentation/devicetree/bindings/net/micrel-ks8851.txt +++ b/Documentation/devicetree/bindings/net/micrel-ks8851.txt @@ -7,3 +7,4 @@ Required properties: Optional properties: - local-mac-address : Ethernet mac address to use +- vdd-supply: supply for Ethernet mac diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c index 727b546a..e0c92e0 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -83,6 +84,7 @@ union ks8851_tx_hdr { * @rc_rxqcr: Cached copy of KS_RXQCR. * @eeprom_size: Companion eeprom size in Bytes, 0 if no eeprom * @eeprom: 93CX6 EEPROM state for accessing on-board EEPROM. + * @vdd_reg: Optional regulator supplying the chip * * The @lock ensures that the chip is protected when certain operations are * in progress. When the read or write packet transfer is in progress, most @@ -130,6 +132,7 @@ struct ks8851_net { struct spi_transfer spi_xfer2[2]; struct eeprom_93cx6 eeprom; + struct regulator *vdd_reg; }; static int msg_enable; @@ -1414,6 +1417,21 @@ static int ks8851_probe(struct spi_device *spi) ks->spidev = spi; ks->tx_space = 6144; + ks->vdd_reg = regulator_get_optional(&spi->dev, "vdd"); + if (IS_ERR(ks->vdd_reg)) { + ret = PTR_ERR(ks->vdd_reg); + if (ret == -EPROBE_DEFER) + goto err_reg; + } else { + ret = regulator_enable(ks->vdd_reg); + if (ret) { + dev_err(&spi->dev, "regulator enable fail: %d\n", + ret); + goto err_reg_en; + } + } + + mutex_init(&ks->lock); spin_lock_init(&ks->statelock); @@ -1508,8 +1526,14 @@ static int ks8851_probe(struct spi_device *spi) err_netdev: free_irq(ndev->irq, ks); -err_id: err_irq: +err_id: + if (!IS_ERR(ks->vdd_reg)) + regulator_disable(ks->vdd_reg); +err_reg_en: + if (!IS_ERR(ks->vdd_reg)) + regulator_put(ks->vdd_reg); +err_reg: free_netdev(ndev); return ret; } @@ -1523,6 +1547,10 @@ static int ks8851_remove(struct spi_device *spi) unregister_netdev(priv->netdev); free_irq(spi->irq, priv); + if (!IS_ERR(priv->vdd_reg)) { + regulator_disable(priv->vdd_reg); + regulator_put(priv->vdd_reg); + } free_netdev(priv->netdev); return 0; -- cgit v0.10.2 From 6e8d7968e92f7668a2a615773ad3940f0219dcbd Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 14 Mar 2014 19:52:23 +0000 Subject: arm64: Implement custom mmap functions for dma mapping The current dma_ops do not specify an mmap function so maping falls back to the default implementation. There are at least two issues with using the default implementation: 1) The pgprot is always pgprot_noncached (strongly ordered) memory even with coherent operations 2) dma_common_mmap calls virt_to_page on the remapped non-coherent address which leads to invalid memory being mapped. Fix both these issue by implementing a custom mmap function which correctly accounts for remapped addresses and sets vm_pg_prot appropriately. Signed-off-by: Laura Abbott [catalin.marinas@arm.com: replaced "arm64_" with "__" prefix for consistency] Signed-off-by: Catalin Marinas diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 88fbc5e..81eea3d 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -221,9 +221,52 @@ static void __swiotlb_sync_sg_for_device(struct device *dev, sg->length, dir); } +/* vma->vm_page_prot must be set appropriately before calling this function */ +static int __dma_common_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size) +{ + int ret = -ENXIO; + unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> + PAGE_SHIFT; + unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT; + unsigned long off = vma->vm_pgoff; + + if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + return ret; + + if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { + ret = remap_pfn_range(vma, vma->vm_start, + pfn + off, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); + } + + return ret; +} + +static int __swiotlb_mmap_noncoherent(struct device *dev, + struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) +{ + vma->vm_page_prot = pgprot_dmacoherent(vma->vm_page_prot); + return __dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); +} + +static int __swiotlb_mmap_coherent(struct device *dev, + struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) +{ + /* Just use whatever page_prot attributes were specified */ + return __dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); +} + struct dma_map_ops noncoherent_swiotlb_dma_ops = { .alloc = __dma_alloc_noncoherent, .free = __dma_free_noncoherent, + .mmap = __swiotlb_mmap_noncoherent, .map_page = __swiotlb_map_page, .unmap_page = __swiotlb_unmap_page, .map_sg = __swiotlb_map_sg_attrs, @@ -240,6 +283,7 @@ EXPORT_SYMBOL(noncoherent_swiotlb_dma_ops); struct dma_map_ops coherent_swiotlb_dma_ops = { .alloc = __dma_alloc_coherent, .free = __dma_free_coherent, + .mmap = __swiotlb_mmap_coherent, .map_page = swiotlb_map_page, .unmap_page = swiotlb_unmap_page, .map_sg = swiotlb_map_sg_attrs, -- cgit v0.10.2 From 214fdbe74a096c3aeb7af81d7900e2ab966b10d6 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 14 Mar 2014 19:52:24 +0000 Subject: arm64: Support DMA_ATTR_WRITE_COMBINE DMA_ATTR_WRITE_COMBINE is currently ignored. Set the pgprot appropriately for non coherent opperations. Signed-off-by: Laura Abbott Signed-off-by: Catalin Marinas diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 81eea3d..5bba6be 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -30,6 +30,16 @@ struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); +static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, + bool coherent) +{ + if (dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs)) + return pgprot_writecombine(prot); + else if (!coherent) + return pgprot_dmacoherent(prot); + return prot; +} + static void *__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, struct dma_attrs *attrs) @@ -104,7 +114,7 @@ static void *__dma_alloc_noncoherent(struct device *dev, size_t size, for (i = 0; i < (size >> PAGE_SHIFT); i++) map[i] = page + i; coherent_ptr = vmap(map, size >> PAGE_SHIFT, VM_MAP, - pgprot_dmacoherent(pgprot_default)); + __get_dma_pgprot(attrs, pgprot_default, false)); kfree(map); if (!coherent_ptr) goto no_map; @@ -250,7 +260,7 @@ static int __swiotlb_mmap_noncoherent(struct device *dev, void *cpu_addr, dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs) { - vma->vm_page_prot = pgprot_dmacoherent(vma->vm_page_prot); + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, false); return __dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); } -- cgit v0.10.2 From 196adf2f3015eacac0567278ba538e3ffdd16d0e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 24 Mar 2014 10:35:35 +0000 Subject: arm64: Remove pgprot_dmacoherent() Since this macro is identical to pgprot_writecombine() and is only used in a single place, remove it completely to avoid confusion. On ARMv7+ processors, the coherent DMA mapping must be Normal NonCacheable (a.k.a. writecombine) to avoid mismatched hardware attribute aliases (with the kernel linear mapping as Normal Cacheable). Signed-off-by: Catalin Marinas diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index ae10350..da92265 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -280,8 +280,6 @@ static inline int has_transparent_hugepage(void) __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRnE) | PTE_PXN | PTE_UXN) #define pgprot_writecombine(prot) \ __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) -#define pgprot_dmacoherent(prot) \ - __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) #define __HAVE_PHYS_MEM_ACCESS_PROT struct file; extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 5bba6be..0ba347e 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -33,10 +33,8 @@ EXPORT_SYMBOL(dma_ops); static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, bool coherent) { - if (dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs)) + if (!coherent || dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs)) return pgprot_writecombine(prot); - else if (!coherent) - return pgprot_dmacoherent(prot); return prot; } -- cgit v0.10.2 From 5f12c5eca6e6b7aeb4b2028d579f614b4fe7a81f Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 18 Mar 2014 16:10:24 -0500 Subject: i2c: cpm: Fix build by adding of_address.h and of_irq.h Fixes a build break due to the undeclared use of irq_of_parse_and_map() and of_iomap(). This build break was apparently introduced while the driver was unbuildable due to the bug fixed by 62c19c9d29e65086e5ae76df371ed2e6b23f00cd ("i2c: Remove usage of orphaned symbol OF_I2C"). When 62c19c was added in v3.14-rc7, the driver was enabled again, breaking the powerpc mpc85xx_defconfig and mpc85xx_smp_defconfig. 62c19c is marked for stable, so this should go there as well. Reported-by: Geert Uytterhoeven Signed-off-by: Scott Wood Signed-off-by: Wolfram Sang Cc: stable@kernel.org diff --git a/drivers/i2c/busses/i2c-cpm.c b/drivers/i2c/busses/i2c-cpm.c index be7f0a2..f3b89a4 100644 --- a/drivers/i2c/busses/i2c-cpm.c +++ b/drivers/i2c/busses/i2c-cpm.c @@ -39,7 +39,9 @@ #include #include #include +#include #include +#include #include #include #include -- cgit v0.10.2 From 9dd721c6dbfc310f94306902611f86dda87a45fa Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 10 Mar 2014 13:42:48 -0700 Subject: x86, kaslr: fix module lock ordering problem There was a potential lock ordering problem with the module kASLR patch ("x86, kaslr: randomize module base load address"). This patch removes the usage of the module_mutex and creates a new mutex to protect the module base address offset value. Chain exists of: text_mutex --> kprobe_insn_slots.mutex --> module_mutex [ 0.515561] Possible unsafe locking scenario: [ 0.515561] [ 0.515561] CPU0 CPU1 [ 0.515561] ---- ---- [ 0.515561] lock(module_mutex); [ 0.515561] lock(kprobe_insn_slots.mutex); [ 0.515561] lock(module_mutex); [ 0.515561] lock(text_mutex); [ 0.515561] [ 0.515561] *** DEADLOCK *** Reported-by: Fengguang Wu Signed-off-by: Andy Honig Signed-off-by: Kees Cook Reviewed-by: Masami Hiramatsu Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 4948313..e69f988 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -48,6 +48,9 @@ do { \ static unsigned long module_load_offset; static int randomize_modules = 1; +/* Mutex protects the module_load_offset. */ +static DEFINE_MUTEX(module_kaslr_mutex); + static int __init parse_nokaslr(char *p) { randomize_modules = 0; @@ -58,7 +61,7 @@ early_param("nokaslr", parse_nokaslr); static unsigned long int get_module_load_offset(void) { if (randomize_modules) { - mutex_lock(&module_mutex); + mutex_lock(&module_kaslr_mutex); /* * Calculate the module_load_offset the first time this * code is called. Once calculated it stays the same until @@ -67,7 +70,7 @@ static unsigned long int get_module_load_offset(void) if (module_load_offset == 0) module_load_offset = (get_random_int() % 1024 + 1) * PAGE_SIZE; - mutex_unlock(&module_mutex); + mutex_unlock(&module_kaslr_mutex); } return module_load_offset; } -- cgit v0.10.2 From cb3042d609e30e6144024801c89be3925106752b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 24 Mar 2014 14:45:12 -0400 Subject: sparc64: Make sure %pil interrupts are enabled during hypervisor yield. In arch_cpu_idle() we must enable %pil based interrupts before potentially invoking the hypervisor cpu yield call. As per the Hypervisor API documentation for cpu_yield: Interrupts which are blocked by some mechanism other that pstate.ie (for example %pil) are not guaranteed to cause a return from this service. It seems that only first generation Niagara chips are hit by this bug. My best guess is that later chips implement this in hardware and wake up anyways from %pil events, whereas in first generation chips the yield is implemented completely in hypervisor code and requires %pil to be enabled in order to wake properly from this call. Fixes: 87fa05aeb3a5 ("sparc: Use generic idle loop") Reported-by: Fabio M. Di Nitto Reported-by: Jan Engelhardt Tested-by: Jan Engelhardt Signed-off-by: David S. Miller diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 32a280e..d7b4967 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -58,9 +58,12 @@ void arch_cpu_idle(void) { if (tlb_type != hypervisor) { touch_nmi_watchdog(); + local_irq_enable(); } else { unsigned long pstate; + local_irq_enable(); + /* The sun4v sleeping code requires that we have PSTATE.IE cleared over * the cpu sleep hypervisor call. */ @@ -82,7 +85,6 @@ void arch_cpu_idle(void) : "=&r" (pstate) : "i" (PSTATE_IE)); } - local_irq_enable(); } #ifdef CONFIG_HOTPLUG_CPU -- cgit v0.10.2 From c27f0872a3448c46e561e226b5b97f77187b06d2 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Fri, 21 Mar 2014 20:53:57 +0800 Subject: netpoll: fix the skb check in pkt_is_ns Neighbor Solicitation is ipv6 protocol, so we should check skb->protocol with ETH_P_IPV6 Signed-off-by: Li RongQing Cc: WANG Cong Signed-off-by: David S. Miller diff --git a/net/core/netpoll.c b/net/core/netpoll.c index a664f78..df9e6b1 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -742,7 +742,7 @@ static bool pkt_is_ns(struct sk_buff *skb) struct nd_msg *msg; struct ipv6hdr *hdr; - if (skb->protocol != htons(ETH_P_ARP)) + if (skb->protocol != htons(ETH_P_IPV6)) return false; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg))) return false; -- cgit v0.10.2 From 33b7107f59a61236d94ecd6b45e20283cd5abcc8 Mon Sep 17 00:00:00 2001 From: Christian Riesch Date: Mon, 24 Mar 2014 13:46:26 +0100 Subject: net: davinci_emac: Replace devm_request_irq with request_irq In commit 6892b41d9701283085b655c6086fb57a5d63fa47 Author: Lad, Prabhakar Date: Tue Jun 25 21:24:51 2013 +0530 net: davinci: emac: Convert to devm_* api the call of request_irq is replaced by devm_request_irq and the call of free_irq is removed. But since interrupts are requested in emac_dev_open, doing ifconfig up/down on the board requests the interrupts again each time, causing devm_request_irq to fail. The interface is dead until the device is rebooted. This patch reverts said commit partially: It changes the driver back to use request_irq instead of devm_request_irq, puts free_irq back in place, but keeps the remaining changes of the original patch. Reported-by: Jon Ringle Signed-off-by: Christian Riesch Cc: Lad, Prabhakar Cc: Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index cd9b164..2514304 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1532,7 +1532,7 @@ static int emac_dev_open(struct net_device *ndev) struct device *emac_dev = &ndev->dev; u32 cnt; struct resource *res; - int ret; + int q, m, ret; int i = 0; int k = 0; struct emac_priv *priv = netdev_priv(ndev); @@ -1567,8 +1567,7 @@ static int emac_dev_open(struct net_device *ndev) while ((res = platform_get_resource(priv->pdev, IORESOURCE_IRQ, k))) { for (i = res->start; i <= res->end; i++) { - if (devm_request_irq(&priv->pdev->dev, i, emac_irq, - 0, ndev->name, ndev)) + if (request_irq(i, emac_irq, 0, ndev->name, ndev)) goto rollback; } k++; @@ -1641,7 +1640,15 @@ static int emac_dev_open(struct net_device *ndev) rollback: - dev_err(emac_dev, "DaVinci EMAC: devm_request_irq() failed"); + dev_err(emac_dev, "DaVinci EMAC: request_irq() failed"); + + for (q = k; k >= 0; k--) { + for (m = i; m >= res->start; m--) + free_irq(m, ndev); + res = platform_get_resource(priv->pdev, IORESOURCE_IRQ, k-1); + m = res->end; + } + ret = -EBUSY; err: pm_runtime_put(&priv->pdev->dev); @@ -1659,6 +1666,9 @@ err: */ static int emac_dev_stop(struct net_device *ndev) { + struct resource *res; + int i = 0; + int irq_num; struct emac_priv *priv = netdev_priv(ndev); struct device *emac_dev = &ndev->dev; @@ -1674,6 +1684,13 @@ static int emac_dev_stop(struct net_device *ndev) if (priv->phydev) phy_disconnect(priv->phydev); + /* Free IRQ */ + while ((res = platform_get_resource(priv->pdev, IORESOURCE_IRQ, i))) { + for (irq_num = res->start; irq_num <= res->end; irq_num++) + free_irq(irq_num, priv->ndev); + i++; + } + if (netif_msg_drv(priv)) dev_notice(emac_dev, "DaVinci EMAC: %s stopped\n", ndev->name); -- cgit v0.10.2 From cd11cf505318ff24e42f35145f9cdf8596fa1958 Mon Sep 17 00:00:00 2001 From: Christian Riesch Date: Mon, 24 Mar 2014 13:46:27 +0100 Subject: net: davinci_emac: Fix rollback of emac_dev_open() If an error occurs during the initialization in emac_dev_open() (the driver's ndo_open function), interrupts, DMA descriptors etc. must be freed. The current rollback code is buggy in several ways. 1) Freeing the interrupts. The current code will not free all interrupts that were requested by the driver. Furthermore, the code tries to do a platform_get_resource(priv->pdev, IORESOURCE_IRQ, -1) in its last iteration. This patch fixes these bugs. 2) Wrong order of err: and rollback: labels. If the setup of the PHY in the code fails, the interrupts that have been requested before are not freed: request irq if requesting irqs fails, goto rollback setup phy if phy setup fails, goto err return 0 rollback: free irqs err: This patch brings the code into the correct order. 3) The code calls napi_enable() and emac_int_enable(), but does not undo both in case of an error. This patch adds calls of emac_int_disable() and napi_disable() to the rollback code. 4) RX DMA descriptors are not freed in case of an error: Right before requesting the irqs, the function creates DMA descriptors for the RX channel. These RX descriptors are never freed when we jump to either rollback or err. This patch adds code for freeing the DMA descriptors in the case of an initialization error. This required a modification of cpdma_ctrl_stop() in davinci_cpdma.c: We must be able to call this function to free the DMA descriptors while the DMA channels are in IDLE state (before cpdma_ctlr_start() was called). Tested on a custom board with the Texas Instruments AM1808. Signed-off-by: Christian Riesch Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c index 364d0c7..88ef270 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.c +++ b/drivers/net/ethernet/ti/davinci_cpdma.c @@ -355,7 +355,7 @@ int cpdma_ctlr_stop(struct cpdma_ctlr *ctlr) int i; spin_lock_irqsave(&ctlr->lock, flags); - if (ctlr->state != CPDMA_STATE_ACTIVE) { + if (ctlr->state == CPDMA_STATE_TEARDOWN) { spin_unlock_irqrestore(&ctlr->lock, flags); return -EINVAL; } @@ -891,7 +891,7 @@ int cpdma_chan_stop(struct cpdma_chan *chan) unsigned timeout; spin_lock_irqsave(&chan->lock, flags); - if (chan->state != CPDMA_STATE_ACTIVE) { + if (chan->state == CPDMA_STATE_TEARDOWN) { spin_unlock_irqrestore(&chan->lock, flags); return -EINVAL; } diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 2514304..8f0e69c 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1533,8 +1533,8 @@ static int emac_dev_open(struct net_device *ndev) u32 cnt; struct resource *res; int q, m, ret; + int res_num = 0, irq_num = 0; int i = 0; - int k = 0; struct emac_priv *priv = netdev_priv(ndev); pm_runtime_get(&priv->pdev->dev); @@ -1564,14 +1564,24 @@ static int emac_dev_open(struct net_device *ndev) } /* Request IRQ */ + while ((res = platform_get_resource(priv->pdev, IORESOURCE_IRQ, + res_num))) { + for (irq_num = res->start; irq_num <= res->end; irq_num++) { + dev_err(emac_dev, "Request IRQ %d\n", irq_num); + if (request_irq(irq_num, emac_irq, 0, ndev->name, + ndev)) { + dev_err(emac_dev, + "DaVinci EMAC: request_irq() failed\n"); + ret = -EBUSY; - while ((res = platform_get_resource(priv->pdev, IORESOURCE_IRQ, k))) { - for (i = res->start; i <= res->end; i++) { - if (request_irq(i, emac_irq, 0, ndev->name, ndev)) goto rollback; + } } - k++; + res_num++; } + /* prepare counters for rollback in case of an error */ + res_num--; + irq_num--; /* Start/Enable EMAC hardware */ emac_hw_enable(priv); @@ -1638,19 +1648,23 @@ static int emac_dev_open(struct net_device *ndev) return 0; -rollback: - - dev_err(emac_dev, "DaVinci EMAC: request_irq() failed"); +err: + emac_int_disable(priv); + napi_disable(&priv->napi); - for (q = k; k >= 0; k--) { - for (m = i; m >= res->start; m--) +rollback: + for (q = res_num; q >= 0; q--) { + res = platform_get_resource(priv->pdev, IORESOURCE_IRQ, q); + /* at the first iteration, irq_num is already set to the + * right value + */ + if (q != res_num) + irq_num = res->end; + + for (m = irq_num; m >= res->start; m--) free_irq(m, ndev); - res = platform_get_resource(priv->pdev, IORESOURCE_IRQ, k-1); - m = res->end; } - - ret = -EBUSY; -err: + cpdma_ctlr_stop(priv->dma); pm_runtime_put(&priv->pdev->dev); return ret; } -- cgit v0.10.2 From 4b29dba9c085a4fb79058fb1c45a2f6257ca3dfa Mon Sep 17 00:00:00 2001 From: David Stevens Date: Mon, 24 Mar 2014 10:39:58 -0400 Subject: vxlan: fix nonfunctional neigh_reduce() The VXLAN neigh_reduce() code is completely non-functional since check-in. Specific errors: 1) The original code drops all packets with a multicast destination address, even though neighbor solicitations are sent to the solicited-node address, a multicast address. The code after this check was never run. 2) The neighbor table lookup used the IPv6 header destination, which is the solicited node address, rather than the target address from the neighbor solicitation. So neighbor lookups would always fail if it got this far. Also for L3MISSes. 3) The code calls ndisc_send_na(), which does a send on the tunnel device. The context for neigh_reduce() is the transmit path, vxlan_xmit(), where the host or a bridge-attached neighbor is trying to transmit a neighbor solicitation. To respond to it, the tunnel endpoint needs to do a *receive* of the appropriate neighbor advertisement. Doing a send, would only try to send the advertisement, encapsulated, to the remote destinations in the fdb -- hosts that definitely did not do the corresponding solicitation. 4) The code uses the tunnel endpoint IPv6 forwarding flag to determine the isrouter flag in the advertisement. This has nothing to do with whether or not the target is a router, and generally won't be set since the tunnel endpoint is bridging, not routing, traffic. The patch below creates a proxy neighbor advertisement to respond to neighbor solicitions as intended, providing proper IPv6 support for neighbor reduction. Signed-off-by: David L Stevens Signed-off-by: David S. Miller diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index a7eb3f2..1236812 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1342,15 +1342,103 @@ out: } #if IS_ENABLED(CONFIG_IPV6) + +static struct sk_buff *vxlan_na_create(struct sk_buff *request, + struct neighbour *n, bool isrouter) +{ + struct net_device *dev = request->dev; + struct sk_buff *reply; + struct nd_msg *ns, *na; + struct ipv6hdr *pip6; + u8 *daddr; + int na_olen = 8; /* opt hdr + ETH_ALEN for target */ + int ns_olen; + int i, len; + + if (dev == NULL) + return NULL; + + len = LL_RESERVED_SPACE(dev) + sizeof(struct ipv6hdr) + + sizeof(*na) + na_olen + dev->needed_tailroom; + reply = alloc_skb(len, GFP_ATOMIC); + if (reply == NULL) + return NULL; + + reply->protocol = htons(ETH_P_IPV6); + reply->dev = dev; + skb_reserve(reply, LL_RESERVED_SPACE(request->dev)); + skb_push(reply, sizeof(struct ethhdr)); + skb_set_mac_header(reply, 0); + + ns = (struct nd_msg *)skb_transport_header(request); + + daddr = eth_hdr(request)->h_source; + ns_olen = request->len - skb_transport_offset(request) - sizeof(*ns); + for (i = 0; i < ns_olen-1; i += (ns->opt[i+1]<<3)) { + if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) { + daddr = ns->opt + i + sizeof(struct nd_opt_hdr); + break; + } + } + + /* Ethernet header */ + ether_addr_copy(eth_hdr(reply)->h_dest, daddr); + ether_addr_copy(eth_hdr(reply)->h_source, n->ha); + eth_hdr(reply)->h_proto = htons(ETH_P_IPV6); + reply->protocol = htons(ETH_P_IPV6); + + skb_pull(reply, sizeof(struct ethhdr)); + skb_set_network_header(reply, 0); + skb_put(reply, sizeof(struct ipv6hdr)); + + /* IPv6 header */ + + pip6 = ipv6_hdr(reply); + memset(pip6, 0, sizeof(struct ipv6hdr)); + pip6->version = 6; + pip6->priority = ipv6_hdr(request)->priority; + pip6->nexthdr = IPPROTO_ICMPV6; + pip6->hop_limit = 255; + pip6->daddr = ipv6_hdr(request)->saddr; + pip6->saddr = *(struct in6_addr *)n->primary_key; + + skb_pull(reply, sizeof(struct ipv6hdr)); + skb_set_transport_header(reply, 0); + + na = (struct nd_msg *)skb_put(reply, sizeof(*na) + na_olen); + + /* Neighbor Advertisement */ + memset(na, 0, sizeof(*na)+na_olen); + na->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; + na->icmph.icmp6_router = isrouter; + na->icmph.icmp6_override = 1; + na->icmph.icmp6_solicited = 1; + na->target = ns->target; + ether_addr_copy(&na->opt[2], n->ha); + na->opt[0] = ND_OPT_TARGET_LL_ADDR; + na->opt[1] = na_olen >> 3; + + na->icmph.icmp6_cksum = csum_ipv6_magic(&pip6->saddr, + &pip6->daddr, sizeof(*na)+na_olen, IPPROTO_ICMPV6, + csum_partial(na, sizeof(*na)+na_olen, 0)); + + pip6->payload_len = htons(sizeof(*na)+na_olen); + + skb_push(reply, sizeof(struct ipv6hdr)); + + reply->ip_summed = CHECKSUM_UNNECESSARY; + + return reply; +} + static int neigh_reduce(struct net_device *dev, struct sk_buff *skb) { struct vxlan_dev *vxlan = netdev_priv(dev); - struct neighbour *n; - union vxlan_addr ipa; + struct nd_msg *msg; const struct ipv6hdr *iphdr; const struct in6_addr *saddr, *daddr; - struct nd_msg *msg; - struct inet6_dev *in6_dev = NULL; + struct neighbour *n; + struct inet6_dev *in6_dev; in6_dev = __in6_dev_get(dev); if (!in6_dev) @@ -1363,19 +1451,20 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb) saddr = &iphdr->saddr; daddr = &iphdr->daddr; - if (ipv6_addr_loopback(daddr) || - ipv6_addr_is_multicast(daddr)) - goto out; - msg = (struct nd_msg *)skb_transport_header(skb); if (msg->icmph.icmp6_code != 0 || msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) goto out; - n = neigh_lookup(ipv6_stub->nd_tbl, daddr, dev); + if (ipv6_addr_loopback(daddr) || + ipv6_addr_is_multicast(&msg->target)) + goto out; + + n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, dev); if (n) { struct vxlan_fdb *f; + struct sk_buff *reply; if (!(n->nud_state & NUD_CONNECTED)) { neigh_release(n); @@ -1389,13 +1478,23 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb) goto out; } - ipv6_stub->ndisc_send_na(dev, n, saddr, &msg->target, - !!in6_dev->cnf.forwarding, - true, false, false); + reply = vxlan_na_create(skb, n, + !!(f ? f->flags & NTF_ROUTER : 0)); + neigh_release(n); + + if (reply == NULL) + goto out; + + if (netif_rx_ni(reply) == NET_RX_DROP) + dev->stats.rx_dropped++; + } else if (vxlan->flags & VXLAN_F_L3MISS) { - ipa.sin6.sin6_addr = *daddr; - ipa.sa.sa_family = AF_INET6; + union vxlan_addr ipa = { + .sin6.sin6_addr = msg->target, + .sa.sa_family = AF_INET6, + }; + vxlan_ip_miss(dev, &ipa); } -- cgit v0.10.2 From a5d0e7c037119484a7006b883618bfa87996cb41 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Mon, 24 Mar 2014 16:56:38 +0100 Subject: tipc: fix spinlock recursion bug for failed subscriptions If a topology event subscription fails for any reason, such as out of memory, max number reached or because we received an invalid request the correct behavior is to terminate the subscribers connection to the topology server. This is currently broken and produces the following oops: [27.953662] tipc: Subscription rejected, illegal request [27.955329] BUG: spinlock recursion on CPU#1, kworker/u4:0/6 [27.957066] lock: 0xffff88003c67f408, .magic: dead4ead, .owner: kworker/u4:0/6, .owner_cpu: 1 [27.958054] CPU: 1 PID: 6 Comm: kworker/u4:0 Not tainted 3.14.0-rc6+ #5 [27.960230] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [27.960874] Workqueue: tipc_rcv tipc_recv_work [tipc] [27.961430] ffff88003c67f408 ffff88003de27c18 ffffffff815c0207 ffff88003de1c050 [27.962292] ffff88003de27c38 ffffffff815beec5 ffff88003c67f408 ffffffff817f0a8a [27.963152] ffff88003de27c58 ffffffff815beeeb ffff88003c67f408 ffffffffa0013520 [27.964023] Call Trace: [27.964292] [] dump_stack+0x45/0x56 [27.964874] [] spin_dump+0x8c/0x91 [27.965420] [] spin_bug+0x21/0x26 [27.965995] [] do_raw_spin_lock+0x116/0x140 [27.966631] [] _raw_spin_lock_bh+0x15/0x20 [27.967256] [] subscr_conn_shutdown_event+0x20/0xa0 [tipc] [27.968051] [] tipc_close_conn+0xa4/0xb0 [tipc] [27.968722] [] tipc_conn_terminate+0x1a/0x30 [tipc] [27.969436] [] subscr_conn_msg_event+0x1f2/0x2f0 [tipc] [27.970209] [] tipc_receive_from_sock+0x90/0xf0 [tipc] [27.970972] [] tipc_recv_work+0x29/0x50 [tipc] [27.971633] [] process_one_work+0x165/0x3e0 [27.972267] [] worker_thread+0x119/0x3a0 [27.972896] [] ? manage_workers.isra.25+0x2a0/0x2a0 [27.973622] [] kthread+0xdf/0x100 [27.974168] [] ? kthread_create_on_node+0x1a0/0x1a0 [27.974893] [] ret_from_fork+0x7c/0xb0 [27.975466] [] ? kthread_create_on_node+0x1a0/0x1a0 The recursion occurs when subscr_terminate tries to grab the subscriber lock, which is already taken by subscr_conn_msg_event. We fix this by checking if the request to establish a new subscription was successful, and if not we initiate termination of the subscriber after we have released the subscriber lock. Signed-off-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 11c9ae0..6424372 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -263,9 +263,9 @@ static void subscr_cancel(struct tipc_subscr *s, * * Called with subscriber lock held. */ -static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, - struct tipc_subscriber *subscriber) -{ +static int subscr_subscribe(struct tipc_subscr *s, + struct tipc_subscriber *subscriber, + struct tipc_subscription **sub_p) { struct tipc_subscription *sub; int swap; @@ -276,23 +276,21 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) { s->filter &= ~htohl(TIPC_SUB_CANCEL, swap); subscr_cancel(s, subscriber); - return NULL; + return 0; } /* Refuse subscription if global limit exceeded */ if (atomic_read(&subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { pr_warn("Subscription rejected, limit reached (%u)\n", TIPC_MAX_SUBSCRIPTIONS); - subscr_terminate(subscriber); - return NULL; + return -EINVAL; } /* Allocate subscription object */ sub = kmalloc(sizeof(*sub), GFP_ATOMIC); if (!sub) { pr_warn("Subscription rejected, no memory\n"); - subscr_terminate(subscriber); - return NULL; + return -ENOMEM; } /* Initialize subscription object */ @@ -306,8 +304,7 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, (sub->seq.lower > sub->seq.upper)) { pr_warn("Subscription rejected, illegal request\n"); kfree(sub); - subscr_terminate(subscriber); - return NULL; + return -EINVAL; } INIT_LIST_HEAD(&sub->nameseq_list); list_add(&sub->subscription_list, &subscriber->subscription_list); @@ -320,8 +317,8 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, (Handler)subscr_timeout, (unsigned long)sub); k_start_timer(&sub->timer, sub->timeout); } - - return sub; + *sub_p = sub; + return 0; } /* Handle one termination request for the subscriber */ @@ -335,10 +332,14 @@ static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr, void *usr_data, void *buf, size_t len) { struct tipc_subscriber *subscriber = usr_data; - struct tipc_subscription *sub; + struct tipc_subscription *sub = NULL; spin_lock_bh(&subscriber->lock); - sub = subscr_subscribe((struct tipc_subscr *)buf, subscriber); + if (subscr_subscribe((struct tipc_subscr *)buf, subscriber, &sub) < 0) { + spin_unlock_bh(&subscriber->lock); + subscr_terminate(subscriber); + return; + } if (sub) tipc_nametbl_subscribe(sub); spin_unlock_bh(&subscriber->lock); -- cgit v0.10.2 From 41f50094b2ab4e37673e41a084ea61b907447159 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 24 Mar 2014 21:37:02 +0100 Subject: workqueue: Spelling s/instensive/intensive/ Signed-off-by: Geert Uytterhoeven Signed-off-by: Tejun Heo diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 29da9e7..0523eab 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -294,7 +294,7 @@ enum { WQ_FREEZABLE = 1 << 2, /* freeze during suspend */ WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */ WQ_HIGHPRI = 1 << 4, /* high priority */ - WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */ + WQ_CPU_INTENSIVE = 1 << 5, /* cpu intensive workqueue */ WQ_SYSFS = 1 << 6, /* visible in sysfs, see wq_sysfs_register() */ /* -- cgit v0.10.2 From b098d6726bbfb94c06d6e1097466187afddae61f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 24 Mar 2014 19:31:17 -0700 Subject: Linux 3.14-rc8 diff --git a/Makefile b/Makefile index ef779ec..c10b734 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Shuffling Zombie Juror # *DOCUMENTATION* -- cgit v0.10.2 From 09ed3d5ba06137913960f9c9385f71fc384193ab Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Sat, 15 Mar 2014 16:11:47 +0000 Subject: xen/balloon: flush persistent kmaps in correct position Xen balloon driver will update ballooned out pages' P2M entries to point to scratch page for PV guests. In 24f69373e2 ("xen/balloon: don't alloc page while non-preemptible", kmap_flush_unused was moved after updating P2M table. In that case for 32 bit PV guest we might end up with P2M X -----> S (S is mfn of balloon scratch page) M2P Y -----> X (Y is mfn in persistent kmap entry) kmap_flush_unused() iterates through all the PTEs in the kmap address space, using pte_to_page() to obtain the page. If the p2m and the m2p are inconsistent the incorrect page is returned. This will clear page->address on the wrong page which may cause subsequent oopses if that page is currently kmap'ed. Move the flush back between get_page and __set_phys_to_machine to fix this. Signed-off-by: Wei Liu Signed-off-by: David Vrabel Cc: stable@vger.kernel.org # 3.12+ diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 37d06ea..61a6ac8 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -399,11 +399,25 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) state = BP_EAGAIN; break; } + scrub_page(page); - pfn = page_to_pfn(page); - frame_list[i] = pfn_to_mfn(pfn); + frame_list[i] = page_to_pfn(page); + } - scrub_page(page); + /* + * Ensure that ballooned highmem pages don't have kmaps. + * + * Do this before changing the p2m as kmap_flush_unused() + * reads PTEs to obtain pages (and hence needs the original + * p2m entry). + */ + kmap_flush_unused(); + + /* Update direct mapping, invalidate P2M, and add to balloon. */ + for (i = 0; i < nr_pages; i++) { + pfn = frame_list[i]; + frame_list[i] = pfn_to_mfn(pfn); + page = pfn_to_page(pfn); #ifdef CONFIG_XEN_HAVE_PVMMU /* @@ -429,11 +443,9 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) } #endif - balloon_append(pfn_to_page(pfn)); + balloon_append(page); } - /* Ensure that ballooned highmem pages don't have kmaps. */ - kmap_flush_unused(); flush_tlb_all(); set_xen_guest_handle(reservation.extent_start, frame_list); -- cgit v0.10.2 From 5926f87fdaad4be3ed10cec563bf357915e55a86 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Tue, 25 Mar 2014 10:38:37 +0000 Subject: Revert "xen: properly account for _PAGE_NUMA during xen pte translations" This reverts commit a9c8e4beeeb64c22b84c803747487857fe424b68. PTEs in Xen PV guests must contain machine addresses if _PAGE_PRESENT is set and pseudo-physical addresses is _PAGE_PRESENT is clear. This is because during a domain save/restore (migration) the page table entries are "canonicalised" and uncanonicalised". i.e., MFNs are converted to PFNs during domain save so that on a restore the page table entries may be rewritten with the new MFNs on the destination. This canonicalisation is only done for PTEs that are present. This change resulted in writing PTEs with MFNs if _PAGE_PROTNONE (or _PAGE_NUMA) was set but _PAGE_PRESENT was clear. These PTEs would be migrated as-is which would result in unexpected behaviour in the destination domain. Either a) the MFN would be translated to the wrong PFN/page; b) setting the _PAGE_PRESENT bit would clear the PTE because the MFN is no longer owned by the domain; or c) the present bit would not get set. Symptoms include "Bad page" reports when munmapping after migrating a domain. Signed-off-by: David Vrabel Acked-by: Konrad Rzeszutek Wilk Cc: [3.12+] diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5ad38ad..bbc8b12 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -445,20 +445,10 @@ static inline int pte_same(pte_t a, pte_t b) return a.pte == b.pte; } -static inline int pteval_present(pteval_t pteval) -{ - /* - * Yes Linus, _PAGE_PROTNONE == _PAGE_NUMA. Expressing it this - * way clearly states that the intent is that protnone and numa - * hinting ptes are considered present for the purposes of - * pagetable operations like zapping, protection changes, gup etc. - */ - return pteval & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_NUMA); -} - static inline int pte_present(pte_t a) { - return pteval_present(pte_flags(a)); + return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE | + _PAGE_NUMA); } #define pte_accessible pte_accessible diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 256282e..2423ef0 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -365,7 +365,7 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, /* Assume pteval_t is equivalent to all the other *val_t types. */ static pteval_t pte_mfn_to_pfn(pteval_t val) { - if (pteval_present(val)) { + if (val & _PAGE_PRESENT) { unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; unsigned long pfn = mfn_to_pfn(mfn); @@ -381,7 +381,7 @@ static pteval_t pte_mfn_to_pfn(pteval_t val) static pteval_t pte_pfn_to_mfn(pteval_t val) { - if (pteval_present(val)) { + if (val & _PAGE_PRESENT) { unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; pteval_t flags = val & PTE_FLAGS_MASK; unsigned long mfn; -- cgit v0.10.2 From ea2e64f280d2a34a8ed9ae3d783cd770d14b70ec Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Mar 2014 14:20:44 +0000 Subject: workqueue: Provide destroy_delayed_work_on_stack() If a delayed or deferrable work is on stack we need to tell debug objects that we are destroying the timer and the work. Otherwise we leak the tracking object. Signed-off-by: Thomas Gleixner Cc: Vince Weaver Acked-by: Tejun Heo Link: http://lkml.kernel.org/r/20140323141939.911487677@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 594521b..abdbe5a 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -191,6 +191,7 @@ struct execute_work { #ifdef CONFIG_DEBUG_OBJECTS_WORK extern void __init_work(struct work_struct *work, int onstack); extern void destroy_work_on_stack(struct work_struct *work); +extern void destroy_delayed_work_on_stack(struct delayed_work *work); static inline unsigned int work_static(struct work_struct *work) { return *work_data_bits(work) & WORK_STRUCT_STATIC; @@ -198,6 +199,7 @@ static inline unsigned int work_static(struct work_struct *work) #else static inline void __init_work(struct work_struct *work, int onstack) { } static inline void destroy_work_on_stack(struct work_struct *work) { } +static inline void destroy_delayed_work_on_stack(struct delayed_work *work) { } static inline unsigned int work_static(struct work_struct *work) { return 0; } #endif diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 82ef9f3..5b690b5 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -516,6 +516,13 @@ void destroy_work_on_stack(struct work_struct *work) } EXPORT_SYMBOL_GPL(destroy_work_on_stack); +void destroy_delayed_work_on_stack(struct delayed_work *work) +{ + destroy_timer_on_stack(&work->timer); + debug_object_free(&work->work, &work_debug_descr); +} +EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack); + #else static inline void debug_work_activate(struct work_struct *work) { } static inline void debug_work_deactivate(struct work_struct *work) { } -- cgit v0.10.2 From b712c8dae05931a76b6c17a4254f403798e6caef Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 23 Mar 2014 14:20:45 +0000 Subject: x86: hpet: Use proper destructor for delayed work destroy_timer_on_stack() is hardly the right thing for a delayed work. We leak a tracking object for the work itself when DEBUG_OBJECTS is enabled. Signed-off-by: Thomas Gleixner Cc: Vince Weaver Cc: x86@kernel.org Link: http://lkml.kernel.org/r/20140323141940.034005322@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index da85a8e..b91abfd 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -699,7 +699,7 @@ static int hpet_cpuhp_notify(struct notifier_block *n, /* FIXME: add schedule_work_on() */ schedule_delayed_work_on(cpu, &work.work, 0); wait_for_completion(&work.complete); - destroy_timer_on_stack(&work.work.timer); + destroy_delayed_work_on_stack(&work.work); break; case CPU_DEAD: if (hdev) { -- cgit v0.10.2 From 83b03fd67b9b3fa3795871169f3c08c35b3d6ea8 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 25 Mar 2014 19:51:38 +0100 Subject: ata: ahci_platform: fix ahci_platform_data->suspend method handling Looking at ST SPEAr1340 AHCI code (the only user of the deprecated pdata->suspend and pdata->resume) it is obvious the we should return after calling pdata->suspend() only if the function have returned non-zero return value. The code has been broken since commit 1e70c2 ("ata/ahci_platform: Add clock framework support"). Fix it. Cc: Viresh Kumar Cc: Shiraz Hashim Acked-by: Hans de Goede Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Tejun Heo diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index 70fbf66..7bd6adf 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -521,12 +521,19 @@ int ahci_platform_suspend(struct device *dev) if (rc) return rc; - if (pdata && pdata->suspend) - return pdata->suspend(dev); + if (pdata && pdata->suspend) { + rc = pdata->suspend(dev); + if (rc) + goto resume_host; + } ahci_platform_disable_resources(hpriv); return 0; + +resume_host: + ahci_platform_resume_host(dev); + return rc; } EXPORT_SYMBOL_GPL(ahci_platform_suspend); -- cgit v0.10.2 From fd990556f0fa25446c6bfa9cf4c9e49d387d4472 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 25 Mar 2014 19:51:39 +0100 Subject: ata: move library code from ahci_platform.c to libahci_platform.c Move AHCI platform library code from ahci_platform.c to libahci_platform.c and fix dependencies for ahci_st, ahci_imx and ahci_sunxi drivers. Acked-by: Hans de Goede Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Tejun Heo diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 9f3e326..12767ad 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -99,7 +99,6 @@ config SATA_AHCI_PLATFORM config AHCI_ST tristate "ST AHCI SATA support" - depends on SATA_AHCI_PLATFORM depends on ARCH_STI help This option enables support for ST AHCI SATA controller. @@ -108,7 +107,7 @@ config AHCI_ST config AHCI_IMX tristate "Freescale i.MX AHCI SATA support" - depends on SATA_AHCI_PLATFORM && MFD_SYSCON + depends on MFD_SYSCON help This option enables support for the Freescale i.MX SoC's onboard AHCI SATA. @@ -117,7 +116,7 @@ config AHCI_IMX config AHCI_SUNXI tristate "Allwinner sunxi AHCI SATA support" - depends on ARCH_SUNXI && SATA_AHCI_PLATFORM + depends on ARCH_SUNXI help This option enables support for the Allwinner sunxi SoC's onboard AHCI SATA. @@ -126,7 +125,7 @@ config AHCI_SUNXI config AHCI_XGENE tristate "APM X-Gene 6.0Gbps AHCI SATA host controller support" - depends on SATA_AHCI_PLATFORM && (ARM64 || COMPILE_TEST) + depends on ARM64 || COMPILE_TEST select PHY_XGENE help This option enables support for APM X-Gene SoC SATA host controller. diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile index 095d461..5fa79ab 100644 --- a/drivers/ata/Makefile +++ b/drivers/ata/Makefile @@ -4,16 +4,16 @@ obj-$(CONFIG_ATA) += libata.o # non-SFF interface obj-$(CONFIG_SATA_AHCI) += ahci.o libahci.o obj-$(CONFIG_SATA_ACARD_AHCI) += acard-ahci.o libahci.o -obj-$(CONFIG_SATA_AHCI_PLATFORM) += ahci_platform.o libahci.o +obj-$(CONFIG_SATA_AHCI_PLATFORM) += ahci_platform.o libahci.o libahci_platform.o obj-$(CONFIG_SATA_FSL) += sata_fsl.o obj-$(CONFIG_SATA_INIC162X) += sata_inic162x.o obj-$(CONFIG_SATA_SIL24) += sata_sil24.o obj-$(CONFIG_SATA_DWC) += sata_dwc_460ex.o obj-$(CONFIG_SATA_HIGHBANK) += sata_highbank.o libahci.o -obj-$(CONFIG_AHCI_IMX) += ahci_imx.o -obj-$(CONFIG_AHCI_SUNXI) += ahci_sunxi.o -obj-$(CONFIG_AHCI_ST) += ahci_st.o -obj-$(CONFIG_AHCI_XGENE) += ahci_xgene.o +obj-$(CONFIG_AHCI_IMX) += ahci_imx.o libahci.o libahci_platform.o +obj-$(CONFIG_AHCI_SUNXI) += ahci_sunxi.o libahci.o libahci_platform.o +obj-$(CONFIG_AHCI_ST) += ahci_st.o libahci.o libahci_platform.o +obj-$(CONFIG_AHCI_XGENE) += ahci_xgene.o libahci.o libahci_platform.o # SFF w/ custom DMA obj-$(CONFIG_PDC_ADMA) += pdc_adma.o diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index 7bd6adf..ef67e79 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -12,28 +12,15 @@ * any later version. */ -#include #include -#include #include #include -#include #include #include #include #include -#include -#include #include "ahci.h" -static void ahci_host_stop(struct ata_host *host); - -struct ata_port_operations ahci_platform_ops = { - .inherits = &ahci_ops, - .host_stop = ahci_host_stop, -}; -EXPORT_SYMBOL_GPL(ahci_platform_ops); - static const struct ata_port_info ahci_port_info = { .flags = AHCI_FLAG_COMMON, .pio_mask = ATA_PIO4, @@ -41,345 +28,6 @@ static const struct ata_port_info ahci_port_info = { .port_ops = &ahci_platform_ops, }; -static struct scsi_host_template ahci_platform_sht = { - AHCI_SHT("ahci_platform"), -}; - -/** - * ahci_platform_enable_clks - Enable platform clocks - * @hpriv: host private area to store config values - * - * This function enables all the clks found in hpriv->clks, starting at - * index 0. If any clk fails to enable it disables all the clks already - * enabled in reverse order, and then returns an error. - * - * RETURNS: - * 0 on success otherwise a negative error code - */ -int ahci_platform_enable_clks(struct ahci_host_priv *hpriv) -{ - int c, rc; - - for (c = 0; c < AHCI_MAX_CLKS && hpriv->clks[c]; c++) { - rc = clk_prepare_enable(hpriv->clks[c]); - if (rc) - goto disable_unprepare_clk; - } - return 0; - -disable_unprepare_clk: - while (--c >= 0) - clk_disable_unprepare(hpriv->clks[c]); - return rc; -} -EXPORT_SYMBOL_GPL(ahci_platform_enable_clks); - -/** - * ahci_platform_disable_clks - Disable platform clocks - * @hpriv: host private area to store config values - * - * This function disables all the clks found in hpriv->clks, in reverse - * order of ahci_platform_enable_clks (starting at the end of the array). - */ -void ahci_platform_disable_clks(struct ahci_host_priv *hpriv) -{ - int c; - - for (c = AHCI_MAX_CLKS - 1; c >= 0; c--) - if (hpriv->clks[c]) - clk_disable_unprepare(hpriv->clks[c]); -} -EXPORT_SYMBOL_GPL(ahci_platform_disable_clks); - -/** - * ahci_platform_enable_resources - Enable platform resources - * @hpriv: host private area to store config values - * - * This function enables all ahci_platform managed resources in the - * following order: - * 1) Regulator - * 2) Clocks (through ahci_platform_enable_clks) - * 3) Phy - * - * If resource enabling fails at any point the previous enabled resources - * are disabled in reverse order. - * - * RETURNS: - * 0 on success otherwise a negative error code - */ -int ahci_platform_enable_resources(struct ahci_host_priv *hpriv) -{ - int rc; - - if (hpriv->target_pwr) { - rc = regulator_enable(hpriv->target_pwr); - if (rc) - return rc; - } - - rc = ahci_platform_enable_clks(hpriv); - if (rc) - goto disable_regulator; - - if (hpriv->phy) { - rc = phy_init(hpriv->phy); - if (rc) - goto disable_clks; - - rc = phy_power_on(hpriv->phy); - if (rc) { - phy_exit(hpriv->phy); - goto disable_clks; - } - } - - return 0; - -disable_clks: - ahci_platform_disable_clks(hpriv); - -disable_regulator: - if (hpriv->target_pwr) - regulator_disable(hpriv->target_pwr); - return rc; -} -EXPORT_SYMBOL_GPL(ahci_platform_enable_resources); - -/** - * ahci_platform_disable_resources - Disable platform resources - * @hpriv: host private area to store config values - * - * This function disables all ahci_platform managed resources in the - * following order: - * 1) Phy - * 2) Clocks (through ahci_platform_disable_clks) - * 3) Regulator - */ -void ahci_platform_disable_resources(struct ahci_host_priv *hpriv) -{ - if (hpriv->phy) { - phy_power_off(hpriv->phy); - phy_exit(hpriv->phy); - } - - ahci_platform_disable_clks(hpriv); - - if (hpriv->target_pwr) - regulator_disable(hpriv->target_pwr); -} -EXPORT_SYMBOL_GPL(ahci_platform_disable_resources); - -static void ahci_platform_put_resources(struct device *dev, void *res) -{ - struct ahci_host_priv *hpriv = res; - int c; - - if (hpriv->got_runtime_pm) { - pm_runtime_put_sync(dev); - pm_runtime_disable(dev); - } - - for (c = 0; c < AHCI_MAX_CLKS && hpriv->clks[c]; c++) - clk_put(hpriv->clks[c]); -} - -/** - * ahci_platform_get_resources - Get platform resources - * @pdev: platform device to get resources for - * - * This function allocates an ahci_host_priv struct, and gets the following - * resources, storing a reference to them inside the returned struct: - * - * 1) mmio registers (IORESOURCE_MEM 0, mandatory) - * 2) regulator for controlling the targets power (optional) - * 3) 0 - AHCI_MAX_CLKS clocks, as specified in the devs devicetree node, - * or for non devicetree enabled platforms a single clock - * 4) phy (optional) - * - * RETURNS: - * The allocated ahci_host_priv on success, otherwise an ERR_PTR value - */ -struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev) -{ - struct device *dev = &pdev->dev; - struct ahci_host_priv *hpriv; - struct clk *clk; - int i, rc = -ENOMEM; - - if (!devres_open_group(dev, NULL, GFP_KERNEL)) - return ERR_PTR(-ENOMEM); - - hpriv = devres_alloc(ahci_platform_put_resources, sizeof(*hpriv), - GFP_KERNEL); - if (!hpriv) - goto err_out; - - devres_add(dev, hpriv); - - hpriv->mmio = devm_ioremap_resource(dev, - platform_get_resource(pdev, IORESOURCE_MEM, 0)); - if (IS_ERR(hpriv->mmio)) { - dev_err(dev, "no mmio space\n"); - rc = PTR_ERR(hpriv->mmio); - goto err_out; - } - - hpriv->target_pwr = devm_regulator_get_optional(dev, "target"); - if (IS_ERR(hpriv->target_pwr)) { - rc = PTR_ERR(hpriv->target_pwr); - if (rc == -EPROBE_DEFER) - goto err_out; - hpriv->target_pwr = NULL; - } - - for (i = 0; i < AHCI_MAX_CLKS; i++) { - /* - * For now we must use clk_get(dev, NULL) for the first clock, - * because some platforms (da850, spear13xx) are not yet - * converted to use devicetree for clocks. For new platforms - * this is equivalent to of_clk_get(dev->of_node, 0). - */ - if (i == 0) - clk = clk_get(dev, NULL); - else - clk = of_clk_get(dev->of_node, i); - - if (IS_ERR(clk)) { - rc = PTR_ERR(clk); - if (rc == -EPROBE_DEFER) - goto err_out; - break; - } - hpriv->clks[i] = clk; - } - - hpriv->phy = devm_phy_get(dev, "sata-phy"); - if (IS_ERR(hpriv->phy)) { - rc = PTR_ERR(hpriv->phy); - switch (rc) { - case -ENODEV: - case -ENOSYS: - /* continue normally */ - hpriv->phy = NULL; - break; - - case -EPROBE_DEFER: - goto err_out; - - default: - dev_err(dev, "couldn't get sata-phy\n"); - goto err_out; - } - } - - pm_runtime_enable(dev); - pm_runtime_get_sync(dev); - hpriv->got_runtime_pm = true; - - devres_remove_group(dev, NULL); - return hpriv; - -err_out: - devres_release_group(dev, NULL); - return ERR_PTR(rc); -} -EXPORT_SYMBOL_GPL(ahci_platform_get_resources); - -/** - * ahci_platform_init_host - Bring up an ahci-platform host - * @pdev: platform device pointer for the host - * @hpriv: ahci-host private data for the host - * @pi_template: template for the ata_port_info to use - * @force_port_map: param passed to ahci_save_initial_config - * @mask_port_map: param passed to ahci_save_initial_config - * - * This function does all the usual steps needed to bring up an - * ahci-platform host, note any necessary resources (ie clks, phy, etc.) - * must be initialized / enabled before calling this. - * - * RETURNS: - * 0 on success otherwise a negative error code - */ -int ahci_platform_init_host(struct platform_device *pdev, - struct ahci_host_priv *hpriv, - const struct ata_port_info *pi_template, - unsigned int force_port_map, - unsigned int mask_port_map) -{ - struct device *dev = &pdev->dev; - struct ata_port_info pi = *pi_template; - const struct ata_port_info *ppi[] = { &pi, NULL }; - struct ata_host *host; - int i, irq, n_ports, rc; - - irq = platform_get_irq(pdev, 0); - if (irq <= 0) { - dev_err(dev, "no irq\n"); - return -EINVAL; - } - - /* prepare host */ - hpriv->flags |= (unsigned long)pi.private_data; - - ahci_save_initial_config(dev, hpriv, force_port_map, mask_port_map); - - if (hpriv->cap & HOST_CAP_NCQ) - pi.flags |= ATA_FLAG_NCQ; - - if (hpriv->cap & HOST_CAP_PMP) - pi.flags |= ATA_FLAG_PMP; - - ahci_set_em_messages(hpriv, &pi); - - /* CAP.NP sometimes indicate the index of the last enabled - * port, at other times, that of the last possible port, so - * determining the maximum port number requires looking at - * both CAP.NP and port_map. - */ - n_ports = max(ahci_nr_ports(hpriv->cap), fls(hpriv->port_map)); - - host = ata_host_alloc_pinfo(dev, ppi, n_ports); - if (!host) - return -ENOMEM; - - host->private_data = hpriv; - - if (!(hpriv->cap & HOST_CAP_SSS) || ahci_ignore_sss) - host->flags |= ATA_HOST_PARALLEL_SCAN; - else - dev_info(dev, "SSS flag set, parallel bus scan disabled\n"); - - if (pi.flags & ATA_FLAG_EM) - ahci_reset_em(host); - - for (i = 0; i < host->n_ports; i++) { - struct ata_port *ap = host->ports[i]; - - ata_port_desc(ap, "mmio %pR", - platform_get_resource(pdev, IORESOURCE_MEM, 0)); - ata_port_desc(ap, "port 0x%x", 0x100 + ap->port_no * 0x80); - - /* set enclosure management message type */ - if (ap->flags & ATA_FLAG_EM) - ap->em_message_type = hpriv->em_msg_type; - - /* disabled/not-implemented port */ - if (!(hpriv->port_map & (1 << i))) - ap->ops = &ata_dummy_port_ops; - } - - rc = ahci_reset_controller(host); - if (rc) - return rc; - - ahci_init_controller(host); - ahci_print_info(host, "platform"); - - return ata_host_activate(host, irq, ahci_interrupt, IRQF_SHARED, - &ahci_platform_sht); -} -EXPORT_SYMBOL_GPL(ahci_platform_init_host); - static int ahci_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -420,169 +68,6 @@ disable_resources: return rc; } -static void ahci_host_stop(struct ata_host *host) -{ - struct device *dev = host->dev; - struct ahci_platform_data *pdata = dev_get_platdata(dev); - struct ahci_host_priv *hpriv = host->private_data; - - if (pdata && pdata->exit) - pdata->exit(dev); - - ahci_platform_disable_resources(hpriv); -} - -#ifdef CONFIG_PM_SLEEP -/** - * ahci_platform_suspend_host - Suspend an ahci-platform host - * @dev: device pointer for the host - * - * This function does all the usual steps needed to suspend an - * ahci-platform host, note any necessary resources (ie clks, phy, etc.) - * must be disabled after calling this. - * - * RETURNS: - * 0 on success otherwise a negative error code - */ -int ahci_platform_suspend_host(struct device *dev) -{ - struct ata_host *host = dev_get_drvdata(dev); - struct ahci_host_priv *hpriv = host->private_data; - void __iomem *mmio = hpriv->mmio; - u32 ctl; - - if (hpriv->flags & AHCI_HFLAG_NO_SUSPEND) { - dev_err(dev, "firmware update required for suspend/resume\n"); - return -EIO; - } - - /* - * AHCI spec rev1.1 section 8.3.3: - * Software must disable interrupts prior to requesting a - * transition of the HBA to D3 state. - */ - ctl = readl(mmio + HOST_CTL); - ctl &= ~HOST_IRQ_EN; - writel(ctl, mmio + HOST_CTL); - readl(mmio + HOST_CTL); /* flush */ - - return ata_host_suspend(host, PMSG_SUSPEND); -} -EXPORT_SYMBOL_GPL(ahci_platform_suspend_host); - -/** - * ahci_platform_resume_host - Resume an ahci-platform host - * @dev: device pointer for the host - * - * This function does all the usual steps needed to resume an ahci-platform - * host, note any necessary resources (ie clks, phy, etc.) must be - * initialized / enabled before calling this. - * - * RETURNS: - * 0 on success otherwise a negative error code - */ -int ahci_platform_resume_host(struct device *dev) -{ - struct ata_host *host = dev_get_drvdata(dev); - int rc; - - if (dev->power.power_state.event == PM_EVENT_SUSPEND) { - rc = ahci_reset_controller(host); - if (rc) - return rc; - - ahci_init_controller(host); - } - - ata_host_resume(host); - - return 0; -} -EXPORT_SYMBOL_GPL(ahci_platform_resume_host); - -/** - * ahci_platform_suspend - Suspend an ahci-platform device - * @dev: the platform device to suspend - * - * This function suspends the host associated with the device, followed by - * disabling all the resources of the device. - * - * RETURNS: - * 0 on success otherwise a negative error code - */ -int ahci_platform_suspend(struct device *dev) -{ - struct ahci_platform_data *pdata = dev_get_platdata(dev); - struct ata_host *host = dev_get_drvdata(dev); - struct ahci_host_priv *hpriv = host->private_data; - int rc; - - rc = ahci_platform_suspend_host(dev); - if (rc) - return rc; - - if (pdata && pdata->suspend) { - rc = pdata->suspend(dev); - if (rc) - goto resume_host; - } - - ahci_platform_disable_resources(hpriv); - - return 0; - -resume_host: - ahci_platform_resume_host(dev); - return rc; -} -EXPORT_SYMBOL_GPL(ahci_platform_suspend); - -/** - * ahci_platform_resume - Resume an ahci-platform device - * @dev: the platform device to resume - * - * This function enables all the resources of the device followed by - * resuming the host associated with the device. - * - * RETURNS: - * 0 on success otherwise a negative error code - */ -int ahci_platform_resume(struct device *dev) -{ - struct ahci_platform_data *pdata = dev_get_platdata(dev); - struct ata_host *host = dev_get_drvdata(dev); - struct ahci_host_priv *hpriv = host->private_data; - int rc; - - rc = ahci_platform_enable_resources(hpriv); - if (rc) - return rc; - - if (pdata && pdata->resume) { - rc = pdata->resume(dev); - if (rc) - goto disable_resources; - } - - rc = ahci_platform_resume_host(dev); - if (rc) - goto disable_resources; - - /* We resumed so update PM runtime state */ - pm_runtime_disable(dev); - pm_runtime_set_active(dev); - pm_runtime_enable(dev); - - return 0; - -disable_resources: - ahci_platform_disable_resources(hpriv); - - return rc; -} -EXPORT_SYMBOL_GPL(ahci_platform_resume); -#endif - static SIMPLE_DEV_PM_OPS(ahci_pm_ops, ahci_platform_suspend, ahci_platform_resume); diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c new file mode 100644 index 0000000..7cb3a85 --- /dev/null +++ b/drivers/ata/libahci_platform.c @@ -0,0 +1,541 @@ +/* + * AHCI SATA platform library + * + * Copyright 2004-2005 Red Hat, Inc. + * Jeff Garzik + * Copyright 2010 MontaVista Software, LLC. + * Anton Vorontsov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ahci.h" + +static void ahci_host_stop(struct ata_host *host); + +struct ata_port_operations ahci_platform_ops = { + .inherits = &ahci_ops, + .host_stop = ahci_host_stop, +}; +EXPORT_SYMBOL_GPL(ahci_platform_ops); + +static struct scsi_host_template ahci_platform_sht = { + AHCI_SHT("ahci_platform"), +}; + +/** + * ahci_platform_enable_clks - Enable platform clocks + * @hpriv: host private area to store config values + * + * This function enables all the clks found in hpriv->clks, starting at + * index 0. If any clk fails to enable it disables all the clks already + * enabled in reverse order, and then returns an error. + * + * RETURNS: + * 0 on success otherwise a negative error code + */ +int ahci_platform_enable_clks(struct ahci_host_priv *hpriv) +{ + int c, rc; + + for (c = 0; c < AHCI_MAX_CLKS && hpriv->clks[c]; c++) { + rc = clk_prepare_enable(hpriv->clks[c]); + if (rc) + goto disable_unprepare_clk; + } + return 0; + +disable_unprepare_clk: + while (--c >= 0) + clk_disable_unprepare(hpriv->clks[c]); + return rc; +} +EXPORT_SYMBOL_GPL(ahci_platform_enable_clks); + +/** + * ahci_platform_disable_clks - Disable platform clocks + * @hpriv: host private area to store config values + * + * This function disables all the clks found in hpriv->clks, in reverse + * order of ahci_platform_enable_clks (starting at the end of the array). + */ +void ahci_platform_disable_clks(struct ahci_host_priv *hpriv) +{ + int c; + + for (c = AHCI_MAX_CLKS - 1; c >= 0; c--) + if (hpriv->clks[c]) + clk_disable_unprepare(hpriv->clks[c]); +} +EXPORT_SYMBOL_GPL(ahci_platform_disable_clks); + +/** + * ahci_platform_enable_resources - Enable platform resources + * @hpriv: host private area to store config values + * + * This function enables all ahci_platform managed resources in the + * following order: + * 1) Regulator + * 2) Clocks (through ahci_platform_enable_clks) + * 3) Phy + * + * If resource enabling fails at any point the previous enabled resources + * are disabled in reverse order. + * + * RETURNS: + * 0 on success otherwise a negative error code + */ +int ahci_platform_enable_resources(struct ahci_host_priv *hpriv) +{ + int rc; + + if (hpriv->target_pwr) { + rc = regulator_enable(hpriv->target_pwr); + if (rc) + return rc; + } + + rc = ahci_platform_enable_clks(hpriv); + if (rc) + goto disable_regulator; + + if (hpriv->phy) { + rc = phy_init(hpriv->phy); + if (rc) + goto disable_clks; + + rc = phy_power_on(hpriv->phy); + if (rc) { + phy_exit(hpriv->phy); + goto disable_clks; + } + } + + return 0; + +disable_clks: + ahci_platform_disable_clks(hpriv); + +disable_regulator: + if (hpriv->target_pwr) + regulator_disable(hpriv->target_pwr); + return rc; +} +EXPORT_SYMBOL_GPL(ahci_platform_enable_resources); + +/** + * ahci_platform_disable_resources - Disable platform resources + * @hpriv: host private area to store config values + * + * This function disables all ahci_platform managed resources in the + * following order: + * 1) Phy + * 2) Clocks (through ahci_platform_disable_clks) + * 3) Regulator + */ +void ahci_platform_disable_resources(struct ahci_host_priv *hpriv) +{ + if (hpriv->phy) { + phy_power_off(hpriv->phy); + phy_exit(hpriv->phy); + } + + ahci_platform_disable_clks(hpriv); + + if (hpriv->target_pwr) + regulator_disable(hpriv->target_pwr); +} +EXPORT_SYMBOL_GPL(ahci_platform_disable_resources); + +static void ahci_platform_put_resources(struct device *dev, void *res) +{ + struct ahci_host_priv *hpriv = res; + int c; + + if (hpriv->got_runtime_pm) { + pm_runtime_put_sync(dev); + pm_runtime_disable(dev); + } + + for (c = 0; c < AHCI_MAX_CLKS && hpriv->clks[c]; c++) + clk_put(hpriv->clks[c]); +} + +/** + * ahci_platform_get_resources - Get platform resources + * @pdev: platform device to get resources for + * + * This function allocates an ahci_host_priv struct, and gets the following + * resources, storing a reference to them inside the returned struct: + * + * 1) mmio registers (IORESOURCE_MEM 0, mandatory) + * 2) regulator for controlling the targets power (optional) + * 3) 0 - AHCI_MAX_CLKS clocks, as specified in the devs devicetree node, + * or for non devicetree enabled platforms a single clock + * 4) phy (optional) + * + * RETURNS: + * The allocated ahci_host_priv on success, otherwise an ERR_PTR value + */ +struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct ahci_host_priv *hpriv; + struct clk *clk; + int i, rc = -ENOMEM; + + if (!devres_open_group(dev, NULL, GFP_KERNEL)) + return ERR_PTR(-ENOMEM); + + hpriv = devres_alloc(ahci_platform_put_resources, sizeof(*hpriv), + GFP_KERNEL); + if (!hpriv) + goto err_out; + + devres_add(dev, hpriv); + + hpriv->mmio = devm_ioremap_resource(dev, + platform_get_resource(pdev, IORESOURCE_MEM, 0)); + if (IS_ERR(hpriv->mmio)) { + dev_err(dev, "no mmio space\n"); + rc = PTR_ERR(hpriv->mmio); + goto err_out; + } + + hpriv->target_pwr = devm_regulator_get_optional(dev, "target"); + if (IS_ERR(hpriv->target_pwr)) { + rc = PTR_ERR(hpriv->target_pwr); + if (rc == -EPROBE_DEFER) + goto err_out; + hpriv->target_pwr = NULL; + } + + for (i = 0; i < AHCI_MAX_CLKS; i++) { + /* + * For now we must use clk_get(dev, NULL) for the first clock, + * because some platforms (da850, spear13xx) are not yet + * converted to use devicetree for clocks. For new platforms + * this is equivalent to of_clk_get(dev->of_node, 0). + */ + if (i == 0) + clk = clk_get(dev, NULL); + else + clk = of_clk_get(dev->of_node, i); + + if (IS_ERR(clk)) { + rc = PTR_ERR(clk); + if (rc == -EPROBE_DEFER) + goto err_out; + break; + } + hpriv->clks[i] = clk; + } + + hpriv->phy = devm_phy_get(dev, "sata-phy"); + if (IS_ERR(hpriv->phy)) { + rc = PTR_ERR(hpriv->phy); + switch (rc) { + case -ENODEV: + case -ENOSYS: + /* continue normally */ + hpriv->phy = NULL; + break; + + case -EPROBE_DEFER: + goto err_out; + + default: + dev_err(dev, "couldn't get sata-phy\n"); + goto err_out; + } + } + + pm_runtime_enable(dev); + pm_runtime_get_sync(dev); + hpriv->got_runtime_pm = true; + + devres_remove_group(dev, NULL); + return hpriv; + +err_out: + devres_release_group(dev, NULL); + return ERR_PTR(rc); +} +EXPORT_SYMBOL_GPL(ahci_platform_get_resources); + +/** + * ahci_platform_init_host - Bring up an ahci-platform host + * @pdev: platform device pointer for the host + * @hpriv: ahci-host private data for the host + * @pi_template: template for the ata_port_info to use + * @force_port_map: param passed to ahci_save_initial_config + * @mask_port_map: param passed to ahci_save_initial_config + * + * This function does all the usual steps needed to bring up an + * ahci-platform host, note any necessary resources (ie clks, phy, etc.) + * must be initialized / enabled before calling this. + * + * RETURNS: + * 0 on success otherwise a negative error code + */ +int ahci_platform_init_host(struct platform_device *pdev, + struct ahci_host_priv *hpriv, + const struct ata_port_info *pi_template, + unsigned int force_port_map, + unsigned int mask_port_map) +{ + struct device *dev = &pdev->dev; + struct ata_port_info pi = *pi_template; + const struct ata_port_info *ppi[] = { &pi, NULL }; + struct ata_host *host; + int i, irq, n_ports, rc; + + irq = platform_get_irq(pdev, 0); + if (irq <= 0) { + dev_err(dev, "no irq\n"); + return -EINVAL; + } + + /* prepare host */ + hpriv->flags |= (unsigned long)pi.private_data; + + ahci_save_initial_config(dev, hpriv, force_port_map, mask_port_map); + + if (hpriv->cap & HOST_CAP_NCQ) + pi.flags |= ATA_FLAG_NCQ; + + if (hpriv->cap & HOST_CAP_PMP) + pi.flags |= ATA_FLAG_PMP; + + ahci_set_em_messages(hpriv, &pi); + + /* CAP.NP sometimes indicate the index of the last enabled + * port, at other times, that of the last possible port, so + * determining the maximum port number requires looking at + * both CAP.NP and port_map. + */ + n_ports = max(ahci_nr_ports(hpriv->cap), fls(hpriv->port_map)); + + host = ata_host_alloc_pinfo(dev, ppi, n_ports); + if (!host) + return -ENOMEM; + + host->private_data = hpriv; + + if (!(hpriv->cap & HOST_CAP_SSS) || ahci_ignore_sss) + host->flags |= ATA_HOST_PARALLEL_SCAN; + else + dev_info(dev, "SSS flag set, parallel bus scan disabled\n"); + + if (pi.flags & ATA_FLAG_EM) + ahci_reset_em(host); + + for (i = 0; i < host->n_ports; i++) { + struct ata_port *ap = host->ports[i]; + + ata_port_desc(ap, "mmio %pR", + platform_get_resource(pdev, IORESOURCE_MEM, 0)); + ata_port_desc(ap, "port 0x%x", 0x100 + ap->port_no * 0x80); + + /* set enclosure management message type */ + if (ap->flags & ATA_FLAG_EM) + ap->em_message_type = hpriv->em_msg_type; + + /* disabled/not-implemented port */ + if (!(hpriv->port_map & (1 << i))) + ap->ops = &ata_dummy_port_ops; + } + + rc = ahci_reset_controller(host); + if (rc) + return rc; + + ahci_init_controller(host); + ahci_print_info(host, "platform"); + + return ata_host_activate(host, irq, ahci_interrupt, IRQF_SHARED, + &ahci_platform_sht); +} +EXPORT_SYMBOL_GPL(ahci_platform_init_host); + +static void ahci_host_stop(struct ata_host *host) +{ + struct device *dev = host->dev; + struct ahci_platform_data *pdata = dev_get_platdata(dev); + struct ahci_host_priv *hpriv = host->private_data; + + if (pdata && pdata->exit) + pdata->exit(dev); + + ahci_platform_disable_resources(hpriv); +} + +#ifdef CONFIG_PM_SLEEP +/** + * ahci_platform_suspend_host - Suspend an ahci-platform host + * @dev: device pointer for the host + * + * This function does all the usual steps needed to suspend an + * ahci-platform host, note any necessary resources (ie clks, phy, etc.) + * must be disabled after calling this. + * + * RETURNS: + * 0 on success otherwise a negative error code + */ +int ahci_platform_suspend_host(struct device *dev) +{ + struct ata_host *host = dev_get_drvdata(dev); + struct ahci_host_priv *hpriv = host->private_data; + void __iomem *mmio = hpriv->mmio; + u32 ctl; + + if (hpriv->flags & AHCI_HFLAG_NO_SUSPEND) { + dev_err(dev, "firmware update required for suspend/resume\n"); + return -EIO; + } + + /* + * AHCI spec rev1.1 section 8.3.3: + * Software must disable interrupts prior to requesting a + * transition of the HBA to D3 state. + */ + ctl = readl(mmio + HOST_CTL); + ctl &= ~HOST_IRQ_EN; + writel(ctl, mmio + HOST_CTL); + readl(mmio + HOST_CTL); /* flush */ + + return ata_host_suspend(host, PMSG_SUSPEND); +} +EXPORT_SYMBOL_GPL(ahci_platform_suspend_host); + +/** + * ahci_platform_resume_host - Resume an ahci-platform host + * @dev: device pointer for the host + * + * This function does all the usual steps needed to resume an ahci-platform + * host, note any necessary resources (ie clks, phy, etc.) must be + * initialized / enabled before calling this. + * + * RETURNS: + * 0 on success otherwise a negative error code + */ +int ahci_platform_resume_host(struct device *dev) +{ + struct ata_host *host = dev_get_drvdata(dev); + int rc; + + if (dev->power.power_state.event == PM_EVENT_SUSPEND) { + rc = ahci_reset_controller(host); + if (rc) + return rc; + + ahci_init_controller(host); + } + + ata_host_resume(host); + + return 0; +} +EXPORT_SYMBOL_GPL(ahci_platform_resume_host); + +/** + * ahci_platform_suspend - Suspend an ahci-platform device + * @dev: the platform device to suspend + * + * This function suspends the host associated with the device, followed by + * disabling all the resources of the device. + * + * RETURNS: + * 0 on success otherwise a negative error code + */ +int ahci_platform_suspend(struct device *dev) +{ + struct ahci_platform_data *pdata = dev_get_platdata(dev); + struct ata_host *host = dev_get_drvdata(dev); + struct ahci_host_priv *hpriv = host->private_data; + int rc; + + rc = ahci_platform_suspend_host(dev); + if (rc) + return rc; + + if (pdata && pdata->suspend) { + rc = pdata->suspend(dev); + if (rc) + goto resume_host; + } + + ahci_platform_disable_resources(hpriv); + + return 0; + +resume_host: + ahci_platform_resume_host(dev); + return rc; +} +EXPORT_SYMBOL_GPL(ahci_platform_suspend); + +/** + * ahci_platform_resume - Resume an ahci-platform device + * @dev: the platform device to resume + * + * This function enables all the resources of the device followed by + * resuming the host associated with the device. + * + * RETURNS: + * 0 on success otherwise a negative error code + */ +int ahci_platform_resume(struct device *dev) +{ + struct ahci_platform_data *pdata = dev_get_platdata(dev); + struct ata_host *host = dev_get_drvdata(dev); + struct ahci_host_priv *hpriv = host->private_data; + int rc; + + rc = ahci_platform_enable_resources(hpriv); + if (rc) + return rc; + + if (pdata && pdata->resume) { + rc = pdata->resume(dev); + if (rc) + goto disable_resources; + } + + rc = ahci_platform_resume_host(dev); + if (rc) + goto disable_resources; + + /* We resumed so update PM runtime state */ + pm_runtime_disable(dev); + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + + return 0; + +disable_resources: + ahci_platform_disable_resources(hpriv); + + return rc; +} +EXPORT_SYMBOL_GPL(ahci_platform_resume); +#endif + +MODULE_DESCRIPTION("AHCI SATA platform library"); +MODULE_AUTHOR("Anton Vorontsov "); +MODULE_LICENSE("GPL"); -- cgit v0.10.2 From ae8723f8a9c8e804c2b906074d7d5f1265a385bb Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 25 Mar 2014 19:51:40 +0100 Subject: ata: add new-style AHCI platform driver for DaVinci DA850 AHCI controller Add the new ahci_da850 host driver. Platform changes needed to make DaVinci DA850 SATA AHCI support fully functional are in the separate "ARM: davinci: da850: update SATA AHCI support" commit. Please note that this driver doesn't have the superfluous clock control code as clock is already handled by the generic AHCI platform library code. Cc: Sekhar Nori Cc: Kevin Hilman Cc: Hans de Goede Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Tejun Heo diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 12767ad..6e3be7d 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -97,6 +97,15 @@ config SATA_AHCI_PLATFORM If unsure, say N. +config AHCI_DA850 + tristate "DaVinci DA850 AHCI SATA support" + depends on ARCH_DAVINCI_DA850 + help + This option enables support for the DaVinci DA850 SoC's + onboard AHCI SATA. + + If unsure, say N. + config AHCI_ST tristate "ST AHCI SATA support" depends on ARCH_STI diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile index 5fa79ab..44c8016 100644 --- a/drivers/ata/Makefile +++ b/drivers/ata/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_SATA_INIC162X) += sata_inic162x.o obj-$(CONFIG_SATA_SIL24) += sata_sil24.o obj-$(CONFIG_SATA_DWC) += sata_dwc_460ex.o obj-$(CONFIG_SATA_HIGHBANK) += sata_highbank.o libahci.o +obj-$(CONFIG_AHCI_DA850) += ahci_da850.o libahci.o libahci_platform.o obj-$(CONFIG_AHCI_IMX) += ahci_imx.o libahci.o libahci_platform.o obj-$(CONFIG_AHCI_SUNXI) += ahci_sunxi.o libahci.o libahci_platform.o obj-$(CONFIG_AHCI_ST) += ahci_st.o libahci.o libahci_platform.o diff --git a/drivers/ata/ahci_da850.c b/drivers/ata/ahci_da850.c new file mode 100644 index 0000000..2c83613 --- /dev/null +++ b/drivers/ata/ahci_da850.c @@ -0,0 +1,114 @@ +/* + * DaVinci DA850 AHCI SATA platform driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "ahci.h" + +/* SATA PHY Control Register offset from AHCI base */ +#define SATA_P0PHYCR_REG 0x178 + +#define SATA_PHY_MPY(x) ((x) << 0) +#define SATA_PHY_LOS(x) ((x) << 6) +#define SATA_PHY_RXCDR(x) ((x) << 10) +#define SATA_PHY_RXEQ(x) ((x) << 13) +#define SATA_PHY_TXSWING(x) ((x) << 19) +#define SATA_PHY_ENPLL(x) ((x) << 31) + +/* + * The multiplier needed for 1.5GHz PLL output. + * + * NOTE: This is currently hardcoded to be suitable for 100MHz crystal + * frequency (which is used by DA850 EVM board) and may need to be changed + * if you would like to use this driver on some other board. + */ +#define DA850_SATA_CLK_MULTIPLIER 7 + +static void da850_sata_init(struct device *dev, void __iomem *pwrdn_reg, + void __iomem *ahci_base) +{ + unsigned int val; + + /* Enable SATA clock receiver */ + val = readl(pwrdn_reg); + val &= ~BIT(0); + writel(val, pwrdn_reg); + + val = SATA_PHY_MPY(DA850_SATA_CLK_MULTIPLIER + 1) | SATA_PHY_LOS(1) | + SATA_PHY_RXCDR(4) | SATA_PHY_RXEQ(1) | SATA_PHY_TXSWING(3) | + SATA_PHY_ENPLL(1); + + writel(val, ahci_base + SATA_P0PHYCR_REG); +} + +static const struct ata_port_info ahci_da850_port_info = { + .flags = AHCI_FLAG_COMMON, + .pio_mask = ATA_PIO4, + .udma_mask = ATA_UDMA6, + .port_ops = &ahci_platform_ops, +}; + +static int ahci_da850_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct ahci_host_priv *hpriv; + struct resource *res; + void __iomem *pwrdn_reg; + int rc; + + hpriv = ahci_platform_get_resources(pdev); + if (IS_ERR(hpriv)) + return PTR_ERR(hpriv); + + rc = ahci_platform_enable_resources(hpriv); + if (rc) + return rc; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!res) + goto disable_resources; + + pwrdn_reg = devm_ioremap(dev, res->start, resource_size(res)); + if (!pwrdn_reg) + goto disable_resources; + + da850_sata_init(dev, pwrdn_reg, hpriv->mmio); + + rc = ahci_platform_init_host(pdev, hpriv, &ahci_da850_port_info, 0, 0); + if (rc) + goto disable_resources; + + return 0; +disable_resources: + ahci_platform_disable_resources(hpriv); + return rc; +} + +static SIMPLE_DEV_PM_OPS(ahci_da850_pm_ops, ahci_platform_suspend, + ahci_platform_resume); + +static struct platform_driver ahci_da850_driver = { + .probe = ahci_da850_probe, + .remove = ata_platform_remove_one, + .driver = { + .name = "ahci_da850", + .owner = THIS_MODULE, + .pm = &ahci_da850_pm_ops, + }, +}; +module_platform_driver(ahci_da850_driver); + +MODULE_DESCRIPTION("DaVinci DA850 AHCI SATA platform driver"); +MODULE_AUTHOR("Bartlomiej Zolnierkiewicz "); +MODULE_LICENSE("GPL"); -- cgit v0.10.2 From 080c492df02bd1171963d70daf7305a72443873d Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 25 Mar 2014 19:51:41 +0100 Subject: ARM: davinci: da850: update SATA AHCI support Update SATA AHCI support to make use of the new ahci_da850 host driver (instead of the generic ahci_platform one) and remove deprecated ahci_platform_data code. Cc: Sekhar Nori Cc: Kevin Hilman Cc: Hans de Goede Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Tejun Heo diff --git a/arch/arm/mach-davinci/da850.c b/arch/arm/mach-davinci/da850.c index 2ab0043..85399c9 100644 --- a/arch/arm/mach-davinci/da850.c +++ b/arch/arm/mach-davinci/da850.c @@ -472,7 +472,7 @@ static struct clk_lookup da850_clks[] = { CLK("spi_davinci.0", NULL, &spi0_clk), CLK("spi_davinci.1", NULL, &spi1_clk), CLK("vpif", NULL, &vpif_clk), - CLK("ahci", NULL, &sata_clk), + CLK("ahci_da850", NULL, &sata_clk), CLK("davinci-rproc.0", NULL, &dsp_clk), CLK("ehrpwm", "fck", &ehrpwm_clk), CLK("ehrpwm", "tbclk", &ehrpwm_tbclk), diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c index 0486cdf..56ea41d 100644 --- a/arch/arm/mach-davinci/devices-da8xx.c +++ b/arch/arm/mach-davinci/devices-da8xx.c @@ -1020,7 +1020,6 @@ int __init da8xx_register_spi_bus(int instance, unsigned num_chipselect) } #ifdef CONFIG_ARCH_DAVINCI_DA850 - static struct resource da850_sata_resources[] = { { .start = DA850_SATA_BASE, @@ -1028,103 +1027,22 @@ static struct resource da850_sata_resources[] = { .flags = IORESOURCE_MEM, }, { + .start = DA8XX_SYSCFG1_BASE + DA8XX_PWRDN_REG, + .end = DA8XX_SYSCFG1_BASE + DA8XX_PWRDN_REG + 0x3, + .flags = IORESOURCE_MEM, + }, + { .start = IRQ_DA850_SATAINT, .flags = IORESOURCE_IRQ, }, }; -/* SATA PHY Control Register offset from AHCI base */ -#define SATA_P0PHYCR_REG 0x178 - -#define SATA_PHY_MPY(x) ((x) << 0) -#define SATA_PHY_LOS(x) ((x) << 6) -#define SATA_PHY_RXCDR(x) ((x) << 10) -#define SATA_PHY_RXEQ(x) ((x) << 13) -#define SATA_PHY_TXSWING(x) ((x) << 19) -#define SATA_PHY_ENPLL(x) ((x) << 31) - -static struct clk *da850_sata_clk; -static unsigned long da850_sata_refclkpn; - -/* Supported DA850 SATA crystal frequencies */ -#define KHZ_TO_HZ(freq) ((freq) * 1000) -static unsigned long da850_sata_xtal[] = { - KHZ_TO_HZ(300000), - KHZ_TO_HZ(250000), - 0, /* Reserved */ - KHZ_TO_HZ(187500), - KHZ_TO_HZ(150000), - KHZ_TO_HZ(125000), - KHZ_TO_HZ(120000), - KHZ_TO_HZ(100000), - KHZ_TO_HZ(75000), - KHZ_TO_HZ(60000), -}; - -static int da850_sata_init(struct device *dev, void __iomem *addr) -{ - int i, ret; - unsigned int val; - - da850_sata_clk = clk_get(dev, NULL); - if (IS_ERR(da850_sata_clk)) - return PTR_ERR(da850_sata_clk); - - ret = clk_prepare_enable(da850_sata_clk); - if (ret) - goto err0; - - /* Enable SATA clock receiver */ - val = __raw_readl(DA8XX_SYSCFG1_VIRT(DA8XX_PWRDN_REG)); - val &= ~BIT(0); - __raw_writel(val, DA8XX_SYSCFG1_VIRT(DA8XX_PWRDN_REG)); - - /* Get the multiplier needed for 1.5GHz PLL output */ - for (i = 0; i < ARRAY_SIZE(da850_sata_xtal); i++) - if (da850_sata_xtal[i] == da850_sata_refclkpn) - break; - - if (i == ARRAY_SIZE(da850_sata_xtal)) { - ret = -EINVAL; - goto err1; - } - - val = SATA_PHY_MPY(i + 1) | - SATA_PHY_LOS(1) | - SATA_PHY_RXCDR(4) | - SATA_PHY_RXEQ(1) | - SATA_PHY_TXSWING(3) | - SATA_PHY_ENPLL(1); - - __raw_writel(val, addr + SATA_P0PHYCR_REG); - - return 0; - -err1: - clk_disable_unprepare(da850_sata_clk); -err0: - clk_put(da850_sata_clk); - return ret; -} - -static void da850_sata_exit(struct device *dev) -{ - clk_disable_unprepare(da850_sata_clk); - clk_put(da850_sata_clk); -} - -static struct ahci_platform_data da850_sata_pdata = { - .init = da850_sata_init, - .exit = da850_sata_exit, -}; - static u64 da850_sata_dmamask = DMA_BIT_MASK(32); static struct platform_device da850_sata_device = { - .name = "ahci", + .name = "ahci_da850", .id = -1, .dev = { - .platform_data = &da850_sata_pdata, .dma_mask = &da850_sata_dmamask, .coherent_dma_mask = DMA_BIT_MASK(32), }, @@ -1134,9 +1052,8 @@ static struct platform_device da850_sata_device = { int __init da850_register_sata(unsigned long refclkpn) { - da850_sata_refclkpn = refclkpn; - if (!da850_sata_refclkpn) - return -EINVAL; + /* please see comment in drivers/ata/ahci_da850.c */ + BUG_ON(refclkpn != 100 * 1000 * 1000); return platform_device_register(&da850_sata_device); } -- cgit v0.10.2 From cacb3c76c2012ade52124e8c6fdc5cb125625772 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Mar 2014 16:09:18 +0530 Subject: tick: Fix spelling mistake in tick_handle_periodic() One of the comments in tick_handle_periodic() had 'when' instead of 'which' (My guess :)). Fix it. Also fix spelling mistake in 'Possible'. Signed-off-by: Viresh Kumar Cc: linaro-kernel@lists.linaro.org Cc: skarafotis@gmail.com Link: http://lkml.kernel.org/r/2b29ca4230c163e44179941d7c7a16c1474385c2.1395743878.git.viresh.kumar@linaro.org Signed-off-by: Thomas Gleixner diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 20b2fe3..0fec634 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -118,7 +118,7 @@ void tick_handle_periodic(struct clock_event_device *dev) * to be sure we're using a real hardware clocksource. * Otherwise we could get trapped in an infinite * loop, as the tick_periodic() increments jiffies, - * when then will increment time, posibly causing + * which then will increment time, possibly causing * the loop to trigger again and again. */ if (timekeeping_valid_for_hres()) -- cgit v0.10.2 From b97f0291a2504291aef850077f98cab68a5a2f33 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 25 Mar 2014 13:56:23 +0530 Subject: tick: Remove code duplication in tick_handle_periodic() tick_handle_periodic() is calling ktime_add() at two places, first before the infinite loop and then at the end of infinite loop. We can rearrange code a bit to fix code duplication here. It looks quite simple and shouldn't break anything, I guess :) Signed-off-by: Viresh Kumar Cc: linaro-kernel@lists.linaro.org Cc: fweisbec@gmail.com Link: http://lkml.kernel.org/r/be3481e8f3f71df694a4b43623254fc93ca51b59.1395735873.git.viresh.kumar@linaro.org Signed-off-by: Thomas Gleixner diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 0fec634..0156612 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -98,18 +98,19 @@ static void tick_periodic(int cpu) void tick_handle_periodic(struct clock_event_device *dev) { int cpu = smp_processor_id(); - ktime_t next; + ktime_t next = dev->next_event; tick_periodic(cpu); if (dev->mode != CLOCK_EVT_MODE_ONESHOT) return; - /* - * Setup the next period for devices, which do not have - * periodic mode: - */ - next = ktime_add(dev->next_event, tick_period); for (;;) { + /* + * Setup the next period for devices, which do not have + * periodic mode: + */ + next = ktime_add(next, tick_period); + if (!clockevents_program_event(dev, next, false)) return; /* @@ -123,7 +124,6 @@ void tick_handle_periodic(struct clock_event_device *dev) */ if (timekeeping_valid_for_hres()) tick_periodic(cpu); - next = ktime_add(next, tick_period); } } -- cgit v0.10.2 From 6058bb362818e09990de722e983a7f2874e7f61c Mon Sep 17 00:00:00 2001 From: Carlo Caione Date: Wed, 19 Mar 2014 20:21:17 +0100 Subject: ARM: sun7i/sun6i: irqchip: Add irqchip driver for NMI controller Allwinner A20/A31 SoCs have special registers to control / (un)mask / acknowledge NMI. This NMI controller is separated and independent from GIC. This patch adds a new irqchip to manage NMI. Signed-off-by: Carlo Caione Acked-by: Maxime Ripard Cc: linux-arm-kernel@lists.infradead.org Cc: linux-sunxi@googlegroups.com Cc: mark.rutland@arm.com Cc: hdegoede@redhat.com Link: http://lkml.kernel.org/r/1395256879-8475-2-git-send-email-carlo@caione.org Signed-off-by: Thomas Gleixner diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 5194afb..1c0c151 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_METAG_PERFCOUNTER_IRQS) += irq-metag.o obj-$(CONFIG_ARCH_MOXART) += irq-moxart.o obj-$(CONFIG_ORION_IRQCHIP) += irq-orion.o obj-$(CONFIG_ARCH_SUNXI) += irq-sun4i.o +obj-$(CONFIG_ARCH_SUNXI) += irq-sunxi-nmi.o obj-$(CONFIG_ARCH_SPEAR3XX) += spear-shirq.o obj-$(CONFIG_ARM_GIC) += irq-gic.o obj-$(CONFIG_ARM_NVIC) += irq-nvic.o diff --git a/drivers/irqchip/irq-sunxi-nmi.c b/drivers/irqchip/irq-sunxi-nmi.c new file mode 100644 index 0000000..1c8566c --- /dev/null +++ b/drivers/irqchip/irq-sunxi-nmi.c @@ -0,0 +1,208 @@ +/* + * Allwinner A20/A31 SoCs NMI IRQ chip driver. + * + * Carlo Caione + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "irqchip.h" + +#define SUNXI_NMI_SRC_TYPE_MASK 0x00000003 + +enum { + SUNXI_SRC_TYPE_LEVEL_LOW = 0, + SUNXI_SRC_TYPE_EDGE_FALLING, + SUNXI_SRC_TYPE_LEVEL_HIGH, + SUNXI_SRC_TYPE_EDGE_RISING, +}; + +struct sunxi_sc_nmi_reg_offs { + u32 ctrl; + u32 pend; + u32 enable; +}; + +static struct sunxi_sc_nmi_reg_offs sun7i_reg_offs = { + .ctrl = 0x00, + .pend = 0x04, + .enable = 0x08, +}; + +static struct sunxi_sc_nmi_reg_offs sun6i_reg_offs = { + .ctrl = 0x00, + .pend = 0x04, + .enable = 0x34, +}; + +static inline void sunxi_sc_nmi_write(struct irq_chip_generic *gc, u32 off, + u32 val) +{ + irq_reg_writel(val, gc->reg_base + off); +} + +static inline u32 sunxi_sc_nmi_read(struct irq_chip_generic *gc, u32 off) +{ + return irq_reg_readl(gc->reg_base + off); +} + +static void sunxi_sc_nmi_handle_irq(unsigned int irq, struct irq_desc *desc) +{ + struct irq_domain *domain = irq_desc_get_handler_data(desc); + struct irq_chip *chip = irq_get_chip(irq); + unsigned int virq = irq_find_mapping(domain, 0); + + chained_irq_enter(chip, desc); + generic_handle_irq(virq); + chained_irq_exit(chip, desc); +} + +static int sunxi_sc_nmi_set_type(struct irq_data *data, unsigned int flow_type) +{ + struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data); + struct irq_chip_type *ct = gc->chip_types; + u32 src_type_reg; + u32 ctrl_off = ct->regs.type; + unsigned int src_type; + unsigned int i; + + irq_gc_lock(gc); + + switch (flow_type & IRQF_TRIGGER_MASK) { + case IRQ_TYPE_EDGE_FALLING: + src_type = SUNXI_SRC_TYPE_EDGE_FALLING; + break; + case IRQ_TYPE_EDGE_RISING: + src_type = SUNXI_SRC_TYPE_EDGE_RISING; + break; + case IRQ_TYPE_LEVEL_HIGH: + src_type = SUNXI_SRC_TYPE_LEVEL_HIGH; + break; + case IRQ_TYPE_NONE: + case IRQ_TYPE_LEVEL_LOW: + src_type = SUNXI_SRC_TYPE_LEVEL_LOW; + break; + default: + irq_gc_unlock(gc); + pr_err("%s: Cannot assign multiple trigger modes to IRQ %d.\n", + __func__, data->irq); + return -EBADR; + } + + irqd_set_trigger_type(data, flow_type); + irq_setup_alt_chip(data, flow_type); + + for (i = 0; i <= gc->num_ct; i++, ct++) + if (ct->type & flow_type) + ctrl_off = ct->regs.type; + + src_type_reg = sunxi_sc_nmi_read(gc, ctrl_off); + src_type_reg &= ~SUNXI_NMI_SRC_TYPE_MASK; + src_type_reg |= src_type; + sunxi_sc_nmi_write(gc, ctrl_off, src_type_reg); + + irq_gc_unlock(gc); + + return IRQ_SET_MASK_OK; +} + +static int __init sunxi_sc_nmi_irq_init(struct device_node *node, + struct sunxi_sc_nmi_reg_offs *reg_offs) +{ + struct irq_domain *domain; + struct irq_chip_generic *gc; + unsigned int irq; + unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; + int ret; + + + domain = irq_domain_add_linear(node, 1, &irq_generic_chip_ops, NULL); + if (!domain) { + pr_err("%s: Could not register interrupt domain.\n", node->name); + return -ENOMEM; + } + + ret = irq_alloc_domain_generic_chips(domain, 1, 2, node->name, + handle_fasteoi_irq, clr, 0, + IRQ_GC_INIT_MASK_CACHE); + if (ret) { + pr_err("%s: Could not allocate generic interrupt chip.\n", + node->name); + goto fail_irqd_remove; + } + + irq = irq_of_parse_and_map(node, 0); + if (irq <= 0) { + pr_err("%s: unable to parse irq\n", node->name); + ret = -EINVAL; + goto fail_irqd_remove; + } + + gc = irq_get_domain_generic_chip(domain, 0); + gc->reg_base = of_iomap(node, 0); + if (!gc->reg_base) { + pr_err("%s: unable to map resource\n", node->name); + ret = -ENOMEM; + goto fail_irqd_remove; + } + + gc->chip_types[0].type = IRQ_TYPE_LEVEL_MASK; + gc->chip_types[0].chip.irq_mask = irq_gc_mask_clr_bit; + gc->chip_types[0].chip.irq_unmask = irq_gc_mask_set_bit; + gc->chip_types[0].chip.irq_eoi = irq_gc_ack_set_bit; + gc->chip_types[0].chip.irq_set_type = sunxi_sc_nmi_set_type; + gc->chip_types[0].chip.flags = IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED; + gc->chip_types[0].regs.ack = reg_offs->pend; + gc->chip_types[0].regs.mask = reg_offs->enable; + gc->chip_types[0].regs.type = reg_offs->ctrl; + + gc->chip_types[1].type = IRQ_TYPE_EDGE_BOTH; + gc->chip_types[1].chip.name = gc->chip_types[0].chip.name; + gc->chip_types[1].chip.irq_ack = irq_gc_ack_set_bit; + gc->chip_types[1].chip.irq_mask = irq_gc_mask_clr_bit; + gc->chip_types[1].chip.irq_unmask = irq_gc_mask_set_bit; + gc->chip_types[1].chip.irq_set_type = sunxi_sc_nmi_set_type; + gc->chip_types[1].regs.ack = reg_offs->pend; + gc->chip_types[1].regs.mask = reg_offs->enable; + gc->chip_types[1].regs.type = reg_offs->ctrl; + gc->chip_types[1].handler = handle_edge_irq; + + irq_set_handler_data(irq, domain); + irq_set_chained_handler(irq, sunxi_sc_nmi_handle_irq); + + sunxi_sc_nmi_write(gc, reg_offs->enable, 0); + sunxi_sc_nmi_write(gc, reg_offs->pend, 0x1); + + return 0; + +fail_irqd_remove: + irq_domain_remove(domain); + + return ret; +} + +static int __init sun6i_sc_nmi_irq_init(struct device_node *node, + struct device_node *parent) +{ + return sunxi_sc_nmi_irq_init(node, &sun6i_reg_offs); +} +IRQCHIP_DECLARE(sun6i_sc_nmi, "allwinner,sun6i-a31-sc-nmi", sun6i_sc_nmi_irq_init); + +static int __init sun7i_sc_nmi_irq_init(struct device_node *node, + struct device_node *parent) +{ + return sunxi_sc_nmi_irq_init(node, &sun7i_reg_offs); +} +IRQCHIP_DECLARE(sun7i_sc_nmi, "allwinner,sun7i-a20-sc-nmi", sun7i_sc_nmi_irq_init); -- cgit v0.10.2 From 8ff973a26763ef2f2d45c1649c618dfff528a502 Mon Sep 17 00:00:00 2001 From: Carlo Caione Date: Wed, 19 Mar 2014 20:21:18 +0100 Subject: ARM: sun7i/sun6i: dts: Add NMI irqchip support This patch adds DTS entries for NMI controller as child of GIC. Signed-off-by: Carlo Caione Cc: linux-arm-kernel@lists.infradead.org Cc: linux-sunxi@googlegroups.com Cc: mark.rutland@arm.com Cc: hdegoede@redhat.com Acked-by: maxime.ripard@free-electrons.com Link: http://lkml.kernel.org/r/1395256879-8475-3-git-send-email-carlo@caione.org Signed-off-by: Thomas Gleixner diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi index 5256ad9..eea6033 100644 --- a/arch/arm/boot/dts/sun6i-a31.dtsi +++ b/arch/arm/boot/dts/sun6i-a31.dtsi @@ -190,6 +190,14 @@ #size-cells = <1>; ranges; + nmi_intc: interrupt-controller@01f00c0c { + compatible = "allwinner,sun6i-a31-sc-nmi"; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x01f00c0c 0x38>; + interrupts = <0 0 4>; + }; + pio: pinctrl@01c20800 { compatible = "allwinner,sun6i-a31-pinctrl"; reg = <0x01c20800 0x400>; diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 6f25cf5..7637f12 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -339,6 +339,14 @@ #size-cells = <1>; ranges; + nmi_intc: interrupt-controller@01c00030 { + compatible = "allwinner,sun7i-a20-sc-nmi"; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x01c00030 0x0c>; + interrupts = <0 0 4>; + }; + emac: ethernet@01c0b000 { compatible = "allwinner,sun4i-a10-emac"; reg = <0x01c0b000 0x1000>; -- cgit v0.10.2 From ae7d9d3245b7ced5c6f615cfef1250226d518436 Mon Sep 17 00:00:00 2001 From: Carlo Caione Date: Wed, 19 Mar 2014 20:21:19 +0100 Subject: ARM: sun7i/sun6i: irqchip: Update the documentation Add documentation for NMI irqchip. Signed-off-by: Carlo Caione Cc: linux-arm-kernel@lists.infradead.org Cc: linux-sunxi@googlegroups.com Cc: mark.rutland@arm.com Cc: hdegoede@redhat.com Acked-by: maxime.ripard@free-electrons.com Link: http://lkml.kernel.org/r/1395256879-8475-4-git-send-email-carlo@caione.org Signed-off-by: Thomas Gleixner diff --git a/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun67i-sc-nmi.txt b/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun67i-sc-nmi.txt new file mode 100644 index 0000000..d1c5cda --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun67i-sc-nmi.txt @@ -0,0 +1,27 @@ +Allwinner Sunxi NMI Controller +============================== + +Required properties: + +- compatible : should be "allwinner,sun7i-a20-sc-nmi" or + "allwinner,sun6i-a31-sc-nmi" +- reg : Specifies base physical address and size of the registers. +- interrupt-controller : Identifies the node as an interrupt controller +- #interrupt-cells : Specifies the number of cells needed to encode an + interrupt source. The value shall be 2. The first cell is the IRQ number, the + second cell the trigger type as defined in interrupt.txt in this directory. +- interrupt-parent: Specifies the parent interrupt controller. +- interrupts: Specifies the interrupt line (NMI) which is handled by + the interrupt controller in the parent controller's notation. This value + shall be the NMI. + +Example: + +sc-nmi-intc@01c00030 { + compatible = "allwinner,sun7i-a20-sc-nmi"; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x01c00030 0x0c>; + interrupt-parent = <&gic>; + interrupts = <0 0 4>; +}; -- cgit v0.10.2 From d6f2589ad561aa5fa39f347eca6942668b7560a1 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 25 Mar 2014 21:37:09 +0100 Subject: fs: Avoid userspace mounting anon_inodefs filesystem anon_inodefs filesystem is a kernel internal filesystem userspace shouldn't mess with. Remove registration of it so userspace cannot even try to mount it (which would fail anyway because the filesystem is MS_NOUSER). This fixes an oops triggered by trinity when it tried mounting anon_inodefs which overwrote anon_inode_inode pointer while other CPU has been in anon_inode_getfile() between ihold() and d_instantiate(). Thus effectively creating dentry pointing to an inode without holding a reference to it. Reported-by: Sasha Levin Signed-off-by: Jan Kara Signed-off-by: Linus Torvalds diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 2408473..4b4543b 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -177,9 +177,6 @@ static int __init anon_inode_init(void) { int error; - error = register_filesystem(&anon_inode_fs_type); - if (error) - goto err_exit; anon_inode_mnt = kern_mount(&anon_inode_fs_type); if (IS_ERR(anon_inode_mnt)) { error = PTR_ERR(anon_inode_mnt); -- cgit v0.10.2 From fce7fc79c8f7188dfc5eafa1b937bcc3c5a4c2f5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 25 Mar 2014 17:43:34 -0700 Subject: fs: remove now stale label in anon_inode_init() The previous commit removed the register_filesystem() call and the associated error handling, but left the label for the error path that no longer exists. Remove that too. Signed-off-by: Linus Torvalds diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 4b4543b..42fcc46 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -186,7 +186,6 @@ static int __init anon_inode_init(void) err_unregister_filesystem: unregister_filesystem(&anon_inode_fs_type); -err_exit: panic(KERN_ERR "anon_inode_init() failed (%d)\n", error); } -- cgit v0.10.2 From 2c4a33aba5f9ea3a28f2e40351f078d95f00786b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 25 Mar 2014 23:39:41 -0400 Subject: tracing: Fix traceon trigger condition to actually turn tracing on While working on my tutorial for 2014 Linux Collaboration Summit I found that the traceon trigger did not work when conditions were used. The other triggers worked fine though. Looking into it, it is because of the way the triggers use the ring buffer to store the fields it will use for the condition. But if tracing is off, nothing is stored in the buffer, and the tracepoint exits before calling the trigger to test the condition. This is fine for all the triggers that only work when tracing is on, but for traceon trigger that is to work when tracing is off, nothing happens. The fix is simple, just use a temp ring buffer to record the event if tracing is off and the event has a trace event conditional trigger enabled. The rest of the tracepoint code will work just fine, but the tracepoint wont be recorded in the other buffers. Cc: Tom Zanussi Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 815c878..24c1f23 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1600,15 +1600,31 @@ void trace_buffer_unlock_commit(struct ring_buffer *buffer, } EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit); +static struct ring_buffer *temp_buffer; + struct ring_buffer_event * trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, struct ftrace_event_file *ftrace_file, int type, unsigned long len, unsigned long flags, int pc) { + struct ring_buffer_event *entry; + *current_rb = ftrace_file->tr->trace_buffer.buffer; - return trace_buffer_lock_reserve(*current_rb, + entry = trace_buffer_lock_reserve(*current_rb, type, len, flags, pc); + /* + * If tracing is off, but we have triggers enabled + * we still need to look at the event data. Use the temp_buffer + * to store the trace event for the tigger to use. It's recusive + * safe and will not be recorded anywhere. + */ + if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) { + *current_rb = temp_buffer; + entry = trace_buffer_lock_reserve(*current_rb, + type, len, flags, pc); + } + return entry; } EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); @@ -6494,11 +6510,16 @@ __init static int tracer_alloc_buffers(void) raw_spin_lock_init(&global_trace.start_lock); + /* Used for event triggers */ + temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE); + if (!temp_buffer) + goto out_free_cpumask; + /* TODO: make the number of buffers hot pluggable with CPUS */ if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) { printk(KERN_ERR "tracer: failed to allocate ring buffer!\n"); WARN_ON(1); - goto out_free_cpumask; + goto out_free_temp_buffer; } if (global_trace.buffer_disabled) @@ -6540,6 +6561,8 @@ __init static int tracer_alloc_buffers(void) return 0; +out_free_temp_buffer: + ring_buffer_free(temp_buffer); out_free_cpumask: free_percpu(global_trace.trace_buffer.data); #ifdef CONFIG_TRACER_MAX_TRACE -- cgit v0.10.2 From 233faec97a1dfef1f4bc271f9e5d33f2ba4845ca Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 21 Mar 2014 09:25:24 +0100 Subject: s390/con3270: optionally disable auto update This patch adds a parameter 'auto_update' to the con3270 driver, causing the 'auto_update' feature to be disabled if unset. The 'auto_update' feature will cause the con3270 driver to switch to the console view whenever new system messages are displayed, which makes working on the 3270 terminal awkward. Signed-off-by: Martin Schwidefsky diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c index bb6b0df..75ffe99 100644 --- a/drivers/s390/char/con3270.c +++ b/drivers/s390/char/con3270.c @@ -7,6 +7,7 @@ * Copyright IBM Corp. 2003, 2009 */ +#include #include #include #include @@ -30,6 +31,9 @@ static struct raw3270_fn con3270_fn; +static bool auto_update = 1; +module_param(auto_update, bool, 0); + /* * Main 3270 console view data structure. */ @@ -204,6 +208,8 @@ con3270_update(struct con3270 *cp) struct string *s, *n; int rc; + if (!auto_update && !raw3270_view_active(&cp->view)) + return; if (cp->view.dev) raw3270_activate_view(&cp->view); @@ -529,6 +535,7 @@ con3270_flush(void) if (!cp->view.dev) return; raw3270_pm_unfreeze(&cp->view); + raw3270_activate_view(&cp->view); spin_lock_irqsave(&cp->view.lock, flags); con3270_wait_write(cp); cp->nr_up = 0; diff --git a/drivers/s390/char/raw3270.c b/drivers/s390/char/raw3270.c index 041c65b..9f849df 100644 --- a/drivers/s390/char/raw3270.c +++ b/drivers/s390/char/raw3270.c @@ -276,6 +276,15 @@ __raw3270_start(struct raw3270 *rp, struct raw3270_view *view, } int +raw3270_view_active(struct raw3270_view *view) +{ + struct raw3270 *rp = view->dev; + + return rp && rp->view == view && + !test_bit(RAW3270_FLAGS_FROZEN, &rp->flags); +} + +int raw3270_start(struct raw3270_view *view, struct raw3270_request *rq) { unsigned long flags; diff --git a/drivers/s390/char/raw3270.h b/drivers/s390/char/raw3270.h index 359276a..e1e41c2 100644 --- a/drivers/s390/char/raw3270.h +++ b/drivers/s390/char/raw3270.h @@ -173,6 +173,7 @@ int raw3270_start_locked(struct raw3270_view *, struct raw3270_request *); int raw3270_start_irq(struct raw3270_view *, struct raw3270_request *); int raw3270_reset(struct raw3270_view *); struct raw3270_view *raw3270_view(struct raw3270_view *); +int raw3270_view_active(struct raw3270_view *); /* Reference count inliner for view structures. */ static inline void -- cgit v0.10.2 From 7596d93d8dfa7a508ca52c7d9b7db763eb5b0715 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 26 Mar 2014 15:19:23 +0100 Subject: ata: fix R-Car SATA driver dependencies Make sata_rcar host driver depend on ARCH_SHMOBILE config option as Renesas R-Car SATA support is specific to Renesas SoCs and the driver to work requires suitable device tree node (or platform device) to be defined. Additionally allow the driver build if COMPILE_TEST config option is set. Cc: Simon Horman Cc: Magnus Damm Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Tejun Heo diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 6e3be7d..f4d9a6a 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -305,6 +305,7 @@ config SATA_PROMISE config SATA_RCAR tristate "Renesas R-Car SATA support" + depends on ARCH_SHMOBILE || COMPILE_TEST help This option enables support for Renesas R-Car Serial ATA. -- cgit v0.10.2 From 0b99f8648eb1814d787db3981ad93e6bc343b3f6 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 26 Mar 2014 16:39:50 +0100 Subject: ata: fix Calxeda Highbank SATA driver dependencies Make sata_highbank host driver depend on ARCH_HIGHBANK config option as Calxeda Highbank SATA support is specific to Calxeda Highbank SoCs and the driver to work requires suitable device tree node to be defined. Additionally allow the driver build if COMPILE_TEST config option is set. Cc: Mark Langsdorf Cc: Rob Herring Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Tejun Heo diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index f4d9a6a..23f15f4 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -272,6 +272,7 @@ config SATA_DWC_VDEBUG config SATA_HIGHBANK tristate "Calxeda Highbank SATA support" + depends on ARCH_HIGHBANK || COMPILE_TEST help This option enables support for the Calxeda Highbank SoC's onboard SATA. -- cgit v0.10.2 From a498e31643c1e9981fde87556bc4f6133e2fd989 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 26 Mar 2014 16:41:00 +0100 Subject: ata: sata_highbank: remove superfluous cast hpriv->plat_data is 'void *' so there is no need to cast it to 'struct ecx_plat_data *'. Cc: Mark Langsdorf Cc: Rob Herring Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Tejun Heo diff --git a/drivers/ata/sata_highbank.c b/drivers/ata/sata_highbank.c index d4df0bf..65965cf 100644 --- a/drivers/ata/sata_highbank.c +++ b/drivers/ata/sata_highbank.c @@ -141,7 +141,7 @@ static ssize_t ecx_transmit_led_message(struct ata_port *ap, u32 state, ssize_t size) { struct ahci_host_priv *hpriv = ap->host->private_data; - struct ecx_plat_data *pdata = (struct ecx_plat_data *) hpriv->plat_data; + struct ecx_plat_data *pdata = hpriv->plat_data; struct ahci_port_priv *pp = ap->private_data; unsigned long flags; int pmp, i; -- cgit v0.10.2 From d5185d655c1fc4dfd467303f45ba4496ad84ddf9 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 26 Mar 2014 09:34:49 -0700 Subject: ata: remove superfluous casts Unreferenced casts of void * types are unnecessary so remove them. Signed-off-by: Joe Perches Signed-off-by: Tejun Heo diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 7985ae7..6bd4f66 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -1636,7 +1636,7 @@ static void ahci_error_intr(struct ata_port *ap, u32 irq_stat) } if (irq_stat & PORT_IRQ_UNK_FIS) { - u32 *unk = (u32 *)(pp->rx_fis + RX_FIS_UNK); + u32 *unk = pp->rx_fis + RX_FIS_UNK; active_ehi->err_mask |= AC_ERR_HSM; active_ehi->action |= ATA_EH_RESET; diff --git a/drivers/ata/pata_arasan_cf.c b/drivers/ata/pata_arasan_cf.c index 73492dd..6fac524 100644 --- a/drivers/ata/pata_arasan_cf.c +++ b/drivers/ata/pata_arasan_cf.c @@ -356,7 +356,7 @@ static void cf_exit(struct arasan_cf_dev *acdev) static void dma_callback(void *dev) { - struct arasan_cf_dev *acdev = (struct arasan_cf_dev *) dev; + struct arasan_cf_dev *acdev = dev; complete(&acdev->dma_completion); } diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index 73510d0d..0bb2cab 100644 --- a/drivers/ata/sata_dwc_460ex.c +++ b/drivers/ata/sata_dwc_460ex.c @@ -461,8 +461,7 @@ static irqreturn_t dma_dwc_interrupt(int irq, void *hsdev_instance) int chan; u32 tfr_reg, err_reg; unsigned long flags; - struct sata_dwc_device *hsdev = - (struct sata_dwc_device *)hsdev_instance; + struct sata_dwc_device *hsdev = hsdev_instance; struct ata_host *host = (struct ata_host *)hsdev->host; struct ata_port *ap; struct sata_dwc_device_port *hsdevp; diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c index 6cd0312..39b5de6 100644 --- a/drivers/ata/sata_sx4.c +++ b/drivers/ata/sata_sx4.c @@ -1020,8 +1020,7 @@ static void pdc20621_get_from_dimm(struct ata_host *host, void *psource, idx++; dist = ((long) (window_size - (offset + size))) >= 0 ? size : (long) (window_size - offset); - memcpy_fromio((char *) psource, (char *) (dimm_mmio + offset / 4), - dist); + memcpy_fromio(psource, dimm_mmio + offset / 4, dist); psource += dist; size -= dist; @@ -1030,8 +1029,7 @@ static void pdc20621_get_from_dimm(struct ata_host *host, void *psource, readl(mmio + PDC_GENERAL_CTLR); writel(((idx) << page_mask), mmio + PDC_DIMM_WINDOW_CTLR); readl(mmio + PDC_DIMM_WINDOW_CTLR); - memcpy_fromio((char *) psource, (char *) (dimm_mmio), - window_size / 4); + memcpy_fromio(psource, dimm_mmio, window_size / 4); psource += window_size; size -= window_size; idx++; @@ -1042,8 +1040,7 @@ static void pdc20621_get_from_dimm(struct ata_host *host, void *psource, readl(mmio + PDC_GENERAL_CTLR); writel(((idx) << page_mask), mmio + PDC_DIMM_WINDOW_CTLR); readl(mmio + PDC_DIMM_WINDOW_CTLR); - memcpy_fromio((char *) psource, (char *) (dimm_mmio), - size / 4); + memcpy_fromio(psource, dimm_mmio, size / 4); } } #endif -- cgit v0.10.2 From e638433b5021f59aacbfe15597beca1e706773fb Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 26 Mar 2014 15:33:37 +0100 Subject: ata: fix ARASAN CompactFlash PATA driver dependencies Make pata_arasan_cf host driver depend on ARCH_SPEAR13XX config option as ARASAN CompactFlash PATA support is specific to ST SPEAr13xx SoCs and the driver to work requires suitable device tree node (or platform device) to be defined. Additionally allow the driver build if COMPILE_TEST config option is set. Cc: Viresh Kumar Cc: Shiraz Hashim Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Tejun Heo diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 23f15f4..af0f71a 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -386,6 +386,7 @@ config PATA_AMD config PATA_ARASAN_CF tristate "ARASAN CompactFlash PATA Controller Support" + depends on ARCH_SPEAR13XX || COMPILE_TEST depends on DMADEVICES select DMA_ENGINE help -- cgit v0.10.2 From 204b0a1a4b92612c957a042df1a3be0e9cc79391 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sat, 22 Mar 2014 10:09:01 +0000 Subject: x86, efi: Abstract x86 efi_early calls The ARM EFI boot stub doesn't need to care about the efi_early infrastructure that x86 requires in order to do mixed mode thunking. So wrap everything up in an efi_call_early() macro. This allows x86 to do the necessary indirection jumps to call whatever firmware interface is necessary (native or mixed mode), but also allows the ARM folks to mask the fact that they don't support relocation in the boot stub and need to pass 'sys_table_arg' to every function. [ hpa: there are no object code changes from this patch ] Signed-off-by: Matt Fleming Link: http://lkml.kernel.org/r/20140326091011.GB2958@console-pimps.org Cc: Roy Franz Signed-off-by: H. Peter Anvin diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 5e1ba4f..1e61461 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -21,6 +21,9 @@ static efi_system_table_t *sys_table; static struct efi_config *efi_early; +#define efi_call_early(f, ...) \ + efi_early->call(efi_early->f, __VA_ARGS__); + #define BOOT_SERVICES(bits) \ static void setup_boot_services##bits(struct efi_config *c) \ { \ @@ -78,8 +81,8 @@ __file_size32(void *__fh, efi_char16_t *filename_16, } grow: - status = efi_early->call(efi_early->allocate_pool, EFI_LOADER_DATA, - info_sz, (void **)&info); + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, + info_sz, (void **)&info); if (status != EFI_SUCCESS) { efi_printk(sys_table, "Failed to alloc mem for file info\n"); return status; @@ -88,12 +91,12 @@ grow: status = efi_early->call((unsigned long)h->get_info, h, &info_guid, &info_sz, info); if (status == EFI_BUFFER_TOO_SMALL) { - efi_early->call(efi_early->free_pool, info); + efi_call_early(free_pool, info); goto grow; } *file_sz = info->file_size; - efi_early->call(efi_early->free_pool, info); + efi_call_early(free_pool, info); if (status != EFI_SUCCESS) efi_printk(sys_table, "Failed to get initrd info\n"); @@ -131,8 +134,8 @@ __file_size64(void *__fh, efi_char16_t *filename_16, } grow: - status = efi_early->call(efi_early->allocate_pool, EFI_LOADER_DATA, - info_sz, (void **)&info); + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, + info_sz, (void **)&info); if (status != EFI_SUCCESS) { efi_printk(sys_table, "Failed to alloc mem for file info\n"); return status; @@ -141,12 +144,12 @@ grow: status = efi_early->call((unsigned long)h->get_info, h, &info_guid, &info_sz, info); if (status == EFI_BUFFER_TOO_SMALL) { - efi_early->call(efi_early->free_pool, info); + efi_call_early(free_pool, info); goto grow; } *file_sz = info->file_size; - efi_early->call(efi_early->free_pool, info); + efi_call_early(free_pool, info); if (status != EFI_SUCCESS) efi_printk(sys_table, "Failed to get initrd info\n"); @@ -204,8 +207,8 @@ static inline efi_status_t __open_volume32(void *__image, void **__fh) void *handle = (void *)(unsigned long)image->device_handle; unsigned long func; - status = efi_early->call(efi_early->handle_protocol, handle, - &fs_proto, (void **)&io); + status = efi_call_early(handle_protocol, handle, + &fs_proto, (void **)&io); if (status != EFI_SUCCESS) { efi_printk(sys_table, "Failed to handle fs_proto\n"); return status; @@ -230,8 +233,8 @@ static inline efi_status_t __open_volume64(void *__image, void **__fh) void *handle = (void *)(unsigned long)image->device_handle; unsigned long func; - status = efi_early->call(efi_early->handle_protocol, handle, - &fs_proto, (void **)&io); + status = efi_call_early(handle_protocol, handle, + &fs_proto, (void **)&io); if (status != EFI_SUCCESS) { efi_printk(sys_table, "Failed to handle fs_proto\n"); return status; @@ -325,9 +328,7 @@ __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom) size = pci->romsize + sizeof(*rom); - status = efi_early->call(efi_early->allocate_pool, - EFI_LOADER_DATA, size, &rom); - + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); if (status != EFI_SUCCESS) return status; @@ -361,7 +362,7 @@ __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom) return status; free_struct: - efi_early->call(efi_early->free_pool, rom); + efi_call_early(free_pool, rom); return status; } @@ -387,8 +388,8 @@ setup_efi_pci32(struct boot_params *params, void **pci_handle, struct pci_setup_rom *rom = NULL; u32 h = handles[i]; - status = efi_early->call(efi_early->handle_protocol, h, - &pci_proto, (void **)&pci); + status = efi_call_early(handle_protocol, h, + &pci_proto, (void **)&pci); if (status != EFI_SUCCESS) continue; @@ -431,9 +432,7 @@ __setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom) size = pci->romsize + sizeof(*rom); - status = efi_early->call(efi_early->allocate_pool, - EFI_LOADER_DATA, size, &rom); - + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); if (status != EFI_SUCCESS) return status; @@ -465,7 +464,7 @@ __setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom) return status; free_struct: - efi_early->call(efi_early->free_pool, rom); + efi_call_early(free_pool, rom); return status; } @@ -492,8 +491,8 @@ setup_efi_pci64(struct boot_params *params, void **pci_handle, struct pci_setup_rom *rom = NULL; u64 h = handles[i]; - status = efi_early->call(efi_early->handle_protocol, h, - &pci_proto, (void **)&pci); + status = efi_call_early(handle_protocol, h, + &pci_proto, (void **)&pci); if (status != EFI_SUCCESS) continue; @@ -524,21 +523,21 @@ static efi_status_t setup_efi_pci(struct boot_params *params) efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID; unsigned long size = 0; - status = efi_early->call(efi_early->locate_handle, - EFI_LOCATE_BY_PROTOCOL, - &pci_proto, NULL, &size, pci_handle); + status = efi_call_early(locate_handle, + EFI_LOCATE_BY_PROTOCOL, + &pci_proto, NULL, &size, pci_handle); if (status == EFI_BUFFER_TOO_SMALL) { - status = efi_early->call(efi_early->allocate_pool, - EFI_LOADER_DATA, - size, (void **)&pci_handle); + status = efi_call_early(allocate_pool, + EFI_LOADER_DATA, + size, (void **)&pci_handle); if (status != EFI_SUCCESS) return status; - status = efi_early->call(efi_early->locate_handle, - EFI_LOCATE_BY_PROTOCOL, &pci_proto, - NULL, &size, pci_handle); + status = efi_call_early(locate_handle, + EFI_LOCATE_BY_PROTOCOL, &pci_proto, + NULL, &size, pci_handle); } if (status != EFI_SUCCESS) @@ -550,7 +549,7 @@ static efi_status_t setup_efi_pci(struct boot_params *params) status = setup_efi_pci32(params, pci_handle, size); free_handle: - efi_early->call(efi_early->free_pool, pci_handle); + efi_call_early(free_pool, pci_handle); return status; } @@ -651,13 +650,13 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto, void *dummy = NULL; u32 h = handles[i]; - status = efi_early->call(efi_early->handle_protocol, h, - proto, (void **)&gop32); + status = efi_call_early(handle_protocol, h, + proto, (void **)&gop32); if (status != EFI_SUCCESS) continue; - status = efi_early->call(efi_early->handle_protocol, h, - &conout_proto, &dummy); + status = efi_call_early(handle_protocol, h, + &conout_proto, &dummy); if (status == EFI_SUCCESS) conout_found = true; @@ -754,13 +753,13 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto, void *dummy = NULL; u64 h = handles[i]; - status = efi_early->call(efi_early->handle_protocol, h, - proto, (void **)&gop64); + status = efi_call_early(handle_protocol, h, + proto, (void **)&gop64); if (status != EFI_SUCCESS) continue; - status = efi_early->call(efi_early->handle_protocol, h, - &conout_proto, &dummy); + status = efi_call_early(handle_protocol, h, + &conout_proto, &dummy); if (status == EFI_SUCCESS) conout_found = true; @@ -819,14 +818,14 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, efi_status_t status; void **gop_handle = NULL; - status = efi_early->call(efi_early->allocate_pool, EFI_LOADER_DATA, - size, (void **)&gop_handle); + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, + size, (void **)&gop_handle); if (status != EFI_SUCCESS) return status; - status = efi_early->call(efi_early->locate_handle, - EFI_LOCATE_BY_PROTOCOL, - proto, NULL, &size, gop_handle); + status = efi_call_early(locate_handle, + EFI_LOCATE_BY_PROTOCOL, + proto, NULL, &size, gop_handle); if (status != EFI_SUCCESS) goto free_handle; @@ -836,7 +835,7 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, status = setup_gop32(si, proto, size, gop_handle); free_handle: - efi_early->call(efi_early->free_pool, gop_handle); + efi_call_early(free_pool, gop_handle); return status; } @@ -858,13 +857,12 @@ setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height) void *pciio; u32 handle = handles[i]; - status = efi_early->call(efi_early->handle_protocol, handle, - &uga_proto, (void **)&uga); + status = efi_call_early(handle_protocol, handle, + &uga_proto, (void **)&uga); if (status != EFI_SUCCESS) continue; - efi_early->call(efi_early->handle_protocol, handle, - &pciio_proto, &pciio); + efi_call_early(handle_protocol, handle, &pciio_proto, &pciio); status = efi_early->call((unsigned long)uga->get_mode, uga, &w, &h, &depth, &refresh); @@ -904,13 +902,12 @@ setup_uga64(void **uga_handle, unsigned long size, u32 *width, u32 *height) void *pciio; u64 handle = handles[i]; - status = efi_early->call(efi_early->handle_protocol, handle, - &uga_proto, (void **)&uga); + status = efi_call_early(handle_protocol, handle, + &uga_proto, (void **)&uga); if (status != EFI_SUCCESS) continue; - efi_early->call(efi_early->handle_protocol, handle, - &pciio_proto, &pciio); + efi_call_early(handle_protocol, handle, &pciio_proto, &pciio); status = efi_early->call((unsigned long)uga->get_mode, uga, &w, &h, &depth, &refresh); @@ -942,14 +939,14 @@ static efi_status_t setup_uga(struct screen_info *si, efi_guid_t *uga_proto, u32 width, height; void **uga_handle = NULL; - status = efi_early->call(efi_early->allocate_pool, EFI_LOADER_DATA, - size, (void **)&uga_handle); + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, + size, (void **)&uga_handle); if (status != EFI_SUCCESS) return status; - status = efi_early->call(efi_early->locate_handle, - EFI_LOCATE_BY_PROTOCOL, - uga_proto, NULL, &size, uga_handle); + status = efi_call_early(locate_handle, + EFI_LOCATE_BY_PROTOCOL, + uga_proto, NULL, &size, uga_handle); if (status != EFI_SUCCESS) goto free_handle; @@ -981,7 +978,7 @@ static efi_status_t setup_uga(struct screen_info *si, efi_guid_t *uga_proto, si->rsvd_pos = 24; free_handle: - efi_early->call(efi_early->free_pool, uga_handle); + efi_call_early(free_pool, uga_handle); return status; } @@ -999,17 +996,17 @@ void setup_graphics(struct boot_params *boot_params) memset(si, 0, sizeof(*si)); size = 0; - status = efi_early->call(efi_early->locate_handle, - EFI_LOCATE_BY_PROTOCOL, - &graphics_proto, NULL, &size, gop_handle); + status = efi_call_early(locate_handle, + EFI_LOCATE_BY_PROTOCOL, + &graphics_proto, NULL, &size, gop_handle); if (status == EFI_BUFFER_TOO_SMALL) status = setup_gop(si, &graphics_proto, size); if (status != EFI_SUCCESS) { size = 0; - status = efi_early->call(efi_early->locate_handle, - EFI_LOCATE_BY_PROTOCOL, - &uga_proto, NULL, &size, uga_handle); + status = efi_call_early(locate_handle, + EFI_LOCATE_BY_PROTOCOL, + &uga_proto, NULL, &size, uga_handle); if (status == EFI_BUFFER_TOO_SMALL) setup_uga(si, &uga_proto, size); } @@ -1052,8 +1049,8 @@ struct boot_params *make_boot_params(struct efi_config *c) else setup_boot_services32(efi_early); - status = efi_early->call(efi_early->handle_protocol, handle, - &proto, (void *)&image); + status = efi_call_early(handle_protocol, handle, + &proto, (void *)&image); if (status != EFI_SUCCESS) { efi_printk(sys_table, "Failed to get handle for LOADED_IMAGE_PROTOCOL\n"); return NULL; @@ -1242,13 +1239,13 @@ static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext, sizeof(struct e820entry) * nr_desc; if (*e820ext) { - efi_early->call(efi_early->free_pool, *e820ext); + efi_call_early(free_pool, *e820ext); *e820ext = NULL; *e820ext_size = 0; } - status = efi_early->call(efi_early->allocate_pool, EFI_LOADER_DATA, - size, (void **)e820ext); + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, + size, (void **)e820ext); if (status == EFI_SUCCESS) *e820ext_size = size; @@ -1292,7 +1289,7 @@ get_map: if (status != EFI_SUCCESS) goto free_mem_map; - efi_early->call(efi_early->free_pool, mem_map); + efi_call_early(free_pool, mem_map); goto get_map; /* Allocated memory, get map again */ } @@ -1311,7 +1308,7 @@ get_map: #endif /* Might as well exit boot services now */ - status = efi_early->call(efi_early->exit_boot_services, handle, key); + status = efi_call_early(exit_boot_services, handle, key); if (status != EFI_SUCCESS) { /* * ExitBootServices() will fail if any of the event @@ -1324,7 +1321,7 @@ get_map: goto free_mem_map; called_exit = true; - efi_early->call(efi_early->free_pool, mem_map); + efi_call_early(free_pool, mem_map); goto get_map; } @@ -1338,7 +1335,7 @@ get_map: return EFI_SUCCESS; free_mem_map: - efi_early->call(efi_early->free_pool, mem_map); + efi_call_early(free_pool, mem_map); return status; } @@ -1379,8 +1376,8 @@ struct boot_params *efi_main(struct efi_config *c, setup_efi_pci(boot_params); - status = efi_early->call(efi_early->allocate_pool, EFI_LOADER_DATA, - sizeof(*gdt), (void **)&gdt); + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, + sizeof(*gdt), (void **)&gdt); if (status != EFI_SUCCESS) { efi_printk(sys_table, "Failed to alloc mem for gdt structure\n"); goto fail; diff --git a/drivers/firmware/efi/efi-stub-helper.c b/drivers/firmware/efi/efi-stub-helper.c index a028287..ff50aee 100644 --- a/drivers/firmware/efi/efi-stub-helper.c +++ b/drivers/firmware/efi/efi-stub-helper.c @@ -53,22 +53,22 @@ again: * allocation which may be in a new descriptor region. */ *map_size += sizeof(*m); - status = efi_early->call(efi_early->allocate_pool, EFI_LOADER_DATA, - *map_size, (void **)&m); + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, + *map_size, (void **)&m); if (status != EFI_SUCCESS) goto fail; *desc_size = 0; key = 0; - status = efi_early->call(efi_early->get_memory_map, map_size, m, - &key, desc_size, &desc_version); + status = efi_call_early(get_memory_map, map_size, m, + &key, desc_size, &desc_version); if (status == EFI_BUFFER_TOO_SMALL) { - efi_early->call(efi_early->free_pool, m); + efi_call_early(free_pool, m); goto again; } if (status != EFI_SUCCESS) - efi_early->call(efi_early->free_pool, m); + efi_call_early(free_pool, m); if (key_ptr && status == EFI_SUCCESS) *key_ptr = key; @@ -149,9 +149,9 @@ again: if (!max_addr) status = EFI_NOT_FOUND; else { - status = efi_early->call(efi_early->allocate_pages, - EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, - nr_pages, &max_addr); + status = efi_call_early(allocate_pages, + EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, + nr_pages, &max_addr); if (status != EFI_SUCCESS) { max = max_addr; max_addr = 0; @@ -161,7 +161,7 @@ again: *addr = max_addr; } - efi_early->call(efi_early->free_pool, map); + efi_call_early(free_pool, map); fail: return status; } @@ -221,9 +221,9 @@ static efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, if ((start + size) > end) continue; - status = efi_early->call(efi_early->allocate_pages, - EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, - nr_pages, &start); + status = efi_call_early(allocate_pages, + EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, + nr_pages, &start); if (status == EFI_SUCCESS) { *addr = start; break; @@ -233,7 +233,7 @@ static efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, if (i == map_size / desc_size) status = EFI_NOT_FOUND; - efi_early->call(efi_early->free_pool, map); + efi_call_early(free_pool, map); fail: return status; } @@ -247,7 +247,7 @@ static void efi_free(efi_system_table_t *sys_table_arg, unsigned long size, return; nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; - efi_early->call(efi_early->free_pages, addr, nr_pages); + efi_call_early(free_pages, addr, nr_pages); } @@ -307,8 +307,8 @@ static efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, if (!nr_files) return EFI_SUCCESS; - status = efi_early->call(efi_early->allocate_pool, EFI_LOADER_DATA, - nr_files * sizeof(*files), (void **)&files); + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, + nr_files * sizeof(*files), (void **)&files); if (status != EFI_SUCCESS) { efi_printk(sys_table_arg, "Failed to alloc mem for file handle list\n"); goto fail; @@ -413,7 +413,7 @@ static efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, } - efi_early->call(efi_early->free_pool, files); + efi_call_early(free_pool, files); *load_addr = file_addr; *load_size = file_size_total; @@ -427,7 +427,7 @@ close_handles: for (k = j; k < i; k++) efi_file_close(fh, files[k].handle); free_files: - efi_early->call(efi_early->free_pool, files); + efi_call_early(free_pool, files); fail: *load_addr = 0; *load_size = 0; @@ -473,9 +473,9 @@ static efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg, * as possible while respecting the required alignment. */ nr_pages = round_up(alloc_size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; - status = efi_early->call(efi_early->allocate_pages, - EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, - nr_pages, &efi_addr); + status = efi_call_early(allocate_pages, + EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, + nr_pages, &efi_addr); new_addr = efi_addr; /* * If preferred address allocation failed allocate as low as -- cgit v0.10.2 From fbd02dd405d0724a0f25897ed4a6813297c9b96f Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Sun, 23 Mar 2014 22:06:36 -0700 Subject: ip_tunnel: Fix dst ref-count. Commit 10ddceb22ba (ip_tunnel:multicast process cause panic due to skb->_skb_refdst NULL pointer) removed dst-drop call from ip-tunnel-recv. Following commit reintroduce dst-drop and fix the original bug by checking loopback packet before releasing dst. Original bug: https://bugzilla.kernel.org/show_bug.cgi?id=70681 CC: Xin Long Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 1863422f..250be74 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -182,6 +182,14 @@ static int gre_cisco_rcv(struct sk_buff *skb) int i; bool csum_err = false; +#ifdef CONFIG_NET_IPGRE_BROADCAST + if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { + /* Looped back packet, drop it! */ + if (rt_is_output_route(skb_rtable(skb))) + goto drop; + } +#endif + if (parse_gre_header(skb, &tpi, &csum_err) < 0) goto drop; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 78a89e6..a82a22d 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -416,9 +416,6 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { - /* Looped back packet, drop it! */ - if (rt_is_output_route(skb_rtable(skb))) - goto drop; tunnel->dev->stats.multicast++; skb->pkt_type = PACKET_BROADCAST; } diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 6f847dd..8d69626 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -108,6 +108,7 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) nf_reset(skb); secpath_reset(skb); skb_clear_hash_if_not_l4(skb); + skb_dst_drop(skb); skb->vlan_tci = 0; skb_set_queue_mapping(skb, 0); skb->pkt_type = PACKET_HOST; -- cgit v0.10.2 From 51dfe7b944998eaeb2b34d314f3a6b16a5fd621b Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 24 Mar 2014 17:52:12 -0400 Subject: tg3: Do not include vlan acceleration features in vlan_features Including hardware acceleration features in vlan_features breaks stacked vlans (Q-in-Q) by marking the bottom vlan interface as capable of acceleration. This causes one of the tags to be lost and the packets are sent with a sing vlan header. CC: Nithin Nayak Sujir CC: Michael Chan Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 3b6d0ba..70a225c8 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -17649,8 +17649,6 @@ static int tg3_init_one(struct pci_dev *pdev, tg3_init_bufmgr_config(tp); - features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; - /* 5700 B0 chips do not support checksumming correctly due * to hardware bugs. */ @@ -17682,7 +17680,8 @@ static int tg3_init_one(struct pci_dev *pdev, features |= NETIF_F_TSO_ECN; } - dev->features |= features; + dev->features |= features | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX; dev->vlan_features |= features; /* -- cgit v0.10.2 From 6797b39e6f6f34c74177736e146406e894b9482b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 26 Mar 2014 13:30:52 -0700 Subject: Input: cypress_ps2 - don't report as a button pads The cypress PS/2 trackpad models supported by the cypress_ps2 driver emulate BTN_RIGHT events in firmware based on the finger position, as part of this no motion events are sent when the finger is in the button area. The INPUT_PROP_BUTTONPAD property is there to indicate to userspace that BTN_RIGHT events should be emulated in userspace, which is not necessary in this case. When INPUT_PROP_BUTTONPAD is advertised userspace will wait for a motion event before propagating the button event higher up the stack, as it needs current abs x + y data for its BTN_RIGHT emulation. Since in the cypress_ps2 pads don't report motion events in the button area, this means that clicks in the button area end up being ignored, so INPUT_PROP_BUTTONPAD actually causes problems for these touchpads, and removing it fixes: https://bugs.freedesktop.org/show_bug.cgi?id=76341 Reported-by: Adam Williamson Tested-by: Adam Williamson Reviewed-by: Peter Hutterer Signed-off-by: Hans de Goede Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/mouse/cypress_ps2.c b/drivers/input/mouse/cypress_ps2.c index 87095e2..8af34ff 100644 --- a/drivers/input/mouse/cypress_ps2.c +++ b/drivers/input/mouse/cypress_ps2.c @@ -409,7 +409,6 @@ static int cypress_set_input_params(struct input_dev *input, __clear_bit(REL_X, input->relbit); __clear_bit(REL_Y, input->relbit); - __set_bit(INPUT_PROP_BUTTONPAD, input->propbit); __set_bit(EV_KEY, input->evbit); __set_bit(BTN_LEFT, input->keybit); __set_bit(BTN_RIGHT, input->keybit); -- cgit v0.10.2 From a79121d3b57e7ad61f0b5d23eae05214054f3ccd Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 26 Mar 2014 00:25:41 +0100 Subject: net: mvneta: rename MVNETA_GMAC2_PSC_ENABLE to MVNETA_GMAC2_PCS_ENABLE Bit 3 of the MVNETA_GMAC_CTRL_2 is actually used to enable the PCS, not the PSC: there was a typo in the name of the define, which this commit fixes. Cc: stable@vger.kernel.org Signed-off-by: Thomas Petazzoni Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index f418f4f..d6b04d0 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -161,7 +161,7 @@ #define MVNETA_GMAC_MAX_RX_SIZE_MASK 0x7ffc #define MVNETA_GMAC0_PORT_ENABLE BIT(0) #define MVNETA_GMAC_CTRL_2 0x2c08 -#define MVNETA_GMAC2_PSC_ENABLE BIT(3) +#define MVNETA_GMAC2_PCS_ENABLE BIT(3) #define MVNETA_GMAC2_PORT_RGMII BIT(4) #define MVNETA_GMAC2_PORT_RESET BIT(6) #define MVNETA_GMAC_STATUS 0x2c10 @@ -733,7 +733,7 @@ static void mvneta_port_sgmii_config(struct mvneta_port *pp) u32 val; val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); - val |= MVNETA_GMAC2_PSC_ENABLE; + val |= MVNETA_GMAC2_PCS_ENABLE; mvreg_write(pp, MVNETA_GMAC_CTRL_2, val); mvreg_write(pp, MVNETA_SGMII_SERDES_CFG, MVNETA_SGMII_SERDES_PROTO); -- cgit v0.10.2 From e3a8786c10e75903f1269474e21fe8cb49c3a670 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 26 Mar 2014 00:25:42 +0100 Subject: net: mvneta: fix usage as a module on RGMII configurations Commit 5445eaf309ff ('mvneta: Try to fix mvneta when compiled as module') fixed the mvneta driver to make it work properly when loaded as a module in SGMII configuration, which was tested successful by the author on the Armada XP OpenBlocks AX3, which uses SGMII. However, it turns out that the Armada XP GP, which uses RGMII, is affected by a similar problem: its SERDES configuration is lost when mvneta is loaded as a module, because this configuration is set by the bootloader, and then lost because the clock is gated by the clock framework until the mvneta driver is loaded again and the clock is re-enabled. However, it turns out that for the RGMII case, setting the SERDES configuration is not sufficient: the PCS enable bit in the MVNETA_GMAC_CTRL_2 register must also be set, like in the SGMII configuration. Therefore, this commit reworks the SGMII/RGMII initialization: the only difference between the two now is a different SERDES configuration, all the rest is identical. In detail, to achieve this, the commit: * Renames MVNETA_SGMII_SERDES_CFG to MVNETA_SERDES_CFG because it is not specific to SGMII, but also used on RGMII configurations. * Adds a MVNETA_RGMII_SERDES_PROTO definition, that must be used as the MVNETA_SERDES_CFG value in RGMII configurations. * Removes the mvneta_gmac_rgmii_set() and mvneta_port_sgmii_config() functions, and instead directly do the SGMII/RGMII configuration in mvneta_port_up(), from where those functions where called. It is worth mentioning that mvneta_gmac_rgmii_set() had an 'enable' parameter that was always passed as '1', so it was pretty useless. * Reworks the mvneta_port_up() function to set the MVNETA_SERDES_CFG register to the appropriate value depending on the RGMII vs. SGMII configuration. It also unconditionally set the PCS_ENABLE bit (was already done for SGMII, but is now also needed for RGMII), and sets the PORT_RGMII bit (which was already done for both SGMII and RGMII). This commit was successfully tested with mvneta compiled as a module, on both the OpenBlocks AX3 (SGMII configuration) and the Armada XP GP (RGMII configuration). Reported-by: Steve McIntyre Cc: stable@vger.kernel.org # 3.11.x: 5445eaf309ff mvneta: Try to fix mvneta when compiled as module Signed-off-by: Thomas Petazzoni Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index d6b04d0..c9c2faa 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -88,8 +88,9 @@ #define MVNETA_TX_IN_PRGRS BIT(1) #define MVNETA_TX_FIFO_EMPTY BIT(8) #define MVNETA_RX_MIN_FRAME_SIZE 0x247c -#define MVNETA_SGMII_SERDES_CFG 0x24A0 +#define MVNETA_SERDES_CFG 0x24A0 #define MVNETA_SGMII_SERDES_PROTO 0x0cc7 +#define MVNETA_RGMII_SERDES_PROTO 0x0667 #define MVNETA_TYPE_PRIO 0x24bc #define MVNETA_FORCE_UNI BIT(21) #define MVNETA_TXQ_CMD_1 0x24e4 @@ -710,35 +711,6 @@ static void mvneta_rxq_bm_disable(struct mvneta_port *pp, mvreg_write(pp, MVNETA_RXQ_CONFIG_REG(rxq->id), val); } - - -/* Sets the RGMII Enable bit (RGMIIEn) in port MAC control register */ -static void mvneta_gmac_rgmii_set(struct mvneta_port *pp, int enable) -{ - u32 val; - - val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); - - if (enable) - val |= MVNETA_GMAC2_PORT_RGMII; - else - val &= ~MVNETA_GMAC2_PORT_RGMII; - - mvreg_write(pp, MVNETA_GMAC_CTRL_2, val); -} - -/* Config SGMII port */ -static void mvneta_port_sgmii_config(struct mvneta_port *pp) -{ - u32 val; - - val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); - val |= MVNETA_GMAC2_PCS_ENABLE; - mvreg_write(pp, MVNETA_GMAC_CTRL_2, val); - - mvreg_write(pp, MVNETA_SGMII_SERDES_CFG, MVNETA_SGMII_SERDES_PROTO); -} - /* Start the Ethernet port RX and TX activity */ static void mvneta_port_up(struct mvneta_port *pp) { @@ -2756,12 +2728,15 @@ static void mvneta_port_power_up(struct mvneta_port *pp, int phy_mode) mvreg_write(pp, MVNETA_UNIT_INTR_CAUSE, 0); if (phy_mode == PHY_INTERFACE_MODE_SGMII) - mvneta_port_sgmii_config(pp); + mvreg_write(pp, MVNETA_SERDES_CFG, MVNETA_SGMII_SERDES_PROTO); + else + mvreg_write(pp, MVNETA_SERDES_CFG, MVNETA_RGMII_SERDES_PROTO); - mvneta_gmac_rgmii_set(pp, 1); + val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); + + val |= MVNETA_GMAC2_PCS_ENABLE | MVNETA_GMAC2_PORT_RGMII; /* Cancel Port Reset */ - val = mvreg_read(pp, MVNETA_GMAC_CTRL_2); val &= ~MVNETA_GMAC2_PORT_RESET; mvreg_write(pp, MVNETA_GMAC_CTRL_2, val); -- cgit v0.10.2 From b5f3b75d9d3b5526d973fc0bfee5680bdc6acf2a Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 26 Mar 2014 00:26:55 +0100 Subject: net: mvneta: use devm_ioremap_resource() instead of of_iomap() The mvneta driver currently uses of_iomap(), which has two drawbacks: it doesn't request the resource, and it isn't devm-style so some error handling is needed. This commit switches to use devm_ioremap_resource() instead, which automatically requests the resource (so the I/O registers region shows up properly in /proc/iomem), and also is devm-style, which allows to get rid of some error handling to unmap the I/O registers region. Signed-off-by: Thomas Petazzoni Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index c9c2faa..8d76fca 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -2749,6 +2750,7 @@ static void mvneta_port_power_up(struct mvneta_port *pp, int phy_mode) static int mvneta_probe(struct platform_device *pdev) { const struct mbus_dram_target_info *dram_target_info; + struct resource *res; struct device_node *dn = pdev->dev.of_node; struct device_node *phy_node; u32 phy_addr; @@ -2813,9 +2815,15 @@ static int mvneta_probe(struct platform_device *pdev) clk_prepare_enable(pp->clk); - pp->base = of_iomap(dn, 0); + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + err = -ENODEV; + goto err_clk; + } + + pp->base = devm_ioremap_resource(&pdev->dev, res); if (pp->base == NULL) { - err = -ENOMEM; + err = PTR_ERR(pp->base); goto err_clk; } @@ -2823,7 +2831,7 @@ static int mvneta_probe(struct platform_device *pdev) pp->stats = alloc_percpu(struct mvneta_pcpu_stats); if (!pp->stats) { err = -ENOMEM; - goto err_unmap; + goto err_clk; } for_each_possible_cpu(cpu) { @@ -2888,8 +2896,6 @@ err_deinit: mvneta_deinit(pp); err_free_stats: free_percpu(pp->stats); -err_unmap: - iounmap(pp->base); err_clk: clk_disable_unprepare(pp->clk); err_free_irq: @@ -2909,7 +2915,6 @@ static int mvneta_remove(struct platform_device *pdev) mvneta_deinit(pp); clk_disable_unprepare(pp->clk); free_percpu(pp->stats); - iounmap(pp->base); irq_dispose_mapping(dev->irq); free_netdev(dev); -- cgit v0.10.2 From de1443916791d75fdd26becb116898277bb0273f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Mar 2014 18:42:27 -0700 Subject: net: unix: non blocking recvmsg() should not return -EINTR Some applications didn't expect recvmsg() on a non blocking socket could return -EINTR. This possibility was added as a side effect of commit b3ca9b02b00704 ("net: fix multithreaded signal handling in unix recv routines"). To hit this bug, you need to be a bit unlucky, as the u->readlock mutex is usually held for very small periods. Fixes: b3ca9b02b00704 ("net: fix multithreaded signal handling in unix recv routines") Signed-off-by: Eric Dumazet Cc: Rainer Weikusat Signed-off-by: David S. Miller diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ce6ec6c..94404f1 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1787,8 +1787,11 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, goto out; err = mutex_lock_interruptible(&u->readlock); - if (err) { - err = sock_intr_errno(sock_rcvtimeo(sk, noblock)); + if (unlikely(err)) { + /* recvmsg() in non blocking mode is supposed to return -EAGAIN + * sk_rcvtimeo is not honored by mutex_lock_interruptible() + */ + err = noblock ? -EAGAIN : -ERESTARTSYS; goto out; } @@ -1913,6 +1916,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, struct unix_sock *u = unix_sk(sk); DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); int copied = 0; + int noblock = flags & MSG_DONTWAIT; int check_creds = 0; int target; int err = 0; @@ -1928,7 +1932,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, goto out; target = sock_rcvlowat(sk, flags&MSG_WAITALL, size); - timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT); + timeo = sock_rcvtimeo(sk, noblock); /* Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg @@ -1940,8 +1944,11 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, } err = mutex_lock_interruptible(&u->readlock); - if (err) { - err = sock_intr_errno(timeo); + if (unlikely(err)) { + /* recvmsg() in non blocking mode is supposed to return -EAGAIN + * sk_rcvtimeo is not honored by mutex_lock_interruptible() + */ + err = noblock ? -EAGAIN : -ERESTARTSYS; goto out; } -- cgit v0.10.2 From 347cf10aed1657a2b385a95f92763a67062c5ad3 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 25 Mar 2014 14:38:44 +1000 Subject: drm/udl: take reference to device struct for dma-bufs this stops the device from being deleted before all the dma-bufs on it are freed, this fixes an oops when you unplug a udl device while it has imported a buffer from another device. Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/udl/udl_gem.c b/drivers/gpu/drm/udl/udl_gem.c index 8d67b94..0394811 100644 --- a/drivers/gpu/drm/udl/udl_gem.c +++ b/drivers/gpu/drm/udl/udl_gem.c @@ -177,8 +177,10 @@ void udl_gem_free_object(struct drm_gem_object *gem_obj) if (obj->vmapping) udl_gem_vunmap(obj); - if (gem_obj->import_attach) + if (gem_obj->import_attach) { drm_prime_gem_destroy(gem_obj, obj->sg); + put_device(gem_obj->dev->dev); + } if (obj->pages) udl_gem_put_pages(obj); @@ -256,9 +258,12 @@ struct drm_gem_object *udl_gem_prime_import(struct drm_device *dev, int ret; /* need to attach */ + get_device(dev->dev); attach = dma_buf_attach(dma_buf, dev->dev); - if (IS_ERR(attach)) + if (IS_ERR(attach)) { + put_device(dev->dev); return ERR_CAST(attach); + } get_dma_buf(dma_buf); @@ -282,6 +287,6 @@ fail_unmap: fail_detach: dma_buf_detach(dma_buf, attach); dma_buf_put(dma_buf); - + put_device(dev->dev); return ERR_PTR(ret); } -- cgit v0.10.2 From adbbdbac04f093c0abf946b1e93e4e5291808491 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Wed, 26 Mar 2014 14:09:37 +1000 Subject: drm/nouveau: fail runtime pm properly. If we were on a non-optimus device, we'd return -EINVAL, this would lead to the over engineered runtime pm system to go into an error state, subsequent get_sync's would fail, so we'd never be able to open the device again. (like really get_sync shouldn't fail if the device isn't powered down). Signed-off-by: Dave Airlie Reviewed-by: Alex Deucher diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 89c484d..4ee702a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -866,13 +866,16 @@ static int nouveau_pmops_runtime_suspend(struct device *dev) struct drm_device *drm_dev = pci_get_drvdata(pdev); int ret; - if (nouveau_runtime_pm == 0) - return -EINVAL; + if (nouveau_runtime_pm == 0) { + pm_runtime_forbid(dev); + return -EBUSY; + } /* are we optimus enabled? */ if (nouveau_runtime_pm == -1 && !nouveau_is_optimus() && !nouveau_is_v1_dsm()) { DRM_DEBUG_DRIVER("failing to power off - not optimus\n"); - return -EINVAL; + pm_runtime_forbid(dev); + return -EBUSY; } nv_debug_level(SILENT); @@ -923,12 +926,15 @@ static int nouveau_pmops_runtime_idle(struct device *dev) struct nouveau_drm *drm = nouveau_drm(drm_dev); struct drm_crtc *crtc; - if (nouveau_runtime_pm == 0) + if (nouveau_runtime_pm == 0) { + pm_runtime_forbid(dev); return -EBUSY; + } /* are we optimus enabled? */ if (nouveau_runtime_pm == -1 && !nouveau_is_optimus() && !nouveau_is_v1_dsm()) { DRM_DEBUG_DRIVER("failing to power off - not optimus\n"); + pm_runtime_forbid(dev); return -EBUSY; } -- cgit v0.10.2 From 03803ef66d22702af124d21552f90ac32da9fef5 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 27 Mar 2014 14:24:39 +0100 Subject: ata: fix Marvell SATA driver dependencies Make sata_mv host driver depend on PCI || ARCH_DOVE || ARCH_KIRKWOOD || ARCH_MV78XX0 || ARCH_MVEBU || ARCH_ORION5X config options as Marvell SATA support covers both Marvell PCI devices and Marvell Dove, Kirkwood, MV78xx0, Armada 370/XP and Orion5x SoCs (for non-PCI devices the driver to work requires suitable device tree node or platform device to be defined). Additionally allow the driver build if COMPILE_TEST config option is set. Cc: Jason Cooper Cc: Andrew Lunn Cc: Gregory Clement Cc: Sebastian Hesselbarth Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Tejun Heo diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index af0f71a..71bd29c 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -281,6 +281,8 @@ config SATA_HIGHBANK config SATA_MV tristate "Marvell SATA support" + depends on PCI || ARCH_DOVE || ARCH_KIRKWOOD || ARCH_MV78XX0 || \ + ARCH_MVEBU || ARCH_ORION5X || COMPILE_TEST help This option enables support for the Marvell Serial ATA family. Currently supports 88SX[56]0[48][01] PCI(-X) chips, -- cgit v0.10.2 From 75c5a52da3fc2a06abb6c6192bdf5d680e56d37d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 26 Mar 2014 06:20:14 +0100 Subject: vfs: Allocate anon_inode_inode in anon_inode_init() Currently we allocated anon_inode_inode in anon_inodefs_mount. This is somewhat fragile as if that function ever gets called again, it will overwrite anon_inode_inode pointer. So move the initialization of anon_inode_inode to anon_inode_init(). Signed-off-by: Jan Kara [ Further simplified on suggestion from Dave Jones ] Signed-off-by: Linus Torvalds diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 42fcc46..80ef38c 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -41,19 +41,8 @@ static const struct dentry_operations anon_inodefs_dentry_operations = { static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - struct dentry *root; - root = mount_pseudo(fs_type, "anon_inode:", NULL, + return mount_pseudo(fs_type, "anon_inode:", NULL, &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC); - if (!IS_ERR(root)) { - struct super_block *s = root->d_sb; - anon_inode_inode = alloc_anon_inode(s); - if (IS_ERR(anon_inode_inode)) { - dput(root); - deactivate_locked_super(s); - root = ERR_CAST(anon_inode_inode); - } - } - return root; } static struct file_system_type anon_inode_fs_type = { @@ -175,18 +164,15 @@ EXPORT_SYMBOL_GPL(anon_inode_getfd); static int __init anon_inode_init(void) { - int error; - anon_inode_mnt = kern_mount(&anon_inode_fs_type); - if (IS_ERR(anon_inode_mnt)) { - error = PTR_ERR(anon_inode_mnt); - goto err_unregister_filesystem; - } - return 0; + if (IS_ERR(anon_inode_mnt)) + panic("anon_inode_init() kernel mount failed (%ld)\n", PTR_ERR(anon_inode_mnt)); -err_unregister_filesystem: - unregister_filesystem(&anon_inode_fs_type); - panic(KERN_ERR "anon_inode_init() failed (%d)\n", error); + anon_inode_inode = alloc_anon_inode(anon_inode_mnt->mnt_sb); + if (IS_ERR(anon_inode_inode)) + panic("anon_inode_init() inode allocation failed (%ld)\n", PTR_ERR(anon_inode_inode)); + + return 0; } fs_initcall(anon_inode_init); -- cgit v0.10.2 From 681daee2443291419c57cccb0671f5f94a839005 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 26 Mar 2014 13:03:00 +0800 Subject: virtio-net: correct error handling of virtqueue_kick() Current error handling of virtqueue_kick() was wrong in two places: - The skb were freed immediately when virtqueue_kick() fail during xmit. This may lead double free since the skb was not detached from the virtqueue. - try_fill_recv() returns false when virtqueue_kick() fail. This will lead unnecessary rescheduling of refill work. Actually, it's safe to just ignore the kick failure in those two places. So this patch fixes this by partially revert commit 67975901183799af8e93ec60e322f9e2a1940b9b. Fixes 67975901183799af8e93ec60e322f9e2a1940b9b (virtio_net: verify if virtqueue_kick() succeeded). Cc: Heinz Graalfs Cc: Rusty Russell Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 5632a99..841b608 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -671,8 +671,7 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp) if (err) break; } while (rq->vq->num_free); - if (unlikely(!virtqueue_kick(rq->vq))) - return false; + virtqueue_kick(rq->vq); return !oom; } @@ -877,7 +876,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) err = xmit_skb(sq, skb); /* This should not happen! */ - if (unlikely(err) || unlikely(!virtqueue_kick(sq->vq))) { + if (unlikely(err)) { dev->stats.tx_fifo_errors++; if (net_ratelimit()) dev_warn(&dev->dev, @@ -886,6 +885,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) kfree_skb(skb); return NETDEV_TX_OK; } + virtqueue_kick(sq->vq); /* Don't wait up for transmitted skbs to be freed. */ skb_orphan(skb); -- cgit v0.10.2 From ca3ba2a2f4a49a308e7d78c784d51b2332064f15 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 28 Feb 2014 11:30:29 +0800 Subject: x86, hyperv: Bypass the timer_irq_works() check This patch bypass the timer_irq_works() check for hyperv guest since: - It was guaranteed to work. - timer_irq_works() may fail sometime due to the lpj calibration were inaccurate in a hyperv guest or a buggy host. In the future, we should get the tsc frequency from hypervisor and use preset lpj instead. [ hpa: I would prefer to not defer things to "the future" in the future... ] Cc: K. Y. Srinivasan Cc: Haiyang Zhang Cc: Acked-by: K. Y. Srinivasan Signed-off-by: Jason Wang Link: http://lkml.kernel.org/r/1393558229-14755-1-git-send-email-jasowang@redhat.com Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 9f7ca26..832d05a 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -26,6 +26,7 @@ #include #include #include +#include struct ms_hyperv_info ms_hyperv; EXPORT_SYMBOL_GPL(ms_hyperv); @@ -105,6 +106,11 @@ static void __init ms_hyperv_init_platform(void) if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); + +#ifdef CONFIG_X86_IO_APIC + no_timer_check = 1; +#endif + } const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { -- cgit v0.10.2 From 14a0d635d18d0fb552dcc979d6d25106e6541f2e Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Wed, 26 Mar 2014 14:32:51 +0100 Subject: usbnet: include wait queue head in device structure This fixes a race which happens by freeing an object on the stack. Quoting Julius: > The issue is > that it calls usbnet_terminate_urbs() before that, which temporarily > installs a waitqueue in dev->wait in order to be able to wait on the > tasklet to run and finish up some queues. The waiting itself looks > okay, but the access to 'dev->wait' is totally unprotected and can > race arbitrarily. I think in this case usbnet_bh() managed to succeed > it's dev->wait check just before usbnet_terminate_urbs() sets it back > to NULL. The latter then finishes and the waitqueue_t structure on its > stack gets overwritten by other functions halfway through the > wake_up() call in usbnet_bh(). The fix is to just not allocate the data structure on the stack. As dev->wait is abused as a flag it also takes a runtime PM change to fix this bug. Signed-off-by: Oliver Neukum Reported-by: Grant Grundler Tested-by: Grant Grundler Signed-off-by: David S. Miller diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index dd10d58..f9e96c4 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -752,14 +752,12 @@ EXPORT_SYMBOL_GPL(usbnet_unlink_rx_urbs); // precondition: never called in_interrupt static void usbnet_terminate_urbs(struct usbnet *dev) { - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(unlink_wakeup); DECLARE_WAITQUEUE(wait, current); int temp; /* ensure there are no more active urbs */ - add_wait_queue(&unlink_wakeup, &wait); + add_wait_queue(&dev->wait, &wait); set_current_state(TASK_UNINTERRUPTIBLE); - dev->wait = &unlink_wakeup; temp = unlink_urbs(dev, &dev->txq) + unlink_urbs(dev, &dev->rxq); @@ -773,15 +771,14 @@ static void usbnet_terminate_urbs(struct usbnet *dev) "waited for %d urb completions\n", temp); } set_current_state(TASK_RUNNING); - dev->wait = NULL; - remove_wait_queue(&unlink_wakeup, &wait); + remove_wait_queue(&dev->wait, &wait); } int usbnet_stop (struct net_device *net) { struct usbnet *dev = netdev_priv(net); struct driver_info *info = dev->driver_info; - int retval; + int retval, pm; clear_bit(EVENT_DEV_OPEN, &dev->flags); netif_stop_queue (net); @@ -791,6 +788,8 @@ int usbnet_stop (struct net_device *net) net->stats.rx_packets, net->stats.tx_packets, net->stats.rx_errors, net->stats.tx_errors); + /* to not race resume */ + pm = usb_autopm_get_interface(dev->intf); /* allow minidriver to stop correctly (wireless devices to turn off * radio etc) */ if (info->stop) { @@ -817,6 +816,9 @@ int usbnet_stop (struct net_device *net) dev->flags = 0; del_timer_sync (&dev->delay); tasklet_kill (&dev->bh); + if (!pm) + usb_autopm_put_interface(dev->intf); + if (info->manage_power && !test_and_clear_bit(EVENT_NO_RUNTIME_PM, &dev->flags)) info->manage_power(dev, 0); @@ -1437,11 +1439,12 @@ static void usbnet_bh (unsigned long param) /* restart RX again after disabling due to high error rate */ clear_bit(EVENT_RX_KILL, &dev->flags); - // waiting for all pending urbs to complete? - if (dev->wait) { - if ((dev->txq.qlen + dev->rxq.qlen + dev->done.qlen) == 0) { - wake_up (dev->wait); - } + /* waiting for all pending urbs to complete? + * only then can we forgo submitting anew + */ + if (waitqueue_active(&dev->wait)) { + if (dev->txq.qlen + dev->rxq.qlen + dev->done.qlen == 0) + wake_up_all(&dev->wait); // or are we maybe short a few urbs? } else if (netif_running (dev->net) && @@ -1580,6 +1583,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) dev->driver_name = name; dev->msg_enable = netif_msg_init (msg_level, NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK); + init_waitqueue_head(&dev->wait); skb_queue_head_init (&dev->rxq); skb_queue_head_init (&dev->txq); skb_queue_head_init (&dev->done); @@ -1791,9 +1795,10 @@ int usbnet_resume (struct usb_interface *intf) spin_unlock_irq(&dev->txq.lock); if (test_bit(EVENT_DEV_OPEN, &dev->flags)) { - /* handle remote wakeup ASAP */ - if (!dev->wait && - netif_device_present(dev->net) && + /* handle remote wakeup ASAP + * we cannot race against stop + */ + if (netif_device_present(dev->net) && !timer_pending(&dev->delay) && !test_bit(EVENT_RX_HALT, &dev->flags)) rx_alloc_submit(dev, GFP_NOIO); diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index e303eef..0662e98 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -30,7 +30,7 @@ struct usbnet { struct driver_info *driver_info; const char *driver_name; void *driver_priv; - wait_queue_head_t *wait; + wait_queue_head_t wait; struct mutex phy_mutex; unsigned char suspend_count; unsigned char pkt_cnt, pkt_err; -- cgit v0.10.2 From fc0d48b8fb449ca007b2057328abf736cb516168 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 26 Mar 2014 11:47:56 -0400 Subject: vlan: Set hard_header_len according to available acceleration Currently, if the card supports CTAG acceleration we do not account for the vlan header even if we are configuring an 8021AD vlan. This may not be best since we'll do software tagging for 8021AD which will cause data copy on skb head expansion Configure the length based on available hw offload capabilities and vlan protocol. CC: Patrick McHardy Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index ec99099..175273f 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -307,9 +307,11 @@ static void vlan_sync_address(struct net_device *dev, static void vlan_transfer_features(struct net_device *dev, struct net_device *vlandev) { + struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev); + vlandev->gso_max_size = dev->gso_max_size; - if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) + if (vlan_hw_offload_capable(dev->features, vlan->vlan_proto)) vlandev->hard_header_len = dev->hard_header_len; else vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 4b65aa4..a9591ff 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -592,7 +592,8 @@ static int vlan_dev_init(struct net_device *dev) #endif dev->needed_headroom = real_dev->needed_headroom; - if (real_dev->features & NETIF_F_HW_VLAN_CTAG_TX) { + if (vlan_hw_offload_capable(real_dev->features, + vlan_dev_priv(dev)->vlan_proto)) { dev->header_ops = &vlan_passthru_header_ops; dev->hard_header_len = real_dev->hard_header_len; } else { -- cgit v0.10.2 From 36d5fe6a000790f56039afe26834265db0a3ad4c Mon Sep 17 00:00:00 2001 From: Zoltan Kiss Date: Wed, 26 Mar 2014 22:37:45 +0000 Subject: core, nfqueue, openvswitch: Orphan frags in skb_zerocopy and handle errors skb_zerocopy can copy elements of the frags array between skbs, but it doesn't orphan them. Also, it doesn't handle errors, so this patch takes care of that as well, and modify the callers accordingly. skb_tx_error() is also added to the callers so they will signal the failed delivery towards the creator of the skb. Signed-off-by: Zoltan Kiss Signed-off-by: David S. Miller diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5e1e6f2..15ede6a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2451,8 +2451,8 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, unsigned int flags); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); unsigned int skb_zerocopy_headlen(const struct sk_buff *from); -void skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, - int len, int hlen); +int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, + int len, int hlen); void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 869c7af..97e5a2c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2127,25 +2127,31 @@ EXPORT_SYMBOL_GPL(skb_zerocopy_headlen); * * The `hlen` as calculated by skb_zerocopy_headlen() specifies the * headroom in the `to` buffer. + * + * Return value: + * 0: everything is OK + * -ENOMEM: couldn't orphan frags of @from due to lack of memory + * -EFAULT: skb_copy_bits() found some problem with skb geometry */ -void -skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) +int +skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) { int i, j = 0; int plen = 0; /* length of skb->head fragment */ + int ret; struct page *page; unsigned int offset; BUG_ON(!from->head_frag && !hlen); /* dont bother with small payloads */ - if (len <= skb_tailroom(to)) { - skb_copy_bits(from, 0, skb_put(to, len), len); - return; - } + if (len <= skb_tailroom(to)) + return skb_copy_bits(from, 0, skb_put(to, len), len); if (hlen) { - skb_copy_bits(from, 0, skb_put(to, hlen), hlen); + ret = skb_copy_bits(from, 0, skb_put(to, hlen), hlen); + if (unlikely(ret)) + return ret; len -= hlen; } else { plen = min_t(int, skb_headlen(from), len); @@ -2163,6 +2169,11 @@ skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) to->len += len + plen; to->data_len += len + plen; + if (unlikely(skb_orphan_frags(from, GFP_ATOMIC))) { + skb_tx_error(from); + return -ENOMEM; + } + for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { if (!len) break; @@ -2173,6 +2184,8 @@ skb_zerocopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) j++; } skb_shinfo(to)->nr_frags = j; + + return 0; } EXPORT_SYMBOL_GPL(skb_zerocopy); diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index f072fe8..108120f 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -354,13 +354,16 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, skb = nfnetlink_alloc_skb(net, size, queue->peer_portid, GFP_ATOMIC); - if (!skb) + if (!skb) { + skb_tx_error(entskb); return NULL; + } nlh = nlmsg_put(skb, 0, 0, NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, sizeof(struct nfgenmsg), 0); if (!nlh) { + skb_tx_error(entskb); kfree_skb(skb); return NULL; } @@ -488,13 +491,15 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, nla->nla_type = NFQA_PAYLOAD; nla->nla_len = nla_attr_size(data_len); - skb_zerocopy(skb, entskb, data_len, hlen); + if (skb_zerocopy(skb, entskb, data_len, hlen)) + goto nla_put_failure; } nlh->nlmsg_len = skb->len; return skb; nla_put_failure: + skb_tx_error(entskb); kfree_skb(skb); net_err_ratelimited("nf_queue: error creating packet message\n"); return NULL; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 8601b32..270b77d 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -464,7 +464,9 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, } nla->nla_len = nla_attr_size(skb->len); - skb_zerocopy(user_skb, skb, skb->len, hlen); + err = skb_zerocopy(user_skb, skb, skb->len, hlen); + if (err) + goto out; /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */ if (!(dp->user_features & OVS_DP_F_UNALIGNED)) { @@ -478,6 +480,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid); out: + if (err) + skb_tx_error(skb); kfree_skb(nskb); return err; } -- cgit v0.10.2 From 97a5221f56bad2e1c7e8ab55da4ac4748ef59c64 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 27 Mar 2014 09:28:31 +0800 Subject: net/mlx4_core: pass pci_device_id.driver_data to __mlx4_init_one during reset The second parameter of __mlx4_init_one() is used to identify whether the pci_dev is a PF or VF. Currently, when it is invoked in mlx4_pci_slot_reset() this information is missed. This patch match the pci_dev with mlx4_pci_table and passes the pci_device_id.driver_data to __mlx4_init_one() in mlx4_pci_slot_reset(). Signed-off-by: Wei Yang Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 936c153..d413e60 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2681,7 +2681,11 @@ static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) { - int ret = __mlx4_init_one(pdev, 0); + const struct pci_device_id *id; + int ret; + + id = pci_match_id(mlx4_pci_table, pdev); + ret = __mlx4_init_one(pdev, id->driver_data); return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; } -- cgit v0.10.2 From c53c6beddc0ebb6feb5d64488c2c0cd23dd7f43d Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 27 Mar 2014 02:31:08 +0000 Subject: drm/radeon: fix runtime suspend breaking secondary GPUs Same fix as for nouveau, when we fail with EINVAL, subsequent gets fail hard, causing the device not to open. Signed-off-by: Dave Airlie Reviewed-by: Alex Deucher diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 84a1bbb7..f633c27 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -403,11 +403,15 @@ static int radeon_pmops_runtime_suspend(struct device *dev) struct drm_device *drm_dev = pci_get_drvdata(pdev); int ret; - if (radeon_runtime_pm == 0) - return -EINVAL; + if (radeon_runtime_pm == 0) { + pm_runtime_forbid(dev); + return -EBUSY; + } - if (radeon_runtime_pm == -1 && !radeon_is_px()) - return -EINVAL; + if (radeon_runtime_pm == -1 && !radeon_is_px()) { + pm_runtime_forbid(dev); + return -EBUSY; + } drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; drm_kms_helper_poll_disable(drm_dev); @@ -456,12 +460,15 @@ static int radeon_pmops_runtime_idle(struct device *dev) struct drm_device *drm_dev = pci_get_drvdata(pdev); struct drm_crtc *crtc; - if (radeon_runtime_pm == 0) + if (radeon_runtime_pm == 0) { + pm_runtime_forbid(dev); return -EBUSY; + } /* are we PX enabled? */ if (radeon_runtime_pm == -1 && !radeon_is_px()) { DRM_DEBUG_DRIVER("failing to power off - not px\n"); + pm_runtime_forbid(dev); return -EBUSY; } -- cgit v0.10.2 From 8ee661b505613ef2747b350ca2871a31b3781bee Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 26 Mar 2014 20:10:09 +0100 Subject: drm/i915: Undo gtt scratch pte unmapping again It apparently blows up on some machines. This functionally reverts commit 828c79087cec61eaf4c76bb32c222fbe35ac3930 Author: Ben Widawsky Date: Wed Oct 16 09:21:30 2013 -0700 drm/i915: Disable GGTT PTEs on GEN6+ suspend Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=64841 Reported-and-Tested-by: Brad Jackson Cc: stable@vger.kernel.org Cc: Takashi Iwai Cc: Paulo Zanoni Cc: Todd Previte Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 40a2b36..d278be1 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -842,7 +842,7 @@ void i915_gem_suspend_gtt_mappings(struct drm_device *dev) dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, dev_priv->gtt.base.start / PAGE_SIZE, dev_priv->gtt.base.total / PAGE_SIZE, - false); + true); } void i915_gem_restore_gtt_mappings(struct drm_device *dev) -- cgit v0.10.2 From cab5e127eef040399902caa8e1510795583fa03a Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 27 Mar 2014 16:30:49 -0700 Subject: time: Revert to calling clock_was_set_delayed() while in irq context In commit 47a1b796306356f35 ("tick/timekeeping: Call update_wall_time outside the jiffies lock"), we moved to calling clock_was_set() due to the fact that we were no longer holding the timekeeping or jiffies lock. However, there is still the problem that clock_was_set() triggers an IPI, which cannot be done from the timer's hard irq context, and will generate WARN_ON warnings. Apparently in my earlier testing, I'm guessing I didn't bump the dmesg log level, so I somehow missed the WARN_ONs. Thus we need to revert back to calling clock_was_set_delayed(). Signed-off-by: John Stultz Cc: Linus Torvalds Link: http://lkml.kernel.org/r/1395963049-11923-1-git-send-email-john.stultz@linaro.org Signed-off-by: Ingo Molnar diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 0aa4ce81..5b40279 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1435,7 +1435,8 @@ void update_wall_time(void) out: raw_spin_unlock_irqrestore(&timekeeper_lock, flags); if (clock_set) - clock_was_set(); + /* Have to call _delayed version, since in irq context*/ + clock_was_set_delayed(); } /** -- cgit v0.10.2 From 421e08c41fda1f0c2ff6af81a67b491389b653a5 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Fri, 28 Mar 2014 00:43:00 -0700 Subject: Input: synaptics - add manual min/max quirk The new Lenovo Haswell series (-40's) contains a new Synaptics touchpad. However, these new Synaptics devices report bad axis ranges. Under Windows, it is not a problem because the Windows driver uses RMI4 over SMBus to talk to the device. Under Linux, we are using the PS/2 fallback interface and it occurs the reported ranges are wrong. Of course, it would be too easy to have only one range for the whole series, each touchpad seems to be calibrated in a different way. We can not use SMBus to get the actual range because I suspect the firmware will switch into the SMBus mode and stop talking through PS/2 (this is the case for hybrid HID over I2C / PS/2 Synaptics touchpads). So as a temporary solution (until RMI4 land into upstream), start a new list of quirks with the min/max manually set. Signed-off-by: Benjamin Tissoires CC: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 26386f9..ef148f9 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -265,11 +265,22 @@ static int synaptics_identify(struct psmouse *psmouse) * Read touchpad resolution and maximum reported coordinates * Resolution is left zero if touchpad does not support the query */ + +static const int *quirk_min_max; + static int synaptics_resolution(struct psmouse *psmouse) { struct synaptics_data *priv = psmouse->private; unsigned char resp[3]; + if (quirk_min_max) { + priv->x_min = quirk_min_max[0]; + priv->x_max = quirk_min_max[1]; + priv->y_min = quirk_min_max[2]; + priv->y_max = quirk_min_max[3]; + return 0; + } + if (SYN_ID_MAJOR(priv->identity) < 4) return 0; @@ -1485,10 +1496,46 @@ static const struct dmi_system_id olpc_dmi_table[] __initconst = { { } }; +static const struct dmi_system_id min_max_dmi_table[] __initconst = { +#if defined(CONFIG_DMI) + { + /* Lenovo ThinkPad Helix */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad Helix"), + }, + .driver_data = (int []){1024, 5052, 2258, 4832}, + }, + { + /* Lenovo ThinkPad T440s */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T440"), + }, + .driver_data = (int []){1024, 5112, 2024, 4832}, + }, + { + /* Lenovo ThinkPad T540p */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T540"), + }, + .driver_data = (int []){1024, 5056, 2058, 4832}, + }, +#endif + { } +}; + void __init synaptics_module_init(void) { + const struct dmi_system_id *min_max_dmi; + impaired_toshiba_kbc = dmi_check_system(toshiba_dmi_table); broken_olpc_ec = dmi_check_system(olpc_dmi_table); + + min_max_dmi = dmi_first_match(min_max_dmi_table); + if (min_max_dmi) + quirk_min_max = min_max_dmi->driver_data; } static int __synaptics_init(struct psmouse *psmouse, bool absolute_mode) -- cgit v0.10.2 From 8a0435d958fb36d93b8df610124a0e91e5675c82 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 28 Mar 2014 01:01:38 -0700 Subject: Input: synaptics - add manual min/max quirk for ThinkPad X240 This extends Benjamin Tissoires manual min/max quirk table with support for the ThinkPad X240. Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index ef148f9..d8d49d1 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -1507,6 +1507,14 @@ static const struct dmi_system_id min_max_dmi_table[] __initconst = { .driver_data = (int []){1024, 5052, 2258, 4832}, }, { + /* Lenovo ThinkPad X240 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X240"), + }, + .driver_data = (int []){1232, 5710, 1156, 4696}, + }, + { /* Lenovo ThinkPad T440s */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), -- cgit v0.10.2 From 335a67d2ad481b03607bf30a38c28178fa1ad61a Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 27 Mar 2014 02:01:34 -0400 Subject: random32: assign to network folks in MAINTAINERS lib/random32.c was split out of the network code and is de-facto still maintained by the almighty net/ gods. Make it a bit more official so that people who aren't aware of that know where to send their patches. Signed-off-by: Sasha Levin Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index 82640e6..0b3c40f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6006,6 +6006,7 @@ F: include/uapi/linux/net.h F: include/uapi/linux/netdevice.h F: tools/net/ F: tools/testing/selftests/net/ +F: lib/random32.c NETWORKING [IPv4/IPv6] M: "David S. Miller" -- cgit v0.10.2 From 05efa8c943b1d5d90fa8c8147571837573338bb6 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Fri, 28 Mar 2014 17:38:42 +0100 Subject: random32: avoid attempt to late reseed if in the middle of seeding Commit 4af712e8df ("random32: add prandom_reseed_late() and call when nonblocking pool becomes initialized") has added a late reseed stage that happens as soon as the nonblocking pool is marked as initialized. This fails in the case that the nonblocking pool gets initialized during __prandom_reseed()'s call to get_random_bytes(). In that case we'd double back into __prandom_reseed() in an attempt to do a late reseed - deadlocking on 'lock' early on in the boot process. Instead, just avoid even waiting to do a reseed if a reseed is already occuring. Fixes: 4af712e8df99 ("random32: add prandom_reseed_late() and call when nonblocking pool becomes initialized") Signed-off-by: Sasha Levin Acked-by: Hannes Frederic Sowa Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller diff --git a/lib/random32.c b/lib/random32.c index 1e5b2df..6148967 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -244,8 +244,19 @@ static void __prandom_reseed(bool late) static bool latch = false; static DEFINE_SPINLOCK(lock); + /* Asking for random bytes might result in bytes getting + * moved into the nonblocking pool and thus marking it + * as initialized. In this case we would double back into + * this function and attempt to do a late reseed. + * Ignore the pointless attempt to reseed again if we're + * already waiting for bytes when the nonblocking pool + * got initialized. + */ + /* only allow initial seeding (late == false) once */ - spin_lock_irqsave(&lock, flags); + if (!spin_trylock_irqsave(&lock, flags)) + return; + if (latch && !late) goto out; latch = true; -- cgit v0.10.2 From d8316f3991d207fe32881a9ac20241be8fa2bad0 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 27 Mar 2014 12:00:26 +0200 Subject: vhost: fix total length when packets are too short When mergeable buffers are disabled, and the incoming packet is too large for the rx buffer, get_rx_bufs returns success. This was intentional in order for make recvmsg truncate the packet and then handle_rx would detect err != sock_len and drop it. Unfortunately we pass the original sock_len to recvmsg - which means we use parts of iov not fully validated. Fix this up by detecting this overrun and doing packet drop immediately. CVE-2014-0077 Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index a0fa5de..026be58 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -532,6 +532,12 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, *iovcount = seg; if (unlikely(log)) *log_num = nlogs; + + /* Detect overrun */ + if (unlikely(datalen > 0)) { + r = UIO_MAXIOV + 1; + goto err; + } return headcount; err: vhost_discard_vq_desc(vq, headcount); @@ -587,6 +593,14 @@ static void handle_rx(struct vhost_net *net) /* On error, stop handling until the next kick. */ if (unlikely(headcount < 0)) break; + /* On overrun, truncate and discard */ + if (unlikely(headcount > UIO_MAXIOV)) { + msg.msg_iovlen = 1; + err = sock->ops->recvmsg(NULL, sock, &msg, + 1, MSG_DONTWAIT | MSG_TRUNC); + pr_debug("Discarded rx packet: len %zd\n", sock_len); + continue; + } /* OK, now we need to know about added descriptors. */ if (!headcount) { if (unlikely(vhost_enable_notify(&net->dev, vq))) { -- cgit v0.10.2 From a39ee449f96a2cd44ce056d8a0a112211a9b1a1f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 27 Mar 2014 12:53:37 +0200 Subject: vhost: validate vhost_get_vq_desc return value vhost fails to validate negative error code from vhost_get_vq_desc causing a crash: we are using -EFAULT which is 0xfffffff2 as vector size, which exceeds the allocated size. The code in question was introduced in commit 8dd014adfea6f173c1ef6378f7e5e7924866c923 vhost-net: mergeable buffers support CVE-2014-0055 Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 026be58..e1e22e0 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -505,9 +505,13 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, r = -ENOBUFS; goto err; } - d = vhost_get_vq_desc(vq->dev, vq, vq->iov + seg, + r = vhost_get_vq_desc(vq->dev, vq, vq->iov + seg, ARRAY_SIZE(vq->iov) - seg, &out, &in, log, log_num); + if (unlikely(r < 0)) + goto err; + + d = r; if (d == vq->num) { r = 0; goto err; -- cgit v0.10.2 From 12464bb8de021a01fa7ec9299c273c247df7f198 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Thu, 27 Mar 2014 21:46:55 +0900 Subject: bridge: Fix inabillity to retrieve vlan tags when tx offload is disabled Bridge vlan code (br_vlan_get_tag()) assumes that all frames have vlan_tci if they are tagged, but if vlan tx offload is manually disabled on bridge device and frames are sent from vlan device on the bridge device, the tags are embedded in skb->data and they break this assumption. Extract embedded vlan tags and move them to vlan_tci at ingress. Signed-off-by: Toshiaki Makita Acked-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 63f0455..8fe8b71 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -49,14 +49,14 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) brstats->tx_bytes += skb->len; u64_stats_update_end(&brstats->syncp); - if (!br_allowed_ingress(br, br_get_vlan_info(br), skb, &vid)) - goto out; - BR_INPUT_SKB_CB(skb)->brdev = dev; skb_reset_mac_header(skb); skb_pull(skb, ETH_HLEN); + if (!br_allowed_ingress(br, br_get_vlan_info(br), skb, &vid)) + goto out; + if (is_broadcast_ether_addr(dest)) br_flood_deliver(br, skb, false); else if (is_multicast_ether_addr(dest)) { diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 8249ca76..44f31af 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -174,6 +174,18 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v, if (!v) return false; + /* If vlan tx offload is disabled on bridge device and frame was + * sent from vlan device on the bridge device, it does not have + * HW accelerated vlan tag. + */ + if (unlikely(!vlan_tx_tag_present(skb) && + (skb->protocol == htons(ETH_P_8021Q) || + skb->protocol == htons(ETH_P_8021AD)))) { + skb = vlan_untag(skb); + if (unlikely(!skb)) + return false; + } + err = br_vlan_get_tag(skb, vid); if (!*vid) { u16 pvid = br_get_pvid(v); -- cgit v0.10.2 From 99b192da9c99284ad3374132e56f66995cadc6b4 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Thu, 27 Mar 2014 21:46:56 +0900 Subject: bridge: Fix handling stacked vlan tags If a bridge with vlan_filtering enabled receives frames with stacked vlan tags, i.e., they have two vlan tags, br_vlan_untag() strips not only the outer tag but also the inner tag. br_vlan_untag() is called only from br_handle_vlan(), and in this case, it is enough to set skb->vlan_tci to 0 here, because vlan_tci has already been set before calling br_handle_vlan(). Signed-off-by: Toshiaki Makita Acked-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 44f31af..c77eed5 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -119,22 +119,6 @@ static void __vlan_flush(struct net_port_vlans *v) kfree_rcu(v, rcu); } -/* Strip the tag from the packet. Will return skb with tci set 0. */ -static struct sk_buff *br_vlan_untag(struct sk_buff *skb) -{ - if (skb->protocol != htons(ETH_P_8021Q)) { - skb->vlan_tci = 0; - return skb; - } - - skb->vlan_tci = 0; - skb = vlan_untag(skb); - if (skb) - skb->vlan_tci = 0; - - return skb; -} - struct sk_buff *br_handle_vlan(struct net_bridge *br, const struct net_port_vlans *pv, struct sk_buff *skb) @@ -150,7 +134,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, */ br_vlan_get_tag(skb, &vid); if (test_bit(vid, pv->untagged_bitmap)) - skb = br_vlan_untag(skb); + skb->vlan_tci = 0; out: return skb; -- cgit v0.10.2 From 4f647e0a3c37b8d5086214128614a136064110c3 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Thu, 27 Mar 2014 11:05:34 -0300 Subject: openvswitch: fix a possible deadlock and lockdep warning There are two problematic situations. A deadlock can happen when is_percpu is false because it can get interrupted while holding the spinlock. Then it executes ovs_flow_stats_update() in softirq context which tries to get the same lock. The second sitation is that when is_percpu is true, the code correctly disables BH but only for the local CPU, so the following can happen when locking the remote CPU without disabling BH: CPU#0 CPU#1 ovs_flow_stats_get() stats_read() +->spin_lock remote CPU#1 ovs_flow_stats_get() | stats_read() | ... +--> spin_lock remote CPU#0 | | | ovs_flow_stats_update() | ... | spin_lock local CPU#0 <--+ ovs_flow_stats_update() +---------------------------------- spin_lock local CPU#1 This patch disables BH for both cases fixing the deadlocks. Acked-by: Jesse Gross ================================= [ INFO: inconsistent lock state ] 3.14.0-rc8-00007-g632b06a #1 Tainted: G I --------------------------------- inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. swapper/0/0 [HC0[0]:SC1[5]:HE1:SE0] takes: (&(&cpu_stats->lock)->rlock){+.?...}, at: [] ovs_flow_stats_update+0x51/0xd0 [openvswitch] {SOFTIRQ-ON-W} state was registered at: [] __lock_acquire+0x68f/0x1c40 [] lock_acquire+0xa2/0x1d0 [] _raw_spin_lock+0x3e/0x80 [] ovs_flow_stats_get+0xc4/0x1e0 [openvswitch] [] ovs_flow_cmd_fill_info+0x185/0x360 [openvswitch] [] ovs_flow_cmd_build_info.constprop.27+0x55/0x90 [openvswitch] [] ovs_flow_cmd_new_or_set+0x4dd/0x570 [openvswitch] [] genl_family_rcv_msg+0x1cd/0x3f0 [] genl_rcv_msg+0x8e/0xd0 [] netlink_rcv_skb+0xa9/0xc0 [] genl_rcv+0x28/0x40 [] netlink_unicast+0x100/0x1e0 [] netlink_sendmsg+0x347/0x770 [] sock_sendmsg+0x9c/0xe0 [] ___sys_sendmsg+0x3a9/0x3c0 [] __sys_sendmsg+0x51/0x90 [] SyS_sendmsg+0x12/0x20 [] system_call_fastpath+0x16/0x1b irq event stamp: 1740726 hardirqs last enabled at (1740726): [] ip6_finish_output2+0x4f0/0x840 hardirqs last disabled at (1740725): [] ip6_finish_output2+0x4ab/0x840 softirqs last enabled at (1740674): [] _local_bh_enable+0x22/0x50 softirqs last disabled at (1740675): [] irq_exit+0xc5/0xd0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&cpu_stats->lock)->rlock); lock(&(&cpu_stats->lock)->rlock); *** DEADLOCK *** 5 locks held by swapper/0/0: #0: (((&ifa->dad_timer))){+.-...}, at: [] call_timer_fn+0x5/0x320 #1: (rcu_read_lock){.+.+..}, at: [] mld_sendpack+0x5/0x4a0 #2: (rcu_read_lock_bh){.+....}, at: [] ip6_finish_output2+0x59/0x840 #3: (rcu_read_lock_bh){.+....}, at: [] __dev_queue_xmit+0x5/0x9b0 #4: (rcu_read_lock){.+.+..}, at: [] internal_dev_xmit+0x5/0x110 [openvswitch] stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Tainted: G I 3.14.0-rc8-00007-g632b06a #1 Hardware name: /DX58SO, BIOS SOX5810J.86A.5599.2012.0529.2218 05/29/2012 0000000000000000 0fcf20709903df0c ffff88042d603808 ffffffff817cfe3c ffffffff81c134c0 ffff88042d603858 ffffffff817cb6da 0000000000000005 ffffffff00000001 ffff880400000000 0000000000000006 ffffffff81c134c0 Call Trace: [] dump_stack+0x4d/0x66 [] print_usage_bug+0x1f4/0x205 [] ? check_usage_backwards+0x180/0x180 [] mark_lock+0x223/0x2b0 [] __lock_acquire+0x623/0x1c40 [] ? __lock_is_held+0x57/0x80 [] ? masked_flow_lookup+0x236/0x250 [openvswitch] [] lock_acquire+0xa2/0x1d0 [] ? ovs_flow_stats_update+0x51/0xd0 [openvswitch] [] _raw_spin_lock+0x3e/0x80 [] ? ovs_flow_stats_update+0x51/0xd0 [openvswitch] [] ovs_flow_stats_update+0x51/0xd0 [openvswitch] [] ovs_dp_process_received_packet+0x84/0x120 [openvswitch] [] ? __lock_acquire+0x347/0x1c40 [] ovs_vport_receive+0x2a/0x30 [openvswitch] [] internal_dev_xmit+0x68/0x110 [openvswitch] [] ? internal_dev_xmit+0x5/0x110 [openvswitch] [] dev_hard_start_xmit+0x2e6/0x8b0 [] __dev_queue_xmit+0x417/0x9b0 [] ? __dev_queue_xmit+0x5/0x9b0 [] ? ip6_finish_output2+0x4f0/0x840 [] dev_queue_xmit+0x10/0x20 [] ip6_finish_output2+0x551/0x840 [] ? ip6_finish_output+0x9a/0x220 [] ip6_finish_output+0x9a/0x220 [] ip6_output+0x4f/0x1f0 [] mld_sendpack+0x1d9/0x4a0 [] mld_send_initial_cr.part.32+0x88/0xa0 [] ? addrconf_dad_completed+0x220/0x220 [] ipv6_mc_dad_complete+0x31/0x50 [] addrconf_dad_completed+0x147/0x220 [] ? addrconf_dad_completed+0x220/0x220 [] addrconf_dad_timer+0x19f/0x1c0 [] call_timer_fn+0x99/0x320 [] ? call_timer_fn+0x5/0x320 [] ? addrconf_dad_completed+0x220/0x220 [] run_timer_softirq+0x254/0x3b0 [] __do_softirq+0x12d/0x480 Signed-off-by: Flavio Leitner Signed-off-by: David S. Miller diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index dda451f..2998989 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -103,30 +103,24 @@ static void stats_read(struct flow_stats *stats, void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, unsigned long *used, __be16 *tcp_flags) { - int cpu, cur_cpu; + int cpu; *used = 0; *tcp_flags = 0; memset(ovs_stats, 0, sizeof(*ovs_stats)); + local_bh_disable(); if (!flow->stats.is_percpu) { stats_read(flow->stats.stat, ovs_stats, used, tcp_flags); } else { - cur_cpu = get_cpu(); for_each_possible_cpu(cpu) { struct flow_stats *stats; - if (cpu == cur_cpu) - local_bh_disable(); - stats = per_cpu_ptr(flow->stats.cpu_stats, cpu); stats_read(stats, ovs_stats, used, tcp_flags); - - if (cpu == cur_cpu) - local_bh_enable(); } - put_cpu(); } + local_bh_enable(); } static void stats_reset(struct flow_stats *stats) @@ -141,25 +135,17 @@ static void stats_reset(struct flow_stats *stats) void ovs_flow_stats_clear(struct sw_flow *flow) { - int cpu, cur_cpu; + int cpu; + local_bh_disable(); if (!flow->stats.is_percpu) { stats_reset(flow->stats.stat); } else { - cur_cpu = get_cpu(); - for_each_possible_cpu(cpu) { - - if (cpu == cur_cpu) - local_bh_disable(); - stats_reset(per_cpu_ptr(flow->stats.cpu_stats, cpu)); - - if (cpu == cur_cpu) - local_bh_enable(); } - put_cpu(); } + local_bh_enable(); } static int check_header(struct sk_buff *skb, int len) -- cgit v0.10.2 From e2a1d3e47bb904082b758dec9d07edf241c45d05 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Mar 2014 07:19:19 -0700 Subject: tcp: fix get_timewait4_sock() delay computation on 64bit It seems I missed one change in get_timewait4_sock() to compute the remaining time before deletion of IPV4 timewait socket. This could result in wrong output in /proc/net/tcp for tm->when field. Fixes: 96f817fedec4 ("tcp: shrink tcp6_timewait_sock by one cache line") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3cf9765..1e4eac7 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2628,7 +2628,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw, { __be32 dest, src; __u16 destp, srcp; - long delta = tw->tw_ttd - jiffies; + s32 delta = tw->tw_ttd - inet_tw_time_stamp(); dest = tw->tw_daddr; src = tw->tw_rcv_saddr; -- cgit v0.10.2 From c15b1ccadb323ea50023e8f1cca2954129a62b51 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 27 Mar 2014 18:28:07 +0100 Subject: ipv6: move DAD and addrconf_verify processing to workqueue addrconf_join_solict and addrconf_join_anycast may cause actions which need rtnl locked, especially on first address creation. A new DAD state is introduced which defers processing of the initial DAD processing into a workqueue. To get rtnl lock we need to push the code paths which depend on those calls up to workqueues, specifically addrconf_verify and the DAD processing. (v2) addrconf_dad_failure needs to be queued up to the workqueue, too. This patch introduces a new DAD state and stop the DAD processing in the workqueue (this is because of the possible ipv6_del_addr processing which removes the solicited multicast address from the device). addrconf_verify_lock is removed, too. After the transition it is not needed any more. As we are not processing in bottom half anymore we need to be a bit more careful about disabling bottom half out when we lock spin_locks which are also used in bh. Relevant backtrace: [ 541.030090] RTNL: assertion failed at net/core/dev.c (4496) [ 541.031143] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G O 3.10.33-1-amd64-vyatta #1 [ 541.031145] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 541.031146] ffffffff8148a9f0 000000000000002f ffffffff813c98c1 ffff88007c4451f8 [ 541.031148] 0000000000000000 0000000000000000 ffffffff813d3540 ffff88007fc03d18 [ 541.031150] 0000880000000006 ffff88007c445000 ffffffffa0194160 0000000000000000 [ 541.031152] Call Trace: [ 541.031153] [] ? dump_stack+0xd/0x17 [ 541.031180] [] ? __dev_set_promiscuity+0x101/0x180 [ 541.031183] [] ? __hw_addr_create_ex+0x60/0xc0 [ 541.031185] [] ? __dev_set_rx_mode+0xaa/0xc0 [ 541.031189] [] ? __dev_mc_add+0x61/0x90 [ 541.031198] [] ? igmp6_group_added+0xfc/0x1a0 [ipv6] [ 541.031208] [] ? kmem_cache_alloc+0xcb/0xd0 [ 541.031212] [] ? ipv6_dev_mc_inc+0x267/0x300 [ipv6] [ 541.031216] [] ? addrconf_join_solict+0x2e/0x40 [ipv6] [ 541.031219] [] ? ipv6_dev_ac_inc+0x159/0x1f0 [ipv6] [ 541.031223] [] ? addrconf_join_anycast+0x92/0xa0 [ipv6] [ 541.031226] [] ? __ipv6_ifa_notify+0x11e/0x1e0 [ipv6] [ 541.031229] [] ? ipv6_ifa_notify+0x33/0x50 [ipv6] [ 541.031233] [] ? addrconf_dad_completed+0x28/0x100 [ipv6] [ 541.031241] [] ? task_cputime+0x2d/0x50 [ 541.031244] [] ? addrconf_dad_timer+0x136/0x150 [ipv6] [ 541.031247] [] ? addrconf_dad_completed+0x100/0x100 [ipv6] [ 541.031255] [] ? call_timer_fn.isra.22+0x2a/0x90 [ 541.031258] [] ? addrconf_dad_completed+0x100/0x100 [ipv6] Hunks and backtrace stolen from a patch by Stephen Hemminger. Reported-by: Stephen Hemminger Signed-off-by: Stephen Hemminger Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 9650a3f..b4956a5 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -31,8 +31,10 @@ #define IF_PREFIX_AUTOCONF 0x02 enum { + INET6_IFADDR_STATE_PREDAD, INET6_IFADDR_STATE_DAD, INET6_IFADDR_STATE_POSTDAD, + INET6_IFADDR_STATE_ERRDAD, INET6_IFADDR_STATE_UP, INET6_IFADDR_STATE_DEAD, }; @@ -58,7 +60,7 @@ struct inet6_ifaddr { unsigned long cstamp; /* created timestamp */ unsigned long tstamp; /* updated timestamp */ - struct timer_list dad_timer; + struct delayed_work dad_work; struct inet6_dev *idev; struct rt6_info *rt; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 344e972..6c7fa08 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -133,10 +133,12 @@ static int ipv6_count_addresses(struct inet6_dev *idev); static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE]; static DEFINE_SPINLOCK(addrconf_hash_lock); -static void addrconf_verify(unsigned long); +static void addrconf_verify(void); +static void addrconf_verify_rtnl(void); +static void addrconf_verify_work(struct work_struct *); -static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0); -static DEFINE_SPINLOCK(addrconf_verify_lock); +static struct workqueue_struct *addrconf_wq; +static DECLARE_DELAYED_WORK(addr_chk_work, addrconf_verify_work); static void addrconf_join_anycast(struct inet6_ifaddr *ifp); static void addrconf_leave_anycast(struct inet6_ifaddr *ifp); @@ -151,7 +153,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, u32 flags, u32 noflags); static void addrconf_dad_start(struct inet6_ifaddr *ifp); -static void addrconf_dad_timer(unsigned long data); +static void addrconf_dad_work(struct work_struct *w); static void addrconf_dad_completed(struct inet6_ifaddr *ifp); static void addrconf_dad_run(struct inet6_dev *idev); static void addrconf_rs_timer(unsigned long data); @@ -247,9 +249,9 @@ static void addrconf_del_rs_timer(struct inet6_dev *idev) __in6_dev_put(idev); } -static void addrconf_del_dad_timer(struct inet6_ifaddr *ifp) +static void addrconf_del_dad_work(struct inet6_ifaddr *ifp) { - if (del_timer(&ifp->dad_timer)) + if (cancel_delayed_work(&ifp->dad_work)) __in6_ifa_put(ifp); } @@ -261,12 +263,12 @@ static void addrconf_mod_rs_timer(struct inet6_dev *idev, mod_timer(&idev->rs_timer, jiffies + when); } -static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp, - unsigned long when) +static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp, + unsigned long delay) { - if (!timer_pending(&ifp->dad_timer)) + if (!delayed_work_pending(&ifp->dad_work)) in6_ifa_hold(ifp); - mod_timer(&ifp->dad_timer, jiffies + when); + mod_delayed_work(addrconf_wq, &ifp->dad_work, delay); } static int snmp6_alloc_dev(struct inet6_dev *idev) @@ -751,8 +753,9 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) in6_dev_put(ifp->idev); - if (del_timer(&ifp->dad_timer)) - pr_notice("Timer is still running, when freeing ifa=%p\n", ifp); + if (cancel_delayed_work(&ifp->dad_work)) + pr_notice("delayed DAD work was pending while freeing ifa=%p\n", + ifp); if (ifp->state != INET6_IFADDR_STATE_DEAD) { pr_warn("Freeing alive inet6 address %p\n", ifp); @@ -849,8 +852,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, spin_lock_init(&ifa->lock); spin_lock_init(&ifa->state_lock); - setup_timer(&ifa->dad_timer, addrconf_dad_timer, - (unsigned long)ifa); + INIT_DELAYED_WORK(&ifa->dad_work, addrconf_dad_work); INIT_HLIST_NODE(&ifa->addr_lst); ifa->scope = scope; ifa->prefix_len = pfxlen; @@ -990,6 +992,8 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_NOP; unsigned long expires; + ASSERT_RTNL(); + spin_lock_bh(&ifp->state_lock); state = ifp->state; ifp->state = INET6_IFADDR_STATE_DEAD; @@ -1021,7 +1025,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) write_unlock_bh(&ifp->idev->lock); - addrconf_del_dad_timer(ifp); + addrconf_del_dad_work(ifp); ipv6_ifa_notify(RTM_DELADDR, ifp); @@ -1604,7 +1608,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) { if (ifp->flags&IFA_F_PERMANENT) { spin_lock_bh(&ifp->lock); - addrconf_del_dad_timer(ifp); + addrconf_del_dad_work(ifp); ifp->flags |= IFA_F_TENTATIVE; if (dad_failed) ifp->flags |= IFA_F_DADFAILED; @@ -1625,20 +1629,21 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) spin_unlock_bh(&ifp->lock); } ipv6_del_addr(ifp); - } else + } else { ipv6_del_addr(ifp); + } } static int addrconf_dad_end(struct inet6_ifaddr *ifp) { int err = -ENOENT; - spin_lock(&ifp->state_lock); + spin_lock_bh(&ifp->state_lock); if (ifp->state == INET6_IFADDR_STATE_DAD) { ifp->state = INET6_IFADDR_STATE_POSTDAD; err = 0; } - spin_unlock(&ifp->state_lock); + spin_unlock_bh(&ifp->state_lock); return err; } @@ -1671,7 +1676,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) } } - addrconf_dad_stop(ifp, 1); + spin_lock_bh(&ifp->state_lock); + /* transition from _POSTDAD to _ERRDAD */ + ifp->state = INET6_IFADDR_STATE_ERRDAD; + spin_unlock_bh(&ifp->state_lock); + + addrconf_mod_dad_work(ifp, 0); } /* Join to solicited addr multicast group. */ @@ -1680,6 +1690,8 @@ void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr) { struct in6_addr maddr; + ASSERT_RTNL(); + if (dev->flags&(IFF_LOOPBACK|IFF_NOARP)) return; @@ -1691,6 +1703,8 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) { struct in6_addr maddr; + ASSERT_RTNL(); + if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP)) return; @@ -1701,6 +1715,9 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) static void addrconf_join_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; + + ASSERT_RTNL(); + if (ifp->prefix_len >= 127) /* RFC 6164 */ return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); @@ -1712,6 +1729,9 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp) static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; + + ASSERT_RTNL(); + if (ifp->prefix_len >= 127) /* RFC 6164 */ return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); @@ -2271,11 +2291,13 @@ ok: return; } - ifp->flags |= IFA_F_MANAGETEMPADDR; update_lft = 0; create = 1; + spin_lock_bh(&ifp->lock); + ifp->flags |= IFA_F_MANAGETEMPADDR; ifp->cstamp = jiffies; ifp->tokenized = tokenized; + spin_unlock_bh(&ifp->lock); addrconf_dad_start(ifp); } @@ -2326,7 +2348,7 @@ ok: create, now); in6_ifa_put(ifp); - addrconf_verify(0); + addrconf_verify(); } } inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo); @@ -2475,7 +2497,7 @@ static int inet6_addr_add(struct net *net, int ifindex, manage_tempaddrs(idev, ifp, valid_lft, prefered_lft, true, jiffies); in6_ifa_put(ifp); - addrconf_verify(0); + addrconf_verify_rtnl(); return 0; } @@ -3011,7 +3033,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) hlist_for_each_entry_rcu(ifa, h, addr_lst) { if (ifa->idev == idev) { hlist_del_init_rcu(&ifa->addr_lst); - addrconf_del_dad_timer(ifa); + addrconf_del_dad_work(ifa); goto restart; } } @@ -3049,7 +3071,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) while (!list_empty(&idev->addr_list)) { ifa = list_first_entry(&idev->addr_list, struct inet6_ifaddr, if_list); - addrconf_del_dad_timer(ifa); + addrconf_del_dad_work(ifa); list_del(&ifa->if_list); @@ -3148,10 +3170,10 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp) rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1); ifp->dad_probes = idev->cnf.dad_transmits; - addrconf_mod_dad_timer(ifp, rand_num); + addrconf_mod_dad_work(ifp, rand_num); } -static void addrconf_dad_start(struct inet6_ifaddr *ifp) +static void addrconf_dad_begin(struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; struct net_device *dev = idev->dev; @@ -3203,25 +3225,68 @@ out: read_unlock_bh(&idev->lock); } -static void addrconf_dad_timer(unsigned long data) +static void addrconf_dad_start(struct inet6_ifaddr *ifp) { - struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; + bool begin_dad = false; + + spin_lock_bh(&ifp->state_lock); + if (ifp->state != INET6_IFADDR_STATE_DEAD) { + ifp->state = INET6_IFADDR_STATE_PREDAD; + begin_dad = true; + } + spin_unlock_bh(&ifp->state_lock); + + if (begin_dad) + addrconf_mod_dad_work(ifp, 0); +} + +static void addrconf_dad_work(struct work_struct *w) +{ + struct inet6_ifaddr *ifp = container_of(to_delayed_work(w), + struct inet6_ifaddr, + dad_work); struct inet6_dev *idev = ifp->idev; struct in6_addr mcaddr; + enum { + DAD_PROCESS, + DAD_BEGIN, + DAD_ABORT, + } action = DAD_PROCESS; + + rtnl_lock(); + + spin_lock_bh(&ifp->state_lock); + if (ifp->state == INET6_IFADDR_STATE_PREDAD) { + action = DAD_BEGIN; + ifp->state = INET6_IFADDR_STATE_DAD; + } else if (ifp->state == INET6_IFADDR_STATE_ERRDAD) { + action = DAD_ABORT; + ifp->state = INET6_IFADDR_STATE_POSTDAD; + } + spin_unlock_bh(&ifp->state_lock); + + if (action == DAD_BEGIN) { + addrconf_dad_begin(ifp); + goto out; + } else if (action == DAD_ABORT) { + addrconf_dad_stop(ifp, 1); + goto out; + } + if (!ifp->dad_probes && addrconf_dad_end(ifp)) goto out; - write_lock(&idev->lock); + write_lock_bh(&idev->lock); if (idev->dead || !(idev->if_flags & IF_READY)) { - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); goto out; } spin_lock(&ifp->lock); if (ifp->state == INET6_IFADDR_STATE_DEAD) { spin_unlock(&ifp->lock); - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); goto out; } @@ -3232,7 +3297,7 @@ static void addrconf_dad_timer(unsigned long data) ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); spin_unlock(&ifp->lock); - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); addrconf_dad_completed(ifp); @@ -3240,16 +3305,17 @@ static void addrconf_dad_timer(unsigned long data) } ifp->dad_probes--; - addrconf_mod_dad_timer(ifp, - NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME)); + addrconf_mod_dad_work(ifp, + NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME)); spin_unlock(&ifp->lock); - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); /* send a neighbour solicitation for our addr */ addrconf_addr_solict_mult(&ifp->addr, &mcaddr); ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any); out: in6_ifa_put(ifp); + rtnl_unlock(); } /* ifp->idev must be at least read locked */ @@ -3276,7 +3342,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) struct in6_addr lladdr; bool send_rs, send_mld; - addrconf_del_dad_timer(ifp); + addrconf_del_dad_work(ifp); /* * Configure the address for reception. Now it is valid. @@ -3517,23 +3583,23 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) * Periodic address status verification */ -static void addrconf_verify(unsigned long foo) +static void addrconf_verify_rtnl(void) { unsigned long now, next, next_sec, next_sched; struct inet6_ifaddr *ifp; int i; + ASSERT_RTNL(); + rcu_read_lock_bh(); - spin_lock(&addrconf_verify_lock); now = jiffies; next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); - del_timer(&addr_chk_timer); + cancel_delayed_work(&addr_chk_work); for (i = 0; i < IN6_ADDR_HSIZE; i++) { restart: - hlist_for_each_entry_rcu_bh(ifp, - &inet6_addr_lst[i], addr_lst) { + hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) { unsigned long age; /* When setting preferred_lft to a value not zero or @@ -3628,13 +3694,22 @@ restart: ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n", now, next, next_sec, next_sched); - - addr_chk_timer.expires = next_sched; - add_timer(&addr_chk_timer); - spin_unlock(&addrconf_verify_lock); + mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now); rcu_read_unlock_bh(); } +static void addrconf_verify_work(struct work_struct *w) +{ + rtnl_lock(); + addrconf_verify_rtnl(); + rtnl_unlock(); +} + +static void addrconf_verify(void) +{ + mod_delayed_work(addrconf_wq, &addr_chk_work, 0); +} + static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local, struct in6_addr **peer_pfx) { @@ -3691,6 +3766,8 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, bool was_managetempaddr; bool had_prefixroute; + ASSERT_RTNL(); + if (!valid_lft || (prefered_lft > valid_lft)) return -EINVAL; @@ -3756,7 +3833,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, !was_managetempaddr, jiffies); } - addrconf_verify(0); + addrconf_verify_rtnl(); return 0; } @@ -4386,6 +4463,8 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) bool update_rs = false; struct in6_addr ll_addr; + ASSERT_RTNL(); + if (token == NULL) return -EINVAL; if (ipv6_addr_any(token)) @@ -4434,7 +4513,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) } write_unlock_bh(&idev->lock); - addrconf_verify(0); + addrconf_verify_rtnl(); return 0; } @@ -4636,6 +4715,9 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) { struct net *net = dev_net(ifp->idev->dev); + if (event) + ASSERT_RTNL(); + inet6_ifa_notify(event ? : RTM_NEWADDR, ifp); switch (event) { @@ -5244,6 +5326,12 @@ int __init addrconf_init(void) if (err < 0) goto out_addrlabel; + addrconf_wq = create_workqueue("ipv6_addrconf"); + if (!addrconf_wq) { + err = -ENOMEM; + goto out_nowq; + } + /* The addrconf netdev notifier requires that loopback_dev * has it's ipv6 private information allocated and setup * before it can bring up and give link-local addresses @@ -5274,7 +5362,7 @@ int __init addrconf_init(void) register_netdevice_notifier(&ipv6_dev_notf); - addrconf_verify(0); + addrconf_verify(); rtnl_af_register(&inet6_ops); @@ -5302,6 +5390,8 @@ errout: rtnl_af_unregister(&inet6_ops); unregister_netdevice_notifier(&ipv6_dev_notf); errlo: + destroy_workqueue(addrconf_wq); +out_nowq: unregister_pernet_subsys(&addrconf_ops); out_addrlabel: ipv6_addr_label_cleanup(); @@ -5337,7 +5427,8 @@ void addrconf_cleanup(void) for (i = 0; i < IN6_ADDR_HSIZE; i++) WARN_ON(!hlist_empty(&inet6_addr_lst[i])); spin_unlock_bh(&addrconf_hash_lock); - - del_timer(&addr_chk_timer); + cancel_delayed_work(&addr_chk_work); rtnl_unlock(); + + destroy_workqueue(addrconf_wq); } -- cgit v0.10.2 From d9060742fbf630fe31951dfc10b798deb2813f01 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Fri, 28 Mar 2014 13:33:38 -0700 Subject: ocfs2: check if cluster name exists before deref Commit c74a3bdd9b52 ("ocfs2: add clustername to cluster connection") is trying to strlcpy a string which was explicitly passed as NULL in the very same patch, triggering a NULL ptr deref. BUG: unable to handle kernel NULL pointer dereference at (null) IP: strlcpy (lib/string.c:388 lib/string.c:151) CPU: 19 PID: 19426 Comm: trinity-c19 Tainted: G W 3.14.0-rc7-next-20140325-sasha-00014-g9476368-dirty #274 RIP: strlcpy (lib/string.c:388 lib/string.c:151) Call Trace: ocfs2_cluster_connect (fs/ocfs2/stackglue.c:350) ocfs2_cluster_connect_agnostic (fs/ocfs2/stackglue.c:396) user_dlm_register (fs/ocfs2/dlmfs/userdlm.c:679) dlmfs_mkdir (fs/ocfs2/dlmfs/dlmfs.c:503) vfs_mkdir (fs/namei.c:3467) SyS_mkdirat (fs/namei.c:3488 fs/namei.c:3472) tracesys (arch/x86/kernel/entry_64.S:749) akpm: this patch probably disables the feature. A temporary thing to avoid triviel oopses. Signed-off-by: Sasha Levin Cc: Goldwyn Rodrigues Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 1324e66..ca5ce14 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -346,7 +346,9 @@ int ocfs2_cluster_connect(const char *stack_name, strlcpy(new_conn->cc_name, group, GROUP_NAME_MAX + 1); new_conn->cc_namelen = grouplen; - strlcpy(new_conn->cc_cluster_name, cluster_name, CLUSTER_NAME_MAX + 1); + if (cluster_name_len) + strlcpy(new_conn->cc_cluster_name, cluster_name, + CLUSTER_NAME_MAX + 1); new_conn->cc_cluster_name_len = cluster_name_len; new_conn->cc_recovery_handler = recovery_handler; new_conn->cc_recovery_data = recovery_data; -- cgit v0.10.2 From 825600c0f20e595daaa7a6dd8970f84fa2a2ee57 Mon Sep 17 00:00:00 2001 From: Artem Fetishev Date: Fri, 28 Mar 2014 13:33:39 -0700 Subject: x86: fix boot on uniprocessor systems On x86 uniprocessor systems topology_physical_package_id() returns -1 which causes rapl_cpu_prepare() to leave rapl_pmu variable uninitialized which leads to GPF in rapl_pmu_init(). See arch/x86/kernel/cpu/perf_event_intel_rapl.c. It turns out that physical_package_id and core_id can actually be retreived for uniprocessor systems too. Enabling them also fixes rapl_pmu code. Signed-off-by: Artem Fetishev Cc: Stephane Eranian Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index d35f24e..1306d11 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -119,9 +119,10 @@ static inline void setup_node_to_cpumask_map(void) { } extern const struct cpumask *cpu_coregroup_mask(int cpu); -#ifdef ENABLE_TOPO_DEFINES #define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) + +#ifdef ENABLE_TOPO_DEFINES #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_thread_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) #endif -- cgit v0.10.2 From 79b30750d99a7f0fb4eaaa1c69139a7821bf6771 Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Thu, 27 Mar 2014 10:33:44 -0700 Subject: MAINTAINERS: bonding: change email address Update my email address. Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index 0b3c40f..acac797 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1832,7 +1832,7 @@ F: net/bluetooth/ F: include/net/bluetooth/ BONDING DRIVER -M: Jay Vosburgh +M: Jay Vosburgh M: Veaceslav Falico M: Andy Gospodarek L: netdev@vger.kernel.org -- cgit v0.10.2 From 898602a049504dea256e696ee3152dc0b788a393 Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Thu, 27 Mar 2014 18:43:50 +0100 Subject: MAINTAINERS: bonding: change email address Signed-off-by: Veaceslav Falico Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index acac797..a80c84e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1833,7 +1833,7 @@ F: include/net/bluetooth/ BONDING DRIVER M: Jay Vosburgh -M: Veaceslav Falico +M: Veaceslav Falico M: Andy Gospodarek L: netdev@vger.kernel.org W: http://sourceforge.net/projects/bonding/ -- cgit v0.10.2 From 53d6471cef17262d3ad1c7ce8982a234244f68ec Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 27 Mar 2014 17:26:18 -0400 Subject: net: Account for all vlan headers in skb_mac_gso_segment skb_network_protocol() already accounts for multiple vlan headers that may be present in the skb. However, skb_mac_gso_segment() doesn't know anything about it and assumes that skb->mac_len is set correctly to skip all mac headers. That may not always be the case. If we are simply forwarding the packet (via bridge or macvtap), all vlan headers may not be accounted for. A simple solution is to allow skb_network_protocol to return the vlan depth it has calculated. This way skb_mac_gso_segment will correctly skip all mac headers. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e8eeebd..daafd95 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3014,7 +3014,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) { return __skb_gso_segment(skb, features, true); } -__be16 skb_network_protocol(struct sk_buff *skb); +__be16 skb_network_protocol(struct sk_buff *skb, int *depth); static inline bool can_checksum_protocol(netdev_features_t features, __be16 protocol) diff --git a/net/core/dev.c b/net/core/dev.c index b1b0c8d..45fa2f1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2286,7 +2286,7 @@ out: } EXPORT_SYMBOL(skb_checksum_help); -__be16 skb_network_protocol(struct sk_buff *skb) +__be16 skb_network_protocol(struct sk_buff *skb, int *depth) { __be16 type = skb->protocol; int vlan_depth = ETH_HLEN; @@ -2313,6 +2313,8 @@ __be16 skb_network_protocol(struct sk_buff *skb) vlan_depth += VLAN_HLEN; } + *depth = vlan_depth; + return type; } @@ -2326,12 +2328,13 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); struct packet_offload *ptype; - __be16 type = skb_network_protocol(skb); + int vlan_depth = skb->mac_len; + __be16 type = skb_network_protocol(skb, &vlan_depth); if (unlikely(!type)) return ERR_PTR(-EINVAL); - __skb_pull(skb, skb->mac_len); + __skb_pull(skb, vlan_depth); rcu_read_lock(); list_for_each_entry_rcu(ptype, &offload_base, list) { @@ -2498,8 +2501,10 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, const struct net_device *dev, netdev_features_t features) { + int tmp; + if (skb->ip_summed != CHECKSUM_NONE && - !can_checksum_protocol(features, skb_network_protocol(skb))) { + !can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) { features &= ~NETIF_F_ALL_CSUM; } else if (illegal_highdma(dev, skb)) { features &= ~NETIF_F_SG; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 97e5a2c..90b96a1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2879,8 +2879,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, int err = -ENOMEM; int i = 0; int pos; + int dummy; - proto = skb_network_protocol(head_skb); + proto = skb_network_protocol(head_skb, &dummy); if (unlikely(!proto)) return ERR_PTR(-EINVAL); -- cgit v0.10.2 From fc92f745f8d0d3736ce5afb00a905d7cc61f9c46 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 27 Mar 2014 21:51:18 -0400 Subject: bridge: Fix crash with vlan filtering and tcpdump When the vlan filtering is enabled on the bridge, but the filter is not configured on the bridge device itself, running tcpdump on the bridge device will result in a an Oops with NULL pointer dereference. The reason is that br_pass_frame_up() will bypass the vlan check because promisc flag is set. It will then try to get the table pointer and process the packet based on the table. Since the table pointer is NULL, we oops. Catch this special condition in br_handle_vlan(). Reported-by: Toshiaki Makita CC: Toshiaki Makita Signed-off-by: Vlad Yasevich Acked-by: Toshiaki Makita Signed-off-by: David S. Miller diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 28d5446..d0cca3c 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -29,6 +29,7 @@ static int br_pass_frame_up(struct sk_buff *skb) struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev; struct net_bridge *br = netdev_priv(brdev); struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); + struct net_port_vlans *pv; u64_stats_update_begin(&brstats->syncp); brstats->rx_packets++; @@ -39,18 +40,18 @@ static int br_pass_frame_up(struct sk_buff *skb) * packet is allowed except in promisc modue when someone * may be running packet capture. */ + pv = br_get_vlan_info(br); if (!(brdev->flags & IFF_PROMISC) && - !br_allowed_egress(br, br_get_vlan_info(br), skb)) { + !br_allowed_egress(br, pv, skb)) { kfree_skb(skb); return NET_RX_DROP; } - skb = br_handle_vlan(br, br_get_vlan_info(br), skb); - if (!skb) - return NET_RX_DROP; - indev = skb->dev; skb->dev = brdev; + skb = br_handle_vlan(br, pv, skb); + if (!skb) + return NET_RX_DROP; return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL, netif_receive_skb); diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index c77eed5..f23c74b 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -128,6 +128,20 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, if (!br->vlan_enabled) goto out; + /* Vlan filter table must be configured at this point. The + * only exception is the bridge is set in promisc mode and the + * packet is destined for the bridge device. In this case + * pass the packet as is. + */ + if (!pv) { + if ((br->dev->flags & IFF_PROMISC) && skb->dev == br->dev) { + goto out; + } else { + kfree_skb(skb); + return NULL; + } + } + /* At this point, we know that the frame was filtered and contains * a valid vlan id. If the vlan id is set in the untagged bitmap, * send untagged; otherwise, send tagged. -- cgit v0.10.2 From f6d1ac4b5f15f57929fe0fa283b3a45dfec717a0 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 27 Mar 2014 22:14:46 -0400 Subject: qlge: Do not propaged vlan tag offloads to vlans qlge driver turns off NETIF_F_HW_CTAG_FILTER, but forgets to turn off HW_CTAG_TX and HW_CTAG_RX on vlan devices. With the current settings, q-in-q will only generate a single vlan header. Remember to mask off CTAG_TX and CTAG_RX features in vlan_features. CC: Shahed Shaikh CC: Jitendra Kalsaria CC: Ron Mercer Signed-off-by: Vlad Yasevich Acked-by: Jitendra Kalsaria Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index ce2cfdd..656c65d 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -4765,7 +4765,9 @@ static int qlge_probe(struct pci_dev *pdev, ndev->features = ndev->hw_features; ndev->vlan_features = ndev->hw_features; /* vlan gets same features (except vlan filter) */ - ndev->vlan_features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; + ndev->vlan_features &= ~(NETIF_F_HW_VLAN_CTAG_FILTER | + NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX); if (test_bit(QL_DMA64, &qdev->flags)) ndev->features |= NETIF_F_HIGHDMA; -- cgit v0.10.2 From 8dd6e147b0c29723ec10d0e836c7f3466d61a19b Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 27 Mar 2014 22:14:47 -0400 Subject: ifb: Remove vlan acceleration from vlan_features Do not include vlan acceleration features in vlan_features as that precludes correct Q-in-Q operation. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index c14d39b..d7b2e94 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -180,7 +180,8 @@ static void ifb_setup(struct net_device *dev) dev->tx_queue_len = TX_Q_LIMIT; dev->features |= IFB_FEATURES; - dev->vlan_features |= IFB_FEATURES; + dev->vlan_features |= IFB_FEATURES & ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX); dev->flags |= IFF_NOARP; dev->flags &= ~IFF_MULTICAST; -- cgit v0.10.2 From 3f8c707b9a83cd956af65796081b6c8cb8716089 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 27 Mar 2014 22:14:48 -0400 Subject: veth: Turn off vlan rx acceleration in vlan_features For completeness, turn off vlan rx acceleration in vlan_features so that it doesn't show up on q-in-q setups. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 5b37437..c0e7c64 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -286,7 +286,10 @@ static void veth_setup(struct net_device *dev) dev->features |= NETIF_F_LLTX; dev->features |= VETH_FEATURES; dev->vlan_features = dev->features & - ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); + ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_STAG_RX); dev->destructor = veth_dev_free; dev->hw_features = VETH_FEATURES; -- cgit v0.10.2 From 2adb956b084d6d49f519541a4b5f9947e96f8ef7 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 27 Mar 2014 22:14:49 -0400 Subject: vlan: Warn the user if lowerdev has bad vlan features. Some drivers incorrectly assign vlan acceleration features to vlan_features thus causing issues for Q-in-Q vlan configurations. Warn the user of such cases. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 1005ebf..5a09a48 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -163,4 +163,11 @@ enum { /* changeable features with no special hardware requirements */ #define NETIF_F_SOFT_FEATURES (NETIF_F_GSO | NETIF_F_GRO) +#define NETIF_F_VLAN_FEATURES (NETIF_F_HW_VLAN_CTAG_FILTER | \ + NETIF_F_HW_VLAN_CTAG_RX | \ + NETIF_F_HW_VLAN_CTAG_TX | \ + NETIF_F_HW_VLAN_STAG_FILTER | \ + NETIF_F_HW_VLAN_STAG_RX | \ + NETIF_F_HW_VLAN_STAG_TX) + #endif /* _LINUX_NETDEV_FEATURES_H */ diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index a9591ff..27bfe2f 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -578,6 +578,9 @@ static int vlan_dev_init(struct net_device *dev) dev->features |= real_dev->vlan_features | NETIF_F_LLTX; dev->gso_max_size = real_dev->gso_max_size; + if (dev->features & NETIF_F_VLAN_FEATURES) + netdev_warn(real_dev, "VLAN features are set incorrectly. Q-in-Q configurations may not work correctly.\n"); + /* ipv6 shared card related stuff */ dev->dev_id = real_dev->dev_id; -- cgit v0.10.2 From 4c332c3238ab41fa9bc18d6ccb8e8aec1c87a426 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 15 Mar 2014 13:11:18 -0700 Subject: [IA64] Keep format strings from leaking into printk The buffer being sent to printk has already had format strings resolved. The string should not be reinterpreted again to avoid any unintended format strings from leaking into printk. Signed-off-by: Kees Cook Signed-off-by: Tony Luck diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index b8edfa7..601502a 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -217,7 +217,7 @@ void ia64_mca_printk(const char *fmt, ...) /* Copy the output into mlogbuf */ if (oops_in_progress) { /* mlogbuf was abandoned, use printk directly instead. */ - printk(temp_buf); + printk("%s", temp_buf); } else { spin_lock(&mlogbuf_wlock); for (p = temp_buf; *p; p++) { @@ -268,7 +268,7 @@ void ia64_mlogbuf_dump(void) } *p = '\0'; if (temp_buf[0]) - printk(temp_buf); + printk("%s", temp_buf); mlogbuf_start = index; mlogbuf_timestamp = 0; -- cgit v0.10.2 From 1e4ec6217dcf4b26cf959b70298a3b990479c955 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 29 Mar 2014 10:50:47 +0100 Subject: s390/compat: add copyright statement Signed-off-by: Heiko Carstens diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index d123f5d..824c39d 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -1,3 +1,9 @@ +/* + * Compat sytem call wrappers. + * + * Copyright IBM Corp. 2014 + */ + #include #include #include "entry.h" -- cgit v0.10.2 From 21ddfd38ee9aac804d22beaceed4c7b903cca234 Mon Sep 17 00:00:00 2001 From: Jianyu Zhan Date: Fri, 28 Mar 2014 20:55:21 +0800 Subject: percpu: renew the max_contig if we merge the head and previous block During pcpu_alloc_area(), we might merge the current head with the previous block. Since we have calculated the max_contig using the size of previous block before we skip it, and now we update the size of previous block, so we should renew the max_contig. Signed-off-by: Jianyu Zhan Signed-off-by: Tejun Heo diff --git a/mm/percpu.c b/mm/percpu.c index 202e104..63e24fb 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -473,9 +473,11 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) * uncommon for percpu allocations. */ if (head && (head < sizeof(int) || !(p[-1] & 1))) { + *p = off += head; if (p[-1] & 1) chunk->free_size -= head; - *p = off += head; + else + max_contig = max(*p - p[-1], max_contig); this_size -= head; head = 0; } -- cgit v0.10.2 From 59ff3eb6d6f75c6c1c3ea8b46ac2cc64eb216547 Mon Sep 17 00:00:00 2001 From: ZhangZhen Date: Thu, 27 Mar 2014 09:41:47 +0800 Subject: workqueue: remove deprecated WQ_NON_REENTRANT Tejun Heo has made WQ_NON_REENTRANT useless in the dbf2576e37 ("workqueue: make all workqueues non-reentrant"). So remove its usages and definition. This patch doesn't introduce any behavior changes. tj: minor description updates. Signed-off-by: ZhangZhen Sigend-off-by: Tejun Heo Acked-by: James Chapman Acked-by: Ulf Hansson diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 55cd110..c204b7d 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -2607,7 +2607,7 @@ int dw_mci_probe(struct dw_mci *host) tasklet_init(&host->tasklet, dw_mci_tasklet_func, (unsigned long)host); host->card_workqueue = alloc_workqueue("dw-mci-card", - WQ_MEM_RECLAIM | WQ_NON_REENTRANT, 1); + WQ_MEM_RECLAIM, 1); if (!host->card_workqueue) { ret = -ENOMEM; goto err_dmaunmap; diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 0523eab..5329946 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -284,12 +284,6 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } * Documentation/workqueue.txt. */ enum { - /* - * All wqs are now non-reentrant making the following flag - * meaningless. Will be removed. - */ - WQ_NON_REENTRANT = 1 << 0, /* DEPRECATED */ - WQ_UNBOUND = 1 << 1, /* not bound to any cpu */ WQ_FREEZABLE = 1 << 2, /* freeze during suspend */ WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */ diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 735d0f6..fcbd63e 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -2016,7 +2016,7 @@ static int __init l2tp_init(void) if (rc) goto out; - l2tp_wq = alloc_workqueue("l2tp", WQ_NON_REENTRANT | WQ_UNBOUND, 0); + l2tp_wq = alloc_workqueue("l2tp", WQ_UNBOUND, 0); if (!l2tp_wq) { pr_err("alloc_workqueue failed\n"); rc = -ENOMEM; -- cgit v0.10.2 From 638c323c4d1f8eaf25224946e21ce8818f1bcee1 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 25 Mar 2014 15:36:02 +0200 Subject: rbd: drop an unsafe assertion Olivier Bonvalet reported having repeated crashes due to a failed assertion he was hitting in rbd_img_obj_callback(): Assertion failure in rbd_img_obj_callback() at line 2165: rbd_assert(which >= img_request->next_completion); With a lot of help from Olivier with reproducing the problem we were able to determine the object and image requests had already been completed (and often freed) at the point the assertion failed. There was a great deal of discussion on the ceph-devel mailing list about this. The problem only arose when there were two (or more) object requests in an image request, and the problem was always seen when the second request was being completed. The problem is due to a race in the window between setting the "done" flag on an object request and checking the image request's next completion value. When the first object request completes, it checks to see if its successor request is marked "done", and if so, that request is also completed. In the process, the image request's next_completion value is updated to reflect that both the first and second requests are completed. By the time the second request is able to check the next_completion value, it has been set to a value *greater* than its own "which" value, which caused an assertion to fail. Fix this problem by skipping over any completion processing unless the completing object request is the next one expected. Test only for inequality (not >=), and eliminate the bad assertion. Tested-by: Olivier Bonvalet Signed-off-by: Alex Elder Reviewed-by: Sage Weil Reviewed-by: Ilya Dryomov diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index b365e0d..34898d5 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2109,7 +2109,6 @@ static void rbd_img_obj_callback(struct rbd_obj_request *obj_request) rbd_assert(img_request->obj_request_count > 0); rbd_assert(which != BAD_WHICH); rbd_assert(which < img_request->obj_request_count); - rbd_assert(which >= img_request->next_completion); spin_lock_irq(&img_request->completion_lock); if (which != img_request->next_completion) -- cgit v0.10.2 From fc7392aa1b20debc7f398acc39ffc817630f11e6 Mon Sep 17 00:00:00 2001 From: Elias Vanderstuyft Date: Sat, 29 Mar 2014 12:08:45 -0700 Subject: Input: don't modify the id of ioctl-provided ff effect on upload failure If a new (id == -1) ff effect was uploaded from userspace, ff-core.c::input_ff_upload() will have assigned a positive number to the new effect id. Currently, evdev.c::evdev_do_ioctl() will save this new id to userspace, regardless of whether the upload succeeded or not. On upload failure, this can be confusing because the dev->ff->effects[] array will not contain an element at the index of that new effect id. This patch fixes this by leaving the id unchanged after upload fails. Note: Unfortunately applications should still expect changed effect id for quite some time. This has been discussed on: http://www.mail-archive.com/linux-input@vger.kernel.org/msg08513.html ("ff-core effect id handling in case of a failed effect upload") Suggested-by: Dmitry Torokhov Signed-off-by: Elias Vanderstuyft Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index a06e125..ce953d8 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -954,11 +954,13 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd, return -EFAULT; error = input_ff_upload(dev, &effect, file); + if (error) + return error; if (put_user(effect.id, &(((struct ff_effect __user *)p)->id))) return -EFAULT; - return error; + return 0; } /* Multi-number variable-length handlers */ -- cgit v0.10.2 From e4dbedc7eac7da9db363a36f2bd4366962eeefcc Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 6 Mar 2014 12:57:24 -0800 Subject: Input: mousedev - fix race when creating mixed device We should not be using static variable mousedev_mix in methods that can be called before that singleton gets assigned. While at it let's add open and close methods to mousedev structure so that we do not need to test if we are dealing with multiplexor or normal device and simply call appropriate method directly. This fixes: https://bugzilla.kernel.org/show_bug.cgi?id=71551 Reported-by: GiulioDP Tested-by: GiulioDP Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/mousedev.c b/drivers/input/mousedev.c index 4c842c3..b604564 100644 --- a/drivers/input/mousedev.c +++ b/drivers/input/mousedev.c @@ -67,7 +67,6 @@ struct mousedev { struct device dev; struct cdev cdev; bool exist; - bool is_mixdev; struct list_head mixdev_node; bool opened_by_mixdev; @@ -77,6 +76,9 @@ struct mousedev { int old_x[4], old_y[4]; int frac_dx, frac_dy; unsigned long touch; + + int (*open_device)(struct mousedev *mousedev); + void (*close_device)(struct mousedev *mousedev); }; enum mousedev_emul { @@ -116,9 +118,6 @@ static unsigned char mousedev_imex_seq[] = { 0xf3, 200, 0xf3, 200, 0xf3, 80 }; static struct mousedev *mousedev_mix; static LIST_HEAD(mousedev_mix_list); -static void mixdev_open_devices(void); -static void mixdev_close_devices(void); - #define fx(i) (mousedev->old_x[(mousedev->pkt_count - (i)) & 03]) #define fy(i) (mousedev->old_y[(mousedev->pkt_count - (i)) & 03]) @@ -428,9 +427,7 @@ static int mousedev_open_device(struct mousedev *mousedev) if (retval) return retval; - if (mousedev->is_mixdev) - mixdev_open_devices(); - else if (!mousedev->exist) + if (!mousedev->exist) retval = -ENODEV; else if (!mousedev->open++) { retval = input_open_device(&mousedev->handle); @@ -446,9 +443,7 @@ static void mousedev_close_device(struct mousedev *mousedev) { mutex_lock(&mousedev->mutex); - if (mousedev->is_mixdev) - mixdev_close_devices(); - else if (mousedev->exist && !--mousedev->open) + if (mousedev->exist && !--mousedev->open) input_close_device(&mousedev->handle); mutex_unlock(&mousedev->mutex); @@ -459,21 +454,29 @@ static void mousedev_close_device(struct mousedev *mousedev) * stream. Note that this function is called with mousedev_mix->mutex * held. */ -static void mixdev_open_devices(void) +static int mixdev_open_devices(struct mousedev *mixdev) { - struct mousedev *mousedev; + int error; + + error = mutex_lock_interruptible(&mixdev->mutex); + if (error) + return error; - if (mousedev_mix->open++) - return; + if (!mixdev->open++) { + struct mousedev *mousedev; - list_for_each_entry(mousedev, &mousedev_mix_list, mixdev_node) { - if (!mousedev->opened_by_mixdev) { - if (mousedev_open_device(mousedev)) - continue; + list_for_each_entry(mousedev, &mousedev_mix_list, mixdev_node) { + if (!mousedev->opened_by_mixdev) { + if (mousedev_open_device(mousedev)) + continue; - mousedev->opened_by_mixdev = true; + mousedev->opened_by_mixdev = true; + } } } + + mutex_unlock(&mixdev->mutex); + return 0; } /* @@ -481,19 +484,22 @@ static void mixdev_open_devices(void) * device. Note that this function is called with mousedev_mix->mutex * held. */ -static void mixdev_close_devices(void) +static void mixdev_close_devices(struct mousedev *mixdev) { - struct mousedev *mousedev; + mutex_lock(&mixdev->mutex); - if (--mousedev_mix->open) - return; + if (!--mixdev->open) { + struct mousedev *mousedev; - list_for_each_entry(mousedev, &mousedev_mix_list, mixdev_node) { - if (mousedev->opened_by_mixdev) { - mousedev->opened_by_mixdev = false; - mousedev_close_device(mousedev); + list_for_each_entry(mousedev, &mousedev_mix_list, mixdev_node) { + if (mousedev->opened_by_mixdev) { + mousedev->opened_by_mixdev = false; + mousedev_close_device(mousedev); + } } } + + mutex_unlock(&mixdev->mutex); } @@ -522,7 +528,7 @@ static int mousedev_release(struct inode *inode, struct file *file) mousedev_detach_client(mousedev, client); kfree(client); - mousedev_close_device(mousedev); + mousedev->close_device(mousedev); return 0; } @@ -550,7 +556,7 @@ static int mousedev_open(struct inode *inode, struct file *file) client->mousedev = mousedev; mousedev_attach_client(mousedev, client); - error = mousedev_open_device(mousedev); + error = mousedev->open_device(mousedev); if (error) goto err_free_client; @@ -861,16 +867,21 @@ static struct mousedev *mousedev_create(struct input_dev *dev, if (mixdev) { dev_set_name(&mousedev->dev, "mice"); + + mousedev->open_device = mixdev_open_devices; + mousedev->close_device = mixdev_close_devices; } else { int dev_no = minor; /* Normalize device number if it falls into legacy range */ if (dev_no < MOUSEDEV_MINOR_BASE + MOUSEDEV_MINORS) dev_no -= MOUSEDEV_MINOR_BASE; dev_set_name(&mousedev->dev, "mouse%d", dev_no); + + mousedev->open_device = mousedev_open_device; + mousedev->close_device = mousedev_close_device; } mousedev->exist = true; - mousedev->is_mixdev = mixdev; mousedev->handle.dev = input_get_device(dev); mousedev->handle.name = dev_name(&mousedev->dev); mousedev->handle.handler = handler; @@ -919,7 +930,7 @@ static void mousedev_destroy(struct mousedev *mousedev) device_del(&mousedev->dev); mousedev_cleanup(mousedev); input_free_minor(MINOR(mousedev->dev.devt)); - if (!mousedev->is_mixdev) + if (mousedev != mousedev_mix) input_unregister_handle(&mousedev->handle); put_device(&mousedev->dev); } -- cgit v0.10.2 From 0818bf27c05b2de56c5b2bd08cfae2a939bd5f52 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 28 Feb 2014 13:46:44 -0500 Subject: resizable namespace.c hashes * switch allocation to alloc_large_system_hash() * make sizes overridable by boot parameters (mhash_entries=, mphash_entries=) * switch mountpoint_hashtable from list_head to hlist_head Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/fs/mount.h b/fs/mount.h index a17458c..acdb428 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -19,7 +19,7 @@ struct mnt_pcp { }; struct mountpoint { - struct list_head m_hash; + struct hlist_node m_hash; struct dentry *m_dentry; int m_count; }; diff --git a/fs/namespace.c b/fs/namespace.c index 22e5367..3b648da 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -23,11 +23,34 @@ #include #include #include +#include #include "pnode.h" #include "internal.h" -#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) -#define HASH_SIZE (1UL << HASH_SHIFT) +static unsigned int m_hash_mask __read_mostly; +static unsigned int m_hash_shift __read_mostly; +static unsigned int mp_hash_mask __read_mostly; +static unsigned int mp_hash_shift __read_mostly; + +static __initdata unsigned long mhash_entries; +static int __init set_mhash_entries(char *str) +{ + if (!str) + return 0; + mhash_entries = simple_strtoul(str, &str, 0); + return 1; +} +__setup("mhash_entries=", set_mhash_entries); + +static __initdata unsigned long mphash_entries; +static int __init set_mphash_entries(char *str) +{ + if (!str) + return 0; + mphash_entries = simple_strtoul(str, &str, 0); + return 1; +} +__setup("mphash_entries=", set_mphash_entries); static int event; static DEFINE_IDA(mnt_id_ida); @@ -37,7 +60,7 @@ static int mnt_id_start = 0; static int mnt_group_start = 1; static struct list_head *mount_hashtable __read_mostly; -static struct list_head *mountpoint_hashtable __read_mostly; +static struct hlist_head *mountpoint_hashtable __read_mostly; static struct kmem_cache *mnt_cache __read_mostly; static DECLARE_RWSEM(namespace_sem); @@ -55,12 +78,19 @@ EXPORT_SYMBOL_GPL(fs_kobj); */ __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock); -static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) +static inline struct list_head *m_hash(struct vfsmount *mnt, struct dentry *dentry) { unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); tmp += ((unsigned long)dentry / L1_CACHE_BYTES); - tmp = tmp + (tmp >> HASH_SHIFT); - return tmp & (HASH_SIZE - 1); + tmp = tmp + (tmp >> m_hash_shift); + return &mount_hashtable[tmp & m_hash_mask]; +} + +static inline struct hlist_head *mp_hash(struct dentry *dentry) +{ + unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES); + tmp = tmp + (tmp >> mp_hash_shift); + return &mountpoint_hashtable[tmp & mp_hash_mask]; } /* @@ -575,7 +605,7 @@ bool legitimize_mnt(struct vfsmount *bastard, unsigned seq) */ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) { - struct list_head *head = mount_hashtable + hash(mnt, dentry); + struct list_head *head = m_hash(mnt, dentry); struct mount *p; list_for_each_entry_rcu(p, head, mnt_hash) @@ -590,7 +620,7 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) */ struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) { - struct list_head *head = mount_hashtable + hash(mnt, dentry); + struct list_head *head = m_hash(mnt, dentry); struct mount *p; list_for_each_entry_reverse(p, head, mnt_hash) @@ -633,11 +663,11 @@ struct vfsmount *lookup_mnt(struct path *path) static struct mountpoint *new_mountpoint(struct dentry *dentry) { - struct list_head *chain = mountpoint_hashtable + hash(NULL, dentry); + struct hlist_head *chain = mp_hash(dentry); struct mountpoint *mp; int ret; - list_for_each_entry(mp, chain, m_hash) { + hlist_for_each_entry(mp, chain, m_hash) { if (mp->m_dentry == dentry) { /* might be worth a WARN_ON() */ if (d_unlinked(dentry)) @@ -659,7 +689,7 @@ static struct mountpoint *new_mountpoint(struct dentry *dentry) mp->m_dentry = dentry; mp->m_count = 1; - list_add(&mp->m_hash, chain); + hlist_add_head(&mp->m_hash, chain); return mp; } @@ -670,7 +700,7 @@ static void put_mountpoint(struct mountpoint *mp) spin_lock(&dentry->d_lock); dentry->d_flags &= ~DCACHE_MOUNTED; spin_unlock(&dentry->d_lock); - list_del(&mp->m_hash); + hlist_del(&mp->m_hash); kfree(mp); } } @@ -739,8 +769,7 @@ static void attach_mnt(struct mount *mnt, struct mountpoint *mp) { mnt_set_mountpoint(parent, mp, mnt); - list_add_tail(&mnt->mnt_hash, mount_hashtable + - hash(&parent->mnt, mp->m_dentry)); + list_add_tail(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry)); list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); } @@ -762,8 +791,8 @@ static void commit_tree(struct mount *mnt) list_splice(&head, n->list.prev); - list_add_tail(&mnt->mnt_hash, mount_hashtable + - hash(&parent->mnt, mnt->mnt_mountpoint)); + list_add_tail(&mnt->mnt_hash, + m_hash(&parent->mnt, mnt->mnt_mountpoint)); list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); touch_mnt_namespace(n); } @@ -2777,18 +2806,24 @@ void __init mnt_init(void) mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); - mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); - mountpoint_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); + mount_hashtable = alloc_large_system_hash("Mount-cache", + sizeof(struct list_head), + mhash_entries, 19, + 0, + &m_hash_shift, &m_hash_mask, 0, 0); + mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache", + sizeof(struct hlist_head), + mphash_entries, 19, + 0, + &mp_hash_shift, &mp_hash_mask, 0, 0); if (!mount_hashtable || !mountpoint_hashtable) panic("Failed to allocate mount hash table\n"); - printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE); - - for (u = 0; u < HASH_SIZE; u++) + for (u = 0; u <= m_hash_mask; u++) INIT_LIST_HEAD(&mount_hashtable[u]); - for (u = 0; u < HASH_SIZE; u++) - INIT_LIST_HEAD(&mountpoint_hashtable[u]); + for (u = 0; u <= mp_hash_mask; u++) + INIT_HLIST_HEAD(&mountpoint_hashtable[u]); kernfs_init(); -- cgit v0.10.2 From 1d6a32acd70ab18499829c0a9a5dbe2bace72a13 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 20 Mar 2014 20:34:43 -0400 Subject: keep shadowed vfsmounts together preparation to switching mnt_hash to hlist Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/fs/namespace.c b/fs/namespace.c index 3b648da..9db3ce3 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -621,12 +621,20 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) { struct list_head *head = m_hash(mnt, dentry); - struct mount *p; + struct mount *p, *res = NULL; - list_for_each_entry_reverse(p, head, mnt_hash) + list_for_each_entry(p, head, mnt_hash) if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) - return p; - return NULL; + goto found; + return res; +found: + res = p; + list_for_each_entry_continue(p, head, mnt_hash) { + if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry) + break; + res = p; + } + return res; } /* @@ -769,14 +777,14 @@ static void attach_mnt(struct mount *mnt, struct mountpoint *mp) { mnt_set_mountpoint(parent, mp, mnt); - list_add_tail(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry)); + list_add(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry)); list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); } /* * vfsmount lock must be held for write */ -static void commit_tree(struct mount *mnt) +static void commit_tree(struct mount *mnt, struct mount *shadows) { struct mount *parent = mnt->mnt_parent; struct mount *m; @@ -791,7 +799,10 @@ static void commit_tree(struct mount *mnt) list_splice(&head, n->list.prev); - list_add_tail(&mnt->mnt_hash, + if (shadows) + list_add(&mnt->mnt_hash, &shadows->mnt_hash); + else + list_add(&mnt->mnt_hash, m_hash(&parent->mnt, mnt->mnt_mountpoint)); list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); touch_mnt_namespace(n); @@ -1659,12 +1670,15 @@ static int attach_recursive_mnt(struct mount *source_mnt, touch_mnt_namespace(source_mnt->mnt_ns); } else { mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt); - commit_tree(source_mnt); + commit_tree(source_mnt, NULL); } list_for_each_entry_safe(child, p, &tree_list, mnt_hash) { + struct mount *q; list_del_init(&child->mnt_hash); - commit_tree(child); + q = __lookup_mnt_last(&child->mnt_parent->mnt, + child->mnt_mountpoint); + commit_tree(child, q); } unlock_mount_hash(); -- cgit v0.10.2 From 0b1b901b5a98bb36943d10820efc796f7cd45ff3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 21 Mar 2014 10:14:08 -0400 Subject: don't bother with propagate_mnt() unless the target is shared If the dest_mnt is not shared, propagate_mnt() does nothing - there's no mounts to propagate to and thus no copies to create. Might as well don't bother calling it in that case. Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/fs/namespace.c b/fs/namespace.c index 9db3ce3..d3fb9f0 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1653,16 +1653,14 @@ static int attach_recursive_mnt(struct mount *source_mnt, err = invent_group_ids(source_mnt, true); if (err) goto out; - } - err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list); - if (err) - goto out_cleanup_ids; - - lock_mount_hash(); - - if (IS_MNT_SHARED(dest_mnt)) { + err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list); + if (err) + goto out_cleanup_ids; + lock_mount_hash(); for (p = source_mnt; p; p = next_mnt(p, source_mnt)) set_mnt_shared(p); + } else { + lock_mount_hash(); } if (parent_path) { detach_mnt(source_mnt, parent_path); @@ -1685,8 +1683,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, return 0; out_cleanup_ids: - if (IS_MNT_SHARED(dest_mnt)) - cleanup_group_ids(source_mnt, NULL); + cleanup_group_ids(source_mnt, NULL); out: return err; } -- cgit v0.10.2 From 38129a13e6e71f666e0468e99fdd932a687b4d7e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 20 Mar 2014 21:10:51 -0400 Subject: switch mnt_hash to hlist fixes RCU bug - walking through hlist is safe in face of element moves, since it's self-terminating. Cyclic lists are not - if we end up jumping to another hash chain, we'll loop infinitely without ever hitting the original list head. [fix for dumb braino folded] Spotted by: Max Kellermann Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/fs/mount.h b/fs/mount.h index acdb428..b29e42f 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -25,7 +25,7 @@ struct mountpoint { }; struct mount { - struct list_head mnt_hash; + struct hlist_node mnt_hash; struct mount *mnt_parent; struct dentry *mnt_mountpoint; struct vfsmount mnt; diff --git a/fs/namespace.c b/fs/namespace.c index d3fb9f0..2ffc5a2 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -59,7 +59,7 @@ static DEFINE_SPINLOCK(mnt_id_lock); static int mnt_id_start = 0; static int mnt_group_start = 1; -static struct list_head *mount_hashtable __read_mostly; +static struct hlist_head *mount_hashtable __read_mostly; static struct hlist_head *mountpoint_hashtable __read_mostly; static struct kmem_cache *mnt_cache __read_mostly; static DECLARE_RWSEM(namespace_sem); @@ -78,7 +78,7 @@ EXPORT_SYMBOL_GPL(fs_kobj); */ __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock); -static inline struct list_head *m_hash(struct vfsmount *mnt, struct dentry *dentry) +static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry) { unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); tmp += ((unsigned long)dentry / L1_CACHE_BYTES); @@ -217,7 +217,7 @@ static struct mount *alloc_vfsmnt(const char *name) mnt->mnt_writers = 0; #endif - INIT_LIST_HEAD(&mnt->mnt_hash); + INIT_HLIST_NODE(&mnt->mnt_hash); INIT_LIST_HEAD(&mnt->mnt_child); INIT_LIST_HEAD(&mnt->mnt_mounts); INIT_LIST_HEAD(&mnt->mnt_list); @@ -605,10 +605,10 @@ bool legitimize_mnt(struct vfsmount *bastard, unsigned seq) */ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) { - struct list_head *head = m_hash(mnt, dentry); + struct hlist_head *head = m_hash(mnt, dentry); struct mount *p; - list_for_each_entry_rcu(p, head, mnt_hash) + hlist_for_each_entry_rcu(p, head, mnt_hash) if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) return p; return NULL; @@ -620,20 +620,16 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) */ struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) { - struct list_head *head = m_hash(mnt, dentry); - struct mount *p, *res = NULL; - - list_for_each_entry(p, head, mnt_hash) - if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) - goto found; - return res; -found: - res = p; - list_for_each_entry_continue(p, head, mnt_hash) { + struct mount *p, *res; + res = p = __lookup_mnt(mnt, dentry); + if (!p) + goto out; + hlist_for_each_entry_continue(p, mnt_hash) { if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry) break; res = p; } +out: return res; } @@ -750,7 +746,7 @@ static void detach_mnt(struct mount *mnt, struct path *old_path) mnt->mnt_parent = mnt; mnt->mnt_mountpoint = mnt->mnt.mnt_root; list_del_init(&mnt->mnt_child); - list_del_init(&mnt->mnt_hash); + hlist_del_init_rcu(&mnt->mnt_hash); put_mountpoint(mnt->mnt_mp); mnt->mnt_mp = NULL; } @@ -777,7 +773,7 @@ static void attach_mnt(struct mount *mnt, struct mountpoint *mp) { mnt_set_mountpoint(parent, mp, mnt); - list_add(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry)); + hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry)); list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); } @@ -800,9 +796,9 @@ static void commit_tree(struct mount *mnt, struct mount *shadows) list_splice(&head, n->list.prev); if (shadows) - list_add(&mnt->mnt_hash, &shadows->mnt_hash); + hlist_add_after_rcu(&shadows->mnt_hash, &mnt->mnt_hash); else - list_add(&mnt->mnt_hash, + hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mnt->mnt_mountpoint)); list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); touch_mnt_namespace(n); @@ -1193,26 +1189,28 @@ int may_umount(struct vfsmount *mnt) EXPORT_SYMBOL(may_umount); -static LIST_HEAD(unmounted); /* protected by namespace_sem */ +static HLIST_HEAD(unmounted); /* protected by namespace_sem */ static void namespace_unlock(void) { struct mount *mnt; - LIST_HEAD(head); + struct hlist_head head = unmounted; - if (likely(list_empty(&unmounted))) { + if (likely(hlist_empty(&head))) { up_write(&namespace_sem); return; } - list_splice_init(&unmounted, &head); + head.first->pprev = &head.first; + INIT_HLIST_HEAD(&unmounted); + up_write(&namespace_sem); synchronize_rcu(); - while (!list_empty(&head)) { - mnt = list_first_entry(&head, struct mount, mnt_hash); - list_del_init(&mnt->mnt_hash); + while (!hlist_empty(&head)) { + mnt = hlist_entry(head.first, struct mount, mnt_hash); + hlist_del_init(&mnt->mnt_hash); if (mnt->mnt_ex_mountpoint.mnt) path_put(&mnt->mnt_ex_mountpoint); mntput(&mnt->mnt); @@ -1233,16 +1231,19 @@ static inline void namespace_lock(void) */ void umount_tree(struct mount *mnt, int how) { - LIST_HEAD(tmp_list); + HLIST_HEAD(tmp_list); struct mount *p; + struct mount *last = NULL; - for (p = mnt; p; p = next_mnt(p, mnt)) - list_move(&p->mnt_hash, &tmp_list); + for (p = mnt; p; p = next_mnt(p, mnt)) { + hlist_del_init_rcu(&p->mnt_hash); + hlist_add_head(&p->mnt_hash, &tmp_list); + } if (how) propagate_umount(&tmp_list); - list_for_each_entry(p, &tmp_list, mnt_hash) { + hlist_for_each_entry(p, &tmp_list, mnt_hash) { list_del_init(&p->mnt_expire); list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); @@ -1260,8 +1261,13 @@ void umount_tree(struct mount *mnt, int how) p->mnt_mp = NULL; } change_mnt_propagation(p, MS_PRIVATE); + last = p; + } + if (last) { + last->mnt_hash.next = unmounted.first; + unmounted.first = tmp_list.first; + unmounted.first->pprev = &unmounted.first; } - list_splice(&tmp_list, &unmounted); } static void shrink_submounts(struct mount *mnt); @@ -1645,8 +1651,9 @@ static int attach_recursive_mnt(struct mount *source_mnt, struct mountpoint *dest_mp, struct path *parent_path) { - LIST_HEAD(tree_list); + HLIST_HEAD(tree_list); struct mount *child, *p; + struct hlist_node *n; int err; if (IS_MNT_SHARED(dest_mnt)) { @@ -1671,9 +1678,9 @@ static int attach_recursive_mnt(struct mount *source_mnt, commit_tree(source_mnt, NULL); } - list_for_each_entry_safe(child, p, &tree_list, mnt_hash) { + hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) { struct mount *q; - list_del_init(&child->mnt_hash); + hlist_del_init(&child->mnt_hash); q = __lookup_mnt_last(&child->mnt_parent->mnt, child->mnt_mountpoint); commit_tree(child, q); @@ -2818,7 +2825,7 @@ void __init mnt_init(void) 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); mount_hashtable = alloc_large_system_hash("Mount-cache", - sizeof(struct list_head), + sizeof(struct hlist_head), mhash_entries, 19, 0, &m_hash_shift, &m_hash_mask, 0, 0); @@ -2832,7 +2839,7 @@ void __init mnt_init(void) panic("Failed to allocate mount hash table\n"); for (u = 0; u <= m_hash_mask; u++) - INIT_LIST_HEAD(&mount_hashtable[u]); + INIT_HLIST_HEAD(&mount_hashtable[u]); for (u = 0; u <= mp_hash_mask; u++) INIT_HLIST_HEAD(&mountpoint_hashtable[u]); diff --git a/fs/pnode.c b/fs/pnode.c index c7221bb..88396df 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -220,14 +220,14 @@ static struct mount *get_source(struct mount *dest, * @tree_list : list of heads of trees to be attached. */ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, - struct mount *source_mnt, struct list_head *tree_list) + struct mount *source_mnt, struct hlist_head *tree_list) { struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; struct mount *m, *child; int ret = 0; struct mount *prev_dest_mnt = dest_mnt; struct mount *prev_src_mnt = source_mnt; - LIST_HEAD(tmp_list); + HLIST_HEAD(tmp_list); for (m = propagation_next(dest_mnt, dest_mnt); m; m = propagation_next(m, dest_mnt)) { @@ -246,27 +246,29 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, child = copy_tree(source, source->mnt.mnt_root, type); if (IS_ERR(child)) { ret = PTR_ERR(child); - list_splice(tree_list, tmp_list.prev); + tmp_list = *tree_list; + tmp_list.first->pprev = &tmp_list.first; + INIT_HLIST_HEAD(tree_list); goto out; } if (is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) { mnt_set_mountpoint(m, dest_mp, child); - list_add_tail(&child->mnt_hash, tree_list); + hlist_add_head(&child->mnt_hash, tree_list); } else { /* * This can happen if the parent mount was bind mounted * on some subdirectory of a shared/slave mount. */ - list_add_tail(&child->mnt_hash, &tmp_list); + hlist_add_head(&child->mnt_hash, &tmp_list); } prev_dest_mnt = m; prev_src_mnt = child; } out: lock_mount_hash(); - while (!list_empty(&tmp_list)) { - child = list_first_entry(&tmp_list, struct mount, mnt_hash); + while (!hlist_empty(&tmp_list)) { + child = hlist_entry(tmp_list.first, struct mount, mnt_hash); umount_tree(child, 0); } unlock_mount_hash(); @@ -338,8 +340,10 @@ static void __propagate_umount(struct mount *mnt) * umount the child only if the child has no * other children */ - if (child && list_empty(&child->mnt_mounts)) - list_move_tail(&child->mnt_hash, &mnt->mnt_hash); + if (child && list_empty(&child->mnt_mounts)) { + hlist_del_init_rcu(&child->mnt_hash); + hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash); + } } } @@ -350,11 +354,11 @@ static void __propagate_umount(struct mount *mnt) * * vfsmount lock must be held for write */ -int propagate_umount(struct list_head *list) +int propagate_umount(struct hlist_head *list) { struct mount *mnt; - list_for_each_entry(mnt, list, mnt_hash) + hlist_for_each_entry(mnt, list, mnt_hash) __propagate_umount(mnt); return 0; } diff --git a/fs/pnode.h b/fs/pnode.h index 59e7eda..fc28a27 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -36,8 +36,8 @@ static inline void set_mnt_shared(struct mount *mnt) void change_mnt_propagation(struct mount *, int); int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, - struct list_head *); -int propagate_umount(struct list_head *); + struct hlist_head *); +int propagate_umount(struct hlist_head *); int propagate_mount_busy(struct mount *, int); void mnt_release_group_id(struct mount *); int get_dominating_id(struct mount *mnt, const struct path *root); -- cgit v0.10.2 From 00a1a053ebe5febcfc2ec498bd894f035ad2aa06 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 30 Mar 2014 10:20:01 -0400 Subject: ext4: atomically set inode->i_flags in ext4_set_inode_flags() Use cmpxchg() to atomically set i_flags instead of clearing out the S_IMMUTABLE, S_APPEND, etc. flags and then setting them from the EXT4_IMMUTABLE_FL, EXT4_APPEND_FL flags, since this opens up a race where an immutable file has the immutable flag cleared for a brief window of time. Reported-by: John Sullivan Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org Signed-off-by: Linus Torvalds diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 6e39895..24bfd7f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "ext4_jbd2.h" #include "xattr.h" @@ -3921,18 +3922,20 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) void ext4_set_inode_flags(struct inode *inode) { unsigned int flags = EXT4_I(inode)->i_flags; + unsigned int new_fl = 0; - inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); if (flags & EXT4_SYNC_FL) - inode->i_flags |= S_SYNC; + new_fl |= S_SYNC; if (flags & EXT4_APPEND_FL) - inode->i_flags |= S_APPEND; + new_fl |= S_APPEND; if (flags & EXT4_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; + new_fl |= S_IMMUTABLE; if (flags & EXT4_NOATIME_FL) - inode->i_flags |= S_NOATIME; + new_fl |= S_NOATIME; if (flags & EXT4_DIRSYNC_FL) - inode->i_flags |= S_DIRSYNC; + new_fl |= S_DIRSYNC; + set_mask_bits(&inode->i_flags, + S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl); } /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ diff --git a/include/linux/bitops.h b/include/linux/bitops.h index abc9ca7..be5fd38 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -196,6 +196,21 @@ static inline unsigned long __ffs64(u64 word) #ifdef __KERNEL__ +#ifndef set_mask_bits +#define set_mask_bits(ptr, _mask, _bits) \ +({ \ + const typeof(*ptr) mask = (_mask), bits = (_bits); \ + typeof(*ptr) old, new; \ + \ + do { \ + old = ACCESS_ONCE(*ptr); \ + new = (old & ~mask) | bits; \ + } while (cmpxchg(ptr, old, new) != old); \ + \ + new; \ +}) +#endif + #ifndef find_last_bit /** * find_last_bit - find the last set bit in a memory region -- cgit v0.10.2 From aa4af831bb4f3168f2f574b2620124699c09c4a3 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Sun, 30 Mar 2014 19:07:54 -0400 Subject: AUDIT: Allow login in non-init namespaces It its possible to configure your PAM stack to refuse login if audit messages (about the login) were unable to be sent. This is common in many distros and thus normal configuration of many containers. The PAM modules determine if audit is enabled/disabled in the kernel based on the return value from sending an audit message on the netlink socket. If userspace gets back ECONNREFUSED it believes audit is disabled in the kernel. If it gets any other error else it refuses to let the login proceed. Just about ever since the introduction of namespaces the kernel audit subsystem has returned EPERM if the task sending a message was not in the init user or pid namespace. So many forms of containers have never worked if audit was enabled in the kernel. BUT if the container was not in net_init then the kernel network code would send ECONNREFUSED (instead of the audit code sending EPERM). Thus by pure accident/dumb luck/bug if an admin configured the PAM stack to reject all logins that didn't talk to audit, but then ran the login untility in the non-init_net namespace, it would work!! Clearly this was a bug, but it is a bug some people expected. With the introduction of network namespace support in 3.14-rc1 the two bugs stopped cancelling each other out. Now, containers in the non-init_net namespace refused to let users log in (just like PAM was configfured!) Obviously some people were not happy that what used to let users log in, now didn't! This fix is kinda hacky. We return ECONNREFUSED for all non-init relevant namespaces. That means that not only will the old broken non-init_net setups continue to work, now the broken non-init_pid or non-init_user setups will 'work'. They don't really work, since audit isn't logging things. But it's what most users want. In 3.15 we should have patches to support not only the non-init_net (3.14) namespace but also the non-init_pid and non-init_user namespace. So all will be right in the world. This just opens the doors wide open on 3.14 and hopefully makes users happy, if not the audit system... Reported-by: Andre Tomt Reported-by: Adam Richter Signed-off-by: Eric Paris Signed-off-by: Linus Torvalds diff --git a/kernel/audit.c b/kernel/audit.c index 3392d3e..95a20f3 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -608,9 +608,19 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type) int err = 0; /* Only support the initial namespaces for now. */ + /* + * We return ECONNREFUSED because it tricks userspace into thinking + * that audit was not configured into the kernel. Lots of users + * configure their PAM stack (because that's what the distro does) + * to reject login if unable to send messages to audit. If we return + * ECONNREFUSED the PAM stack thinks the kernel does not have audit + * configured in and will let login proceed. If we return EPERM + * userspace will reject all logins. This should be removed when we + * support non init namespaces!! + */ if ((current_user_ns() != &init_user_ns) || (task_active_pid_ns(current) != &init_pid_ns)) - return -EPERM; + return -ECONNREFUSED; switch (msg_type) { case AUDIT_LIST: -- cgit v0.10.2 From 01358e562a8b97f50ec04025c009c71508e6d373 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 28 Mar 2014 09:45:33 -0700 Subject: MAINTAINERS: resume as Documentation maintainer I am the new kernel tree Documentation maintainer (except for parts that are handled by other people, of course). Signed-off-by: Randy Dunlap Acked-by: Rob Landley Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index a80c84e..900d98e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2801,9 +2801,9 @@ S: Supported F: drivers/acpi/dock.c DOCUMENTATION -M: Rob Landley +M: Randy Dunlap L: linux-doc@vger.kernel.org -T: TBD +T: quilt http://www.infradead.org/~rdunlap/Doc/patches/ S: Maintained F: Documentation/ -- cgit v0.10.2 From 455c6fdbd219161bd09b1165f11699d6d73de11c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 30 Mar 2014 20:40:15 -0700 Subject: Linux 3.14 diff --git a/Makefile b/Makefile index c10b734..e5ac8a6 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc8 +EXTRAVERSION = NAME = Shuffling Zombie Juror # *DOCUMENTATION* -- cgit v0.10.2 From 8acc8722d3b5755abbfbe73f732dec576f28757a Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 22 Feb 2014 09:28:27 +0100 Subject: avr32: remove cpu_data macro to fix compiles Having cpu_data as a parameterless macro can easily cause build failures because it can be a variable name like in linux/pm_domain.h [1]. So, remove the macro and convert its only user. Because this architecture cannot do SMP, remove the whole SMP block, too. Only compile tested due to no hardware. Signed-off-by: Wolfram Sang Acked-by: Hans-Christian Egtvedt [1] https://lists.01.org/pipermail/kbuild-all/2014-February/003252.html diff --git a/arch/avr32/include/asm/bugs.h b/arch/avr32/include/asm/bugs.h index 7635e77..278661b 100644 --- a/arch/avr32/include/asm/bugs.h +++ b/arch/avr32/include/asm/bugs.h @@ -9,7 +9,7 @@ static void __init check_bugs(void) { - cpu_data->loops_per_jiffy = loops_per_jiffy; + boot_cpu_data.loops_per_jiffy = loops_per_jiffy; } #endif /* __ASM_AVR32_BUGS_H */ diff --git a/arch/avr32/include/asm/processor.h b/arch/avr32/include/asm/processor.h index 48d71c5..972adcc 100644 --- a/arch/avr32/include/asm/processor.h +++ b/arch/avr32/include/asm/processor.h @@ -83,13 +83,8 @@ static inline unsigned int avr32_get_chip_revision(struct avr32_cpuinfo *cpu) extern struct avr32_cpuinfo boot_cpu_data; -#ifdef CONFIG_SMP -extern struct avr32_cpuinfo cpu_data[]; -#define current_cpu_data cpu_data[smp_processor_id()] -#else -#define cpu_data (&boot_cpu_data) +/* No SMP support so far */ #define current_cpu_data boot_cpu_data -#endif /* This decides where the kernel will search for a free chunk of vm * space during mmap's -- cgit v0.10.2 From 2601566d01026d6ad46b1e967466aee7fbce948e Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Sun, 9 Mar 2014 05:35:46 +0800 Subject: arch/avr32/mm/cache.c: export symbol flush_icache_range() for module using Need export symbol flush_icache_range() to modules, just like another platforms have done, or can not pass compiling. The related error (with allmodconfig under avr32): ERROR: "flush_icache_range" [drivers/misc/lkdtm.ko] undefined! make[1]: *** [__modpost] Error 1 make: *** [modules] Error 2 Signed-off-by: Chen Gang Acked-by: Hans-Christian Egtvedt diff --git a/arch/avr32/mm/cache.c b/arch/avr32/mm/cache.c index 6a46ecd..85d635c 100644 --- a/arch/avr32/mm/cache.c +++ b/arch/avr32/mm/cache.c @@ -111,6 +111,7 @@ void flush_icache_range(unsigned long start, unsigned long end) __flush_icache_range(start & ~(linesz - 1), (end + linesz - 1) & ~(linesz - 1)); } +EXPORT_SYMBOL(flush_icache_range); /* * This one is called from __do_fault() and do_swap_page(). -- cgit v0.10.2 From 536a44d4277709303755e6365a059f54f4aa5403 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 27 Mar 2014 18:02:38 +0100 Subject: ARM: sun7i/sun6i: dts: Fix IRQ number for sun6i NMI controller The IRQ line used in sun6i-a31.dtsi for the NMI controller is wrong. This causes a IRQ storm since the NMI controller is repeatedly fired. This patch fixes this problem assigning the correct IRQ number to the NMI controller. Signed-off-by: Hans de Goede Signed-off-by: Carlo Caione Cc: maxime.ripard@free-electrons.com Cc: linux-arm-kernel@lists.infradead.org Cc: linux-sunxi@googlegroups.com Link: http://lkml.kernel.org/r/1395939759-11135-2-git-send-email-carlo@caione.org Signed-off-by: Thomas Gleixner diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi index eea6033..fbdf88f 100644 --- a/arch/arm/boot/dts/sun6i-a31.dtsi +++ b/arch/arm/boot/dts/sun6i-a31.dtsi @@ -195,7 +195,7 @@ interrupt-controller; #interrupt-cells = <2>; reg = <0x01f00c0c 0x38>; - interrupts = <0 0 4>; + interrupts = <0 32 4>; }; pio: pinctrl@01c20800 { -- cgit v0.10.2 From 1b422ecd27866985b9f35d0d2b5ae6e9122dd4c0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 27 Mar 2014 18:02:39 +0100 Subject: irqchip: sun7i/sun6i: Disable NMI before registering the handler It is advisable to disable the NMI before registering the IRQ handler as registering the IRQ handler unmasks the IRQ on the GIC, so if U-Boot has left the NMI enabled and the NMI pin is active we will immediately get an interrupt before any driver has claimed the downstream interrupt of the NMI. Signed-off-by: Hans de Goede Signed-off-by: Carlo Caione Cc: maxime.ripard@free-electrons.com Cc: linux-arm-kernel@lists.infradead.org Cc: linux-sunxi@googlegroups.com Link: http://lkml.kernel.org/r/1395939759-11135-3-git-send-email-carlo@caione.org Signed-off-by: Thomas Gleixner diff --git a/drivers/irqchip/irq-sunxi-nmi.c b/drivers/irqchip/irq-sunxi-nmi.c index 1c8566c..12f547a 100644 --- a/drivers/irqchip/irq-sunxi-nmi.c +++ b/drivers/irqchip/irq-sunxi-nmi.c @@ -179,12 +179,12 @@ static int __init sunxi_sc_nmi_irq_init(struct device_node *node, gc->chip_types[1].regs.type = reg_offs->ctrl; gc->chip_types[1].handler = handle_edge_irq; - irq_set_handler_data(irq, domain); - irq_set_chained_handler(irq, sunxi_sc_nmi_handle_irq); - sunxi_sc_nmi_write(gc, reg_offs->enable, 0); sunxi_sc_nmi_write(gc, reg_offs->pend, 0x1); + irq_set_handler_data(irq, domain); + irq_set_chained_handler(irq, sunxi_sc_nmi_handle_irq); + return 0; fail_irqd_remove: -- cgit v0.10.2 From 50be9eba831da114e1979726f4119d342698e651 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 4 Feb 2014 21:19:17 +0100 Subject: m68k: Update defconfigs for v3.14-rc1 Signed-off-by: Geert Uytterhoeven diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 559ff3a..96da496 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -24,6 +24,8 @@ CONFIG_SUN_PARTITION=y # CONFIG_EFI_PARTITION is not set CONFIG_SYSV68_PARTITION=y CONFIG_IOSCHED_DEADLINE=m +CONFIG_KEXEC=y +CONFIG_BOOTINFO_PROC=y CONFIG_M68020=y CONFIG_M68030=y CONFIG_M68040=y @@ -85,6 +87,7 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_INET=m CONFIG_NFT_EXTHDR=m CONFIG_NFT_META=m CONFIG_NFT_CT=m @@ -94,6 +97,8 @@ CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m +CONFIG_NFT_QUEUE=m +CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -126,6 +131,7 @@ CONFIG_NETFILTER_XT_MATCH_DSCP=m CONFIG_NETFILTER_XT_MATCH_ESP=m CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPCOMP=m CONFIG_NETFILTER_XT_MATCH_IPRANGE=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NETFILTER_XT_MATCH_LIMIT=m @@ -163,8 +169,6 @@ CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_TABLES_IPV4=m -CONFIG_NFT_REJECT_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_NF_TABLES_ARP=m @@ -190,7 +194,6 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_NF_TABLES_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -512,7 +515,6 @@ CONFIG_CRYPTO_LZ4HC=m CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_T10DIF=y CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y CONFIG_XZ_DEC_IA64=y diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index cb1f55d..1b8739f 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -25,6 +25,8 @@ CONFIG_SUN_PARTITION=y # CONFIG_EFI_PARTITION is not set CONFIG_SYSV68_PARTITION=y CONFIG_IOSCHED_DEADLINE=m +CONFIG_KEXEC=y +CONFIG_BOOTINFO_PROC=y CONFIG_M68020=y CONFIG_M68030=y CONFIG_M68040=y @@ -83,6 +85,7 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_INET=m CONFIG_NFT_EXTHDR=m CONFIG_NFT_META=m CONFIG_NFT_CT=m @@ -92,6 +95,8 @@ CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m +CONFIG_NFT_QUEUE=m +CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -124,6 +129,7 @@ CONFIG_NETFILTER_XT_MATCH_DSCP=m CONFIG_NETFILTER_XT_MATCH_ESP=m CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPCOMP=m CONFIG_NETFILTER_XT_MATCH_IPRANGE=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NETFILTER_XT_MATCH_LIMIT=m @@ -161,8 +167,6 @@ CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_TABLES_IPV4=m -CONFIG_NFT_REJECT_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_NF_TABLES_ARP=m @@ -188,7 +192,6 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_NF_TABLES_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -470,7 +473,6 @@ CONFIG_CRYPTO_LZ4HC=m CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_T10DIF=y CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y CONFIG_XZ_DEC_IA64=y diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index e880cfb..6ea4e91 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -24,6 +24,8 @@ CONFIG_SUN_PARTITION=y # CONFIG_EFI_PARTITION is not set CONFIG_SYSV68_PARTITION=y CONFIG_IOSCHED_DEADLINE=m +CONFIG_KEXEC=y +CONFIG_BOOTINFO_PROC=y CONFIG_M68020=y CONFIG_M68030=y CONFIG_M68040=y @@ -82,6 +84,7 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_INET=m CONFIG_NFT_EXTHDR=m CONFIG_NFT_META=m CONFIG_NFT_CT=m @@ -91,6 +94,8 @@ CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m +CONFIG_NFT_QUEUE=m +CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -123,6 +128,7 @@ CONFIG_NETFILTER_XT_MATCH_DSCP=m CONFIG_NETFILTER_XT_MATCH_ESP=m CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPCOMP=m CONFIG_NETFILTER_XT_MATCH_IPRANGE=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NETFILTER_XT_MATCH_LIMIT=m @@ -160,8 +166,6 @@ CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_TABLES_IPV4=m -CONFIG_NFT_REJECT_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_NF_TABLES_ARP=m @@ -187,7 +191,6 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_NF_TABLES_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -487,7 +490,6 @@ CONFIG_CRYPTO_LZ4HC=m CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_T10DIF=y CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y CONFIG_XZ_DEC_IA64=y diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 4aa4f45..e5a1273 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -24,6 +24,8 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_SUN_PARTITION=y # CONFIG_EFI_PARTITION is not set CONFIG_IOSCHED_DEADLINE=m +CONFIG_KEXEC=y +CONFIG_BOOTINFO_PROC=y CONFIG_M68040=y CONFIG_M68060=y CONFIG_VME=y @@ -81,6 +83,7 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_INET=m CONFIG_NFT_EXTHDR=m CONFIG_NFT_META=m CONFIG_NFT_CT=m @@ -90,6 +93,8 @@ CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m +CONFIG_NFT_QUEUE=m +CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -122,6 +127,7 @@ CONFIG_NETFILTER_XT_MATCH_DSCP=m CONFIG_NETFILTER_XT_MATCH_ESP=m CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPCOMP=m CONFIG_NETFILTER_XT_MATCH_IPRANGE=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NETFILTER_XT_MATCH_LIMIT=m @@ -159,8 +165,6 @@ CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_TABLES_IPV4=m -CONFIG_NFT_REJECT_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_NF_TABLES_ARP=m @@ -186,7 +190,6 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_NF_TABLES_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -463,7 +466,6 @@ CONFIG_CRYPTO_LZ4HC=m CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_T10DIF=y CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y CONFIG_XZ_DEC_IA64=y diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 7cd9d9f..8936d7f 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -25,6 +25,8 @@ CONFIG_SUN_PARTITION=y # CONFIG_EFI_PARTITION is not set CONFIG_SYSV68_PARTITION=y CONFIG_IOSCHED_DEADLINE=m +CONFIG_KEXEC=y +CONFIG_BOOTINFO_PROC=y CONFIG_M68020=y CONFIG_M68030=y CONFIG_M68040=y @@ -83,6 +85,7 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_INET=m CONFIG_NFT_EXTHDR=m CONFIG_NFT_META=m CONFIG_NFT_CT=m @@ -92,6 +95,8 @@ CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m +CONFIG_NFT_QUEUE=m +CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -124,6 +129,7 @@ CONFIG_NETFILTER_XT_MATCH_DSCP=m CONFIG_NETFILTER_XT_MATCH_ESP=m CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPCOMP=m CONFIG_NETFILTER_XT_MATCH_IPRANGE=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NETFILTER_XT_MATCH_LIMIT=m @@ -161,8 +167,6 @@ CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_TABLES_IPV4=m -CONFIG_NFT_REJECT_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_NF_TABLES_ARP=m @@ -188,7 +192,6 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_NF_TABLES_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -472,7 +475,6 @@ CONFIG_CRYPTO_LZ4HC=m CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_T10DIF=y CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y CONFIG_XZ_DEC_IA64=y diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 31f5bd0..be5342c 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -24,6 +24,8 @@ CONFIG_SUN_PARTITION=y # CONFIG_EFI_PARTITION is not set CONFIG_SYSV68_PARTITION=y CONFIG_IOSCHED_DEADLINE=m +CONFIG_KEXEC=y +CONFIG_BOOTINFO_PROC=y CONFIG_M68020=y CONFIG_M68030=y CONFIG_M68040=y @@ -82,6 +84,7 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_INET=m CONFIG_NFT_EXTHDR=m CONFIG_NFT_META=m CONFIG_NFT_CT=m @@ -91,6 +94,8 @@ CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m +CONFIG_NFT_QUEUE=m +CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -123,6 +128,7 @@ CONFIG_NETFILTER_XT_MATCH_DSCP=m CONFIG_NETFILTER_XT_MATCH_ESP=m CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPCOMP=m CONFIG_NETFILTER_XT_MATCH_IPRANGE=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NETFILTER_XT_MATCH_LIMIT=m @@ -160,8 +166,6 @@ CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_TABLES_IPV4=m -CONFIG_NFT_REJECT_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_NF_TABLES_ARP=m @@ -187,7 +191,6 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_NF_TABLES_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -495,7 +498,6 @@ CONFIG_CRYPTO_LZ4HC=m CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_T10DIF=y CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y CONFIG_XZ_DEC_IA64=y diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 4e5adff..f27194a 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -20,6 +20,8 @@ CONFIG_SOLARIS_X86_PARTITION=y CONFIG_UNIXWARE_DISKLABEL=y # CONFIG_EFI_PARTITION is not set CONFIG_IOSCHED_DEADLINE=m +CONFIG_KEXEC=y +CONFIG_BOOTINFO_PROC=y CONFIG_M68020=y CONFIG_M68040=y CONFIG_M68060=y @@ -91,6 +93,7 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_INET=m CONFIG_NFT_EXTHDR=m CONFIG_NFT_META=m CONFIG_NFT_CT=m @@ -100,6 +103,8 @@ CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m +CONFIG_NFT_QUEUE=m +CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -132,6 +137,7 @@ CONFIG_NETFILTER_XT_MATCH_DSCP=m CONFIG_NETFILTER_XT_MATCH_ESP=m CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPCOMP=m CONFIG_NETFILTER_XT_MATCH_IPRANGE=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NETFILTER_XT_MATCH_LIMIT=m @@ -169,8 +175,6 @@ CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_TABLES_IPV4=m -CONFIG_NFT_REJECT_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_NF_TABLES_ARP=m @@ -196,7 +200,6 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_NF_TABLES_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -571,7 +574,6 @@ CONFIG_CRYPTO_LZ4HC=m CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_T10DIF=y CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y CONFIG_XZ_DEC_IA64=y diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 02cdbac..c388760 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -24,6 +24,8 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_SUN_PARTITION=y # CONFIG_EFI_PARTITION is not set CONFIG_IOSCHED_DEADLINE=m +CONFIG_KEXEC=y +CONFIG_BOOTINFO_PROC=y CONFIG_M68030=y CONFIG_VME=y CONFIG_MVME147=y @@ -80,6 +82,7 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_INET=m CONFIG_NFT_EXTHDR=m CONFIG_NFT_META=m CONFIG_NFT_CT=m @@ -89,6 +92,8 @@ CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m +CONFIG_NFT_QUEUE=m +CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -121,6 +126,7 @@ CONFIG_NETFILTER_XT_MATCH_DSCP=m CONFIG_NETFILTER_XT_MATCH_ESP=m CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPCOMP=m CONFIG_NETFILTER_XT_MATCH_IPRANGE=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NETFILTER_XT_MATCH_LIMIT=m @@ -158,8 +164,6 @@ CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_TABLES_IPV4=m -CONFIG_NFT_REJECT_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_NF_TABLES_ARP=m @@ -185,7 +189,6 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_NF_TABLES_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -463,7 +466,6 @@ CONFIG_CRYPTO_LZ4HC=m CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_T10DIF=y CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y CONFIG_XZ_DEC_IA64=y diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 05a990a..f7ff784 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -24,6 +24,8 @@ CONFIG_UNIXWARE_DISKLABEL=y CONFIG_SUN_PARTITION=y # CONFIG_EFI_PARTITION is not set CONFIG_IOSCHED_DEADLINE=m +CONFIG_KEXEC=y +CONFIG_BOOTINFO_PROC=y CONFIG_M68040=y CONFIG_M68060=y CONFIG_VME=y @@ -81,6 +83,7 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_INET=m CONFIG_NFT_EXTHDR=m CONFIG_NFT_META=m CONFIG_NFT_CT=m @@ -90,6 +93,8 @@ CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m +CONFIG_NFT_QUEUE=m +CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -122,6 +127,7 @@ CONFIG_NETFILTER_XT_MATCH_DSCP=m CONFIG_NETFILTER_XT_MATCH_ESP=m CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPCOMP=m CONFIG_NETFILTER_XT_MATCH_IPRANGE=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NETFILTER_XT_MATCH_LIMIT=m @@ -159,8 +165,6 @@ CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_TABLES_IPV4=m -CONFIG_NFT_REJECT_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_NF_TABLES_ARP=m @@ -186,7 +190,6 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_NF_TABLES_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -464,7 +467,6 @@ CONFIG_CRYPTO_LZ4HC=m CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_T10DIF=y CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y CONFIG_XZ_DEC_IA64=y diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 568e2a9..f0c72ab 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -25,6 +25,8 @@ CONFIG_SUN_PARTITION=y # CONFIG_EFI_PARTITION is not set CONFIG_SYSV68_PARTITION=y CONFIG_IOSCHED_DEADLINE=m +CONFIG_KEXEC=y +CONFIG_BOOTINFO_PROC=y CONFIG_M68040=y CONFIG_M68060=y CONFIG_Q40=y @@ -81,6 +83,7 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_INET=m CONFIG_NFT_EXTHDR=m CONFIG_NFT_META=m CONFIG_NFT_CT=m @@ -90,6 +93,8 @@ CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m +CONFIG_NFT_QUEUE=m +CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -122,6 +127,7 @@ CONFIG_NETFILTER_XT_MATCH_DSCP=m CONFIG_NETFILTER_XT_MATCH_ESP=m CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPCOMP=m CONFIG_NETFILTER_XT_MATCH_IPRANGE=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NETFILTER_XT_MATCH_LIMIT=m @@ -159,8 +165,6 @@ CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_TABLES_IPV4=m -CONFIG_NFT_REJECT_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_NF_TABLES_ARP=m @@ -186,7 +190,6 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_NF_TABLES_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -485,7 +488,6 @@ CONFIG_CRYPTO_LZ4HC=m CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_T10DIF=y CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y CONFIG_XZ_DEC_IA64=y diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 60b0aea..7bca0f4 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -24,6 +24,8 @@ CONFIG_UNIXWARE_DISKLABEL=y # CONFIG_EFI_PARTITION is not set CONFIG_SYSV68_PARTITION=y CONFIG_IOSCHED_DEADLINE=m +CONFIG_KEXEC=y +CONFIG_BOOTINFO_PROC=y CONFIG_SUN3=y # CONFIG_COMPACTION is not set CONFIG_CLEANCACHE=y @@ -78,6 +80,7 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_INET=m CONFIG_NFT_EXTHDR=m CONFIG_NFT_META=m CONFIG_NFT_CT=m @@ -87,6 +90,8 @@ CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m +CONFIG_NFT_QUEUE=m +CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -119,6 +124,7 @@ CONFIG_NETFILTER_XT_MATCH_DSCP=m CONFIG_NETFILTER_XT_MATCH_ESP=m CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPCOMP=m CONFIG_NETFILTER_XT_MATCH_IPRANGE=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NETFILTER_XT_MATCH_LIMIT=m @@ -156,8 +162,6 @@ CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_TABLES_IPV4=m -CONFIG_NFT_REJECT_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_NF_TABLES_ARP=m @@ -183,7 +187,6 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_NF_TABLES_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -464,7 +467,6 @@ CONFIG_CRYPTO_LZ4HC=m CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_T10DIF=y CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y CONFIG_XZ_DEC_IA64=y diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 21bda33..317f3e1 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -24,6 +24,8 @@ CONFIG_UNIXWARE_DISKLABEL=y # CONFIG_EFI_PARTITION is not set CONFIG_SYSV68_PARTITION=y CONFIG_IOSCHED_DEADLINE=m +CONFIG_KEXEC=y +CONFIG_BOOTINFO_PROC=y CONFIG_SUN3X=y # CONFIG_COMPACTION is not set CONFIG_CLEANCACHE=y @@ -78,6 +80,7 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_INET=m CONFIG_NFT_EXTHDR=m CONFIG_NFT_META=m CONFIG_NFT_CT=m @@ -87,6 +90,8 @@ CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m +CONFIG_NFT_QUEUE=m +CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -119,6 +124,7 @@ CONFIG_NETFILTER_XT_MATCH_DSCP=m CONFIG_NETFILTER_XT_MATCH_ESP=m CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m CONFIG_NETFILTER_XT_MATCH_HELPER=m +CONFIG_NETFILTER_XT_MATCH_IPCOMP=m CONFIG_NETFILTER_XT_MATCH_IPRANGE=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NETFILTER_XT_MATCH_LIMIT=m @@ -156,8 +162,6 @@ CONFIG_IP_SET_HASH_NETPORT=m CONFIG_IP_SET_HASH_NETIFACE=m CONFIG_IP_SET_LIST_SET=m CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_TABLES_IPV4=m -CONFIG_NFT_REJECT_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_NF_TABLES_ARP=m @@ -183,7 +187,6 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_NF_TABLES_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -464,7 +467,6 @@ CONFIG_CRYPTO_LZ4HC=m CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m # CONFIG_CRYPTO_HW is not set -CONFIG_CRC_T10DIF=y CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y CONFIG_XZ_DEC_IA64=y -- cgit v0.10.2 From 6f8a1b335fde143b7407036e2368d3cd6eb55674 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 12 Mar 2014 14:44:33 -0400 Subject: x86: Adjust irq remapping quirk for older revisions of 5500/5520 chipsets Commit 03bbcb2e7e2 (iommu/vt-d: add quirk for broken interrupt remapping on 55XX chipsets) properly disables irq remapping on the 5500/5520 chipsets that don't correctly perform that feature. However, when I wrote it, I followed the errata sheet linked in that commit too closely, and explicitly tied the activation of the quirk to revision 0x13 of the chip, under the assumption that earlier revisions were not in the field. Recently a system was reported to be suffering from this remap bug and the quirk hadn't triggered, because the revision id register read at a lower value that 0x13, so the quirk test failed improperly. Given this, it seems only prudent to adjust this quirk so that any revision less than 0x13 has the quirk asserted. [ tglx: Removed the 0x12 comparison of pci id 3405 as this is covered by the <= 0x13 check already ] Signed-off-by: Neil Horman Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x86@kernel.org Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1394649873-14913-1-git-send-email-nhorman@tuxdriver.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index bc4a088..6d7d5a1 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -203,18 +203,15 @@ static void __init intel_remapping_check(int num, int slot, int func) revision = read_pci_config_byte(num, slot, func, PCI_REVISION_ID); /* - * Revision 13 of all triggering devices id in this quirk have - * a problem draining interrupts when irq remapping is enabled, - * and should be flagged as broken. Additionally revisions 0x12 - * and 0x22 of device id 0x3405 has this problem. + * Revision <= 13 of all triggering devices id in this quirk + * have a problem draining interrupts when irq remapping is + * enabled, and should be flagged as broken. Additionally + * revision 0x22 of device id 0x3405 has this problem. */ - if (revision == 0x13) + if (revision <= 0x13) set_irq_remapping_broken(); - else if ((device == 0x3405) && - ((revision == 0x12) || - (revision == 0x22))) + else if (device == 0x3405 && revision == 0x22) set_irq_remapping_broken(); - } /* -- cgit v0.10.2 From 4c3b7df4ed541f934e7aad7a52aee12d44b9c2ff Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Mon, 31 Mar 2014 20:42:42 -0400 Subject: avr32: replace simple_strtoul() with kstrtoul() simple_strtoul() is marked for obsoletion; use the newer and more pleasant kstrtoul() in its place. Cc: Alexey Dobriyan Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Signed-off-by: Ramkumar Ramachandra Signed-off-by: Hans-Christian Egtvedt diff --git a/arch/avr32/kernel/cpu.c b/arch/avr32/kernel/cpu.c index 2233be7..0341ae2 100644 --- a/arch/avr32/kernel/cpu.c +++ b/arch/avr32/kernel/cpu.c @@ -39,10 +39,12 @@ static ssize_t store_pc0event(struct device *dev, size_t count) { unsigned long val; - char *endp; + int ret; - val = simple_strtoul(buf, &endp, 0); - if (endp == buf || val > 0x3f) + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; + if (val > 0x3f) return -EINVAL; val = (val << 12) | (sysreg_read(PCCR) & 0xfffc0fff); sysreg_write(PCCR, val); @@ -61,11 +63,11 @@ static ssize_t store_pc0count(struct device *dev, const char *buf, size_t count) { unsigned long val; - char *endp; + int ret; - val = simple_strtoul(buf, &endp, 0); - if (endp == buf) - return -EINVAL; + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; sysreg_write(PCNT0, val); return count; @@ -84,10 +86,12 @@ static ssize_t store_pc1event(struct device *dev, size_t count) { unsigned long val; - char *endp; + int ret; - val = simple_strtoul(buf, &endp, 0); - if (endp == buf || val > 0x3f) + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; + if (val > 0x3f) return -EINVAL; val = (val << 18) | (sysreg_read(PCCR) & 0xff03ffff); sysreg_write(PCCR, val); @@ -106,11 +110,11 @@ static ssize_t store_pc1count(struct device *dev, size_t count) { unsigned long val; - char *endp; + int ret; - val = simple_strtoul(buf, &endp, 0); - if (endp == buf) - return -EINVAL; + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; sysreg_write(PCNT1, val); return count; @@ -129,11 +133,11 @@ static ssize_t store_pccycles(struct device *dev, size_t count) { unsigned long val; - char *endp; + int ret; - val = simple_strtoul(buf, &endp, 0); - if (endp == buf) - return -EINVAL; + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; sysreg_write(PCCNT, val); return count; @@ -152,11 +156,11 @@ static ssize_t store_pcenable(struct device *dev, size_t count) { unsigned long pccr, val; - char *endp; + int ret; - val = simple_strtoul(buf, &endp, 0); - if (endp == buf) - return -EINVAL; + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; if (val) val = 1; -- cgit v0.10.2