From 694617474e33b8603fc76e090ed7d09376514b1a Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 4 Mar 2014 17:13:47 -0500 Subject: slab_common: fix the check for duplicate slab names The patch 3e374919b314f20e2a04f641ebc1093d758f66a4 is supposed to fix the problem where kmem_cache_create incorrectly reports duplicate cache name and fails. The problem is described in the header of that patch. However, the patch doesn't really fix the problem because of these reasons: * the logic to test for debugging is reversed. It was intended to perform the check only if slub debugging is enabled (which implies that caches with the same parameters are not merged). Therefore, there should be #if !defined(CONFIG_SLUB) || defined(CONFIG_SLUB_DEBUG_ON) The current code has the condition reversed and performs the test if debugging is disabled. * slub debugging may be enabled or disabled based on kernel command line, CONFIG_SLUB_DEBUG_ON is just the default settings. Therefore the test based on definition of CONFIG_SLUB_DEBUG_ON is unreliable. This patch fixes the problem by removing the test "!defined(CONFIG_SLUB_DEBUG_ON)". Therefore, duplicate names are never checked if the SLUB allocator is used. Note to stable kernel maintainers: when backporint this patch, please backport also the patch 3e374919b314f20e2a04f641ebc1093d758f66a4. Acked-by: David Rientjes Acked-by: Christoph Lameter Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org # 3.6+ Signed-off-by: Pekka Enberg diff --git a/mm/slab_common.c b/mm/slab_common.c index 102cc6f..b810fba 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -55,7 +55,7 @@ static int kmem_cache_sanity_check(const char *name, size_t size) continue; } -#if !defined(CONFIG_SLUB) || !defined(CONFIG_SLUB_DEBUG_ON) +#if !defined(CONFIG_SLUB) if (!strcmp(s->name, name)) { pr_err("%s (%s): Cache name already exists.\n", __func__, name); -- cgit v0.10.2 From 17290231df16eeee5dfc198dbf5ee4b419996dcd Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sat, 24 May 2014 21:48:28 +0400 Subject: xtensa: add fixup for double exception raised in window overflow There are two FIXMEs in the double exception handler 'for the extremely unlikely case'. This case gets hit by gcc during kernel build once in a few hours, resulting in an unrecoverable exception condition. Provide missing fixup routine to handle this case. Double exception literals now need 8 more bytes, add them to the linker script. Also replace bbsi instructions with bbsi.l as we're branching depending on 8th and 7th LSB-based bits of exception address. This may be tested by adding the explicit DTLB invalidation to window overflow handlers, like the following: --- a/arch/xtensa/kernel/vectors.S +++ b/arch/xtensa/kernel/vectors.S @@ -592,6 +592,14 @@ ENDPROC(_WindowUnderflow4) ENTRY_ALIGN64(_WindowOverflow8) s32e a0, a9, -16 + bbsi.l a9, 31, 1f + rsr a0, ccount + bbsi.l a0, 4, 1f + pdtlb a0, a9 + idtlb a0 + movi a0, 9 + idtlb a0 +1: l32e a0, a1, -12 s32e a2, a9, -8 s32e a1, a9, -12 Cc: stable@vger.kernel.org Signed-off-by: Max Filippov diff --git a/arch/xtensa/kernel/vectors.S b/arch/xtensa/kernel/vectors.S index f9e1ec3..8453e6e 100644 --- a/arch/xtensa/kernel/vectors.S +++ b/arch/xtensa/kernel/vectors.S @@ -376,38 +376,42 @@ _DoubleExceptionVector_WindowOverflow: beqz a2, 1f # if at start of vector, don't restore addi a0, a0, -128 - bbsi a0, 8, 1f # don't restore except for overflow 8 and 12 - bbsi a0, 7, 2f + bbsi.l a0, 8, 1f # don't restore except for overflow 8 and 12 + + /* + * This fixup handler is for the extremely unlikely case where the + * overflow handler's reference thru a0 gets a hardware TLB refill + * that bumps out the (distinct, aliasing) TLB entry that mapped its + * prior references thru a9/a13, and where our reference now thru + * a9/a13 gets a 2nd-level miss exception (not hardware TLB refill). + */ + movi a2, window_overflow_restore_a0_fixup + s32i a2, a3, EXC_TABLE_FIXUP + l32i a2, a3, EXC_TABLE_DOUBLE_SAVE + xsr a3, excsave1 + + bbsi.l a0, 7, 2f /* * Restore a0 as saved by _WindowOverflow8(). - * - * FIXME: we really need a fixup handler for this L32E, - * for the extremely unlikely case where the overflow handler's - * reference thru a0 gets a hardware TLB refill that bumps out - * the (distinct, aliasing) TLB entry that mapped its prior - * references thru a9, and where our reference now thru a9 - * gets a 2nd-level miss exception (not hardware TLB refill). */ - l32e a2, a9, -16 - wsr a2, depc # replace the saved a0 - j 1f + l32e a0, a9, -16 + wsr a0, depc # replace the saved a0 + j 3f 2: /* * Restore a0 as saved by _WindowOverflow12(). - * - * FIXME: we really need a fixup handler for this L32E, - * for the extremely unlikely case where the overflow handler's - * reference thru a0 gets a hardware TLB refill that bumps out - * the (distinct, aliasing) TLB entry that mapped its prior - * references thru a13, and where our reference now thru a13 - * gets a 2nd-level miss exception (not hardware TLB refill). */ - l32e a2, a13, -16 - wsr a2, depc # replace the saved a0 + l32e a0, a13, -16 + wsr a0, depc # replace the saved a0 +3: + xsr a3, excsave1 + movi a0, 0 + s32i a0, a3, EXC_TABLE_FIXUP + s32i a2, a3, EXC_TABLE_DOUBLE_SAVE 1: /* * Restore WindowBase while leaving all address registers restored. @@ -449,6 +453,7 @@ _DoubleExceptionVector_WindowOverflow: s32i a0, a2, PT_DEPC +_DoubleExceptionVector_handle_exception: addx4 a0, a0, a3 l32i a0, a0, EXC_TABLE_FAST_USER xsr a3, excsave1 @@ -464,11 +469,120 @@ _DoubleExceptionVector_WindowOverflow: rotw -3 j 1b - .end literal_prefix ENDPROC(_DoubleExceptionVector) /* + * Fixup handler for TLB miss in double exception handler for window owerflow. + * We get here with windowbase set to the window that was being spilled and + * a0 trashed. a0 bit 7 determines if this is a call8 (bit clear) or call12 + * (bit set) window. + * + * We do the following here: + * - go to the original window retaining a0 value; + * - set up exception stack to return back to appropriate a0 restore code + * (we'll need to rotate window back and there's no place to save this + * information, use different return address for that); + * - handle the exception; + * - go to the window that was being spilled; + * - set up window_overflow_restore_a0_fixup as a fixup routine; + * - reload a0; + * - restore the original window; + * - reset the default fixup routine; + * - return to user. By the time we get to this fixup handler all information + * about the conditions of the original double exception that happened in + * the window overflow handler is lost, so we just return to userspace to + * retry overflow from start. + * + * a0: value of depc, original value in depc + * a2: trashed, original value in EXC_TABLE_DOUBLE_SAVE + * a3: exctable, original value in excsave1 + */ + +ENTRY(window_overflow_restore_a0_fixup) + + rsr a0, ps + extui a0, a0, PS_OWB_SHIFT, PS_OWB_WIDTH + rsr a2, windowbase + sub a0, a2, a0 + extui a0, a0, 0, 3 + l32i a2, a3, EXC_TABLE_DOUBLE_SAVE + xsr a3, excsave1 + + _beqi a0, 1, .Lhandle_1 + _beqi a0, 3, .Lhandle_3 + + .macro overflow_fixup_handle_exception_pane n + + rsr a0, depc + rotw -\n + + xsr a3, excsave1 + wsr a2, depc + l32i a2, a3, EXC_TABLE_KSTK + s32i a0, a2, PT_AREG0 + + movi a0, .Lrestore_\n + s32i a0, a2, PT_DEPC + rsr a0, exccause + j _DoubleExceptionVector_handle_exception + + .endm + + overflow_fixup_handle_exception_pane 2 +.Lhandle_1: + overflow_fixup_handle_exception_pane 1 +.Lhandle_3: + overflow_fixup_handle_exception_pane 3 + + .macro overflow_fixup_restore_a0_pane n + + rotw \n + /* Need to preserve a0 value here to be able to handle exception + * that may occur on a0 reload from stack. It may occur because + * TLB miss handler may not be atomic and pointer to page table + * may be lost before we get here. There are no free registers, + * so we need to use EXC_TABLE_DOUBLE_SAVE area. + */ + xsr a3, excsave1 + s32i a2, a3, EXC_TABLE_DOUBLE_SAVE + movi a2, window_overflow_restore_a0_fixup + s32i a2, a3, EXC_TABLE_FIXUP + l32i a2, a3, EXC_TABLE_DOUBLE_SAVE + xsr a3, excsave1 + bbsi.l a0, 7, 1f + l32e a0, a9, -16 + j 2f +1: + l32e a0, a13, -16 +2: + rotw -\n + + .endm + +.Lrestore_2: + overflow_fixup_restore_a0_pane 2 + +.Lset_default_fixup: + xsr a3, excsave1 + s32i a2, a3, EXC_TABLE_DOUBLE_SAVE + movi a2, 0 + s32i a2, a3, EXC_TABLE_FIXUP + l32i a2, a3, EXC_TABLE_DOUBLE_SAVE + xsr a3, excsave1 + rfe + +.Lrestore_1: + overflow_fixup_restore_a0_pane 1 + j .Lset_default_fixup +.Lrestore_3: + overflow_fixup_restore_a0_pane 3 + j .Lset_default_fixup + +ENDPROC(window_overflow_restore_a0_fixup) + + .end literal_prefix +/* * Debug interrupt vector * * There is not much space here, so simply jump to another handler. diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index ee32c00..d16db6d 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -269,13 +269,13 @@ SECTIONS .UserExceptionVector.literal) SECTION_VECTOR (_DoubleExceptionVector_literal, .DoubleExceptionVector.literal, - DOUBLEEXC_VECTOR_VADDR - 16, + DOUBLEEXC_VECTOR_VADDR - 40, SIZEOF(.UserExceptionVector.text), .UserExceptionVector.text) SECTION_VECTOR (_DoubleExceptionVector_text, .DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR, - 32, + 40, .DoubleExceptionVector.literal) . = (LOADADDR( .DoubleExceptionVector.text ) + SIZEOF( .DoubleExceptionVector.text ) + 3) & ~ 3; -- cgit v0.10.2 From be6ae382dc153da51cf066c8dd523aa955f02531 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 9 Jun 2014 22:18:24 +0400 Subject: xtensa: fix sysmem reservation at the end of existing block When sysmem reservation occurs exactly at the end of an existing block that block is deleted, because it is incorrectly included in the range of memblocks to remove. Fix that by skipping such block. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 4224256..77ed202 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -191,7 +191,7 @@ int __init mem_reserve(unsigned long start, unsigned long end, int must_exist) return -EINVAL; } - if (it && start - it->start < bank_sz) { + if (it && start - it->start <= bank_sz) { if (start == it->start) { if (end - it->start < bank_sz) { it->start = end; -- cgit v0.10.2 From d7da3a3ccdeb64ceedb51b0a3377ba56cc2999fa Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Fri, 13 Jun 2014 13:37:33 -0700 Subject: Input: wacom - cleanup multitouch code when touch_max is 2 Historically we dealt with touch_max equals to 2 differently from other MT devices. Now we use input_mt_*() to process all MT events, as long as touch_max is greater than 1. So, there is no need to take (touch_max == 2) as a special case any more. Signed-off-by: Ping Cheng Reviewed-by: Jason Gerecke Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c index 977d05c..e73cf2c 100644 --- a/drivers/input/tablet/wacom_wac.c +++ b/drivers/input/tablet/wacom_wac.c @@ -1217,9 +1217,9 @@ static void wacom_bpt3_touch_msg(struct wacom_wac *wacom, unsigned char *data) * a=(pi*r^2)/C. */ int a = data[5]; - int x_res = input_abs_get_res(input, ABS_X); - int y_res = input_abs_get_res(input, ABS_Y); - width = 2 * int_sqrt(a * WACOM_CONTACT_AREA_SCALE); + int x_res = input_abs_get_res(input, ABS_MT_POSITION_X); + int y_res = input_abs_get_res(input, ABS_MT_POSITION_Y); + width = 2 * int_sqrt(a * WACOM_CONTACT_AREA_SCALE); height = width * y_res / x_res; } @@ -1587,7 +1587,7 @@ static void wacom_abs_set_axis(struct input_dev *input_dev, input_abs_set_res(input_dev, ABS_X, features->x_resolution); input_abs_set_res(input_dev, ABS_Y, features->y_resolution); } else { - if (features->touch_max <= 2) { + if (features->touch_max == 1) { input_set_abs_params(input_dev, ABS_X, 0, features->x_max, features->x_fuzz, 0); input_set_abs_params(input_dev, ABS_Y, 0, @@ -1815,14 +1815,8 @@ int wacom_setup_input_capabilities(struct input_dev *input_dev, case MTTPC: case MTTPC_B: case TABLETPC2FG: - if (features->device_type == BTN_TOOL_FINGER) { - unsigned int flags = INPUT_MT_DIRECT; - - if (wacom_wac->features.type == TABLETPC2FG) - flags = 0; - - input_mt_init_slots(input_dev, features->touch_max, flags); - } + if (features->device_type == BTN_TOOL_FINGER && features->touch_max > 1) + input_mt_init_slots(input_dev, features->touch_max, INPUT_MT_DIRECT); /* fall through */ case TABLETPC: @@ -1883,10 +1877,6 @@ int wacom_setup_input_capabilities(struct input_dev *input_dev, __set_bit(BTN_RIGHT, input_dev->keybit); if (features->touch_max) { - /* touch interface */ - unsigned int flags = INPUT_MT_POINTER; - - __set_bit(INPUT_PROP_POINTER, input_dev->propbit); if (features->pktlen == WACOM_PKGLEN_BBTOUCH3) { input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR, @@ -1894,12 +1884,8 @@ int wacom_setup_input_capabilities(struct input_dev *input_dev, input_set_abs_params(input_dev, ABS_MT_TOUCH_MINOR, 0, features->y_max, 0, 0); - } else { - __set_bit(BTN_TOOL_FINGER, input_dev->keybit); - __set_bit(BTN_TOOL_DOUBLETAP, input_dev->keybit); - flags = 0; } - input_mt_init_slots(input_dev, features->touch_max, flags); + input_mt_init_slots(input_dev, features->touch_max, INPUT_MT_POINTER); } else { /* buttons/keys only interface */ __clear_bit(ABS_X, input_dev->absbit); -- cgit v0.10.2 From 31972f6e517d82a4f60de4994908724b7b47e337 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Sun, 15 Jun 2014 00:15:09 -0700 Subject: Input: ti_am335x_tsc - warn about incorrect spelling In the hopes that people run new kernels on their devices, let's add a warning message asking users to have their DTS file fixed. The goal is that by Linux 4.0 we will be able to remove support for the bogus version of our touchscreen's DTS. Signed-off-by: Felipe Balbi Acked-by: Mark Rutland Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/touchscreen/ti_am335x_tsc.c b/drivers/input/touchscreen/ti_am335x_tsc.c index 4e793a1..2ce6495 100644 --- a/drivers/input/touchscreen/ti_am335x_tsc.c +++ b/drivers/input/touchscreen/ti_am335x_tsc.c @@ -359,9 +359,12 @@ static int titsc_parse_dt(struct platform_device *pdev, */ err = of_property_read_u32(node, "ti,coordinate-readouts", &ts_dev->coordinate_readouts); - if (err < 0) + if (err < 0) { + dev_warn(&pdev->dev, "please use 'ti,coordinate-readouts' instead\n"); err = of_property_read_u32(node, "ti,coordiante-readouts", &ts_dev->coordinate_readouts); + } + if (err < 0) return err; -- cgit v0.10.2 From b7eea4545ea775df957460f58eb56085a8892856 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 18 Jun 2014 12:34:21 +0200 Subject: xfrm: Fix refcount imbalance in xfrm_lookup xfrm_lookup must return a dst_entry with a refcount for the caller. Git commit 1a1ccc96abb ("xfrm: Remove caching of xfrm_policy_sk_bundles") removed this refcount for the socket policy case accidentally. This patch restores it and sets DST_NOCACHE flag to make sure that the dst_entry is freed when the refcount becomes null. Fixes: 1a1ccc96abb ("xfrm: Remove caching of xfrm_policy_sk_bundles") Signed-off-by: Steffen Klassert diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index a8ef510..0525d78 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2097,6 +2097,8 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, goto no_transform; } + dst_hold(&xdst->u.dst); + xdst->u.dst.flags |= DST_NOCACHE; route = xdst->route; } } -- cgit v0.10.2 From a0e5ef53aac8e5049f9344857d8ec5237d31e58b Mon Sep 17 00:00:00 2001 From: Tobias Brunner Date: Thu, 26 Jun 2014 15:12:45 +0200 Subject: xfrm: Fix installation of AH IPsec SAs The SPI check introduced in ea9884b3acf3311c8a11db67bfab21773f6f82ba was intended for IPComp SAs but actually prevented AH SAs from getting installed (depending on the SPI). Fixes: ea9884b3acf3 ("xfrm: check user specified spi for IPComp") Cc: Fan Du Signed-off-by: Tobias Brunner Signed-off-by: Steffen Klassert diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 412d9dc..d4db6eb 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -177,9 +177,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_CRYPT] || attrs[XFRMA_ALG_COMP] || - attrs[XFRMA_TFCPAD] || - (ntohl(p->id.spi) >= 0x10000)) - + attrs[XFRMA_TFCPAD]) goto out; break; @@ -207,7 +205,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, attrs[XFRMA_ALG_AUTH] || attrs[XFRMA_ALG_AUTH_TRUNC] || attrs[XFRMA_ALG_CRYPT] || - attrs[XFRMA_TFCPAD]) + attrs[XFRMA_TFCPAD] || + (ntohl(p->id.spi) >= 0x10000)) goto out; break; -- cgit v0.10.2 From ba394f0b6aa7a4a6afe67176da5d29f0ac59c48d Mon Sep 17 00:00:00 2001 From: Sekhar Nori Date: Mon, 14 Jul 2014 18:43:46 +0530 Subject: ARM: OMAP2+: l2c: squelch warning dump on power control setting On OMAP SOCs using PL310 controllers, power_ctrl register is not accessible from non-secure software even on PL310 versions which support it. The secure code takes care of setting it up correctly and power transitions are proven on these devices. For example, AM437x has L2C-310 version r3p3 and ROM code on that device does not support writing to L2C-310 power control register. The L2C driver, however, tries writing to this register for all revisions >= r3p0. This leads to a warning dump on boot which leads most users to believe that L2 cache is non-functional. Since the problem is understood, and cannot be addressed through software, replace the warning with a pr_info() while maintaining the WARN_ON() for other truly unexpected scenarios. Reported-by: Nishanth Menon Tested-by: Felipe Balbi Signed-off-by: Sekhar Nori Acked-by: Santosh Shilimkar Signed-off-by: Tony Lindgren diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c index 539e810..a0fe747 100644 --- a/arch/arm/mach-omap2/omap4-common.c +++ b/arch/arm/mach-omap2/omap4-common.c @@ -168,6 +168,10 @@ static void omap4_l2c310_write_sec(unsigned long val, unsigned reg) smc_op = OMAP4_MON_L2X0_PREFETCH_INDEX; break; + case L310_POWER_CTRL: + pr_info_once("OMAP L2C310: ROM does not support power control setting\n"); + return; + default: WARN_ONCE(1, "OMAP L2C310: ignoring write to reg 0x%x\n", reg); return; -- cgit v0.10.2 From 8c947e20cb1f442c704852b2ca24b81981b09493 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Wed, 9 Jul 2014 09:48:06 -0700 Subject: Input: i8042 - add Acer Aspire 5710 to nomux blacklist Acer Aspire needs to be added to nomux blacklist, otherwise the touchpad misbehaves rather randomly. Signed-off-by: Jiri Kosina Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 381b20d..136b7b20 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -402,6 +402,13 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = { }, }, { + /* Acer Aspire 5710 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 5710"), + }, + }, + { /* Gericom Bellagio */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Gericom"), -- cgit v0.10.2 From e76aed9da7189eeb41b9856552ce5721181e8e8d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 14 Jul 2014 17:12:21 -0700 Subject: Input: synaptics - add min/max quirk for pnp-id LEN2002 (Edge E531) https://bugzilla.redhat.com/show_bug.cgi?id=1114768 Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index ec772d9..ef9e0b8 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -132,7 +132,8 @@ static const struct min_max_quirk min_max_pnpid_table[] = { 1232, 5710, 1156, 4696 }, { - (const char * const []){"LEN0034", "LEN0036", "LEN2004", NULL}, + (const char * const []){"LEN0034", "LEN0036", "LEN2002", + "LEN2004", NULL}, 1024, 5112, 2024, 4832 }, { @@ -168,7 +169,7 @@ static const char * const topbuttonpad_pnp_ids[] = { "LEN0049", "LEN2000", "LEN2001", /* Edge E431 */ - "LEN2002", + "LEN2002", /* Edge E531 */ "LEN2003", "LEN2004", /* L440 */ "LEN2005", -- cgit v0.10.2 From 7a2deccf0ef12f7f6e33150d5875020c0c94fa94 Mon Sep 17 00:00:00 2001 From: Maxime COQUELIN Date: Fri, 20 Jun 2014 13:34:54 +0200 Subject: pinctrl: st: Fix irqmux handler st_gpio_irqmux_handler() reads the status register to find out which banks inside the controller have pending IRQs. For each banks having pending IRQs, it calls the corresponding handler. Problem is that current code restricts the number of possible banks inside the controller to ST_GPIO_PINS_PER_BANK. This define represents the number of pins inside a bank, so it shouldn't be used here. On STiH407, PIO_FRONT0 controller has 10 banks, so IRQs pending in the two last banks (PIO18 & PIO19) aren't handled. This patch replace ST_GPIO_PINS_PER_BANK by the number of banks inside the controller. Cc: Linus Walleij Cc: #v3.15+ Acked-by: Srinivas Kandagatla Signed-off-by: Maxime Coquelin Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c index 1bd6363bc9..9f43916 100644 --- a/drivers/pinctrl/pinctrl-st.c +++ b/drivers/pinctrl/pinctrl-st.c @@ -1431,7 +1431,7 @@ static void st_gpio_irqmux_handler(unsigned irq, struct irq_desc *desc) status = readl(info->irqmux_base); - for_each_set_bit(n, &status, ST_GPIO_PINS_PER_BANK) + for_each_set_bit(n, &status, info->nbanks) __gpio_irq_handler(&info->banks[n]); chained_irq_exit(chip, desc); -- cgit v0.10.2 From 9963b53693c5fd20405ea8feb07c5a8626380d52 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Wed, 2 Jul 2014 16:33:31 +0200 Subject: MAINTAINERS: Add entry for the Renesas pin controller driver I'm actively maintaining the driver, let's document that. Signed-off-by: Laurent Pinchart Signed-off-by: Linus Walleij diff --git a/MAINTAINERS b/MAINTAINERS index e31c874..b6ffdda 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6958,6 +6958,12 @@ L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: drivers/pinctrl/pinctrl-at91.c +PIN CONTROLLER - RENESAS +M: Laurent Pinchart +L: linux-sh@vger.kernel.org +S: Maintained +F: drivers/pinctrl/sh-pfc/ + PIN CONTROLLER - SAMSUNG M: Tomasz Figa M: Thomas Abraham -- cgit v0.10.2 From fe132649b5b28c19bc657d167c232180774739f8 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 8 Jul 2014 12:46:46 +0200 Subject: gpio: rcar: Add support for DT IRQ flags The gpio-rcar driver has no IRQ domain OF xlate function and thus ignores IRQ flags specified in DT. Fix this by using the two-cell xlate function. Signed-off-by: Laurent Pinchart Signed-off-by: Linus Walleij diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c index 0c9f803..b6ae89e 100644 --- a/drivers/gpio/gpio-rcar.c +++ b/drivers/gpio/gpio-rcar.c @@ -284,6 +284,7 @@ static int gpio_rcar_irq_domain_map(struct irq_domain *h, unsigned int irq, static struct irq_domain_ops gpio_rcar_irq_domain_ops = { .map = gpio_rcar_irq_domain_map, + .xlate = irq_domain_xlate_twocell, }; struct gpio_rcar_info { -- cgit v0.10.2 From 2627b7e15c5064ddd5e578e4efd948d48d531a3f Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 10 Jul 2014 09:24:01 +0300 Subject: ipvs: avoid netns exit crash on ip_vs_conn_drop_conntrack commit 8f4e0a18682d91 ("IPVS netns exit causes crash in conntrack") added second ip_vs_conn_drop_conntrack call instead of just adding the needed check. As result, the first call still can cause crash on netns exit. Remove it. Signed-off-by: Julian Anastasov Signed-off-by: Hans Schillstrom Signed-off-by: Simon Horman diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index a8eb0a8..610e19c 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -797,7 +797,6 @@ static void ip_vs_conn_expire(unsigned long data) ip_vs_control_del(cp); if (cp->flags & IP_VS_CONN_F_NFCT) { - ip_vs_conn_drop_conntrack(cp); /* Do not access conntracks during subsys cleanup * because nf_conntrack_find_get can not be used after * conntrack cleanup for the net. -- cgit v0.10.2 From 1903d50cba54261a6562a476c05085f3d7a54097 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Jul 2014 17:27:27 +0200 Subject: perf: Revert ("perf: Always destroy groups on exit") Vince reported that commit 15a2d4de0eab5 ("perf: Always destroy groups on exit") causes a regression with grouped events. In particular his read_group_attached.c test fails. https://github.com/deater/perf_event_tests/blob/master/tests/bugs/read_group_attached.c Because of the context switch optimization in perf_event_context_sched_out() the 'original' event may end up in the child process and when that exits the change in the patch in question destroys the actual grouping. Therefore revert that change and only destroy inherited groups. Reported-by: Vince Weaver Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/n/tip-zedy3uktcp753q8fw8dagx7a@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index b0c95f0..c46b02b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7458,7 +7458,19 @@ __perf_event_exit_task(struct perf_event *child_event, struct perf_event_context *child_ctx, struct task_struct *child) { - perf_remove_from_context(child_event, true); + /* + * Do not destroy the 'original' grouping; because of the context + * switch optimization the original events could've ended up in a + * random child task. + * + * If we were to destroy the original group, all group related + * operations would cease to function properly after this random + * child dies. + * + * Do destroy all inherited groups, we don't care about those + * and being thorough is better. + */ + perf_remove_from_context(child_event, !!child_event->parent); /* * It can happen that the parent exits first, and has events -- cgit v0.10.2 From 1996388e9f4e3444db8273bc08d25164d2967c21 Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Mon, 14 Jul 2014 15:33:25 -0400 Subject: perf/x86/intel: Use proper dTLB-load-misses event on IvyBridge This was discussed back in February: https://lkml.org/lkml/2014/2/18/956 But I never saw a patch come out of it. On IvyBridge we share the SandyBridge cache event tables, but the dTLB-load-miss event is not compatible. Patch it up after the fact to the proper DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK Signed-off-by: Vince Weaver Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1407141528200.17214@vincent-weaver-1.umelst.maine.edu Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 07846d7..c206815 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2474,6 +2474,9 @@ __init int intel_pmu_init(void) case 62: /* IvyBridge EP */ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + /* dTLB-load-misses on IVB is different than SNB */ + hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */ + memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); -- cgit v0.10.2 From 7711fe4fc2606712125cff1a55ce00df2ae0f1fb Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 30 Jun 2014 16:46:24 +0200 Subject: perf/x86/intel/uncore: Fix SNB-EP/IVT Cbox filter mappings This patch fixes the SNB-EP and IVT Cbox filter mapping table. The table controls which filters are supported by which events. There were several mistakes in those tables causing some filters to be ignored, such as NID on TOR_INSERTS. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Cc: zheng.z.yan@intel.com Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20140630144624.GA2604@quad Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 65bbbea..ae6552a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -550,16 +550,16 @@ static struct extra_reg snbep_uncore_cbox_extra_regs[] = { SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6), SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8), SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xc), - SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xa), + SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xa), SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2), SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2), SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2), SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2), SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8), SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8), - SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xc), - SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xc), + SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xa), + SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xa), SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2), SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2), SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2), @@ -1222,6 +1222,7 @@ static struct extra_reg ivt_uncore_cbox_extra_regs[] = { SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, SNBEP_CBO_PMON_CTL_TID_EN, 0x1), SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2), + SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4), SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc), SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc), @@ -1245,7 +1246,7 @@ static struct extra_reg ivt_uncore_cbox_extra_regs[] = { SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10), SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10), SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10), - SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10), + SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10), SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10), SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18), SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18), -- cgit v0.10.2 From 4a1c0f262f88e2676fda80a6bf80e7dbccae1dcb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Jun 2014 16:12:42 +0200 Subject: perf: Fix lockdep warning on process exit Sasha Levin reported: > While fuzzing with trinity inside a KVM tools guest running the latest -next > kernel I've stumbled on the following spew: > > ====================================================== > [ INFO: possible circular locking dependency detected ] > 3.15.0-next-20140613-sasha-00026-g6dd125d-dirty #654 Not tainted > ------------------------------------------------------- > trinity-c578/9725 is trying to acquire lock: > (&(&pool->lock)->rlock){-.-...}, at: __queue_work (kernel/workqueue.c:1346) > > but task is already holding lock: > (&ctx->lock){-.....}, at: perf_event_exit_task (kernel/events/core.c:7471 kernel/events/core.c:7533) > > which lock already depends on the new lock. > 1 lock held by trinity-c578/9725: > #0: (&ctx->lock){-.....}, at: perf_event_exit_task (kernel/events/core.c:7471 kernel/events/core.c:7533) > > Call Trace: > dump_stack (lib/dump_stack.c:52) > print_circular_bug (kernel/locking/lockdep.c:1216) > __lock_acquire (kernel/locking/lockdep.c:1840 kernel/locking/lockdep.c:1945 kernel/locking/lockdep.c:2131 kernel/locking/lockdep.c:3182) > lock_acquire (./arch/x86/include/asm/current.h:14 kernel/locking/lockdep.c:3602) > _raw_spin_lock (include/linux/spinlock_api_smp.h:143 kernel/locking/spinlock.c:151) > __queue_work (kernel/workqueue.c:1346) > queue_work_on (kernel/workqueue.c:1424) > free_object (lib/debugobjects.c:209) > __debug_check_no_obj_freed (lib/debugobjects.c:715) > debug_check_no_obj_freed (lib/debugobjects.c:727) > kmem_cache_free (mm/slub.c:2683 mm/slub.c:2711) > free_task (kernel/fork.c:221) > __put_task_struct (kernel/fork.c:250) > put_ctx (include/linux/sched.h:1855 kernel/events/core.c:898) > perf_event_exit_task (kernel/events/core.c:907 kernel/events/core.c:7478 kernel/events/core.c:7533) > do_exit (kernel/exit.c:766) > do_group_exit (kernel/exit.c:884) > get_signal_to_deliver (kernel/signal.c:2347) > do_signal (arch/x86/kernel/signal.c:698) > do_notify_resume (arch/x86/kernel/signal.c:751) > int_signal (arch/x86/kernel/entry_64.S:600) Urgh.. so the only way I can make that happen is through: perf_event_exit_task_context() raw_spin_lock(&child_ctx->lock); unclone_ctx(child_ctx) put_ctx(ctx->parent_ctx); raw_spin_unlock_irqrestore(&child_ctx->lock); And we can avoid this by doing the change below. I can't immediately see how this changed recently, but given that you say it's easy to reproduce, lets fix this. Reported-by: Sasha Levin Signed-off-by: Peter Zijlstra Cc: Tejun Heo Cc: Dave Jones Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20140623141242.GB19860@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index c46b02b..6b17ac1 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7486,7 +7486,7 @@ __perf_event_exit_task(struct perf_event *child_event, static void perf_event_exit_task_context(struct task_struct *child, int ctxn) { struct perf_event *child_event, *next; - struct perf_event_context *child_ctx; + struct perf_event_context *child_ctx, *parent_ctx; unsigned long flags; if (likely(!child->perf_event_ctxp[ctxn])) { @@ -7511,6 +7511,15 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) raw_spin_lock(&child_ctx->lock); task_ctx_sched_out(child_ctx); child->perf_event_ctxp[ctxn] = NULL; + + /* + * In order to avoid freeing: child_ctx->parent_ctx->task + * under perf_event_context::lock, grab another reference. + */ + parent_ctx = child_ctx->parent_ctx; + if (parent_ctx) + get_ctx(parent_ctx); + /* * If this context is a clone; unclone it so it can't get * swapped to another process while we're removing all @@ -7521,6 +7530,13 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) raw_spin_unlock_irqrestore(&child_ctx->lock, flags); /* + * Now that we no longer hold perf_event_context::lock, drop + * our extra child_ctx->parent_ctx reference. + */ + if (parent_ctx) + put_ctx(parent_ctx); + + /* * Report the task dead after unscheduling the events so that we * won't get any samples after PERF_RECORD_EXIT. We can however still * get a few PERF_RECORD_READ events. -- cgit v0.10.2 From 338b522ca43cfd32d11a370f4203bcd089c6c877 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 14 Jul 2014 12:25:56 -0700 Subject: perf/x86/intel: Protect LBR and extra_regs against KVM lying With -cpu host, KVM reports LBR and extra_regs support, if the host has support. When the guest perf driver tries to access LBR or extra_regs MSR, it #GPs all MSR accesses,since KVM doesn't handle LBR and extra_regs support. So check the related MSRs access right once at initialization time to avoid the error access at runtime. For reproducing the issue, please build the kernel with CONFIG_KVM_INTEL = y (for host kernel). And CONFIG_PARAVIRT = n and CONFIG_KVM_GUEST = n (for guest kernel). Start the guest with -cpu host. Run perf record with --branch-any or --branch-filter in guest to trigger LBR Run perf stat offcore events (E.g. LLC-loads/LLC-load-misses ...) in guest to trigger offcore_rsp #GP Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra Cc: Andi Kleen Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Maria Dimakopoulou Cc: Mark Davies Cc: Paul Mackerras Cc: Stephane Eranian Cc: Yan, Zheng Link: http://lkml.kernel.org/r/1405365957-20202-1-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2bdfbff..2879ecd 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -118,6 +118,9 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event) continue; if (event->attr.config1 & ~er->valid_mask) return -EINVAL; + /* Check if the extra msrs can be safely accessed*/ + if (!er->extra_msr_access) + return -ENXIO; reg->idx = er->idx; reg->config = event->attr.config1; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 3b2f9bd..8ade931 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -295,14 +295,16 @@ struct extra_reg { u64 config_mask; u64 valid_mask; int idx; /* per_xxx->regs[] reg index */ + bool extra_msr_access; }; #define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ - .event = (e), \ - .msr = (ms), \ - .config_mask = (m), \ - .valid_mask = (vm), \ - .idx = EXTRA_REG_##i, \ + .event = (e), \ + .msr = (ms), \ + .config_mask = (m), \ + .valid_mask = (vm), \ + .idx = EXTRA_REG_##i, \ + .extra_msr_access = true, \ } #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index c206815..2502d0d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2182,6 +2182,41 @@ static void intel_snb_check_microcode(void) } } +/* + * Under certain circumstances, access certain MSR may cause #GP. + * The function tests if the input MSR can be safely accessed. + */ +static bool check_msr(unsigned long msr, u64 mask) +{ + u64 val_old, val_new, val_tmp; + + /* + * Read the current value, change it and read it back to see if it + * matches, this is needed to detect certain hardware emulators + * (qemu/kvm) that don't trap on the MSR access and always return 0s. + */ + if (rdmsrl_safe(msr, &val_old)) + return false; + + /* + * Only change the bits which can be updated by wrmsrl. + */ + val_tmp = val_old ^ mask; + if (wrmsrl_safe(msr, val_tmp) || + rdmsrl_safe(msr, &val_new)) + return false; + + if (val_new != val_tmp) + return false; + + /* Here it's sure that the MSR can be safely accessed. + * Restore the old value and return. + */ + wrmsrl(msr, val_old); + + return true; +} + static __init void intel_sandybridge_quirk(void) { x86_pmu.check_microcode = intel_snb_check_microcode; @@ -2271,7 +2306,8 @@ __init int intel_pmu_init(void) union cpuid10_ebx ebx; struct event_constraint *c; unsigned int unused; - int version; + struct extra_reg *er; + int version, i; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { switch (boot_cpu_data.x86) { @@ -2577,6 +2613,34 @@ __init int intel_pmu_init(void) } } + /* + * Access LBR MSR may cause #GP under certain circumstances. + * E.g. KVM doesn't support LBR MSR + * Check all LBT MSR here. + * Disable LBR access if any LBR MSRs can not be accessed. + */ + if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL)) + x86_pmu.lbr_nr = 0; + for (i = 0; i < x86_pmu.lbr_nr; i++) { + if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) && + check_msr(x86_pmu.lbr_to + i, 0xffffUL))) + x86_pmu.lbr_nr = 0; + } + + /* + * Access extra MSR may cause #GP under certain circumstances. + * E.g. KVM doesn't support offcore event + * Check all extra_regs here. + */ + if (x86_pmu.extra_regs) { + for (er = x86_pmu.extra_regs; er->msr; er++) { + er->extra_msr_access = check_msr(er->msr, 0x1ffUL); + /* Disable LBR select mapping */ + if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access) + x86_pmu.lbr_sel_map = NULL; + } + } + /* Support full width counters using alternative MSR range */ if (x86_pmu.intel_cap.full_width_write) { x86_pmu.max_period = x86_pmu.cntval_mask; -- cgit v0.10.2 From 4485154138f6ffa5b252cb490aba3e8eb30124e4 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 30 Jun 2014 16:04:08 -0700 Subject: perf/x86/intel: Avoid spamming kernel log for BTS buffer failure It's unnecessary to excessively spam the kernel log anytime the BTS buffer cannot be allocated, so make this allocation __GFP_NOWARN. The user probably will want to at least find some artifact that the allocation has failed in the past, probably due to fragmentation because of its large size, when it's not allocated at bootstrap. Thus, add a WARN_ONCE() so something is left behind for them to understand why perf commnads that require PEBS is not working properly. Signed-off-by: David Rientjes Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1406301600460.26302@chino.kir.corp.google.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 980970c..696ade3 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -311,9 +311,11 @@ static int alloc_bts_buffer(int cpu) if (!x86_pmu.bts) return 0; - buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL, node); - if (unlikely(!buffer)) + buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); + if (unlikely(!buffer)) { + WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__); return -ENOMEM; + } max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; thresh = max / 16; -- cgit v0.10.2 From 0cdd192cf40fb6dbf03ec3af1c670068de3fd26c Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 11 Jul 2014 10:27:01 -0700 Subject: kprobes/x86: Don't try to resolve kprobe faults from userspace This commit: commit 6f6343f53d133bae516caf3d254bce37d8774625 Author: Masami Hiramatsu Date: Thu Apr 17 17:17:33 2014 +0900 kprobes/x86: Call exception handlers directly from do_int3/do_debug appears to have inadvertently dropped a check that the int3 came from kernel mode. Trying to dereference addr when addr is user-controlled is completely bogus. Signed-off-by: Andy Lutomirski Acked-by: Masami Hiramatsu Link: http://lkml.kernel.org/r/c4e339882c121aa76254f2adde3fcbdf502faec2.1405099506.git.luto@amacapital.net Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 7596df6..67e6d19e 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -574,6 +574,9 @@ int kprobe_int3_handler(struct pt_regs *regs) struct kprobe *p; struct kprobe_ctlblk *kcb; + if (user_mode_vm(regs)) + return 0; + addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); /* * We don't want to be preempted for the entire -- cgit v0.10.2 From d81b4253b0f0f1e7b7e03b0cd0f80cab18bc4d7b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 17 Jul 2014 11:44:11 +0000 Subject: kprobes: Fix "Failed to find blacklist" probing errors on ia64 and ppc64 On ia64 and ppc64, function pointers do not point to the entry address of the function, but to the address of a function descriptor (which contains the entry address and misc data). Since the kprobes code passes the function pointer stored by NOKPROBE_SYMBOL() to kallsyms_lookup_size_offset() for initalizing its blacklist, it fails and reports many errors, such as: Failed to find blacklist 0001013168300000 Failed to find blacklist 0001013000f0a000 [...] To fix this bug, use arch_deref_entry_point() to get the function entry address for kallsyms_lookup_size_offset() instead of the raw function pointer. Suzuki also pointed out that blacklist entries should also be updated as well. Reported-by: Tony Luck Fixed-by: Suzuki K. Poulose Tested-by: Tony Luck Tested-by: Michael Ellerman Signed-off-by: Masami Hiramatsu Acked-by: Michael Ellerman (for powerpc) Acked-by: Benjamin Herrenschmidt Cc: Jeremy Fitzhardinge Cc: sparse@chrisli.org Cc: Paul Mackerras Cc: akataria@vmware.com Cc: anil.s.keshavamurthy@intel.com Cc: Fenghua Yu Cc: Arnd Bergmann Cc: Rusty Russell Cc: Chris Wright Cc: yrl.pp-manager.tt@hitachi.com Cc: Kevin Hao Cc: Ananth N Mavinakayanahalli Cc: rdunlap@infradead.org Cc: dl9pf@gmx.de Cc: Linus Torvalds Cc: David S. Miller Cc: linux-ia64@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20140717114411.13401.2632.stgit@kbuild-fedora.novalocal Signed-off-by: Ingo Molnar diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 3214289..734e9a7 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2037,19 +2037,23 @@ static int __init populate_kprobe_blacklist(unsigned long *start, { unsigned long *iter; struct kprobe_blacklist_entry *ent; - unsigned long offset = 0, size = 0; + unsigned long entry, offset = 0, size = 0; for (iter = start; iter < end; iter++) { - if (!kallsyms_lookup_size_offset(*iter, &size, &offset)) { - pr_err("Failed to find blacklist %p\n", (void *)*iter); + entry = arch_deref_entry_point((void *)*iter); + + if (!kernel_text_address(entry) || + !kallsyms_lookup_size_offset(entry, &size, &offset)) { + pr_err("Failed to find blacklist at %p\n", + (void *)entry); continue; } ent = kmalloc(sizeof(*ent), GFP_KERNEL); if (!ent) return -ENOMEM; - ent->start_addr = *iter; - ent->end_addr = *iter + size; + ent->start_addr = entry; + ent->end_addr = entry + size; INIT_LIST_HEAD(&ent->list); list_add_tail(&ent->list, &kprobe_blacklist); } -- cgit v0.10.2 From 2e58cdcc22148d89ccea8f900280736e5f585c07 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 8 Jul 2014 15:18:07 -0700 Subject: Input: st-keyscan - fix 'defined but not used' compiler warnings Add #ifdef CONFIG_PM_SLEEP around keyscan_supend() and keyscan_resume() to fix the following compiler warnings occuring if CONFIG_PM_SLEEP is unset: + /scratch/kisskb/src/drivers/input/keyboard/st-keyscan.c: warning: 'keyscan_resume' defined but not used [-Wunused-function]: => 235:12 + /scratch/kisskb/src/drivers/input/keyboard/st-keyscan.c: warning: 'keyscan_suspend' defined but not used [-Wunused-function]: => 218:12 Reported-by: Geert Uytterhoeven Link: https://lkml.org/lkml/2014/7/8/109 Signed-off-by: Tobias Klauser Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/keyboard/st-keyscan.c b/drivers/input/keyboard/st-keyscan.c index 758b487..de7be4f 100644 --- a/drivers/input/keyboard/st-keyscan.c +++ b/drivers/input/keyboard/st-keyscan.c @@ -215,6 +215,7 @@ static int keyscan_probe(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM_SLEEP static int keyscan_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); @@ -249,6 +250,7 @@ static int keyscan_resume(struct device *dev) mutex_unlock(&input->mutex); return retval; } +#endif static SIMPLE_DEV_PM_OPS(keyscan_dev_pm_ops, keyscan_suspend, keyscan_resume); -- cgit v0.10.2 From 67f4aef20055afec73e37e7752bc6cc74fa01dea Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 18 Jul 2014 10:05:38 -0700 Subject: Input: sirfsoc-onkey - fix GPL v2 license string typo Per license_is_gpl_compatible(), the MODULE_LICENSE() string for GPL v2 is "GPL v2", not "GPLv2". Use "GPL v2" so this module doesn't taint the kernel. Signed-off-by: Bjorn Helgaas Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/misc/sirfsoc-onkey.c b/drivers/input/misc/sirfsoc-onkey.c index e4104f9..fed5102 100644 --- a/drivers/input/misc/sirfsoc-onkey.c +++ b/drivers/input/misc/sirfsoc-onkey.c @@ -213,7 +213,7 @@ static struct platform_driver sirfsoc_pwrc_driver = { module_platform_driver(sirfsoc_pwrc_driver); -MODULE_LICENSE("GPLv2"); +MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Binghua Duan , Xianglong Du "); MODULE_DESCRIPTION("CSR Prima2 PWRC Driver"); MODULE_ALIAS("platform:sirfsoc-pwrc"); -- cgit v0.10.2 From 50c5d36dab930b1f1b1e3348b8608aa8b9ee7610 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 19 Jul 2014 16:30:31 -0700 Subject: Input: fix defuzzing logic We attempt to remove noise from coordinates reported by devices in input_handle_abs_event(), unfortunately, unless we were dropping the event altogether, we were ignoring the adjusted value and were passing on the original value instead. Cc: stable@vger.kernel.org Reviewed-by: Andrew de los Reyes Reviewed-by: Benson Leung Reviewed-by: David Herrmann Reviewed-by: Henrik Rydberg Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/input.c b/drivers/input/input.c index 1c4c0db..29ca0bb 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -257,9 +257,10 @@ static int input_handle_abs_event(struct input_dev *dev, } static int input_get_disposition(struct input_dev *dev, - unsigned int type, unsigned int code, int value) + unsigned int type, unsigned int code, int *pval) { int disposition = INPUT_IGNORE_EVENT; + int value = *pval; switch (type) { @@ -357,6 +358,7 @@ static int input_get_disposition(struct input_dev *dev, break; } + *pval = value; return disposition; } @@ -365,7 +367,7 @@ static void input_handle_event(struct input_dev *dev, { int disposition; - disposition = input_get_disposition(dev, type, code, value); + disposition = input_get_disposition(dev, type, code, &value); if ((disposition & INPUT_PASS_TO_DEVICE) && dev->event) dev->event(dev, type, code, value); -- cgit v0.10.2 From 51cbe7e7c400def749950ab6b2c120624dbe21a7 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 20 Jun 2014 23:16:45 +0200 Subject: x86, MCE: Robustify mcheck_init_device BorisO reports that misc_register() fails often on xen. The current code unregisters the CPU hotplug notifier in that case. If then a CPU is offlined and onlined back again, we end up with a second timer running on that CPU, leading to soft lockups and system hangs. So let's leave the hotcpu notifier always registered - even if mce_device_create failed for some cores and never unreg it so that we can deal with the timer handling accordingly. Reported-and-Tested-by: Boris Ostrovsky Link: http://lkml.kernel.org/r/1403274493-1371-1-git-send-email-boris.ostrovsky@oracle.com Signed-off-by: Borislav Petkov diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index bb92f38..9a79c8d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2451,6 +2451,12 @@ static __init int mcheck_init_device(void) for_each_online_cpu(i) { err = mce_device_create(i); if (err) { + /* + * Register notifier anyway (and do not unreg it) so + * that we don't leave undeleted timers, see notifier + * callback above. + */ + __register_hotcpu_notifier(&mce_cpu_notifier); cpu_notifier_register_done(); goto err_device_create; } @@ -2471,10 +2477,6 @@ static __init int mcheck_init_device(void) err_register: unregister_syscore_ops(&mce_syscore_ops); - cpu_notifier_register_begin(); - __unregister_hotcpu_notifier(&mce_cpu_notifier); - cpu_notifier_register_done(); - err_device_create: /* * We didn't keep track of which devices were created above, but -- cgit v0.10.2 From 20b2656d7e644c8673f2b9944a0e65249e0ae555 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 18 Jul 2014 13:56:56 +0200 Subject: drm/radeon: let's use GB for vm_size (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VM sizes smaller than 1GB doesn't make much sense anyway. v2: fix typo and grammer Signed-off-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 03686fa..697add2 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1056,36 +1056,36 @@ static void radeon_check_arguments(struct radeon_device *rdev) if (!radeon_check_pot_argument(radeon_vm_size)) { dev_warn(rdev->dev, "VM size (%d) must be a power of 2\n", radeon_vm_size); - radeon_vm_size = 4096; + radeon_vm_size = 4; } - if (radeon_vm_size < 4) { - dev_warn(rdev->dev, "VM size (%d) to small, min is 4MB\n", + if (radeon_vm_size < 1) { + dev_warn(rdev->dev, "VM size (%d) to small, min is 1GB\n", radeon_vm_size); - radeon_vm_size = 4096; + radeon_vm_size = 4; } /* * Max GPUVM size for Cayman, SI and CI are 40 bits. */ - if (radeon_vm_size > 1024*1024) { - dev_warn(rdev->dev, "VM size (%d) to large, max is 1TB\n", + if (radeon_vm_size > 1024) { + dev_warn(rdev->dev, "VM size (%d) too large, max is 1TB\n", radeon_vm_size); - radeon_vm_size = 4096; + radeon_vm_size = 4; } /* defines number of bits in page table versus page directory, * a page is 4KB so we have 12 bits offset, minimum 9 bits in the * page table and the remaining bits are in the page directory */ if (radeon_vm_block_size < 9) { - dev_warn(rdev->dev, "VM page table size (%d) to small\n", + dev_warn(rdev->dev, "VM page table size (%d) too small\n", radeon_vm_block_size); radeon_vm_block_size = 9; } if (radeon_vm_block_size > 24 || - radeon_vm_size < (1ull << radeon_vm_block_size)) { - dev_warn(rdev->dev, "VM page table size (%d) to large\n", + (radeon_vm_size * 1024) < (1ull << radeon_vm_block_size)) { + dev_warn(rdev->dev, "VM page table size (%d) too large\n", radeon_vm_block_size); radeon_vm_block_size = 9; } @@ -1238,7 +1238,7 @@ int radeon_device_init(struct radeon_device *rdev, /* Adjust VM size here. * Max GPUVM size for cayman+ is 40 bits. */ - rdev->vm_manager.max_pfn = radeon_vm_size << 8; + rdev->vm_manager.max_pfn = radeon_vm_size << 18; /* Set asic functions */ r = radeon_asic_init(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index cb14213..e9e3610 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -173,7 +173,7 @@ int radeon_dpm = -1; int radeon_aspm = -1; int radeon_runtime_pm = -1; int radeon_hard_reset = 0; -int radeon_vm_size = 4096; +int radeon_vm_size = 4; int radeon_vm_block_size = 9; int radeon_deep_color = 0; @@ -243,7 +243,7 @@ module_param_named(runpm, radeon_runtime_pm, int, 0444); MODULE_PARM_DESC(hard_reset, "PCI config reset (1 = force enable, 0 = disable (default))"); module_param_named(hard_reset, radeon_hard_reset, int, 0444); -MODULE_PARM_DESC(vm_size, "VM address space size in megabytes (default 4GB)"); +MODULE_PARM_DESC(vm_size, "VM address space size in gigabytes (default 4GB)"); module_param_named(vm_size, radeon_vm_size, int, 0444); MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default 9)"); -- cgit v0.10.2 From 036bf46a3962c87fc6ab5e6dbc65f469730b4cf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 18 Jul 2014 08:56:40 +0200 Subject: drm/radeon: fix handling of radeon_vm_bo_rmv v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v3: completely rewritten. We now just remember which areas of the PT to clear and do so on the next command submission. Bug: https://bugs.freedesktop.org/show_bug.cgi?id=79980 Signed-off-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index b720450..3d5e1a9 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -449,6 +449,7 @@ struct radeon_bo_va { /* protected by vm mutex */ struct list_head vm_list; + struct list_head vm_status; /* constant after initialization */ struct radeon_vm *vm; @@ -867,6 +868,9 @@ struct radeon_vm { struct list_head va; unsigned id; + /* BOs freed, but not yet updated in the PT */ + struct list_head freed; + /* contains the page directory */ struct radeon_bo *page_directory; uint64_t pd_gpu_addr; @@ -2832,9 +2836,10 @@ void radeon_vm_fence(struct radeon_device *rdev, uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr); int radeon_vm_update_page_directory(struct radeon_device *rdev, struct radeon_vm *vm); +int radeon_vm_clear_freed(struct radeon_device *rdev, + struct radeon_vm *vm); int radeon_vm_bo_update(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_bo *bo, + struct radeon_bo_va *bo_va, struct ttm_mem_reg *mem); void radeon_vm_bo_invalidate(struct radeon_device *rdev, struct radeon_bo *bo); @@ -2847,8 +2852,8 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, struct radeon_bo_va *bo_va, uint64_t offset, uint32_t flags); -int radeon_vm_bo_rmv(struct radeon_device *rdev, - struct radeon_bo_va *bo_va); +void radeon_vm_bo_rmv(struct radeon_device *rdev, + struct radeon_bo_va *bo_va); /* audio */ void r600_audio_update_hdmi(struct work_struct *work); diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 71a1434..09fcf4d 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -461,14 +461,24 @@ static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p, struct radeon_vm *vm) { struct radeon_device *rdev = p->rdev; + struct radeon_bo_va *bo_va; int i, r; r = radeon_vm_update_page_directory(rdev, vm); if (r) return r; - r = radeon_vm_bo_update(rdev, vm, rdev->ring_tmp_bo.bo, - &rdev->ring_tmp_bo.bo->tbo.mem); + r = radeon_vm_clear_freed(rdev, vm); + if (r) + return r; + + bo_va = radeon_vm_bo_find(vm, rdev->ring_tmp_bo.bo); + if (bo_va == NULL) { + DRM_ERROR("Tmp BO not in VM!\n"); + return -EINVAL; + } + + r = radeon_vm_bo_update(rdev, bo_va, &rdev->ring_tmp_bo.bo->tbo.mem); if (r) return r; @@ -480,7 +490,13 @@ static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p, continue; bo = p->relocs[i].robj; - r = radeon_vm_bo_update(rdev, vm, bo, &bo->tbo.mem); + bo_va = radeon_vm_bo_find(vm, bo); + if (bo_va == NULL) { + dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); + return -EINVAL; + } + + r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem); if (r) return r; } diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index eecff6b..2726b46 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -332,6 +332,7 @@ struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, bo_va->ref_count = 1; INIT_LIST_HEAD(&bo_va->bo_list); INIT_LIST_HEAD(&bo_va->vm_list); + INIT_LIST_HEAD(&bo_va->vm_status); mutex_lock(&vm->mutex); list_add(&bo_va->vm_list, &vm->va); @@ -468,6 +469,15 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, head = &tmp->vm_list; } + if (bo_va->soffset) { + /* add a clone of the bo_va to clear the old address */ + tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); + tmp->soffset = bo_va->soffset; + tmp->eoffset = bo_va->eoffset; + tmp->vm = vm; + list_add(&tmp->vm_status, &vm->freed); + } + bo_va->soffset = soffset; bo_va->eoffset = eoffset; bo_va->flags = flags; @@ -823,25 +833,19 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, * Object have to be reserved and mutex must be locked! */ int radeon_vm_bo_update(struct radeon_device *rdev, - struct radeon_vm *vm, - struct radeon_bo *bo, + struct radeon_bo_va *bo_va, struct ttm_mem_reg *mem) { + struct radeon_vm *vm = bo_va->vm; struct radeon_ib ib; - struct radeon_bo_va *bo_va; unsigned nptes, ndw; uint64_t addr; int r; - bo_va = radeon_vm_bo_find(vm, bo); - if (bo_va == NULL) { - dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); - return -EINVAL; - } if (!bo_va->soffset) { dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", - bo, vm); + bo_va->bo, vm); return -EINVAL; } @@ -868,7 +872,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev, trace_radeon_vm_bo_update(bo_va); - nptes = radeon_bo_ngpu_pages(bo); + nptes = (bo_va->eoffset - bo_va->soffset) / RADEON_GPU_PAGE_SIZE; /* padding, etc. */ ndw = 64; @@ -911,33 +915,61 @@ int radeon_vm_bo_update(struct radeon_device *rdev, } /** + * radeon_vm_clear_freed - clear freed BOs in the PT + * + * @rdev: radeon_device pointer + * @vm: requested vm + * + * Make sure all freed BOs are cleared in the PT. + * Returns 0 for success. + * + * PTs have to be reserved and mutex must be locked! + */ +int radeon_vm_clear_freed(struct radeon_device *rdev, + struct radeon_vm *vm) +{ + struct radeon_bo_va *bo_va, *tmp; + int r; + + list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) { + list_del(&bo_va->vm_status); + r = radeon_vm_bo_update(rdev, bo_va, NULL); + kfree(bo_va); + if (r) + return r; + } + return 0; + +} + +/** * radeon_vm_bo_rmv - remove a bo to a specific vm * * @rdev: radeon_device pointer * @bo_va: requested bo_va * * Remove @bo_va->bo from the requested vm (cayman+). - * Remove @bo_va->bo from the list of bos associated with the bo_va->vm and - * remove the ptes for @bo_va in the page table. - * Returns 0 for success. * * Object have to be reserved! */ -int radeon_vm_bo_rmv(struct radeon_device *rdev, - struct radeon_bo_va *bo_va) +void radeon_vm_bo_rmv(struct radeon_device *rdev, + struct radeon_bo_va *bo_va) { - int r = 0; + struct radeon_vm *vm = bo_va->vm; - mutex_lock(&bo_va->vm->mutex); - if (bo_va->soffset) - r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL); + list_del(&bo_va->bo_list); + mutex_lock(&vm->mutex); list_del(&bo_va->vm_list); - mutex_unlock(&bo_va->vm->mutex); - list_del(&bo_va->bo_list); - kfree(bo_va); - return r; + if (bo_va->soffset) { + bo_va->bo = NULL; + list_add(&bo_va->vm_status, &vm->freed); + } else { + kfree(bo_va); + } + + mutex_unlock(&vm->mutex); } /** @@ -980,6 +1012,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) vm->last_id_use = NULL; mutex_init(&vm->mutex); INIT_LIST_HEAD(&vm->va); + INIT_LIST_HEAD(&vm->freed); pd_size = radeon_vm_directory_size(rdev); pd_entries = radeon_vm_num_pdes(rdev); @@ -1034,7 +1067,8 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) kfree(bo_va); } } - + list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) + kfree(bo_va); for (i = 0; i < radeon_vm_num_pdes(rdev); i++) radeon_bo_unref(&vm->page_tables[i].bo); -- cgit v0.10.2 From cc9e67e3d7000c1efbaf929c6bdaf78707407b3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 18 Jul 2014 13:48:10 +0200 Subject: drm/radeon: fix VM IB handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calling radeon_vm_bo_find on the IB BO during CS is illegal and can lead to an crash. Signed-off-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 3d5e1a9..60c47f8 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -879,6 +879,8 @@ struct radeon_vm { /* array of page tables, one for each page directory entry */ struct radeon_vm_pt *page_tables; + struct radeon_bo_va *ib_bo_va; + struct mutex mutex; /* last fence for cs using this vm */ struct radeon_fence *fence; diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 09fcf4d..ae763f6 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -472,13 +472,13 @@ static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p, if (r) return r; - bo_va = radeon_vm_bo_find(vm, rdev->ring_tmp_bo.bo); - if (bo_va == NULL) { + if (vm->ib_bo_va == NULL) { DRM_ERROR("Tmp BO not in VM!\n"); return -EINVAL; } - r = radeon_vm_bo_update(rdev, bo_va, &rdev->ring_tmp_bo.bo->tbo.mem); + r = radeon_vm_bo_update(rdev, vm->ib_bo_va, + &rdev->ring_tmp_bo.bo->tbo.mem); if (r) return r; diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 35d9318..d25ae6a 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -579,7 +579,7 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) /* new gpu have virtual address space support */ if (rdev->family >= CHIP_CAYMAN) { struct radeon_fpriv *fpriv; - struct radeon_bo_va *bo_va; + struct radeon_vm *vm; int r; fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL); @@ -587,7 +587,8 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) return -ENOMEM; } - r = radeon_vm_init(rdev, &fpriv->vm); + vm = &fpriv->vm; + r = radeon_vm_init(rdev, vm); if (r) { kfree(fpriv); return r; @@ -596,22 +597,23 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) if (rdev->accel_working) { r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); if (r) { - radeon_vm_fini(rdev, &fpriv->vm); + radeon_vm_fini(rdev, vm); kfree(fpriv); return r; } /* map the ib pool buffer read only into * virtual address space */ - bo_va = radeon_vm_bo_add(rdev, &fpriv->vm, - rdev->ring_tmp_bo.bo); - r = radeon_vm_bo_set_addr(rdev, bo_va, RADEON_VA_IB_OFFSET, + vm->ib_bo_va = radeon_vm_bo_add(rdev, vm, + rdev->ring_tmp_bo.bo); + r = radeon_vm_bo_set_addr(rdev, vm->ib_bo_va, + RADEON_VA_IB_OFFSET, RADEON_VM_PAGE_READABLE | RADEON_VM_PAGE_SNOOPED); radeon_bo_unreserve(rdev->ring_tmp_bo.bo); if (r) { - radeon_vm_fini(rdev, &fpriv->vm); + radeon_vm_fini(rdev, vm); kfree(fpriv); return r; } @@ -640,21 +642,19 @@ void radeon_driver_postclose_kms(struct drm_device *dev, /* new gpu have virtual address space support */ if (rdev->family >= CHIP_CAYMAN && file_priv->driver_priv) { struct radeon_fpriv *fpriv = file_priv->driver_priv; - struct radeon_bo_va *bo_va; + struct radeon_vm *vm = &fpriv->vm; int r; if (rdev->accel_working) { r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); if (!r) { - bo_va = radeon_vm_bo_find(&fpriv->vm, - rdev->ring_tmp_bo.bo); - if (bo_va) - radeon_vm_bo_rmv(rdev, bo_va); + if (vm->ib_bo_va) + radeon_vm_bo_rmv(rdev, vm->ib_bo_va); radeon_bo_unreserve(rdev->ring_tmp_bo.bo); } } - radeon_vm_fini(rdev, &fpriv->vm); + radeon_vm_fini(rdev, vm); kfree(fpriv); file_priv->driver_priv = NULL; } diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 2726b46..fa41e0d 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -1007,6 +1007,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) int r; vm->id = 0; + vm->ib_bo_va = NULL; vm->fence = NULL; vm->last_flush = NULL; vm->last_id_use = NULL; -- cgit v0.10.2 From 730a336c33a3398d65896e8ee3ef9f5679fe30a9 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 21 Jul 2014 10:41:13 -0400 Subject: drm/radeon/TN: only enable bapm on MSI systems There still seem to be stability problems with other systems. Bug: https://bugs.freedesktop.org/show_bug.cgi?id=72921 Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c index 20da6ff..32e50be 100644 --- a/drivers/gpu/drm/radeon/trinity_dpm.c +++ b/drivers/gpu/drm/radeon/trinity_dpm.c @@ -1874,15 +1874,16 @@ int trinity_dpm_init(struct radeon_device *rdev) for (i = 0; i < SUMO_MAX_HARDWARE_POWERLEVELS; i++) pi->at[i] = TRINITY_AT_DFLT; - /* There are stability issues reported on latops with - * bapm installed when switching between AC and battery - * power. At the same time, some desktop boards hang - * if it's not enabled and dpm is enabled. + /* There are stability issues reported on with + * bapm enabled when switching between AC and battery + * power. At the same time, some MSI boards hang + * if it's not enabled and dpm is enabled. Just enable + * it for MSI boards right now. */ - if (rdev->flags & RADEON_IS_MOBILITY) - pi->enable_bapm = false; - else + if (rdev->pdev->subsystem_vendor == 0x1462) pi->enable_bapm = true; + else + pi->enable_bapm = false; pi->enable_nbps_policy = true; pi->enable_sclk_ds = true; pi->enable_gfx_power_gating = true; -- cgit v0.10.2 From a0d036b074b4a5a933e37fcb9bdd6b3cc80a0387 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 19 Jul 2014 12:40:42 +0100 Subject: drm/i915: Reorder the semaphore deadlock check, again commit 4be173813e57c7298103a83155c2391b5b167b4c Author: Chris Wilson Date: Fri Jun 6 10:22:29 2014 +0100 drm/i915: Reorder semaphore deadlock check did the majority of the work, but it missed one crucial detail: The check for the unkickable deadlock on this ring must come after the check whether the ring that we are waiting on has already passed its target seqno. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=80709 Tested-by: Stefan Huber Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Jani Nikula Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 267f069..c05c84f 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2845,7 +2845,7 @@ static int semaphore_passed(struct intel_engine_cs *ring) { struct drm_i915_private *dev_priv = ring->dev->dev_private; struct intel_engine_cs *signaller; - u32 seqno, ctl; + u32 seqno; ring->hangcheck.deadlock++; @@ -2857,15 +2857,12 @@ static int semaphore_passed(struct intel_engine_cs *ring) if (signaller->hangcheck.deadlock >= I915_NUM_RINGS) return -1; - /* cursory check for an unkickable deadlock */ - ctl = I915_READ_CTL(signaller); - if (ctl & RING_WAIT_SEMAPHORE && semaphore_passed(signaller) < 0) - return -1; - if (i915_seqno_passed(signaller->get_seqno(signaller, false), seqno)) return 1; - if (signaller->hangcheck.deadlock) + /* cursory check for an unkickable deadlock */ + if (I915_READ_CTL(signaller) & RING_WAIT_SEMAPHORE && + semaphore_passed(signaller) < 0) return -1; return 0; -- cgit v0.10.2 From bd6ba3518fcb2539d83163a3f486d09411bc535d Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Fri, 18 Jul 2014 11:41:37 +0930 Subject: powerpc: Disable doorbells on Power8 DD1.x These processors do not currently support doorbell IPIs, so remove them from the feature list if we are at DD 1.xx for the 0x004d part. This fixes a regression caused by d4e58e5928f8 (powerpc/powernv: Enable POWER8 doorbell IPIs). With that patch the kernel would hang at boot when calling smp_call_function_many, as the doorbell would not be received by the target CPUs: .smp_call_function_many+0x2bc/0x3c0 (unreliable) .on_each_cpu_mask+0x30/0x100 .cpuidle_register_driver+0x158/0x1a0 .cpuidle_register+0x2c/0x110 .powernv_processor_idle_init+0x23c/0x2c0 .do_one_initcall+0xd4/0x260 .kernel_init_freeable+0x25c/0x33c .kernel_init+0x1c/0x120 .ret_from_kernel_thread+0x58/0x7c Fixes: d4e58e5928f8 (powerpc/powernv: Enable POWER8 doorbell IPIs) Signed-off-by: Joel Stanley Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index bc23477..0fdd7ee 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -447,6 +447,7 @@ extern const char *powerpc_base_platform; CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP) #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG) +#define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL) #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 965291b..0c15764 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -527,6 +527,26 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check_early = __machine_check_early_realmode_p8, .platform = "power8", }, + { /* Power8 DD1: Does not support doorbell IPIs */ + .pvr_mask = 0xffffff00, + .pvr_value = 0x004d0100, + .cpu_name = "POWER8 (raw)", + .cpu_features = CPU_FTRS_POWER8_DD1, + .cpu_user_features = COMMON_USER_POWER8, + .cpu_user_features2 = COMMON_USER2_POWER8, + .mmu_features = MMU_FTRS_POWER8, + .icache_bsize = 128, + .dcache_bsize = 128, + .num_pmcs = 6, + .pmc_type = PPC_PMC_IBM, + .oprofile_cpu_type = "ppc64/power8", + .oprofile_type = PPC_OPROFILE_INVALID, + .cpu_setup = __setup_cpu_power8, + .cpu_restore = __restore_cpu_power8, + .flush_tlb = __flush_tlb_power8, + .machine_check_early = __machine_check_early_realmode_p8, + .platform = "power8", + }, { /* Power8 */ .pvr_mask = 0xffff0000, .pvr_value = 0x004d0000, -- cgit v0.10.2 From e698b9667879b79e479cc985f9d74ecf126e343e Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Sat, 19 Jul 2014 17:47:57 +1000 Subject: powerpc: Fix bugs in emulate_step() This fixes some bugs in emulate_step(). First, the setting of the carry bit for the arithmetic right-shift instructions was not correct on 64-bit machines because we were masking with a mask of type int rather than unsigned long. Secondly, the sld (shift left doubleword) instruction was using the wrong instruction field for the register containing the shift count. Signed-off-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 412dd46..5c09f36 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1198,7 +1198,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) sh = regs->gpr[rb] & 0x3f; ival = (signed int) regs->gpr[rd]; regs->gpr[ra] = ival >> (sh < 32 ? sh : 31); - if (ival < 0 && (sh >= 32 || (ival & ((1 << sh) - 1)) != 0)) + if (ival < 0 && (sh >= 32 || (ival & ((1ul << sh) - 1)) != 0)) regs->xer |= XER_CA; else regs->xer &= ~XER_CA; @@ -1208,7 +1208,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) sh = rb; ival = (signed int) regs->gpr[rd]; regs->gpr[ra] = ival >> sh; - if (ival < 0 && (ival & ((1 << sh) - 1)) != 0) + if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0) regs->xer |= XER_CA; else regs->xer &= ~XER_CA; @@ -1216,7 +1216,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef __powerpc64__ case 27: /* sld */ - sh = regs->gpr[rd] & 0x7f; + sh = regs->gpr[rb] & 0x7f; if (sh < 64) regs->gpr[ra] = regs->gpr[rd] << sh; else @@ -1235,7 +1235,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) sh = regs->gpr[rb] & 0x7f; ival = (signed long int) regs->gpr[rd]; regs->gpr[ra] = ival >> (sh < 64 ? sh : 63); - if (ival < 0 && (sh >= 64 || (ival & ((1 << sh) - 1)) != 0)) + if (ival < 0 && (sh >= 64 || (ival & ((1ul << sh) - 1)) != 0)) regs->xer |= XER_CA; else regs->xer &= ~XER_CA; @@ -1246,7 +1246,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) sh = rb | ((instr & 2) << 4); ival = (signed long int) regs->gpr[rd]; regs->gpr[ra] = ival >> sh; - if (ival < 0 && (ival & ((1 << sh) - 1)) != 0) + if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0) regs->xer |= XER_CA; else regs->xer &= ~XER_CA; -- cgit v0.10.2 From dad6f37c2602e4af6c3aecfdb41f2d8bd4668163 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 15 Jul 2014 20:22:30 +0530 Subject: powerpc: subpage_protect: Increase the array size to take care of 64TB We now support TASK_SIZE of 16TB, hence the array should be 8. Fixes the below crash: Unable to handle kernel paging request for data at address 0x000100bd Faulting instruction address: 0xc00000000004f914 cpu 0x13: Vector: 300 (Data Access) at [c000000fea75fa90] pc: c00000000004f914: .sys_subpage_prot+0x2d4/0x5c0 lr: c00000000004fb5c: .sys_subpage_prot+0x51c/0x5c0 sp: c000000fea75fd10 msr: 9000000000009032 dar: 100bd dsisr: 40000000 current = 0xc000000fea6ae490 paca = 0xc00000000fb8ab00 softe: 0 irq_happened: 0x00 pid = 8237, comm = a.out enter ? for help [c000000fea75fe30] c00000000000a164 syscall_exit+0x0/0x98 Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index 807014d..c2b4dcf 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -22,6 +22,7 @@ */ #include #include +#include /* * Segment table @@ -496,7 +497,7 @@ extern void slb_set_size(u16 size); */ struct subpage_prot_table { unsigned long maxaddr; /* only addresses < this are protected */ - unsigned int **protptrs[2]; + unsigned int **protptrs[(TASK_SIZE_USER64 >> 43)]; unsigned int *low_prot[4]; }; -- cgit v0.10.2 From 97a9a7179aad701ab676e6f29eb90766a1acfde2 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Thu, 10 Jul 2014 14:50:57 -0400 Subject: powerpc/pseries: dynamically added OF nodes need to call of_node_init Commit 75b57ecf9 refactored device tree nodes to use kobjects such that they can be exposed via /sysfs. A secondary commit 0829f6d1f furthered this rework by moving the kobect initialization logic out of of_node_add into its own of_node_init function. The inital commit removed the existing kref_init calls in the pseries dlpar code with the assumption kobject initialization would occur in of_node_add. The second commit had the side effect of triggering a BUG_ON during DLPAR, migration and suspend/resume operations as a result of dynamically added nodes being uninitialized. This patch fixes this by adding of_node_init calls in place of the previously removed kref_init calls. Fixes: 0829f6d1f69e ("of: device_node kobject lifecycle fixes") Cc: stable@vger.kernel.org Signed-off-by: Tyrel Datwyler Acked-by: Nathan Fontenot Acked-by: Grant Likely Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 022b38e6..2d0b4d6 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -86,6 +86,7 @@ static struct device_node *dlpar_parse_cc_node(struct cc_workarea *ccwa, } of_node_set_flag(dn, OF_DYNAMIC); + of_node_init(dn); return dn; } diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index 0435bb6..1c0a60d 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -69,6 +69,7 @@ static int pSeries_reconfig_add_node(const char *path, struct property *proplist np->properties = proplist; of_node_set_flag(np, OF_DYNAMIC); + of_node_init(np); np->parent = derive_parent(path); if (IS_ERR(np->parent)) { -- cgit v0.10.2 From 6f5405bc2ee0102bb3856e2cdea64ff415db2e0c Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Mon, 21 Jul 2014 17:55:13 +0800 Subject: powerpc: use _GLOBAL_TOC for memmove memmove may be called from module code copy_pages(btrfs), and it may call memcpy, which may call back to C code, so it needs to use _GLOBAL_TOC to set up r2 correctly. This fixes following error when I tried to boot an le guest: Vector: 300 (Data Access) at [c000000073f97210] pc: c000000000015004: enable_kernel_altivec+0x24/0x80 lr: c000000000058fbc: enter_vmx_copy+0x3c/0x60 sp: c000000073f97490 msr: 8000000002009033 dar: d000000001d50170 dsisr: 40000000 current = 0xc0000000734c0000 paca = 0xc00000000fff0000 softe: 0 irq_happened: 0x01 pid = 815, comm = mktemp enter ? for help [c000000073f974f0] c000000000058fbc enter_vmx_copy+0x3c/0x60 [c000000073f97510] c000000000057d34 memcpy_power7+0x274/0x840 [c000000073f97610] d000000001c3179c copy_pages+0xfc/0x110 [btrfs] [c000000073f97660] d000000001c3c248 memcpy_extent_buffer+0xe8/0x160 [btrfs] [c000000073f97700] d000000001be4be8 setup_items_for_insert+0x208/0x4a0 [btrfs] [c000000073f97820] d000000001be50b4 btrfs_insert_empty_items+0xf4/0x140 [btrfs] [c000000073f97890] d000000001bfed30 insert_with_overflow+0x70/0x180 [btrfs] [c000000073f97900] d000000001bff174 btrfs_insert_dir_item+0x114/0x2f0 [btrfs] [c000000073f979a0] d000000001c1f92c btrfs_add_link+0x10c/0x370 [btrfs] [c000000073f97a40] d000000001c20e94 btrfs_create+0x204/0x270 [btrfs] [c000000073f97b00] c00000000026d438 vfs_create+0x178/0x210 [c000000073f97b50] c000000000270a70 do_last+0x9f0/0xe90 [c000000073f97c20] c000000000271010 path_openat+0x100/0x810 [c000000073f97ce0] c000000000272ea8 do_filp_open+0x58/0xd0 [c000000073f97dc0] c00000000025ade8 do_sys_open+0x1b8/0x300 [c000000073f97e30] c00000000000a008 syscall_exit+0x0/0x7c Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S index 0738f96..43435c6 100644 --- a/arch/powerpc/lib/mem_64.S +++ b/arch/powerpc/lib/mem_64.S @@ -77,7 +77,7 @@ _GLOBAL(memset) stb r4,0(r6) blr -_GLOBAL(memmove) +_GLOBAL_TOC(memmove) cmplw 0,r3,r4 bgt backwards_memcpy b memcpy -- cgit v0.10.2 From 88b98287356762cc16c9ff6cd48116160a5d4dba Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 21 Jul 2014 21:34:30 -0700 Subject: drm/i915: fix freeze with blank screen booting highmem x86_64 boots and displays fine, but booting x86_32 with CONFIG_HIGHMEM has frozen with a blank screen throughout 3.16-rc on this ThinkPad T420s, with i915 generation 6 graphics. Fix 9d0a6fa6c5e6 ("drm/i915: add render state initialization"): kunmap() takes struct page * argument, not virtual address. Which the compiler kindly points out, if you use the appropriate u32 *batch, instead of silencing it with a void *. Why did bisection lead decisively to nearby 229b0489aa75 ("drm/i915: add null render states for gen6, gen7 and gen8")? Because the u32 deposited at that virtual address by the previous stub failed the PageHighMem test, and so did no harm. Signed-off-by: Hugh Dickins Signed-off-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 3521f99..34894b5 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -31,7 +31,7 @@ struct i915_render_state { struct drm_i915_gem_object *obj; unsigned long ggtt_offset; - void *batch; + u32 *batch; u32 size; u32 len; }; @@ -80,7 +80,7 @@ free: static void render_state_free(struct i915_render_state *so) { - kunmap(so->batch); + kunmap(kmap_to_page(so->batch)); i915_gem_object_ggtt_unpin(so->obj); drm_gem_object_unreference(&so->obj->base); kfree(so); -- cgit v0.10.2 From 8142b215501f8b291a108a202b3a053a265b03dd Mon Sep 17 00:00:00 2001 From: Sven Wegener Date: Tue, 22 Jul 2014 10:26:06 +0200 Subject: x86_32, entry: Store badsys error code in %eax Commit 554086d ("x86_32, entry: Do syscall exit work on badsys (CVE-2014-4508)") introduced a regression in the x86_32 syscall entry code, resulting in syscall() not returning proper errors for undefined syscalls on CPUs supporting the sysenter feature. The following code: > int result = syscall(666); > printf("result=%d errno=%d error=%s\n", result, errno, strerror(errno)); results in: > result=666 errno=0 error=Success Obviously, the syscall return value is the called syscall number, but it should have been an ENOSYS error. When run under ptrace it behaves correctly, which makes it hard to debug in the wild: > result=-1 errno=38 error=Function not implemented The %eax register is the return value register. For debugging via ptrace the syscall entry code stores the complete register context on the stack. The badsys handlers only store the ENOSYS error code in the ptrace register set and do not set %eax like a regular syscall handler would. The old resume_userspace call chain contains code that clobbers %eax and it restores %eax from the ptrace registers afterwards. The same goes for the ptrace-enabled call chain. When ptrace is not used, the syscall return value is the passed-in syscall number from the untouched %eax register. Use %eax as the return value register in syscall_badsys and sysenter_badsys, like a real syscall handler does, and have the caller push the value onto the stack for ptrace access. Signed-off-by: Sven Wegener Link: http://lkml.kernel.org/r/alpine.LNX.2.11.1407221022380.31021@titan.int.lan.stealer.net Reviewed-and-tested-by: Andy Lutomirski Cc: # If 554086d is backported Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index dbaa23e..0d0c9d4 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -425,8 +425,8 @@ sysenter_do_call: cmpl $(NR_syscalls), %eax jae sysenter_badsys call *sys_call_table(,%eax,4) - movl %eax,PT_EAX(%esp) sysenter_after_call: + movl %eax,PT_EAX(%esp) LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF @@ -502,6 +502,7 @@ ENTRY(system_call) jae syscall_badsys syscall_call: call *sys_call_table(,%eax,4) +syscall_after_call: movl %eax,PT_EAX(%esp) # store the return value syscall_exit: LOCKDEP_SYS_EXIT @@ -675,12 +676,12 @@ syscall_fault: END(syscall_fault) syscall_badsys: - movl $-ENOSYS,PT_EAX(%esp) - jmp syscall_exit + movl $-ENOSYS,%eax + jmp syscall_after_call END(syscall_badsys) sysenter_badsys: - movl $-ENOSYS,PT_EAX(%esp) + movl $-ENOSYS,%eax jmp sysenter_after_call END(syscall_badsys) CFI_ENDPROC -- cgit v0.10.2 From 901401166464dc1875825235bb2541af31b4c384 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Tue, 22 Jul 2014 23:11:03 +0900 Subject: ALSA: bebob: Fix a missing to unlock mutex in error handling case In error handling case, special_clk_ctl_put() returns without unlock_mutex(), therefore the mutex is still locked. This commit moves mutex_lock() after the error handling case. This commit is my solution for this post. [PATCH -next] ALSA: bebob: Fix missing unlock on error in special_clk_ctl_put() https://lkml.org/lkml/2014/7/20/12 Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai diff --git a/sound/firewire/bebob/bebob_maudio.c b/sound/firewire/bebob/bebob_maudio.c index 6af50eb..fc470c6 100644 --- a/sound/firewire/bebob/bebob_maudio.c +++ b/sound/firewire/bebob/bebob_maudio.c @@ -379,12 +379,12 @@ static int special_clk_ctl_put(struct snd_kcontrol *kctl, struct special_params *params = bebob->maudio_special_quirk; int err, id; - mutex_lock(&bebob->mutex); - id = uval->value.enumerated.item[0]; if (id >= ARRAY_SIZE(special_clk_labels)) return 0; + mutex_lock(&bebob->mutex); + err = avc_maudio_set_special_clk(bebob, id, params->dig_in_fmt, params->dig_out_fmt, -- cgit v0.10.2 From 5a0438f4a6328b47bd3c00b2f03eb766cc72a75c Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Tue, 22 Jul 2014 23:13:47 +0900 Subject: ALSA: bebob: Use different labels for digital input/output This commit uses different labels for control elements of digital input/output interfaces to correct my misunderstanding about M-Audio Firewire 1814 and ProjectMix I/O. According to user manuals for these two models, they have two modes for digital input; one is S/PDIF in both of optical and coaxial interfaces, another is ADAT in optical interface only. But in current implementation, a control element for it reduced labels which a control element for digital output uses because of my misunderstanding that optical interface is not available for digital input with S/PDIF mode. Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai diff --git a/sound/firewire/bebob/bebob_maudio.c b/sound/firewire/bebob/bebob_maudio.c index fc470c6..42e6f22 100644 --- a/sound/firewire/bebob/bebob_maudio.c +++ b/sound/firewire/bebob/bebob_maudio.c @@ -434,8 +434,8 @@ static struct snd_kcontrol_new special_sync_ctl = { .get = special_sync_ctl_get, }; -/* Digital interface control for special firmware */ -static char *const special_dig_iface_labels[] = { +/* Digital input interface control for special firmware */ +static char *const special_dig_in_iface_labels[] = { "S/PDIF Optical", "S/PDIF Coaxial", "ADAT Optical" }; static int special_dig_in_iface_ctl_info(struct snd_kcontrol *kctl, @@ -443,13 +443,13 @@ static int special_dig_in_iface_ctl_info(struct snd_kcontrol *kctl, { einf->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; einf->count = 1; - einf->value.enumerated.items = ARRAY_SIZE(special_dig_iface_labels); + einf->value.enumerated.items = ARRAY_SIZE(special_dig_in_iface_labels); if (einf->value.enumerated.item >= einf->value.enumerated.items) einf->value.enumerated.item = einf->value.enumerated.items - 1; strcpy(einf->value.enumerated.name, - special_dig_iface_labels[einf->value.enumerated.item]); + special_dig_in_iface_labels[einf->value.enumerated.item]); return 0; } @@ -504,9 +504,14 @@ static int special_dig_in_iface_ctl_set(struct snd_kcontrol *kctl, dig_in_fmt, params->dig_out_fmt, params->clk_lock); - if ((err < 0) || (params->dig_in_fmt > 0)) /* ADAT */ + if (err < 0) + goto end; + + /* For ADAT, optical interface is only available. */ + if (params->dig_in_fmt > 0) goto end; + /* For S/PDIF, optical/coaxial interfaces are selectable. */ err = avc_audio_set_selector(bebob->unit, 0x00, 0x04, dig_in_iface); if (err < 0) dev_err(&bebob->unit->device, @@ -525,18 +530,22 @@ static struct snd_kcontrol_new special_dig_in_iface_ctl = { .put = special_dig_in_iface_ctl_set }; +/* Digital output interface control for special firmware */ +static char *const special_dig_out_iface_labels[] = { + "S/PDIF Optical and Coaxial", "ADAT Optical" +}; static int special_dig_out_iface_ctl_info(struct snd_kcontrol *kctl, struct snd_ctl_elem_info *einf) { einf->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; einf->count = 1; - einf->value.enumerated.items = ARRAY_SIZE(special_dig_iface_labels) - 1; + einf->value.enumerated.items = ARRAY_SIZE(special_dig_out_iface_labels); if (einf->value.enumerated.item >= einf->value.enumerated.items) einf->value.enumerated.item = einf->value.enumerated.items - 1; strcpy(einf->value.enumerated.name, - special_dig_iface_labels[einf->value.enumerated.item + 1]); + special_dig_out_iface_labels[einf->value.enumerated.item]); return 0; } -- cgit v0.10.2 From f77ac91e8edade4755f732d52fa094dc3bfd8b8e Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Tue, 22 Jul 2014 23:13:56 +0900 Subject: ALSA: bebob: Correction for return value of .put callback This commit is for correction of my misunderstanding about return value of .put callback in ALSA Control interface. According to 'Writing ALSA Driver' (*1), return value of the callback has three patterns; 1: changed, 0: not changed, an negative value: fatal error. But I misunderstood that it's boolean; zero or nonzero. *1: Writing an ALSA Driver (2005, Takashi Iwai) http://www.alsa-project.org/main/index.php/ALSA_Driver_Documentation Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai diff --git a/sound/firewire/bebob/bebob_maudio.c b/sound/firewire/bebob/bebob_maudio.c index 42e6f22..d6d6ff8 100644 --- a/sound/firewire/bebob/bebob_maudio.c +++ b/sound/firewire/bebob/bebob_maudio.c @@ -391,7 +391,10 @@ static int special_clk_ctl_put(struct snd_kcontrol *kctl, params->clk_lock); mutex_unlock(&bebob->mutex); - return err >= 0; + if (err >= 0) + err = 1; + + return err; } static struct snd_kcontrol_new special_clk_ctl = { .name = "Clock Source", @@ -491,14 +494,16 @@ static int special_dig_in_iface_ctl_set(struct snd_kcontrol *kctl, unsigned int id, dig_in_fmt, dig_in_iface; int err; - mutex_lock(&bebob->mutex); - id = uval->value.enumerated.item[0]; + if (id >= ARRAY_SIZE(special_dig_in_iface_labels)) + return -EINVAL; /* decode user value */ dig_in_fmt = (id >> 1) & 0x01; dig_in_iface = id & 0x01; + mutex_lock(&bebob->mutex); + err = avc_maudio_set_special_clk(bebob, params->clk_src, dig_in_fmt, @@ -508,14 +513,17 @@ static int special_dig_in_iface_ctl_set(struct snd_kcontrol *kctl, goto end; /* For ADAT, optical interface is only available. */ - if (params->dig_in_fmt > 0) + if (params->dig_in_fmt > 0) { + err = 1; goto end; + } /* For S/PDIF, optical/coaxial interfaces are selectable. */ err = avc_audio_set_selector(bebob->unit, 0x00, 0x04, dig_in_iface); if (err < 0) dev_err(&bebob->unit->device, "fail to set digital input interface: %d\n", err); + err = 1; end: special_stream_formation_set(bebob); mutex_unlock(&bebob->mutex); @@ -567,16 +575,20 @@ static int special_dig_out_iface_ctl_set(struct snd_kcontrol *kctl, unsigned int id; int err; - mutex_lock(&bebob->mutex); - id = uval->value.enumerated.item[0]; + if (id >= ARRAY_SIZE(special_dig_out_iface_labels)) + return -EINVAL; + + mutex_lock(&bebob->mutex); err = avc_maudio_set_special_clk(bebob, params->clk_src, params->dig_in_fmt, id, params->clk_lock); - if (err >= 0) + if (err >= 0) { special_stream_formation_set(bebob); + err = 1; + } mutex_unlock(&bebob->mutex); return err; -- cgit v0.10.2 From a800bad36619ce47ac0222004635448e6c91ff72 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 22 Jul 2014 16:37:42 +0200 Subject: fuse: s_time_gran fix Default s_time_gran is 1, don't overwrite that if userspace didn't explicitly specify one. Signed-off-by: Miklos Szeredi Cc: # v3.15+ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 8474028..5ca874f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -907,9 +907,6 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->writeback_cache = 1; if (arg->time_gran && arg->time_gran <= 1000000000) fc->sb->s_time_gran = arg->time_gran; - else - fc->sb->s_time_gran = 1000000000; - } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; -- cgit v0.10.2 From d7afaec0b564f0609e116f562983b8e72fc3e9c9 Mon Sep 17 00:00:00 2001 From: Andrew Gallagher Date: Tue, 22 Jul 2014 16:37:43 +0200 Subject: fuse: add FUSE_NO_OPEN_SUPPORT flag to INIT Here some additional changes to set a capability flag so that clients can detect when it's appropriate to return -ENOSYS from open. This amends the following commit introduced in 3.14: 7678ac50615d fuse: support clients that don't implement 'open' However we can only add the flag to 3.15 and later since there was no protocol version update in 3.14. Signed-off-by: Miklos Szeredi Cc: # v3.15+ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 5ca874f..03246cd 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -935,7 +935,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO | - FUSE_WRITEBACK_CACHE; + FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 40b5ca8..25084a0 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -101,6 +101,7 @@ * - add FATTR_CTIME * - add ctime and ctimensec to fuse_setattr_in * - add FUSE_RENAME2 request + * - add FUSE_NO_OPEN_SUPPORT flag */ #ifndef _LINUX_FUSE_H @@ -229,6 +230,7 @@ struct fuse_file_lock { * FUSE_READDIRPLUS_AUTO: adaptive readdirplus * FUSE_ASYNC_DIO: asynchronous direct I/O submission * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes + * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -247,6 +249,7 @@ struct fuse_file_lock { #define FUSE_READDIRPLUS_AUTO (1 << 14) #define FUSE_ASYNC_DIO (1 << 15) #define FUSE_WRITEBACK_CACHE (1 << 16) +#define FUSE_NO_OPEN_SUPPORT (1 << 17) /** * CUSE INIT request/reply flags -- cgit v0.10.2 From eb12f72ee7245ca207818b9efd10be2641494502 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Wed, 23 Jul 2014 00:02:08 +0900 Subject: ALSA: bebob: Correction for return value of special_clk_ctl_put() in error This commit is a supplement to my previous patch. http://mailman.alsa-project.org/pipermail/alsa-devel/2014-July/079190.html The special_clk_ctl_put() still returns 0 in error handling case. It should return -EINVAL. Signed-off-by: Takashi Sakamoto Signed-off-by: Takashi Iwai diff --git a/sound/firewire/bebob/bebob_maudio.c b/sound/firewire/bebob/bebob_maudio.c index d6d6ff8..70faa3a 100644 --- a/sound/firewire/bebob/bebob_maudio.c +++ b/sound/firewire/bebob/bebob_maudio.c @@ -381,7 +381,7 @@ static int special_clk_ctl_put(struct snd_kcontrol *kctl, id = uval->value.enumerated.item[0]; if (id >= ARRAY_SIZE(special_clk_labels)) - return 0; + return -EINVAL; mutex_lock(&bebob->mutex); -- cgit v0.10.2 From 5b7532756382cb31748f73df6a0af0138390c04f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 22 Jul 2014 11:30:43 +0200 Subject: drm/radeon: fix error handling in radeon_vm_bo_set_addr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index fa41e0d..725d366 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -472,6 +472,10 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, if (bo_va->soffset) { /* add a clone of the bo_va to clear the old address */ tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); + if (!tmp) { + mutex_unlock(&vm->mutex); + return -ENOMEM; + } tmp->soffset = bo_va->soffset; tmp->eoffset = bo_va->eoffset; tmp->vm = vm; -- cgit v0.10.2 From c9b227723d051184b9e78f20c75ae2f9d44a6ea2 Mon Sep 17 00:00:00 2001 From: Shinobu Uehara Date: Mon, 21 Jul 2014 22:04:29 -0700 Subject: ARM: shmobile: r8a7791: Fix SD2CKCR register address 59e79895b95892863617ce630fbda467f2470575 (ARM: shmobile: r8a7791: Add clocks) added r8a7791 SD clocks when v3.14. 2c60a7df72711fb8b4be1e6aa651ab166a8931bc (ARM: shmobile: Add SDHI devices for Koelsch DTS) enabled SD on r8a7791 Koelsch when v3.15. 1299df03d7191ab4356c995dde8b912d3c8922e9 (ARM: shmobile: henninger: add SDHI0/2 DT support) enable SD on r8a7791 Henninger when v3.16. But r8a7791 SD clock had wrong address. This patch fixup it. [Kuninori Morimoto: tidyup for upstreaming] Signed-off-by: Shinobu Uehara Signed-off-by: Kuninori Morimoto Signed-off-by: Simon Horman diff --git a/arch/arm/boot/dts/r8a7791.dtsi b/arch/arm/boot/dts/r8a7791.dtsi index 8d7ffae..79f68ac 100644 --- a/arch/arm/boot/dts/r8a7791.dtsi +++ b/arch/arm/boot/dts/r8a7791.dtsi @@ -540,9 +540,9 @@ #clock-cells = <0>; clock-output-names = "sd1"; }; - sd2_clk: sd3_clk@e615007c { + sd2_clk: sd3_clk@e615026c { compatible = "renesas,r8a7791-div6-clock", "renesas,cpg-div6-clock"; - reg = <0 0xe615007c 0 4>; + reg = <0 0xe615026c 0 4>; clocks = <&pll1_div2_clk>; #clock-cells = <0>; clock-output-names = "sd2"; -- cgit v0.10.2 From 1be9a950c646c9092fb3618197f7b6bfb50e82aa Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 22 Jul 2014 15:22:45 +0200 Subject: net: sctp: inherit auth_capable on INIT collisions Jason reported an oops caused by SCTP on his ARM machine with SCTP authentication enabled: Internal error: Oops: 17 [#1] ARM CPU: 0 PID: 104 Comm: sctp-test Not tainted 3.13.0-68744-g3632f30c9b20-dirty #1 task: c6eefa40 ti: c6f52000 task.ti: c6f52000 PC is at sctp_auth_calculate_hmac+0xc4/0x10c LR is at sg_init_table+0x20/0x38 pc : [] lr : [] psr: 40000013 sp : c6f538e8 ip : 00000000 fp : c6f53924 r10: c6f50d80 r9 : 00000000 r8 : 00010000 r7 : 00000000 r6 : c7be4000 r5 : 00000000 r4 : c6f56254 r3 : c00c8170 r2 : 00000001 r1 : 00000008 r0 : c6f1e660 Flags: nZcv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user Control: 0005397f Table: 06f28000 DAC: 00000015 Process sctp-test (pid: 104, stack limit = 0xc6f521c0) Stack: (0xc6f538e8 to 0xc6f54000) [...] Backtrace: [] (sctp_auth_calculate_hmac+0x0/0x10c) from [] (sctp_packet_transmit+0x33c/0x5c8) [] (sctp_packet_transmit+0x0/0x5c8) from [] (sctp_outq_flush+0x7fc/0x844) [] (sctp_outq_flush+0x0/0x844) from [] (sctp_outq_uncork+0x24/0x28) [] (sctp_outq_uncork+0x0/0x28) from [] (sctp_side_effects+0x1134/0x1220) [] (sctp_side_effects+0x0/0x1220) from [] (sctp_do_sm+0xac/0xd4) [] (sctp_do_sm+0x0/0xd4) from [] (sctp_assoc_bh_rcv+0x118/0x160) [] (sctp_assoc_bh_rcv+0x0/0x160) from [] (sctp_inq_push+0x6c/0x74) [] (sctp_inq_push+0x0/0x74) from [] (sctp_rcv+0x7d8/0x888) While we already had various kind of bugs in that area ec0223ec48a9 ("net: sctp: fix sctp_sf_do_5_1D_ce to verify if we/peer is AUTH capable") and b14878ccb7fa ("net: sctp: cache auth_enable per endpoint"), this one is a bit of a different kind. Giving a bit more background on why SCTP authentication is needed can be found in RFC4895: SCTP uses 32-bit verification tags to protect itself against blind attackers. These values are not changed during the lifetime of an SCTP association. Looking at new SCTP extensions, there is the need to have a method of proving that an SCTP chunk(s) was really sent by the original peer that started the association and not by a malicious attacker. To cause this bug, we're triggering an INIT collision between peers; normal SCTP handshake where both sides intent to authenticate packets contains RANDOM; CHUNKS; HMAC-ALGO parameters that are being negotiated among peers: ---------- INIT[RANDOM; CHUNKS; HMAC-ALGO] ----------> <------- INIT-ACK[RANDOM; CHUNKS; HMAC-ALGO] --------- -------------------- COOKIE-ECHO --------------------> <-------------------- COOKIE-ACK --------------------- RFC4895 says that each endpoint therefore knows its own random number and the peer's random number *after* the association has been established. The local and peer's random number along with the shared key are then part of the secret used for calculating the HMAC in the AUTH chunk. Now, in our scenario, we have 2 threads with 1 non-blocking SEQ_PACKET socket each, setting up common shared SCTP_AUTH_KEY and SCTP_AUTH_ACTIVE_KEY properly, and each of them calling sctp_bindx(3), listen(2) and connect(2) against each other, thus the handshake looks similar to this, e.g.: ---------- INIT[RANDOM; CHUNKS; HMAC-ALGO] ----------> <------- INIT-ACK[RANDOM; CHUNKS; HMAC-ALGO] --------- <--------- INIT[RANDOM; CHUNKS; HMAC-ALGO] ----------- -------- INIT-ACK[RANDOM; CHUNKS; HMAC-ALGO] --------> ... Since such collisions can also happen with verification tags, the RFC4895 for AUTH rather vaguely says under section 6.1: In case of INIT collision, the rules governing the handling of this Random Number follow the same pattern as those for the Verification Tag, as explained in Section 5.2.4 of RFC 2960 [5]. Therefore, each endpoint knows its own Random Number and the peer's Random Number after the association has been established. In RFC2960, section 5.2.4, we're eventually hitting Action B: B) In this case, both sides may be attempting to start an association at about the same time but the peer endpoint started its INIT after responding to the local endpoint's INIT. Thus it may have picked a new Verification Tag not being aware of the previous Tag it had sent this endpoint. The endpoint should stay in or enter the ESTABLISHED state but it MUST update its peer's Verification Tag from the State Cookie, stop any init or cookie timers that may running and send a COOKIE ACK. In other words, the handling of the Random parameter is the same as behavior for the Verification Tag as described in Action B of section 5.2.4. Looking at the code, we exactly hit the sctp_sf_do_dupcook_b() case which triggers an SCTP_CMD_UPDATE_ASSOC command to the side effect interpreter, and in fact it properly copies over peer_{random, hmacs, chunks} parameters from the newly created association to update the existing one. Also, the old asoc_shared_key is being released and based on the new params, sctp_auth_asoc_init_active_key() updated. However, the issue observed in this case is that the previous asoc->peer.auth_capable was 0, and has *not* been updated, so that instead of creating a new secret, we're doing an early return from the function sctp_auth_asoc_init_active_key() leaving asoc->asoc_shared_key as NULL. However, we now have to authenticate chunks from the updated chunk list (e.g. COOKIE-ACK). That in fact causes the server side when responding with ... <------------------ AUTH; COOKIE-ACK ----------------- ... to trigger a NULL pointer dereference, since in sctp_packet_transmit(), it discovers that an AUTH chunk is being queued for xmit, and thus it calls sctp_auth_calculate_hmac(). Since the asoc->active_key_id is still inherited from the endpoint, and the same as encoded into the chunk, it uses asoc->asoc_shared_key, which is still NULL, as an asoc_key and dereferences it in ... crypto_hash_setkey(desc.tfm, &asoc_key->data[0], asoc_key->len) ... causing an oops. All this happens because sctp_make_cookie_ack() called with the *new* association has the peer.auth_capable=1 and therefore marks the chunk with auth=1 after checking sctp_auth_send_cid(), but it is *actually* sent later on over the then *updated* association's transport that didn't initialize its shared key due to peer.auth_capable=0. Since control chunks in that case are not sent by the temporary association which are scheduled for deletion, they are issued for xmit via SCTP_CMD_REPLY in the interpreter with the context of the *updated* association. peer.auth_capable was 0 in the updated association (which went from COOKIE_WAIT into ESTABLISHED state), since all previous processing that performed sctp_process_init() was being done on temporary associations, that we eventually throw away each time. The correct fix is to update to the new peer.auth_capable value as well in the collision case via sctp_assoc_update(), so that in case the collision migrated from 0 -> 1, sctp_auth_asoc_init_active_key() can properly recalculate the secret. This therefore fixes the observed server panic. Fixes: 730fc3d05cd4 ("[SCTP]: Implete SCTP-AUTH parameter processing") Reported-by: Jason Gunthorpe Signed-off-by: Daniel Borkmann Tested-by: Jason Gunthorpe Cc: Vlad Yasevich Acked-by: Vlad Yasevich Signed-off-by: David S. Miller diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 9de23a2..06a9ee6 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1097,6 +1097,7 @@ void sctp_assoc_update(struct sctp_association *asoc, asoc->c = new->c; asoc->peer.rwnd = new->peer.rwnd; asoc->peer.sack_needed = new->peer.sack_needed; + asoc->peer.auth_capable = new->peer.auth_capable; asoc->peer.i = new->peer.i; sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, asoc->peer.i.initial_tsn, GFP_ATOMIC); -- cgit v0.10.2 From 474ea9cafc459976827a477f2c30eaf6313cb7c1 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 22 Jul 2014 11:01:52 -0700 Subject: net: bcmgenet: correctly pad short packets Packets shorter than ETH_ZLEN were not padded with zeroes, hence leaking potentially sensitive information. This bug has been present since the driver got accepted in commit 1c1008c793fa46703a2fee469f4235e1c7984333 ("net: bcmgenet: add main driver file"). Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 16281ad..4e615de 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1149,6 +1149,11 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) goto out; } + if (skb_padto(skb, ETH_ZLEN)) { + ret = NETDEV_TX_OK; + goto out; + } + /* set the SKB transmit checksum */ if (priv->desc_64b_en) { ret = bcmgenet_put_tx_csum(dev, skb); -- cgit v0.10.2 From f62d14a8072b9756db36ba394e2b267470a40240 Mon Sep 17 00:00:00 2001 From: Peter Hutterer Date: Mon, 21 Jul 2014 17:51:35 -0700 Subject: Input: document INPUT_PROP_TOPBUTTONPAD Signed-off-by: Peter Hutterer Signed-off-by: Dmitry Torokhov diff --git a/Documentation/input/event-codes.txt b/Documentation/input/event-codes.txt index f1ea2c6..c587a96 100644 --- a/Documentation/input/event-codes.txt +++ b/Documentation/input/event-codes.txt @@ -281,6 +281,19 @@ gestures can normally be extracted from it. If INPUT_PROP_SEMI_MT is not set, the device is assumed to be a true MT device. +INPUT_PROP_TOPBUTTONPAD: +----------------------- +Some laptops, most notably the Lenovo *40 series provide a trackstick +device but do not have physical buttons associated with the trackstick +device. Instead, the top area of the touchpad is marked to show +visual/haptic areas for left, middle, right buttons intended to be used +with the trackstick. + +If INPUT_PROP_TOPBUTTONPAD is set, userspace should emulate buttons +accordingly. This property does not affect kernel behavior. +The kernel does not provide button emulation for such devices but treats +them as any other INPUT_PROP_BUTTONPAD device. + Guidelines: ========== The guidelines below ensure proper single-touch and multi-finger functionality. -- cgit v0.10.2 From 8903461c9bc56fcb041fb92d054e2529951770b6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 23 Jul 2014 17:20:04 +1000 Subject: powerpc/perf: Fix MMCR2 handling for EBB In the recent commit b50a6c584bb4 "Clear MMCR2 when enabling PMU", I screwed up the handling of MMCR2 for tasks using EBB. We must make sure we set MMCR2 *before* ebb_switch_in(), otherwise we overwrite the value of MMCR2 that userspace may have written. That potentially breaks a task that uses EBB and manually uses MMCR2 for event freezing. Fixes: b50a6c584bb4 ("powerpc/perf: Clear MMCR2 when enabling PMU") Cc: stable@vger.kernel.org Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 6b0641c..fe52db2 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1307,6 +1307,9 @@ static void power_pmu_enable(struct pmu *pmu) out_enable: pmao_restore_workaround(ebb); + if (ppmu->flags & PPMU_ARCH_207S) + mtspr(SPRN_MMCR2, 0); + mmcr0 = ebb_switch_in(ebb, cpuhw->mmcr[0]); mb(); @@ -1315,9 +1318,6 @@ static void power_pmu_enable(struct pmu *pmu) write_mmcr0(cpuhw, mmcr0); - if (ppmu->flags & PPMU_ARCH_207S) - mtspr(SPRN_MMCR2, 0); - /* * Enable instruction sampling if necessary */ -- cgit v0.10.2 From 23d9cec07c589276561c13b180577c0b87930140 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Tue, 22 Jul 2014 10:39:54 -0500 Subject: pinctrl: dra: dt-bindings: Fix pull enable/disable The DRA74/72 control module pins have a weak pull up and pull down. This is configured by bit offset 17. if BIT(17) is 1, a pull up is selected, else a pull down is selected. However, this pull resisstor is applied based on BIT(16) - PULLUDENABLE - if BIT(18) is *0*, then pull as defined in BIT(17) is applied, else no weak pulls are applied. We defined this in reverse. Reference: Table 18-5 (Description of the pad configuration register bits) in Technical Reference Manual Revision (DRA74x revision Q: SPRUHI2Q Revised June 2014 and DRA72x revision F: SPRUHP2F - Revised June 2014) Fixes: 6e58b8f1daaf1a ("ARM: dts: DRA7: Add the dts files for dra7 SoC and dra7-evm board") Signed-off-by: Nishanth Menon Tested-by: Felipe Balbi Acked-by: Felipe Balbi Signed-off-by: Tony Lindgren diff --git a/include/dt-bindings/pinctrl/dra.h b/include/dt-bindings/pinctrl/dra.h index 002a285..3d33794 100644 --- a/include/dt-bindings/pinctrl/dra.h +++ b/include/dt-bindings/pinctrl/dra.h @@ -30,7 +30,8 @@ #define MUX_MODE14 0xe #define MUX_MODE15 0xf -#define PULL_ENA (1 << 16) +#define PULL_ENA (0 << 16) +#define PULL_DIS (1 << 16) #define PULL_UP (1 << 17) #define INPUT_EN (1 << 18) #define SLEWCONTROL (1 << 19) @@ -38,10 +39,10 @@ #define WAKEUP_EVENT (1 << 25) /* Active pin states */ -#define PIN_OUTPUT 0 +#define PIN_OUTPUT (0 | PULL_DIS) #define PIN_OUTPUT_PULLUP (PIN_OUTPUT | PULL_ENA | PULL_UP) #define PIN_OUTPUT_PULLDOWN (PIN_OUTPUT | PULL_ENA) -#define PIN_INPUT INPUT_EN +#define PIN_INPUT (INPUT_EN | PULL_DIS) #define PIN_INPUT_SLEW (INPUT_EN | SLEWCONTROL) #define PIN_INPUT_PULLUP (PULL_ENA | INPUT_EN | PULL_UP) #define PIN_INPUT_PULLDOWN (PULL_ENA | INPUT_EN) -- cgit v0.10.2 From 33753cd2ba41c72a0756edc5dc094d91602deda5 Mon Sep 17 00:00:00 2001 From: Christoph Fritz Date: Mon, 14 Jul 2014 03:36:18 +0200 Subject: ARM: OMAP2+: gpmc: fix gpmc_hwecc_bch_capable() This patch adds bch8 ecc software fallback which is mostly used by omap3s because they lack hardware elm support. Fixes: 0611c41934ab35ce84dea34ab291897ad3cbc7be (ARM: OMAP2+: gpmc: update gpmc_hwecc_bch_capable() for new platforms and ECC schemes) Cc: # 3.15.x+ Signed-off-by: Christoph Fritz Reviewed-by: Pekon Gupta Signed-off-by: Tony Lindgren diff --git a/arch/arm/mach-omap2/gpmc-nand.c b/arch/arm/mach-omap2/gpmc-nand.c index 17cd393..93914d2 100644 --- a/arch/arm/mach-omap2/gpmc-nand.c +++ b/arch/arm/mach-omap2/gpmc-nand.c @@ -50,6 +50,16 @@ static bool gpmc_hwecc_bch_capable(enum omap_ecc ecc_opt) soc_is_omap54xx() || soc_is_dra7xx()) return 1; + if (ecc_opt == OMAP_ECC_BCH4_CODE_HW_DETECTION_SW || + ecc_opt == OMAP_ECC_BCH8_CODE_HW_DETECTION_SW) { + if (cpu_is_omap24xx()) + return 0; + else if (cpu_is_omap3630() && (GET_OMAP_REVISION() == 0)) + return 0; + else + return 1; + } + /* OMAP3xxx do not have ELM engine, so cannot support ECC schemes * which require H/W based ECC error detection */ if ((cpu_is_omap34xx() || cpu_is_omap3630()) && @@ -57,14 +67,6 @@ static bool gpmc_hwecc_bch_capable(enum omap_ecc ecc_opt) (ecc_opt == OMAP_ECC_BCH8_CODE_HW))) return 0; - /* - * For now, assume 4-bit mode is only supported on OMAP3630 ES1.x, x>=1 - * and AM33xx derivates. Other chips may be added if confirmed to work. - */ - if ((ecc_opt == OMAP_ECC_BCH4_CODE_HW_DETECTION_SW) && - (!cpu_is_omap3630() || (GET_OMAP_REVISION() == 0))) - return 0; - /* legacy platforms support only HAM1 (1-bit Hamming) ECC scheme */ if (ecc_opt == OMAP_ECC_HAM1_CODE_HW) return 1; -- cgit v0.10.2 From d50314a6b0702c630c35b88148c1acb76d2e4ede Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 18 Jul 2014 11:54:37 +0100 Subject: arm64: Create non-empty ZONE_DMA when DRAM starts above 4GB ZONE_DMA is created to allow 32-bit only devices to access memory in the absence of an IOMMU. On systems where the memory starts above 4GB, it is expected that some devices have a DMA offset hardwired to be able to access the bottom of the memory. Linux currently supports DT bindings for the DMA offsets but they are not (easily) available early during boot. This patch tries to guess a DMA offset and assumes that ZONE_DMA corresponds to the 32-bit mask above the start of DRAM. Fixes: 2d5a5612bc (arm64: Limit the CMA buffer to 32-bit if ZONE_DMA) Signed-off-by: Catalin Marinas Reported-by: Mark Salter Tested-by: Mark Salter Tested-by: Anup Patel diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f43db8a..e90c542 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -60,6 +60,17 @@ static int __init early_initrd(char *p) early_param("initrd", early_initrd); #endif +/* + * Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It + * currently assumes that for memory starting above 4G, 32-bit devices will + * use a DMA offset. + */ +static phys_addr_t max_zone_dma_phys(void) +{ + phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32); + return min(offset + (1ULL << 32), memblock_end_of_DRAM()); +} + static void __init zone_sizes_init(unsigned long min, unsigned long max) { struct memblock_region *reg; @@ -70,9 +81,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) /* 4GB maximum for 32-bit only capable devices */ if (IS_ENABLED(CONFIG_ZONE_DMA)) { - unsigned long max_dma_phys = - (unsigned long)(dma_to_phys(NULL, DMA_BIT_MASK(32)) + 1); - max_dma = max(min, min(max, max_dma_phys >> PAGE_SHIFT)); + max_dma = PFN_DOWN(max_zone_dma_phys()); zone_size[ZONE_DMA] = max_dma - min; } zone_size[ZONE_NORMAL] = max - max_dma; @@ -146,7 +155,7 @@ void __init arm64_memblock_init(void) /* 4GB maximum for 32-bit only capable devices */ if (IS_ENABLED(CONFIG_ZONE_DMA)) - dma_phys_limit = dma_to_phys(NULL, DMA_BIT_MASK(32)) + 1; + dma_phys_limit = max_zone_dma_phys(); dma_contiguous_reserve(dma_phys_limit); memblock_allow_resize(); -- cgit v0.10.2 From eedd10f45bdcb2a5b2afa35f845e080c3bc984f2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 16 Jun 2014 08:57:44 +0100 Subject: drm/i915: Simplify i915_gem_release_all_mmaps() An object can only have an active gtt mapping if it is currently bound into the global gtt. Therefore we can simply walk the list of all bound objects and check the flag upon those for an active gtt mapping. From commit 48018a57a8f5900e7e53ffaa0adeb784095accfb Author: Paulo Zanoni Date: Fri Dec 13 15:22:31 2013 -0200 drm/i915: release the GTT mmaps when going into D3 Also note that the WARN is inappropriate for this function as GPU activity is orthogonal to GTT mmap status. Rather it is the caller that relies upon this condition and so it should assert that the GPU is idle itself. References: https://bugs.freedesktop.org/show_bug.cgi?id=80081 Signed-off-by: Chris Wilson Cc: Paulo Zanoni Cc: Rodrigo Vivi Cc: Daniel Vetter Reviewed-by: Paulo Zanoni Tested-by: Paulo Zanoni [danvet: cherry-pick from -next to -fixes.] Signed-off-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f361263..d893e4d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1616,22 +1616,6 @@ out: return ret; } -void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) -{ - struct i915_vma *vma; - - /* - * Only the global gtt is relevant for gtt memory mappings, so restrict - * list traversal to objects bound into the global address space. Note - * that the active list should be empty, but better safe than sorry. - */ - WARN_ON(!list_empty(&dev_priv->gtt.base.active_list)); - list_for_each_entry(vma, &dev_priv->gtt.base.active_list, mm_list) - i915_gem_release_mmap(vma->obj); - list_for_each_entry(vma, &dev_priv->gtt.base.inactive_list, mm_list) - i915_gem_release_mmap(vma->obj); -} - /** * i915_gem_release_mmap - remove physical page mappings * @obj: obj in question @@ -1657,6 +1641,15 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) obj->fault_mappable = false; } +void +i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) +{ + struct drm_i915_gem_object *obj; + + list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) + i915_gem_release_mmap(obj); +} + uint32_t i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) { -- cgit v0.10.2 From 1a112d10f03e83fb3a2fdc4c9165865dec8a3ca6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Jul 2014 09:05:27 -0400 Subject: libata: introduce ata_host->n_tags to avoid oops on SAS controllers 1871ee134b73 ("libata: support the ata host which implements a queue depth less than 32") directly used ata_port->scsi_host->can_queue from ata_qc_new() to determine the number of tags supported by the host; unfortunately, SAS controllers doing SATA don't initialize ->scsi_host leading to the following oops. BUG: unable to handle kernel NULL pointer dereference at 0000000000000058 IP: [] ata_qc_new_init+0x188/0x1b0 PGD 0 Oops: 0002 [#1] SMP Modules linked in: isci libsas scsi_transport_sas mgag200 drm_kms_helper ttm CPU: 1 PID: 518 Comm: udevd Not tainted 3.16.0-rc6+ #62 Hardware name: Intel Corporation S2600CO/S2600CO, BIOS SE5C600.86B.02.02.0002.122320131210 12/23/2013 task: ffff880c1a00b280 ti: ffff88061a000000 task.ti: ffff88061a000000 RIP: 0010:[] [] ata_qc_new_init+0x188/0x1b0 RSP: 0018:ffff88061a003ae8 EFLAGS: 00010012 RAX: 0000000000000001 RBX: ffff88000241ca80 RCX: 00000000000000fa RDX: 0000000000000020 RSI: 0000000000000020 RDI: ffff8806194aa298 RBP: ffff88061a003ae8 R08: ffff8806194a8000 R09: 0000000000000000 R10: 0000000000000000 R11: ffff88000241ca80 R12: ffff88061ad58200 R13: ffff8806194aa298 R14: ffffffff814e67a0 R15: ffff8806194a8000 FS: 00007f3ad7fe3840(0000) GS:ffff880627620000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000058 CR3: 000000061a118000 CR4: 00000000001407e0 Stack: ffff88061a003b20 ffffffff814e96e1 ffff88000241ca80 ffff88061ad58200 ffff8800b6bf6000 ffff880c1c988000 ffff880619903850 ffff88061a003b68 ffffffffa0056ce1 ffff88061a003b48 0000000013d6e6f8 ffff88000241ca80 Call Trace: [] ata_sas_queuecmd+0xa1/0x430 [] sas_queuecommand+0x191/0x220 [libsas] [] scsi_dispatch_cmd+0x10e/0x300 [] scsi_request_fn+0x2f5/0x550 [] __blk_run_queue+0x33/0x40 [] queue_unplugged+0x2a/0x90 [] blk_flush_plug_list+0x1b4/0x210 [] blk_finish_plug+0x14/0x50 [] __do_page_cache_readahead+0x198/0x1f0 [] force_page_cache_readahead+0x31/0x50 [] page_cache_sync_readahead+0x3e/0x50 [] generic_file_read_iter+0x496/0x5a0 [] blkdev_read_iter+0x37/0x40 [] new_sync_read+0x7e/0xb0 [] vfs_read+0x94/0x170 [] SyS_read+0x46/0xb0 [] ? SyS_lseek+0x91/0xb0 [] system_call_fastpath+0x16/0x1b Code: 00 00 00 88 50 29 83 7f 08 01 19 d2 83 e2 f0 83 ea 50 88 50 34 c6 81 1d 02 00 00 40 c6 81 17 02 00 00 00 5d c3 66 0f 1f 44 00 00 <89> 14 25 58 00 00 00 Fix it by introducing ata_host->n_tags which is initialized to ATA_MAX_QUEUE - 1 in ata_host_init() for SAS controllers and set to scsi_host_template->can_queue in ata_host_register() for !SAS ones. As SAS hosts are never registered, this will give them the same ATA_MAX_QUEUE - 1 as before. Note that we can't use scsi_host->can_queue directly for SAS hosts anyway as they can go higher than the libata maximum. Signed-off-by: Tejun Heo Reported-by: Mike Qiu Reported-by: Jesse Brandeburg Reported-by: Peter Hurley Reported-by: Peter Zijlstra Tested-by: Alexey Kardashevskiy Fixes: 1871ee134b73 ("libata: support the ata host which implements a queue depth less than 32") Cc: Kevin Hao Cc: Dan Williams Cc: stable@vger.kernel.org diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index d19c37a7..677c0c1 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4798,9 +4798,8 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words) static struct ata_queued_cmd *ata_qc_new(struct ata_port *ap) { struct ata_queued_cmd *qc = NULL; - unsigned int i, tag, max_queue; - - max_queue = ap->scsi_host->can_queue; + unsigned int max_queue = ap->host->n_tags; + unsigned int i, tag; /* no command while frozen */ if (unlikely(ap->pflags & ATA_PFLAG_FROZEN)) @@ -6094,6 +6093,7 @@ void ata_host_init(struct ata_host *host, struct device *dev, { spin_lock_init(&host->lock); mutex_init(&host->eh_mutex); + host->n_tags = ATA_MAX_QUEUE - 1; host->dev = dev; host->ops = ops; } @@ -6175,15 +6175,7 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht) { int i, rc; - /* - * The max queue supported by hardware must not be greater than - * ATA_MAX_QUEUE. - */ - if (sht->can_queue > ATA_MAX_QUEUE) { - dev_err(host->dev, "BUG: the hardware max queue is too large\n"); - WARN_ON(1); - return -EINVAL; - } + host->n_tags = clamp(sht->can_queue, 1, ATA_MAX_QUEUE - 1); /* host must have been started */ if (!(host->flags & ATA_HOST_STARTED)) { diff --git a/include/linux/libata.h b/include/linux/libata.h index 5ab4e3a..92abb49 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -593,6 +593,7 @@ struct ata_host { struct device *dev; void __iomem * const *iomap; unsigned int n_ports; + unsigned int n_tags; /* nr of NCQ tags */ void *private_data; struct ata_port_operations *ops; unsigned long flags; -- cgit v0.10.2 From f98bac5a30b60a2fca854dd5ee7256221d8ccf0a Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Mon, 7 Jul 2014 22:10:56 +0800 Subject: NFSD: Fix crash encoding lock reply on 32-bit Commit 8c7424cff6 "nfsd4: don't try to encode conflicting owner if low on space" forgot to free conf->data in nfsd4_encode_lockt and before sign conf->data to NULL in nfsd4_encode_lock_denied, causing a leak. Worse, kfree() can be called on an uninitialized pointer in the case of a succesful lock (or one that fails for a reason other than a conflict). (Note that lock->lk_denied.ld_owner.data appears it should be zero here, until you notice that it's one arm of a union the other arm of which is written to in the succesful case by the memcpy(&lock->lk_resp_stateid, &lock_stp->st_stid.sc_stateid, sizeof(stateid_t)); in nfsd4_lock(). In the 32-bit case this overwrites ld_owner.data.) Signed-off-by: Kinglong Mee Fixes: 8c7424cff6 ""nfsd4: don't try to encode conflicting owner if low on space" Signed-off-by: J. Bruce Fields diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b56b1cc0..944275c 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2879,6 +2879,7 @@ again: * return the conflicting open: */ if (conf->len) { + kfree(conf->data); conf->len = 0; conf->data = NULL; goto again; @@ -2891,6 +2892,7 @@ again: if (conf->len) { p = xdr_encode_opaque_fixed(p, &ld->ld_clientid, 8); p = xdr_encode_opaque(p, conf->data, conf->len); + kfree(conf->data); } else { /* non - nfsv4 lock in conflict, no clientid nor owner */ p = xdr_encode_hyper(p, (u64)0); /* clientid */ *p++ = cpu_to_be32(0); /* length of owner name */ @@ -2907,7 +2909,7 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo nfserr = nfsd4_encode_stateid(xdr, &lock->lk_resp_stateid); else if (nfserr == nfserr_denied) nfserr = nfsd4_encode_lock_denied(xdr, &lock->lk_denied); - kfree(lock->lk_denied.ld_owner.data); + return nfserr; } -- cgit v0.10.2 From 2a2261553dd1472ca574acadbd93e12f44c4e6d5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 22 Jul 2014 15:35:14 +0200 Subject: x86, cpu: Fix cache topology for early P4-SMT P4 systems with cpuid level < 4 can have SMT, but the cache topology description available (cpuid2) does not include SMP information. Now we know that SMT shares all cache levels, and therefore we can mark all available cache levels as shared. We do this by setting cpu_llc_id to ->phys_proc_id, since that's the same for each SMT thread. We can do this unconditional since if there's no SMT its still true, the one CPU shares cache with only itself. This fixes a problem where such CPUs report an incorrect LLC CPU mask. This in turn fixes a crash in the scheduler where the topology was build wrong, it assumes the LLC mask to include at least the SMT CPUs. Cc: Josh Boyer Cc: Dietmar Eggemann Tested-by: Bruno Wolff III Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140722133514.GM12054@laptop.lan Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index a800290..f9e4fdd 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -370,6 +370,17 @@ static void init_intel(struct cpuinfo_x86 *c) */ detect_extended_topology(c); + if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { + /* + * let's use the legacy cpuid vector 0x1 and 0x4 for topology + * detection. + */ + c->x86_max_cores = intel_num_cpu_cores(c); +#ifdef CONFIG_X86_32 + detect_ht(c); +#endif + } + l2 = init_intel_cacheinfo(c); if (c->cpuid_level > 9) { unsigned eax = cpuid_eax(10); @@ -438,17 +449,6 @@ static void init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_P3); #endif - if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { - /* - * let's use the legacy cpuid vector 0x1 and 0x4 for topology - * detection. - */ - c->x86_max_cores = intel_num_cpu_cores(c); -#ifdef CONFIG_X86_32 - detect_ht(c); -#endif - } - /* Work around errata */ srat_detect_node(c); diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index a952e9c..9c8f739 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -730,6 +730,18 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c) #endif } +#ifdef CONFIG_X86_HT + /* + * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in + * turns means that the only possibility is SMT (as indicated in + * cpuid1). Since cpuid2 doesn't specify shared caches, and we know + * that SMT shares all caches, we can unconditionally set cpu_llc_id to + * c->phys_proc_id. + */ + if (per_cpu(cpu_llc_id, cpu) == BAD_APICID) + per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; +#endif + c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); return l2; -- cgit v0.10.2 From e8c214d22e76dd0ead38f97f8d2dc09aac70d651 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 23 Jul 2014 09:47:58 +0200 Subject: drm/radeon: fix irq ring buffer overflow handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We must mask out the overflow bit as well, otherwise the wptr will never match the rptr again and the interrupt handler will loop forever. Signed-off-by: Christian König Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher Reviewed-by: Michel Dänzer diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 0b24711..cc1f02f 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7376,6 +7376,7 @@ static inline u32 cik_get_ih_wptr(struct radeon_device *rdev) tmp = RREG32(IH_RB_CNTL); tmp |= IH_WPTR_OVERFLOW_CLEAR; WREG32(IH_RB_CNTL, tmp); + wptr &= ~RB_OVERFLOW; } return (wptr & rdev->ih.ptr_mask); } diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 250bac3..15e4f28 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -4756,6 +4756,7 @@ static u32 evergreen_get_ih_wptr(struct radeon_device *rdev) tmp = RREG32(IH_RB_CNTL); tmp |= IH_WPTR_OVERFLOW_CLEAR; WREG32(IH_RB_CNTL, tmp); + wptr &= ~RB_OVERFLOW; } return (wptr & rdev->ih.ptr_mask); } diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index c66952d..3c69f58 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3795,6 +3795,7 @@ static u32 r600_get_ih_wptr(struct radeon_device *rdev) tmp = RREG32(IH_RB_CNTL); tmp |= IH_WPTR_OVERFLOW_CLEAR; WREG32(IH_RB_CNTL, tmp); + wptr &= ~RB_OVERFLOW; } return (wptr & rdev->ih.ptr_mask); } diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index eba0225..9e854fd 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6103,6 +6103,7 @@ static inline u32 si_get_ih_wptr(struct radeon_device *rdev) tmp = RREG32(IH_RB_CNTL); tmp |= IH_WPTR_OVERFLOW_CLEAR; WREG32(IH_RB_CNTL, tmp); + wptr &= ~RB_OVERFLOW; } return (wptr & rdev->ih.ptr_mask); } -- cgit v0.10.2 From d584a66279949561418c82b12bb4c055e6c25836 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Wed, 23 Jul 2014 20:08:12 +0200 Subject: firewire: ohci: disable MSI for VIA VT6315 again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Revert half of commit d151f9854f21: If isochronous I/O is attempted with packets larget than 1 kByte, VIA VT6315 rev 01 immediately stops to generate any interrupts if MSI are used. Fix this by going back to legacy interrupts. [Thread "Isochronous streaming with VT6315 OHCI", http://marc.info/?t=139049641500003] With smaller packets, the loss of IRQs happens too but only very rarely --- rarely eneough that it was not yet possible for me to determine whether QUIRK_NO_MSI is an actual fix for this rare variation of this chip bug. I am keeping QUIRK_CYCLE_TIMER off of VT6315 rev >= 1 because this has been verified by myself with certainty. On the other hand, I am also keeping QUIRK_CYCLE_TIMER on for VT6315 rev 0 because I don't know at this time whether this revision accesses Cycle Timer non-atomically like most of the other VIA OHCIs are known to do. Reported-by: Rémy Bruno Signed-off-by: Stefan Richter diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 5b0934d..41df806 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -336,10 +336,10 @@ static const struct { QUIRK_CYCLE_TIMER | QUIRK_IR_WAKE}, {PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_VT6315, 0, - QUIRK_CYCLE_TIMER | QUIRK_NO_MSI}, + QUIRK_CYCLE_TIMER /* FIXME: necessary? */ | QUIRK_NO_MSI}, {PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_VT6315, PCI_ANY_ID, - 0}, + QUIRK_NO_MSI}, {PCI_VENDOR_ID_VIA, PCI_ANY_ID, PCI_ANY_ID, QUIRK_CYCLE_TIMER | QUIRK_NO_MSI}, -- cgit v0.10.2 From 332cfc823d182d43bfcc80d1ebee7ab79e06ccf3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 23 Jul 2014 08:59:40 +0800 Subject: amd-xgbe: Fix error return code in xgbe_probe() Fix to return a negative error code from the setting real tx queue count error handling case instead of 0. Signed-off-by: Wei Yongjun Acked-by: Tom Lendacky Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c index c83584a..5a1891f 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c @@ -339,7 +339,8 @@ static int xgbe_probe(struct platform_device *pdev) /* Calculate the number of Tx and Rx rings to be created */ pdata->tx_ring_count = min_t(unsigned int, num_online_cpus(), pdata->hw_feat.tx_ch_cnt); - if (netif_set_real_num_tx_queues(netdev, pdata->tx_ring_count)) { + ret = netif_set_real_num_tx_queues(netdev, pdata->tx_ring_count); + if (ret) { dev_err(dev, "error setting real tx queue count\n"); goto err_io; } -- cgit v0.10.2 From dd1d3f8f9920926aa426589e542eed6bf58b7354 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 23 Jul 2014 09:00:35 +0800 Subject: hyperv: Fix error return code in netvsc_init_buf() Fix to return -ENOMEM from the kalloc error handling case instead of 0. Signed-off-by: Wei Yongjun Reviewed-by: Haiyang Zhang Signed-off-by: David S. Miller diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 4ed38ea..d97d5f3 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -378,8 +378,10 @@ static int netvsc_init_buf(struct hv_device *device) net_device->send_section_map = kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL); - if (net_device->send_section_map == NULL) + if (net_device->send_section_map == NULL) { + ret = -ENOMEM; goto cleanup; + } goto exit; -- cgit v0.10.2 From aed8adb7688d5744cb484226820163af31d2499a Mon Sep 17 00:00:00 2001 From: Silesh C V Date: Wed, 23 Jul 2014 13:59:59 -0700 Subject: coredump: fix the setting of PF_DUMPCORE Commit 079148b919d0 ("coredump: factor out the setting of PF_DUMPCORE") cleaned up the setting of PF_DUMPCORE by removing it from all the linux_binfmt->core_dump() and moving it to zap_threads().But this ended up clearing all the previously set flags. This causes issues during core generation when tsk->flags is checked again (eg. for PF_USED_MATH to dump floating point registers). Fix this. Signed-off-by: Silesh C V Acked-by: Oleg Nesterov Cc: Mandeep Singh Baines Cc: [3.10+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/coredump.c b/fs/coredump.c index 0b2528f..a93f7e6 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -306,7 +306,7 @@ static int zap_threads(struct task_struct *tsk, struct mm_struct *mm, if (unlikely(nr < 0)) return nr; - tsk->flags = PF_DUMPCORE; + tsk->flags |= PF_DUMPCORE; if (atomic_read(&mm->mm_users) == nr + 1) goto done; /* -- cgit v0.10.2 From a0f7a756c2f7543585657cdeeefdfcc11b567293 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 23 Jul 2014 14:00:01 -0700 Subject: mm/rmap.c: fix pgoff calculation to handle hugepage correctly I triggered VM_BUG_ON() in vma_address() when I tried to migrate an anonymous hugepage with mbind() in the kernel v3.16-rc3. This is because pgoff's calculation in rmap_walk_anon() fails to consider compound_order() only to have an incorrect value. This patch introduces page_to_pgoff(), which gets the page's offset in PAGE_CACHE_SIZE. Kirill pointed out that page cache tree should natively handle hugepages, and in order to make hugetlbfs fit it, page->index of hugetlbfs page should be in PAGE_CACHE_SIZE. This is beyond this patch, but page_to_pgoff() contains the point to be fixed in a single function. Signed-off-by: Naoya Horiguchi Acked-by: Kirill A. Shutemov Cc: Joonsoo Kim Cc: Hugh Dickins Cc: Rik van Riel Cc: Hillf Danton Cc: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 0a97b58..e1474ae 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -399,6 +399,18 @@ static inline struct page *read_mapping_page(struct address_space *mapping, } /* + * Get the offset in PAGE_SIZE. + * (TODO: hugepage should have ->index in PAGE_SIZE) + */ +static inline pgoff_t page_to_pgoff(struct page *page) +{ + if (unlikely(PageHeadHuge(page))) + return page->index << compound_order(page); + else + return page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); +} + +/* * Return byte-offset into filesystem object for page. */ static inline loff_t page_offset(struct page *page) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index c6399e3..7211a73 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -435,7 +435,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, if (av == NULL) /* Not actually mapped anymore */ return; - pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + pgoff = page_to_pgoff(page); read_lock(&tasklist_lock); for_each_process (tsk) { struct anon_vma_chain *vmac; @@ -469,7 +469,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, mutex_lock(&mapping->i_mmap_mutex); read_lock(&tasklist_lock); for_each_process(tsk) { - pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + pgoff_t pgoff = page_to_pgoff(page); struct task_struct *t = task_early_kill(tsk, force_early); if (!t) diff --git a/mm/rmap.c b/mm/rmap.c index b7e94eb..22a4a76 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -517,11 +517,7 @@ void page_unlock_anon_vma_read(struct anon_vma *anon_vma) static inline unsigned long __vma_address(struct page *page, struct vm_area_struct *vma) { - pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); - - if (unlikely(is_vm_hugetlb_page(vma))) - pgoff = page->index << huge_page_order(page_hstate(page)); - + pgoff_t pgoff = page_to_pgoff(page); return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); } @@ -1639,7 +1635,7 @@ static struct anon_vma *rmap_walk_anon_lock(struct page *page, static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc) { struct anon_vma *anon_vma; - pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + pgoff_t pgoff = page_to_pgoff(page); struct anon_vma_chain *avc; int ret = SWAP_AGAIN; @@ -1680,7 +1676,7 @@ static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc) static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc) { struct address_space *mapping = page->mapping; - pgoff_t pgoff = page->index << compound_order(page); + pgoff_t pgoff = page_to_pgoff(page); struct vm_area_struct *vma; int ret = SWAP_AGAIN; -- cgit v0.10.2 From b4c5c60920e3b0c4598f43e7317559f6aec51531 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 23 Jul 2014 14:00:04 -0700 Subject: zram: avoid lockdep splat by revalidate_disk Sasha reported lockdep warning [1] introduced by [2]. It could be fixed by doing disk revalidation out of the init_lock. It's okay because disk capacity change is protected by init_lock so that revalidate_disk always sees up-to-date value so there is no race. [1] https://lkml.org/lkml/2014/7/3/735 [2] zram: revalidate disk after capacity change Fixes 2e32baea46ce ("zram: revalidate disk after capacity change"). Signed-off-by: Minchan Kim Reported-by: Sasha Levin Cc: "Alexander E. Patrakov" Cc: Nitin Gupta Cc: Jerome Marchand Cc: Sergey Senozhatsky CC: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 089e72c..36e54be 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -622,11 +622,18 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) memset(&zram->stats, 0, sizeof(zram->stats)); zram->disksize = 0; - if (reset_capacity) { + if (reset_capacity) set_capacity(zram->disk, 0); - revalidate_disk(zram->disk); - } + up_write(&zram->init_lock); + + /* + * Revalidate disk out of the init_lock to avoid lockdep splat. + * It's okay because disk's capacity is protected by init_lock + * so that revalidate_disk always sees up-to-date capacity. + */ + if (reset_capacity) + revalidate_disk(zram->disk); } static ssize_t disksize_store(struct device *dev, @@ -666,8 +673,15 @@ static ssize_t disksize_store(struct device *dev, zram->comp = comp; zram->disksize = disksize; set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); - revalidate_disk(zram->disk); up_write(&zram->init_lock); + + /* + * Revalidate disk out of the init_lock to avoid lockdep splat. + * It's okay because disk's capacity is protected by init_lock + * so that revalidate_disk always sees up-to-date capacity. + */ + revalidate_disk(zram->disk); + return len; out_destroy_comp: -- cgit v0.10.2 From b1923b55af43a6febb976084bf30d1a4797c92c9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 23 Jul 2014 14:00:06 -0700 Subject: sh: also try passing -m4-nofpu for SH2A builds When compiling a SH2A kernel (e.g. se7206_defconfig or rsk7203_defconfig) using sh4-linux-gcc, linking fails with: net/built-in.o: In function `__sk_run_filter': net/core/filter.c:566: undefined reference to `__fpscr_values' net/core/filter.c:269: undefined reference to `__fpscr_values' ... net/built-in.o:net/core/filter.c:580: more undefined references to `__fpscr_values' follow This happens because sh4-linux-gcc doesn't support the "-m2a-nofpu", which is thus filtered out by "$(call cc-option, ...)". As compiling using sh4-linux-gcc is useful for compile coverage, also try passing "-m4-nofpu" (which is presumably filtered out when using a real sh2a-linux toolchain) to disable the generation of FPU instructions and references to __fpscr_values[]. Signed-off-by: Geert Uytterhoeven Cc: Guenter Roeck Cc: Tony Breeds Cc: Alexei Starovoitov Cc: Fengguang Wu Cc: Daniel Borkmann Cc: Magnus Damm Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/sh/Makefile b/arch/sh/Makefile index d4d16e4..bf5b3f5 100644 --- a/arch/sh/Makefile +++ b/arch/sh/Makefile @@ -32,7 +32,8 @@ endif cflags-$(CONFIG_CPU_SH2) := $(call cc-option,-m2,) cflags-$(CONFIG_CPU_SH2A) += $(call cc-option,-m2a,) \ - $(call cc-option,-m2a-nofpu,) + $(call cc-option,-m2a-nofpu,) \ + $(call cc-option,-m4-nofpu,) cflags-$(CONFIG_CPU_SH3) := $(call cc-option,-m3,) cflags-$(CONFIG_CPU_SH4) := $(call cc-option,-m4,) \ $(call cc-option,-mno-implicit-fp,-m4-nofpu) -- cgit v0.10.2 From c118678bc79e8241f9d3434d9324c6400d72f48a Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 23 Jul 2014 14:00:08 -0700 Subject: mm: do not call do_fault_around for non-linear fault Ingo Korb reported that "repeated mapping of the same file on tmpfs using remap_file_pages sometimes triggers a BUG at mm/filemap.c:202 when the process exits". He bisected the bug to d7c1755179b8 ("mm: implement ->map_pages for shmem/tmpfs"), although the bug was actually added by commit 8c6e50b0290c ("mm: introduce vm_ops->map_pages()"). The problem is caused by calling do_fault_around for a _non-linear_ fault. In this case pgoff is shifted and might become negative during calculation. Faulting around non-linear page-fault makes no sense and breaks the logic in do_fault_around because pgoff is shifted. Signed-off-by: Konstantin Khlebnikov Reported-by: Ingo Korb Tested-by: Ingo Korb Cc: Hugh Dickins Cc: Sasha Levin Cc: Dave Jones Cc: Ning Qu Cc: "Kirill A. Shutemov" Cc: [3.15.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memory.c b/mm/memory.c index d67fd9f..7e8d820 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2882,7 +2882,8 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma, * if page by the offset is not ready to be mapped (cold cache or * something). */ - if (vma->vm_ops->map_pages && fault_around_pages() > 1) { + if (vma->vm_ops->map_pages && !(flags & FAULT_FLAG_NONLINEAR) && + fault_around_pages() > 1) { pte = pte_offset_map_lock(mm, pmd, address, &ptl); do_fault_around(vma, address, pte, pgoff, flags); if (!pte_same(*pte, orig_pte)) -- cgit v0.10.2 From 8e205f779d1443a94b5ae81aa359cb535dd3021e Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 23 Jul 2014 14:00:10 -0700 Subject: shmem: fix faulting into a hole, not taking i_mutex Commit f00cdc6df7d7 ("shmem: fix faulting into a hole while it's punched") was buggy: Sasha sent a lockdep report to remind us that grabbing i_mutex in the fault path is a no-no (write syscall may already hold i_mutex while faulting user buffer). We tried a completely different approach (see following patch) but that proved inadequate: good enough for a rational workload, but not good enough against trinity - which forks off so many mappings of the object that contention on i_mmap_mutex while hole-puncher holds i_mutex builds into serious starvation when concurrent faults force the puncher to fall back to single-page unmap_mapping_range() searches of the i_mmap tree. So return to the original umbrella approach, but keep away from i_mutex this time. We really don't want to bloat every shmem inode with a new mutex or completion, just to protect this unlikely case from trinity. So extend the original with wait_queue_head on stack at the hole-punch end, and wait_queue item on the stack at the fault end. This involves further use of i_lock to guard against the races: lockdep has been happy so far, and I see fs/inode.c:unlock_new_inode() holds i_lock around wake_up_bit(), which is comparable to what we do here. i_lock is more convenient, but we could switch to shmem's info->lock. This issue has been tagged with CVE-2014-4171, which will require commit f00cdc6df7d7 and this and the following patch to be backported: we suggest to 3.1+, though in fact the trinity forkbomb effect might go back as far as 2.6.16, when madvise(,,MADV_REMOVE) came in - or might not, since much has changed, with i_mmap_mutex a spinlock before 3.0. Anyone running trinity on 3.0 and earlier? I don't think we need care. Signed-off-by: Hugh Dickins Reported-by: Sasha Levin Tested-by: Sasha Levin Cc: Vlastimil Babka Cc: Konstantin Khlebnikov Cc: Johannes Weiner Cc: Lukas Czerner Cc: Dave Jones Cc: [3.1+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/shmem.c b/mm/shmem.c index 1140f49..c0719f0 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -85,7 +85,7 @@ static struct vfsmount *shm_mnt; * a time): we would prefer not to enlarge the shmem inode just for that. */ struct shmem_falloc { - int mode; /* FALLOC_FL mode currently operating */ + wait_queue_head_t *waitq; /* faults into hole wait for punch to end */ pgoff_t start; /* start of range currently being fallocated */ pgoff_t next; /* the next page offset to be fallocated */ pgoff_t nr_falloced; /* how many new pages have been fallocated */ @@ -760,7 +760,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) spin_lock(&inode->i_lock); shmem_falloc = inode->i_private; if (shmem_falloc && - !shmem_falloc->mode && + !shmem_falloc->waitq && index >= shmem_falloc->start && index < shmem_falloc->next) shmem_falloc->nr_unswapped++; @@ -1248,38 +1248,58 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) * Trinity finds that probing a hole which tmpfs is punching can * prevent the hole-punch from ever completing: which in turn * locks writers out with its hold on i_mutex. So refrain from - * faulting pages into the hole while it's being punched, and - * wait on i_mutex to be released if vmf->flags permits. + * faulting pages into the hole while it's being punched. Although + * shmem_undo_range() does remove the additions, it may be unable to + * keep up, as each new page needs its own unmap_mapping_range() call, + * and the i_mmap tree grows ever slower to scan if new vmas are added. + * + * It does not matter if we sometimes reach this check just before the + * hole-punch begins, so that one fault then races with the punch: + * we just need to make racing faults a rare case. + * + * The implementation below would be much simpler if we just used a + * standard mutex or completion: but we cannot take i_mutex in fault, + * and bloating every shmem inode for this unlikely case would be sad. */ if (unlikely(inode->i_private)) { struct shmem_falloc *shmem_falloc; spin_lock(&inode->i_lock); shmem_falloc = inode->i_private; - if (!shmem_falloc || - shmem_falloc->mode != FALLOC_FL_PUNCH_HOLE || - vmf->pgoff < shmem_falloc->start || - vmf->pgoff >= shmem_falloc->next) - shmem_falloc = NULL; - spin_unlock(&inode->i_lock); - /* - * i_lock has protected us from taking shmem_falloc seriously - * once return from shmem_fallocate() went back up that stack. - * i_lock does not serialize with i_mutex at all, but it does - * not matter if sometimes we wait unnecessarily, or sometimes - * miss out on waiting: we just need to make those cases rare. - */ - if (shmem_falloc) { + if (shmem_falloc && + shmem_falloc->waitq && + vmf->pgoff >= shmem_falloc->start && + vmf->pgoff < shmem_falloc->next) { + wait_queue_head_t *shmem_falloc_waitq; + DEFINE_WAIT(shmem_fault_wait); + + ret = VM_FAULT_NOPAGE; if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { + /* It's polite to up mmap_sem if we can */ up_read(&vma->vm_mm->mmap_sem); - mutex_lock(&inode->i_mutex); - mutex_unlock(&inode->i_mutex); - return VM_FAULT_RETRY; + ret = VM_FAULT_RETRY; } - /* cond_resched? Leave that to GUP or return to user */ - return VM_FAULT_NOPAGE; + + shmem_falloc_waitq = shmem_falloc->waitq; + prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait, + TASK_UNINTERRUPTIBLE); + spin_unlock(&inode->i_lock); + schedule(); + + /* + * shmem_falloc_waitq points into the shmem_fallocate() + * stack of the hole-punching task: shmem_falloc_waitq + * is usually invalid by the time we reach here, but + * finish_wait() does not dereference it in that case; + * though i_lock needed lest racing with wake_up_all(). + */ + spin_lock(&inode->i_lock); + finish_wait(shmem_falloc_waitq, &shmem_fault_wait); + spin_unlock(&inode->i_lock); + return ret; } + spin_unlock(&inode->i_lock); } error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); @@ -1774,13 +1794,13 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, mutex_lock(&inode->i_mutex); - shmem_falloc.mode = mode & ~FALLOC_FL_KEEP_SIZE; - if (mode & FALLOC_FL_PUNCH_HOLE) { struct address_space *mapping = file->f_mapping; loff_t unmap_start = round_up(offset, PAGE_SIZE); loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1; + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq); + shmem_falloc.waitq = &shmem_falloc_waitq; shmem_falloc.start = unmap_start >> PAGE_SHIFT; shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT; spin_lock(&inode->i_lock); @@ -1792,8 +1812,13 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, 1 + unmap_end - unmap_start, 0); shmem_truncate_range(inode, offset, offset + len - 1); /* No need to unmap again: hole-punching leaves COWed pages */ + + spin_lock(&inode->i_lock); + inode->i_private = NULL; + wake_up_all(&shmem_falloc_waitq); + spin_unlock(&inode->i_lock); error = 0; - goto undone; + goto out; } /* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */ @@ -1809,6 +1834,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, goto out; } + shmem_falloc.waitq = NULL; shmem_falloc.start = start; shmem_falloc.next = start; shmem_falloc.nr_falloced = 0; -- cgit v0.10.2 From b1a366500bd537b50c3aad26dc7df083ec03a448 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 23 Jul 2014 14:00:13 -0700 Subject: shmem: fix splicing from a hole while it's punched shmem_fault() is the actual culprit in trinity's hole-punch starvation, and the most significant cause of such problems: since a page faulted is one that then appears page_mapped(), needing unmap_mapping_range() and i_mmap_mutex to be unmapped again. But it is not the only way in which a page can be brought into a hole in the radix_tree while that hole is being punched; and Vlastimil's testing implies that if enough other processors are busy filling in the hole, then shmem_undo_range() can be kept from completing indefinitely. shmem_file_splice_read() is the main other user of SGP_CACHE, which can instantiate shmem pagecache pages in the read-only case (without holding i_mutex, so perhaps concurrently with a hole-punch). Probably it's silly not to use SGP_READ already (using the ZERO_PAGE for holes): which ought to be safe, but might bring surprises - not a change to be rushed. shmem_read_mapping_page_gfp() is an internal interface used by drivers/gpu/drm GEM (and next by uprobes): it should be okay. And shmem_file_read_iter() uses the SGP_DIRTY variant of SGP_CACHE, when called internally by the kernel (perhaps for a stacking filesystem, which might rely on holes to be reserved): it's unclear whether it could be provoked to keep hole-punch busy or not. We could apply the same umbrella as now used in shmem_fault() to shmem_file_splice_read() and the others; but it looks ugly, and use over a range raises questions - should it actually be per page? can these get starved themselves? The origin of this part of the problem is my v3.1 commit d0823576bf4b ("mm: pincer in truncate_inode_pages_range"), once it was duplicated into shmem.c. It seemed like a nice idea at the time, to ensure (barring RCU lookup fuzziness) that there's an instant when the entire hole is empty; but the indefinitely repeated scans to ensure that make it vulnerable. Revert that "enhancement" to hole-punch from shmem_undo_range(), but retain the unproblematic rescanning when it's truncating; add a couple of comments there. Remove the "indices[0] >= end" test: that is now handled satisfactorily by the inner loop, and mem_cgroup_uncharge_start()/end() are too light to be worth avoiding here. But if we do not always loop indefinitely, we do need to handle the case of swap swizzled back to page before shmem_free_swap() gets it: add a retry for that case, as suggested by Konstantin Khlebnikov; and for the case of page swizzled back to swap, as suggested by Johannes Weiner. Signed-off-by: Hugh Dickins Reported-by: Sasha Levin Suggested-by: Vlastimil Babka Cc: Konstantin Khlebnikov Cc: Johannes Weiner Cc: Lukas Czerner Cc: Dave Jones Cc: [3.1+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/shmem.c b/mm/shmem.c index c0719f0..af68b15 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -468,23 +468,20 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, return; index = start; - for ( ; ; ) { + while (index < end) { cond_resched(); pvec.nr = find_get_entries(mapping, index, min(end - index, (pgoff_t)PAGEVEC_SIZE), pvec.pages, indices); if (!pvec.nr) { - if (index == start || unfalloc) + /* If all gone or hole-punch or unfalloc, we're done */ + if (index == start || end != -1) break; + /* But if truncating, restart to make sure all gone */ index = start; continue; } - if ((index == start || unfalloc) && indices[0] >= end) { - pagevec_remove_exceptionals(&pvec); - pagevec_release(&pvec); - break; - } mem_cgroup_uncharge_start(); for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; @@ -496,8 +493,12 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, if (radix_tree_exceptional_entry(page)) { if (unfalloc) continue; - nr_swaps_freed += !shmem_free_swap(mapping, - index, page); + if (shmem_free_swap(mapping, index, page)) { + /* Swap was replaced by page: retry */ + index--; + break; + } + nr_swaps_freed++; continue; } @@ -506,6 +507,11 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, if (page->mapping == mapping) { VM_BUG_ON_PAGE(PageWriteback(page), page); truncate_inode_page(mapping, page); + } else { + /* Page was replaced by swap: retry */ + unlock_page(page); + index--; + break; } } unlock_page(page); -- cgit v0.10.2 From 792ceaefe62189e3beea612ec0a052e42a81e993 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 23 Jul 2014 14:00:15 -0700 Subject: mm/fs: fix pessimization in hole-punching pagecache I wanted to revert my v3.1 commit d0823576bf4b ("mm: pincer in truncate_inode_pages_range"), to keep truncate_inode_pages_range() in synch with shmem_undo_range(); but have stepped back - a change to hole-punching in truncate_inode_pages_range() is a change to hole-punching in every filesystem (except tmpfs) that supports it. If there's a logical proof why no filesystem can depend for its own correctness on the pincer guarantee in truncate_inode_pages_range() - an instant when the entire hole is removed from pagecache - then let's revisit later. But the evidence is that only tmpfs suffered from the livelock, and we have no intention of extending hole-punch to ramfs. So for now just add a few comments (to match or differ from those in shmem_undo_range()), and fix one silliness noticed in d0823576bf4b... Its "index == start" addition to the hole-punch termination test was incomplete: it opened a way for the end condition to be missed, and the loop go on looking through the radix_tree, all the way to end of file. Fix that pessimization by resetting index when detected in inner loop. Note that it's actually hard to hit this case, without the obsessive concurrent faulting that trinity does: normally all pages are removed in the initial trylock_page() pass, and this loop finds nothing to do. I had to "#if 0" out the initial pass to reproduce bug and test fix. Signed-off-by: Hugh Dickins Cc: Sasha Levin Cc: Konstantin Khlebnikov Cc: Lukas Czerner Cc: Dave Jones Acked-by: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/truncate.c b/mm/truncate.c index 6a78c81..eda2473 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -355,14 +355,16 @@ void truncate_inode_pages_range(struct address_space *mapping, for ( ; ; ) { cond_resched(); if (!pagevec_lookup_entries(&pvec, mapping, index, - min(end - index, (pgoff_t)PAGEVEC_SIZE), - indices)) { + min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) { + /* If all gone from start onwards, we're done */ if (index == start) break; + /* Otherwise restart to make sure all gone */ index = start; continue; } if (index == start && indices[0] >= end) { + /* All gone out of hole to be punched, we're done */ pagevec_remove_exceptionals(&pvec); pagevec_release(&pvec); break; @@ -373,8 +375,11 @@ void truncate_inode_pages_range(struct address_space *mapping, /* We rely upon deletion not changing page->index */ index = indices[i]; - if (index >= end) + if (index >= end) { + /* Restart punch to make sure all gone */ + index = start - 1; break; + } if (radix_tree_exceptional_entry(page)) { clear_exceptional_entry(mapping, index, page); -- cgit v0.10.2 From 4e66d445d0421a159135572a0ba44b75c7c4adfa Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 23 Jul 2014 14:00:17 -0700 Subject: simple_xattr: permit 0-size extended attributes If a filesystem uses simple_xattr to support user extended attributes, LTP setxattr01 and xfstests generic/062 fail with "Cannot allocate memory": simple_xattr_alloc()'s wrap-around test mistakenly excludes values of zero size. Fix that off-by-one (but apparently no filesystem needs them yet). Signed-off-by: Hugh Dickins Cc: Al Viro Cc: Jeff Layton Cc: Aristeu Rozanski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/xattr.c b/fs/xattr.c index 3377dff..c69e6d4 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -843,7 +843,7 @@ struct simple_xattr *simple_xattr_alloc(const void *value, size_t size) /* wrap around? */ len = sizeof(*new_xattr) + size; - if (len <= sizeof(*new_xattr)) + if (len < sizeof(*new_xattr)) return NULL; new_xattr = kmalloc(len, GFP_KERNEL); -- cgit v0.10.2 From 0253d634e0803a8376a0d88efee0bf523d8673f9 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 23 Jul 2014 14:00:19 -0700 Subject: mm: hugetlb: fix copy_hugetlb_page_range() Commit 4a705fef9862 ("hugetlb: fix copy_hugetlb_page_range() to handle migration/hwpoisoned entry") changed the order of huge_ptep_set_wrprotect() and huge_ptep_get(), which leads to breakage in some workloads like hugepage-backed heap allocation via libhugetlbfs. This patch fixes it. The test program for the problem is shown below: $ cat heap.c #include #include #include #define HPS 0x200000 int main() { int i; char *p = malloc(HPS); memset(p, '1', HPS); for (i = 0; i < 5; i++) { if (!fork()) { memset(p, '2', HPS); p = malloc(HPS); memset(p, '3', HPS); free(p); return 0; } } sleep(1); free(p); return 0; } $ export HUGETLB_MORECORE=yes ; export HUGETLB_NO_PREFAULT= ; hugectl --heap ./heap Fixes 4a705fef9862 ("hugetlb: fix copy_hugetlb_page_range() to handle migration/hwpoisoned entry"), so is applicable to -stable kernels which include it. Signed-off-by: Naoya Horiguchi Reported-by: Guillaume Morin Suggested-by: Guillaume Morin Acked-by: Hugh Dickins Cc: [2.6.37+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 2024bbd..9221c02 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2604,6 +2604,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, } else { if (cow) huge_ptep_set_wrprotect(src, addr, src_pte); + entry = huge_ptep_get(src_pte); ptepage = pte_page(entry); get_page(ptepage); page_dup_rmap(ptepage); -- cgit v0.10.2 From 6fcc5420bfb91049a318bb4d88fe471248b5b391 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 20 Jul 2014 12:09:04 +0300 Subject: direct-io: fix uninitialized warning in do_direct_IO() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following warnings: fs/direct-io.c: In function ‘__blockdev_direct_IO’: fs/direct-io.c:1011:12: warning: ‘to’ may be used uninitialized in this function [-Wmaybe-uninitialized] fs/direct-io.c:913:16: note: ‘to’ was declared here fs/direct-io.c:1011:12: warning: ‘from’ may be used uninitialized in this function [-Wmaybe-uninitialized] fs/direct-io.c:913:10: note: ‘from’ was declared here are false positive because dio_get_page() either fails, or sets both 'from' and 'to'. Paul Bolle said ... Maybe it's better to move initializing "to" and "from" out of dio_get_page(). That _might_ make it easier for both the the reader and the compiler to understand what's going on. Something like this: Christoph Hellwig said ... The fix of moving the code definitively looks nicer, while I think uninitialized_var is horrible wart that won't get anywhere near my code. Boaz Harrosh: I agree with Christoph and Paul Signed-off-by: Boaz Harrosh Signed-off-by: Christoph Hellwig diff --git a/fs/direct-io.c b/fs/direct-io.c index 98040ba..194d0d1 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -198,9 +198,8 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) * L1 cache. */ static inline struct page *dio_get_page(struct dio *dio, - struct dio_submit *sdio, size_t *from, size_t *to) + struct dio_submit *sdio) { - int n; if (dio_pages_present(sdio) == 0) { int ret; @@ -209,10 +208,7 @@ static inline struct page *dio_get_page(struct dio *dio, return ERR_PTR(ret); BUG_ON(dio_pages_present(sdio) == 0); } - n = sdio->head++; - *from = n ? 0 : sdio->from; - *to = (n == sdio->tail - 1) ? sdio->to : PAGE_SIZE; - return dio->pages[n]; + return dio->pages[sdio->head]; } /** @@ -911,11 +907,15 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio, while (sdio->block_in_file < sdio->final_block_in_request) { struct page *page; size_t from, to; - page = dio_get_page(dio, sdio, &from, &to); + + page = dio_get_page(dio, sdio); if (IS_ERR(page)) { ret = PTR_ERR(page); goto out; } + from = sdio->head ? 0 : sdio->from; + to = (sdio->head == sdio->tail - 1) ? sdio->to : PAGE_SIZE; + sdio->head++; while (from < to) { unsigned this_chunk_bytes; /* # of bytes mapped */ -- cgit v0.10.2 From 295dc39d941dc2ae53d5c170365af4c9d5c16212 Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Mon, 21 Jul 2014 12:30:23 +0400 Subject: fs: umount on symlink leaks mnt count Currently umount on symlink blocks following umount: /vz is separate mount # ls /vz/ -al | grep test drwxr-xr-x. 2 root root 4096 Jul 19 01:14 testdir lrwxrwxrwx. 1 root root 11 Jul 19 01:16 testlink -> /vz/testdir # umount -l /vz/testlink umount: /vz/testlink: not mounted (expected) # lsof /vz # umount /vz umount: /vz: device is busy. (unexpected) In this case mountpoint_last() gets an extra refcount on path->mnt Signed-off-by: Vasily Averin Acked-by: Ian Kent Acked-by: Jeff Layton Cc: stable@vger.kernel.org Signed-off-by: Christoph Hellwig diff --git a/fs/namei.c b/fs/namei.c index 985c6f3..9eb787e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2256,9 +2256,10 @@ done: goto out; } path->dentry = dentry; - path->mnt = mntget(nd->path.mnt); + path->mnt = nd->path.mnt; if (should_follow_link(dentry, nd->flags & LOOKUP_FOLLOW)) return 1; + mntget(path->mnt); follow_mount(path); error = 0; out: -- cgit v0.10.2 From 043572d5444116b9d9ad8ae763cf069e7accbc30 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 18 Jul 2014 07:31:18 -0700 Subject: hwmon: (smsc47m192) Fix temperature limit and vrm write operations Temperature limit clamps are applied after converting the temperature from milli-degrees C to degrees C, so either the clamp limit needs to be specified in degrees C, not milli-degrees C, or clamping must happen before converting to degrees C. Use the latter method to avoid overflows. vrm is an u8, so the written value needs to be limited to [0, 255]. Cc: Axel Lin Cc: stable@vger.kernel.org Signed-off-by: Guenter Roeck Reviewed-by: Jean Delvare diff --git a/drivers/hwmon/smsc47m192.c b/drivers/hwmon/smsc47m192.c index efee4c5..34b9a60 100644 --- a/drivers/hwmon/smsc47m192.c +++ b/drivers/hwmon/smsc47m192.c @@ -86,7 +86,7 @@ static inline u8 IN_TO_REG(unsigned long val, int n) */ static inline s8 TEMP_TO_REG(int val) { - return clamp_val(SCALE(val, 1, 1000), -128000, 127000); + return SCALE(clamp_val(val, -128000, 127000), 1, 1000); } static inline int TEMP_FROM_REG(s8 val) @@ -384,6 +384,8 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr, err = kstrtoul(buf, 10, &val); if (err) return err; + if (val > 255) + return -EINVAL; data->vrm = val; return count; -- cgit v0.10.2 From 20dbea494543aefaace874cc3ec93a39b94b1ec4 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Wed, 23 Jul 2014 19:44:12 -0400 Subject: parisc: Remove SA_RESTORER define The sa_restorer field in struct sigaction is obsolete and no longer in the parisc implementation. However, the core code assumes the field is present if SA_RESTORER is defined. So, the define needs to be removed. Signed-off-by: John David Anglin Cc: Signed-off-by: Helge Deller diff --git a/arch/parisc/include/uapi/asm/signal.h b/arch/parisc/include/uapi/asm/signal.h index a2fa2971..f5645d6 100644 --- a/arch/parisc/include/uapi/asm/signal.h +++ b/arch/parisc/include/uapi/asm/signal.h @@ -69,8 +69,6 @@ #define SA_NOMASK SA_NODEFER #define SA_ONESHOT SA_RESETHAND -#define SA_RESTORER 0x04000000 /* obsolete -- ignored */ - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 -- cgit v0.10.2 From 9794144d5a95ca90cb9165a0aae1af155f1d8676 Mon Sep 17 00:00:00 2001 From: HIMANGI SARAOGI Date: Sat, 19 Jul 2014 17:07:41 +0530 Subject: parisc: Eliminate memset after alloc_bootmem_pages alloc_bootmem and related function always return zeroed region of memory. Thus a memset after calls to these functions is unnecessary. The following Coccinelle semantic patch was used for making the change: @@ expression E,E1; @@ E = \(alloc_bootmem\|alloc_bootmem_low\|alloc_bootmem_pages\|alloc_bootmem_low_pages\)(...) ... when != E - memset(E,0,E1); Signed-off-by: Himangi Saraogi Acked-by: Julia Lawall Signed-off-by: Helge Deller diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index ae085ad..0bef864 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -728,7 +728,6 @@ static void __init pagetable_init(void) #endif empty_zero_page = alloc_bootmem_pages(PAGE_SIZE); - memset(empty_zero_page, 0, PAGE_SIZE); } static void __init gateway_init(void) -- cgit v0.10.2 From 1b2c4869d8247f9e202fa8a73777c34adc62d409 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Thu, 24 Jul 2014 16:34:17 -0400 Subject: drm/radeon: fix cut and paste issue for hawaii. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a halfway fix for hawaii acceleration. More fixes to come but hopefully isolated to userspace. Signed-off-by: Jérôme Glisse Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index cc1f02f..c0ea661 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -2291,6 +2291,7 @@ static void cik_tiling_mode_table_init(struct radeon_device *rdev) gb_tile_moden = 0; break; } + rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden; WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden); } } else if (num_pipe_configs == 8) { -- cgit v0.10.2 From 042d7654ecb68383c392e3c7e86e04de023f65f9 Mon Sep 17 00:00:00 2001 From: Yaniv Rosner Date: Wed, 23 Jul 2014 22:12:57 +0300 Subject: bnx2x: fix set_setting for some PHYs Allow set_settings() to complete succesfully even if link is not estabilished and port type isn't known yet. Signed-off-by: Yaniv Rosner Signed-off-by: Dmitry Kravkov Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index bd0600c..25eddd9 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -379,6 +379,7 @@ static int bnx2x_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) break; case PORT_FIBRE: case PORT_DA: + case PORT_NONE: if (!(bp->port.supported[0] & SUPPORTED_FIBRE || bp->port.supported[1] & SUPPORTED_FIBRE)) { DP(BNX2X_MSG_ETHTOOL, -- cgit v0.10.2 From a71e3c37960ce5f9c6a519bc1215e3ba9fa83e75 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Wed, 23 Jul 2014 16:47:31 -0300 Subject: net: phy: Set the driver when registering an MDIO bus device mdiobus_register() registers a device which is already bound to a driver. Hence, the driver pointer should be set properly in order to track down the driver associated to the MDIO bus. This will be used to allow ethernet driver to pin down a MDIO bus driver, preventing it from being unloaded while the PHY device is running. Reviewed-by: Florian Fainelli Tested-by: Florian Fainelli Signed-off-by: Ezequiel Garcia Signed-off-by: David S. Miller diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 4eaadcf..203651e 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -255,6 +255,7 @@ int mdiobus_register(struct mii_bus *bus) bus->dev.parent = bus->parent; bus->dev.class = &mdio_bus_class; + bus->dev.driver = bus->parent->driver; bus->dev.groups = NULL; dev_set_name(&bus->dev, "%s", bus->id); -- cgit v0.10.2 From b3565f278a9babc00032292be489e7fbff4f48d1 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Wed, 23 Jul 2014 16:47:32 -0300 Subject: net: phy: Ensure the MDIO bus module is held This commit adds proper module_{get,put} to prevent the MDIO bus module from being unloaded while the phydev is connected. By doing so, we fix a kernel panic produced when a MDIO driver is removed, but the phydev that relies on it is attached and running. Signed-off-by: Ezequiel Garcia Reviewed-by: Florian Fainelli Tested-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 35d753d..74a8232 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -575,6 +575,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, u32 flags, phy_interface_t interface) { struct device *d = &phydev->dev; + struct module *bus_module; int err; /* Assume that if there is no driver, that it doesn't @@ -599,6 +600,14 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, return -EBUSY; } + /* Increment the bus module reference count */ + bus_module = phydev->bus->dev.driver ? + phydev->bus->dev.driver->owner : NULL; + if (!try_module_get(bus_module)) { + dev_err(&dev->dev, "failed to get the bus module\n"); + return -EIO; + } + phydev->attached_dev = dev; dev->phydev = phydev; @@ -664,6 +673,10 @@ EXPORT_SYMBOL(phy_attach); void phy_detach(struct phy_device *phydev) { int i; + + if (phydev->bus->dev.driver) + module_put(phydev->bus->dev.driver->owner); + phydev->attached_dev->phydev = NULL; phydev->attached_dev = NULL; phy_suspend(phydev); -- cgit v0.10.2 From a3cc465d95c32bfb529f69dee7841ecd67525561 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Thu, 24 Jul 2014 16:37:43 +0800 Subject: r8152: fix the checking of the usb speed When the usb speed of the RTL8152 is not high speed, the USB_DEV_STAT[2:1] should be equal to [0 1]. That is, the STAT_SPEED_FULL should be equal to 2. There is a easy way to check the usb speed by the speed field of the struct usb_device. Use it to replace the original metheod. Signed-off-by: Hayes Wang Spotted-by: Andrey Utkin Signed-off-by: David S. Miller diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 7bad2d3..3eab74c 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -282,7 +282,7 @@ /* USB_DEV_STAT */ #define STAT_SPEED_MASK 0x0006 #define STAT_SPEED_HIGH 0x0000 -#define STAT_SPEED_FULL 0x0001 +#define STAT_SPEED_FULL 0x0002 /* USB_TX_AGG */ #define TX_AGG_MAX_THRESHOLD 0x03 @@ -2292,9 +2292,8 @@ static void r8152b_exit_oob(struct r8152 *tp) /* rx share fifo credit full threshold */ ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL0, RXFIFO_THR1_NORMAL); - ocp_data = ocp_read_word(tp, MCU_TYPE_USB, USB_DEV_STAT); - ocp_data &= STAT_SPEED_MASK; - if (ocp_data == STAT_SPEED_FULL) { + if (tp->udev->speed == USB_SPEED_FULL || + tp->udev->speed == USB_SPEED_LOW) { /* rx share fifo credit near full threshold */ ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL1, RXFIFO_THR2_FULL); -- cgit v0.10.2 From fe26566d8a05151ba1dce75081f6270f73ec4ae1 Mon Sep 17 00:00:00 2001 From: Dmitry Kravkov Date: Thu, 24 Jul 2014 18:54:47 +0300 Subject: bnx2x: fix crash during TSO tunneling When TSO packet is transmitted additional BD w/o mapping is used to describe the packed. The BD needs special handling in tx completion. kernel: Call Trace: kernel: [] dump_stack+0x19/0x1b kernel: [] warn_slowpath_common+0x61/0x80 kernel: [] warn_slowpath_fmt+0x5c/0x80 kernel: [] ? find_iova+0x4d/0x90 kernel: [] intel_unmap_page.part.36+0x142/0x160 kernel: [] intel_unmap_page+0x26/0x30 kernel: [] bnx2x_free_tx_pkt+0x157/0x2b0 [bnx2x] kernel: [] bnx2x_tx_int+0xac/0x220 [bnx2x] kernel: [] ? read_tsc+0x9/0x20 kernel: [] bnx2x_poll+0xbb/0x3c0 [bnx2x] kernel: [] net_rx_action+0x15a/0x250 kernel: [] __do_softirq+0xf7/0x290 kernel: [] call_softirq+0x1c/0x30 kernel: [] do_softirq+0x55/0x90 kernel: [] irq_exit+0x115/0x120 kernel: [] do_IRQ+0x58/0xf0 kernel: [] common_interrupt+0x6d/0x6d kernel: [] ? clockevents_notify+0x127/0x140 kernel: [] ? cpuidle_enter_state+0x4f/0xc0 kernel: [] cpuidle_idle_call+0xc5/0x200 kernel: [] arch_cpu_idle+0xe/0x30 kernel: [] cpu_startup_entry+0xf5/0x290 kernel: [] start_secondary+0x265/0x27b kernel: ---[ end trace 11aa7726f18d7e80 ]--- Fixes: a848ade408b ("bnx2x: add CSUM and TSO support for encapsulation protocols") Reported-by: Yulong Pei Cc: Michal Schmidt Signed-off-by: Dmitry Kravkov Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index 4cab09d..8206a29 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -346,6 +346,7 @@ struct sw_tx_bd { u8 flags; /* Set on the first BD descriptor when there is a split BD */ #define BNX2X_TSO_SPLIT_BD (1<<0) +#define BNX2X_HAS_SECOND_PBD (1<<1) }; struct sw_rx_page { diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 4b875da..c43e7238 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -227,6 +227,12 @@ static u16 bnx2x_free_tx_pkt(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata, --nbd; bd_idx = TX_BD(NEXT_TX_IDX(bd_idx)); + if (tx_buf->flags & BNX2X_HAS_SECOND_PBD) { + /* Skip second parse bd... */ + --nbd; + bd_idx = TX_BD(NEXT_TX_IDX(bd_idx)); + } + /* TSO headers+data bds share a common mapping. See bnx2x_tx_split() */ if (tx_buf->flags & BNX2X_TSO_SPLIT_BD) { tx_data_bd = &txdata->tx_desc_ring[bd_idx].reg_bd; @@ -3889,6 +3895,9 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) /* set encapsulation flag in start BD */ SET_FLAG(tx_start_bd->general_data, ETH_TX_START_BD_TUNNEL_EXIST, 1); + + tx_buf->flags |= BNX2X_HAS_SECOND_PBD; + nbd++; } else if (xmit_type & XMIT_CSUM) { /* Set PBD in checksum offload case w/o encapsulation */ -- cgit v0.10.2 From 33cf75656923ff11d67a937a4f8e9344f58cea77 Mon Sep 17 00:00:00 2001 From: George Cherian Date: Fri, 25 Jul 2014 11:49:41 +0530 Subject: can: c_can_platform: Fix raminit, use devm_ioremap() instead of devm_ioremap_resource() The raminit register is shared register for both can0 and can1. Since commit: 32766ff net: can: Convert to use devm_ioremap_resource devm_ioremap_resource() is used to map raminit register. When using both interfaces the mapping for the can1 interface fails, leading to a non functional can interface. Signed-off-by: George Cherian Signed-off-by: Mugunthan V N Cc: linux-stable # >= v3.11 Signed-off-by: Marc Kleine-Budde diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c index 824108c..12430be 100644 --- a/drivers/net/can/c_can/c_can_platform.c +++ b/drivers/net/can/c_can/c_can_platform.c @@ -287,7 +287,8 @@ static int c_can_plat_probe(struct platform_device *pdev) break; } - priv->raminit_ctrlreg = devm_ioremap_resource(&pdev->dev, res); + priv->raminit_ctrlreg = devm_ioremap(&pdev->dev, res->start, + resource_size(res)); if (IS_ERR(priv->raminit_ctrlreg) || priv->instance < 0) dev_info(&pdev->dev, "control memory is not used for raminit\n"); else -- cgit v0.10.2 From c6a26ce9af9eca685bdd766bcc1dbc855394880b Mon Sep 17 00:00:00 2001 From: Steven Miao Date: Wed, 16 Jul 2014 14:23:08 +0800 Subject: pm: bf609: cleanup smc nor flash drop smc pin state change code, pin state will be saved in pinctrl-adi2 driver cleanup nor flash init/exit for pm suspend/resume Signed-off-by: Steven Miao diff --git a/arch/blackfin/mach-bf609/boards/ezkit.c b/arch/blackfin/mach-bf609/boards/ezkit.c index 1ba4600..6fb0765 100644 --- a/arch/blackfin/mach-bf609/boards/ezkit.c +++ b/arch/blackfin/mach-bf609/boards/ezkit.c @@ -698,8 +698,6 @@ int bf609_nor_flash_init(struct platform_device *pdev) { #define CONFIG_SMC_GCTL_VAL 0x00000010 - if (!devm_pinctrl_get_select_default(&pdev->dev)) - return -EBUSY; bfin_write32(SMC_GCTL, CONFIG_SMC_GCTL_VAL); bfin_write32(SMC_B0CTL, 0x01002011); bfin_write32(SMC_B0TIM, 0x08170977); @@ -709,7 +707,6 @@ int bf609_nor_flash_init(struct platform_device *pdev) void bf609_nor_flash_exit(struct platform_device *pdev) { - devm_pinctrl_put(pdev->dev.pins->p); bfin_write32(SMC_GCTL, 0); } diff --git a/arch/blackfin/mach-bf609/include/mach/pm.h b/arch/blackfin/mach-bf609/include/mach/pm.h index 3ca0fb9..a1efd93 100644 --- a/arch/blackfin/mach-bf609/include/mach/pm.h +++ b/arch/blackfin/mach-bf609/include/mach/pm.h @@ -10,6 +10,7 @@ #define __MACH_BF609_PM_H__ #include +#include extern int bfin609_pm_enter(suspend_state_t state); extern int bf609_pm_prepare(void); @@ -19,6 +20,6 @@ void bf609_hibernate(void); void bfin_sec_raise_irq(unsigned int sid); void coreb_enable(void); -int bf609_nor_flash_init(void); -void bf609_nor_flash_exit(void); +int bf609_nor_flash_init(struct platform_device *pdev); +void bf609_nor_flash_exit(struct platform_device *pdev); #endif diff --git a/arch/blackfin/mach-bf609/pm.c b/arch/blackfin/mach-bf609/pm.c index 0cdd695..b1bfcf4 100644 --- a/arch/blackfin/mach-bf609/pm.c +++ b/arch/blackfin/mach-bf609/pm.c @@ -291,13 +291,13 @@ static struct bfin_cpu_pm_fns bf609_cpu_pm = { #if defined(CONFIG_MTD_PHYSMAP) || defined(CONFIG_MTD_PHYSMAP_MODULE) static int smc_pm_syscore_suspend(void) { - bf609_nor_flash_exit(); + bf609_nor_flash_exit(NULL); return 0; } static void smc_pm_syscore_resume(void) { - bf609_nor_flash_init(); + bf609_nor_flash_init(NULL); } static struct syscore_ops smc_pm_syscore_ops = { -- cgit v0.10.2 From 4ba7b5f0ce49d58e48e4c19a2c5ceea50fceda4d Mon Sep 17 00:00:00 2001 From: Steven Miao Date: Wed, 16 Jul 2014 14:37:31 +0800 Subject: blackfin: fix some bf5xx boards build for missing Signed-off-by: Steven Miao diff --git a/arch/blackfin/mach-bf533/boards/blackstamp.c b/arch/blackfin/mach-bf533/boards/blackstamp.c index 63b0e4f..0ccf0cf 100644 --- a/arch/blackfin/mach-bf533/boards/blackstamp.c +++ b/arch/blackfin/mach-bf533/boards/blackstamp.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/blackfin/mach-bf537/boards/cm_bf537e.c b/arch/blackfin/mach-bf537/boards/cm_bf537e.c index c65c6db..1e7290e 100644 --- a/arch/blackfin/mach-bf537/boards/cm_bf537e.c +++ b/arch/blackfin/mach-bf537/boards/cm_bf537e.c @@ -21,6 +21,7 @@ #endif #include #include +#include #include #include #include diff --git a/arch/blackfin/mach-bf537/boards/cm_bf537u.c b/arch/blackfin/mach-bf537/boards/cm_bf537u.c index af58454..c7495dc 100644 --- a/arch/blackfin/mach-bf537/boards/cm_bf537u.c +++ b/arch/blackfin/mach-bf537/boards/cm_bf537u.c @@ -21,6 +21,7 @@ #endif #include #include +#include #include #include #include diff --git a/arch/blackfin/mach-bf537/boards/tcm_bf537.c b/arch/blackfin/mach-bf537/boards/tcm_bf537.c index a021122..6b988ad 100644 --- a/arch/blackfin/mach-bf537/boards/tcm_bf537.c +++ b/arch/blackfin/mach-bf537/boards/tcm_bf537.c @@ -21,6 +21,7 @@ #endif #include #include +#include #include #include #include diff --git a/arch/blackfin/mach-bf561/boards/acvilon.c b/arch/blackfin/mach-bf561/boards/acvilon.c index 430b16d..6ab9515 100644 --- a/arch/blackfin/mach-bf561/boards/acvilon.c +++ b/arch/blackfin/mach-bf561/boards/acvilon.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/blackfin/mach-bf561/boards/cm_bf561.c b/arch/blackfin/mach-bf561/boards/cm_bf561.c index 9f777df..e862f78 100644 --- a/arch/blackfin/mach-bf561/boards/cm_bf561.c +++ b/arch/blackfin/mach-bf561/boards/cm_bf561.c @@ -18,6 +18,7 @@ #endif #include #include +#include #include #include #include diff --git a/arch/blackfin/mach-bf561/boards/ezkit.c b/arch/blackfin/mach-bf561/boards/ezkit.c index 88dee43..2de71e8 100644 --- a/arch/blackfin/mach-bf561/boards/ezkit.c +++ b/arch/blackfin/mach-bf561/boards/ezkit.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include -- cgit v0.10.2 From 3f68e175db60d8c4cbfee99a9cec44378dcb70f5 Mon Sep 17 00:00:00 2001 From: Sonic Zhang Date: Thu, 13 Feb 2014 18:52:34 +0800 Subject: blackfin: bind different groups of one pinmux function to different state name Signed-off-by: Sonic Zhang Signed-off-by: Steven Miao diff --git a/arch/blackfin/mach-bf548/boards/ezkit.c b/arch/blackfin/mach-bf548/boards/ezkit.c index 90138e6..1fe7ff2 100644 --- a/arch/blackfin/mach-bf548/boards/ezkit.c +++ b/arch/blackfin/mach-bf548/boards/ezkit.c @@ -2118,7 +2118,7 @@ static struct pinctrl_map __initdata bfin_pinmux_map[] = { PIN_MAP_MUX_GROUP_DEFAULT("bfin-rotary", "pinctrl-adi2.0", NULL, "rotary"), PIN_MAP_MUX_GROUP_DEFAULT("bfin_can.0", "pinctrl-adi2.0", NULL, "can0"), PIN_MAP_MUX_GROUP_DEFAULT("bfin_can.1", "pinctrl-adi2.0", NULL, "can1"), - PIN_MAP_MUX_GROUP_DEFAULT("bf54x-lq043", "pinctrl-adi2.0", NULL, "ppi0_24b"), + PIN_MAP_MUX_GROUP_DEFAULT("bf54x-lq043", "pinctrl-adi2.0", "ppi0_24bgrp", "ppi0"), PIN_MAP_MUX_GROUP_DEFAULT("bfin-i2s.0", "pinctrl-adi2.0", NULL, "sport0"), PIN_MAP_MUX_GROUP_DEFAULT("bfin-tdm.0", "pinctrl-adi2.0", NULL, "sport0"), PIN_MAP_MUX_GROUP_DEFAULT("bfin-ac97.0", "pinctrl-adi2.0", NULL, "sport0"), @@ -2140,7 +2140,9 @@ static struct pinctrl_map __initdata bfin_pinmux_map[] = { PIN_MAP_MUX_GROUP_DEFAULT("pata-bf54x", "pinctrl-adi2.0", NULL, "atapi_alter"), #endif PIN_MAP_MUX_GROUP_DEFAULT("bf5xx-nand.0", "pinctrl-adi2.0", NULL, "nfc0"), - PIN_MAP_MUX_GROUP_DEFAULT("bf54x-keys", "pinctrl-adi2.0", NULL, "keys_4x4"), + PIN_MAP_MUX_GROUP_DEFAULT("bf54x-keys", "pinctrl-adi2.0", "keys_4x4grp", "keys"), + PIN_MAP_MUX_GROUP("bf54x-keys", "4bit", "pinctrl-adi2.0", "keys_4x4grp", "keys"), + PIN_MAP_MUX_GROUP("bf54x-keys", "8bit", "pinctrl-adi2.0", "keys_8x8grp", "keys"), }; static int __init ezkit_init(void) diff --git a/arch/blackfin/mach-bf609/boards/ezkit.c b/arch/blackfin/mach-bf609/boards/ezkit.c index 6fb0765..e2c0b02 100644 --- a/arch/blackfin/mach-bf609/boards/ezkit.c +++ b/arch/blackfin/mach-bf609/boards/ezkit.c @@ -2055,15 +2055,14 @@ static struct pinctrl_map __initdata bfin_pinmux_map[] = { PIN_MAP_MUX_GROUP_DEFAULT("bfin-rotary", "pinctrl-adi2.0", NULL, "rotary"), PIN_MAP_MUX_GROUP_DEFAULT("bfin_can.0", "pinctrl-adi2.0", NULL, "can0"), PIN_MAP_MUX_GROUP_DEFAULT("physmap-flash.0", "pinctrl-adi2.0", NULL, "smc0"), - PIN_MAP_MUX_GROUP_DEFAULT("bf609_nl8048.2", "pinctrl-adi2.0", NULL, "ppi2_16b"), - PIN_MAP_MUX_GROUP_DEFAULT("bfin_display.0", "pinctrl-adi2.0", NULL, "ppi0_16b"), -#if IS_ENABLED(CONFIG_VIDEO_MT9M114) - PIN_MAP_MUX_GROUP_DEFAULT("bfin_capture.0", "pinctrl-adi2.0", NULL, "ppi0_8b"), -#elif IS_ENABLED(CONFIG_VIDEO_VS6624) - PIN_MAP_MUX_GROUP_DEFAULT("bfin_capture.0", "pinctrl-adi2.0", NULL, "ppi0_16b"), -#else - PIN_MAP_MUX_GROUP_DEFAULT("bfin_capture.0", "pinctrl-adi2.0", NULL, "ppi0_24b"), -#endif + PIN_MAP_MUX_GROUP_DEFAULT("bf609_nl8048.2", "pinctrl-adi2.0", "ppi2_16bgrp", "ppi2"), + PIN_MAP_MUX_GROUP("bfin_display.0", "8bit", "pinctrl-adi2.0", "ppi2_8bgrp", "ppi2"), + PIN_MAP_MUX_GROUP_DEFAULT("bfin_display.0", "pinctrl-adi2.0", "ppi2_16bgrp", "ppi2"), + PIN_MAP_MUX_GROUP("bfin_display.0", "16bit", "pinctrl-adi2.0", "ppi2_16bgrp", "ppi2"), + PIN_MAP_MUX_GROUP("bfin_capture.0", "8bit", "pinctrl-adi2.0", "ppi0_8bgrp", "ppi0"), + PIN_MAP_MUX_GROUP_DEFAULT("bfin_capture.0", "pinctrl-adi2.0", "ppi0_16bgrp", "ppi0"), + PIN_MAP_MUX_GROUP("bfin_capture.0", "16bit", "pinctrl-adi2.0", "ppi0_16bgrp", "ppi0"), + PIN_MAP_MUX_GROUP("bfin_capture.0", "24bit", "pinctrl-adi2.0", "ppi0_24bgrp", "ppi0"), PIN_MAP_MUX_GROUP_DEFAULT("bfin-i2s.0", "pinctrl-adi2.0", NULL, "sport0"), PIN_MAP_MUX_GROUP_DEFAULT("bfin-tdm.0", "pinctrl-adi2.0", NULL, "sport0"), PIN_MAP_MUX_GROUP_DEFAULT("bfin-i2s.1", "pinctrl-adi2.0", NULL, "sport1"), -- cgit v0.10.2 From 814ecd0d1053df8b6891c0ff02567ed66fdf574e Mon Sep 17 00:00:00 2001 From: Steven Miao Date: Thu, 24 Jul 2014 16:10:19 +0800 Subject: irq: blackfin sec: drop duplicated sec priority set Signed-off-by: Steven Miao diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c index 867b7ce..1f94784 100644 --- a/arch/blackfin/mach-common/ints-priority.c +++ b/arch/blackfin/mach-common/ints-priority.c @@ -1208,8 +1208,6 @@ int __init init_arch_irq(void) bfin_sec_set_priority(CONFIG_SEC_IRQ_PRIORITY_LEVELS, sec_int_priority); - bfin_sec_set_priority(CONFIG_SEC_IRQ_PRIORITY_LEVELS, sec_int_priority); - /* Enable interrupts IVG7-15 */ bfin_irq_flags |= IMASK_IVG15 | IMASK_IVG14 | IMASK_IVG13 | IMASK_IVG12 | IMASK_IVG11 | -- cgit v0.10.2 From ac425b61135d8541cd2b41cf6fe11f9e2ca49b36 Mon Sep 17 00:00:00 2001 From: Steven Miao Date: Fri, 25 Jul 2014 10:31:16 +0800 Subject: defconfig: BF609: update spi config name Signed-off-by: Steven Miao diff --git a/arch/blackfin/configs/BF609-EZKIT_defconfig b/arch/blackfin/configs/BF609-EZKIT_defconfig index a7e9bfd..fcec5ce 100644 --- a/arch/blackfin/configs/BF609-EZKIT_defconfig +++ b/arch/blackfin/configs/BF609-EZKIT_defconfig @@ -102,7 +102,7 @@ CONFIG_I2C_CHARDEV=y CONFIG_I2C_BLACKFIN_TWI=y CONFIG_I2C_BLACKFIN_TWI_CLK_KHZ=100 CONFIG_SPI=y -CONFIG_SPI_BFIN_V3=y +CONFIG_SPI_ADI_V3=y CONFIG_GPIOLIB=y CONFIG_GPIO_SYSFS=y # CONFIG_HWMON is not set -- cgit v0.10.2 From b76f98236a23f808d6e3a27f7292670bc1d2c21b Mon Sep 17 00:00:00 2001 From: Steven Miao Date: Wed, 23 Jul 2014 17:28:25 +0800 Subject: blackfin: vmlinux.lds.S: reserve 32 bytes space at the end of data section for XIP kernel to collect some undefined section to the end of the data section and avoid section overlap Signed-off-by: Steven Miao diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S index ba35864..c9eec84 100644 --- a/arch/blackfin/kernel/vmlinux.lds.S +++ b/arch/blackfin/kernel/vmlinux.lds.S @@ -145,7 +145,7 @@ SECTIONS .text_l1 L1_CODE_START : AT(LOADADDR(.exit.data) + SIZEOF(.exit.data)) #else - .init.data : AT(__data_lma + __data_len) + .init.data : AT(__data_lma + __data_len + 32) { __sinitdata = .; INIT_DATA -- cgit v0.10.2 From edffe1b626b39bd7121691dfdecb548431003bbb Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 25 Jul 2014 17:07:47 -0700 Subject: parport: fix menu breakage Do not split the PARPORT-related symbols with the new kconfig symbol ARCH_MIGHT_HAVE_PC_PARPORT. The split was causing incorrect display of these symbols -- they were not being displayed together as they should be. Fixes: d90c3eb31535 "Kconfig cleanup (PARPORT_PC dependencies)" Signed-off-by: Randy Dunlap Cc: Mark Salter Cc: Ingo Molnar Cc: stable@vger.kernel.org # for 3.13, 3.14, 3.15 Signed-off-by: Linus Torvalds diff --git a/drivers/parport/Kconfig b/drivers/parport/Kconfig index 2872ece..44333bd 100644 --- a/drivers/parport/Kconfig +++ b/drivers/parport/Kconfig @@ -5,6 +5,12 @@ # Parport configuration. # +config ARCH_MIGHT_HAVE_PC_PARPORT + bool + help + Select this config option from the architecture Kconfig if + the architecture might have PC parallel port hardware. + menuconfig PARPORT tristate "Parallel port support" depends on HAS_IOMEM @@ -31,12 +37,6 @@ menuconfig PARPORT If unsure, say Y. -config ARCH_MIGHT_HAVE_PC_PARPORT - bool - help - Select this config option from the architecture Kconfig if - the architecture might have PC parallel port hardware. - if PARPORT config PARPORT_PC -- cgit v0.10.2 From 28c9770bcbd2b6dbab99669825a2f8fa69e6d35b Mon Sep 17 00:00:00 2001 From: Haojian Zhuang Date: Wed, 2 Apr 2014 21:31:50 +0800 Subject: ARM: dts: fix L2 address in Hi3620 Fix the address of L2 controler register in hi3620 SoC. This has been wrong from the point that the file was merged in v3.14. Signed-off-by: Haojian Zhuang Acked-by: Wei Xu Cc: stable@vger.kernel.org Signed-off-by: Arnd Bergmann diff --git a/arch/arm/boot/dts/hi3620.dtsi b/arch/arm/boot/dts/hi3620.dtsi index ab1116d..83a5b86 100644 --- a/arch/arm/boot/dts/hi3620.dtsi +++ b/arch/arm/boot/dts/hi3620.dtsi @@ -73,7 +73,7 @@ L2: l2-cache { compatible = "arm,pl310-cache"; - reg = <0xfc10000 0x100000>; + reg = <0x100000 0x100000>; interrupts = <0 15 4>; cache-unified; cache-level = <2>; -- cgit v0.10.2 From 8bdd638091605dc66d92c57c4b80eb87fffc15f7 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 26 Jul 2014 12:58:23 -0700 Subject: mm: fix direct reclaim writeback regression Shortly before 3.16-rc1, Dave Jones reported: WARNING: CPU: 3 PID: 19721 at fs/xfs/xfs_aops.c:971 xfs_vm_writepage+0x5ce/0x630 [xfs]() CPU: 3 PID: 19721 Comm: trinity-c61 Not tainted 3.15.0+ #3 Call Trace: xfs_vm_writepage+0x5ce/0x630 [xfs] shrink_page_list+0x8f9/0xb90 shrink_inactive_list+0x253/0x510 shrink_lruvec+0x563/0x6c0 shrink_zone+0x3b/0x100 shrink_zones+0x1f1/0x3c0 try_to_free_pages+0x164/0x380 __alloc_pages_nodemask+0x822/0xc90 alloc_pages_vma+0xaf/0x1c0 handle_mm_fault+0xa31/0xc50 etc. 970 if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == 971 PF_MEMALLOC)) I did not respond at the time, because a glance at the PageDirty block in shrink_page_list() quickly shows that this is impossible: we don't do writeback on file pages (other than tmpfs) from direct reclaim nowadays. Dave was hallucinating, but it would have been disrespectful to say so. However, my own /var/log/messages now shows similar complaints WARNING: CPU: 1 PID: 28814 at fs/ext4/inode.c:1881 ext4_writepage+0xa7/0x38b() WARNING: CPU: 0 PID: 27347 at fs/ext4/inode.c:1764 ext4_writepage+0xa7/0x38b() from stressing some mmotm trees during July. Could a dirty xfs or ext4 file page somehow get marked PageSwapBacked, so fail shrink_page_list()'s page_is_file_cache() test, and so proceed to mapping->a_ops->writepage()? Yes, 3.16-rc1's commit 68711a746345 ("mm, migration: add destination page freeing callback") has provided such a way to compaction: if migrating a SwapBacked page fails, its newpage may be put back on the list for later use with PageSwapBacked still set, and nothing will clear it. Whether that can do anything worse than issue WARN_ON_ONCEs, and get some statistics wrong, is unclear: easier to fix than to think through the consequences. Fixing it here, before the put_new_page(), addresses the bug directly, but is probably the worst place to fix it. Page migration is doing too many parts of the job on too many levels: fixing it in move_to_new_page() to complement its SetPageSwapBacked would be preferable, except why is it (and newpage->mapping and newpage->index) done there, rather than down in migrate_page_move_mapping(), once we are sure of success? Not a cleanup to get into right now, especially not with memcg cleanups coming in 3.17. Reported-by: Dave Jones Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds diff --git a/mm/migrate.c b/mm/migrate.c index 9e0beaa..be6dbf9 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -988,9 +988,10 @@ out: * it. Otherwise, putback_lru_page() will drop the reference grabbed * during isolation. */ - if (rc != MIGRATEPAGE_SUCCESS && put_new_page) + if (rc != MIGRATEPAGE_SUCCESS && put_new_page) { + ClearPageSwapBacked(newpage); put_new_page(newpage, private); - else + } else putback_lru_page(newpage); if (result) { -- cgit v0.10.2 From 2062afb4f804afef61cbe62a30cac9a46e58e067 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 26 Jul 2014 14:52:01 -0700 Subject: Fix gcc-4.9.0 miscompilation of load_balance() in scheduler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Michel Dänzer and a couple of other people reported inexplicable random oopses in the scheduler, and the cause turns out to be gcc mis-compiling the load_balance() function when debugging is enabled. The gcc bug apparently goes back to gcc-4.5, but slight optimization changes means that it now showed up as a problem in 4.9.0 and 4.9.1. The instruction scheduling problem causes gcc to schedule a spill operation to before the stack frame has been created, which in turn can corrupt the spilled value if an interrupt comes in. There may be other effects of this bug too, but that's the code generation problem seen in Michel's case. This is fixed in current gcc HEAD, but the workaround as suggested by Markus Trippelsdorf is pretty simple: use -fno-var-tracking-assignments when compiling the kernel, which disables the gcc code that causes the problem. This can result in slightly worse debug information for variable accesses, but that is infinitely preferable to actual code generation problems. Doing this unconditionally (not just for CONFIG_DEBUG_INFO) also allows non-debug builds to verify that the debug build would be identical: we can do export GCC_COMPARE_DEBUG=1 to make gcc internally verify that the result of the build is independent of the "-g" flag (it will make the compiler build everything twice, toggling the debug flag, and compare the results). Without the "-fno-var-tracking-assignments" option, the build would fail (even with 4.8.3 that didn't show the actual stack frame bug) with a gcc compare failure. See also gcc bugzilla: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61801 Reported-by: Michel Dänzer Suggested-by: Markus Trippelsdorf Cc: Jakub Jelinek Cc: stable@kernel.org Signed-off-by: Linus Torvalds diff --git a/Makefile b/Makefile index 6b27741..5147f3f 100644 --- a/Makefile +++ b/Makefile @@ -688,6 +688,8 @@ KBUILD_CFLAGS += -fomit-frame-pointer endif endif +KBUILD_CFLAGS += $(call cc-option, -fno-var-tracking-assignments) + ifdef CONFIG_DEBUG_INFO KBUILD_CFLAGS += -g KBUILD_AFLAGS += -Wa,-gdwarf-2 -- cgit v0.10.2 From 64aa90f26c06e1cb2aacfb98a7d0eccfbd6c1a91 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 27 Jul 2014 12:41:55 -0700 Subject: Linux 3.16-rc7 diff --git a/Makefile b/Makefile index 5147f3f..f6a7794 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 16 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Shuffling Zombie Juror # *DOCUMENTATION* -- cgit v0.10.2 From fa952c54ba13deeb91cd4c7af255cdb5f1273535 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Wed, 23 Jul 2014 14:52:39 +0530 Subject: powerpc/powernv: Change BUG_ON to WARN_ON in elog code We can continue to read the error log (up to MAX size) even if we get the elog size more than MAX size. Hence change BUG_ON to WARN_ON. Also updated error message. Reported-by: Gopesh Kumar Chaudhary Signed-off-by: Vasant Hegde Signed-off-by: Ananth N Mavinakayanahalli Acked-by: Deepthi Dharwar Acked-by: Stewart Smith Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 10268c4..0ad533b 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -249,7 +249,7 @@ static void elog_work_fn(struct work_struct *work) rc = opal_get_elog_size(&id, &size, &type); if (rc != OPAL_SUCCESS) { - pr_err("ELOG: Opal log read failed\n"); + pr_err("ELOG: OPAL log info read failed\n"); return; } @@ -257,7 +257,7 @@ static void elog_work_fn(struct work_struct *work) log_id = be64_to_cpu(id); elog_type = be64_to_cpu(type); - BUG_ON(elog_size > OPAL_MAX_ERRLOG_SIZE); + WARN_ON(elog_size > OPAL_MAX_ERRLOG_SIZE); if (elog_size >= OPAL_MAX_ERRLOG_SIZE) elog_size = OPAL_MAX_ERRLOG_SIZE; -- cgit v0.10.2 From 396a34340cdf7373c00e3977db27d1a20ea65ebc Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Fri, 25 Jul 2014 12:47:42 -0500 Subject: powerpc: Fix endianness of flash_block_list in rtas_flash The function rtas_flash_firmware passes the address of a data structure, flash_block_list, when making the update-flash-64-and-reboot rtas call. While the endianness of the address is handled correctly, the endianness of the data is not. This patch ensures that the data in flash_block_list is big endian when passed to rtas on little endian hosts. Signed-off-by: Thomas Falcon Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 658e89d..db2b482 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -611,17 +611,19 @@ static void rtas_flash_firmware(int reboot_type) for (f = flist; f; f = next) { /* Translate data addrs to absolute */ for (i = 0; i < f->num_blocks; i++) { - f->blocks[i].data = (char *)__pa(f->blocks[i].data); + f->blocks[i].data = (char *)cpu_to_be64(__pa(f->blocks[i].data)); image_size += f->blocks[i].length; + f->blocks[i].length = cpu_to_be64(f->blocks[i].length); } next = f->next; /* Don't translate NULL pointer for last entry */ if (f->next) - f->next = (struct flash_block_list *)__pa(f->next); + f->next = (struct flash_block_list *)cpu_to_be64(__pa(f->next)); else f->next = NULL; /* make num_blocks into the version/length field */ f->num_blocks = (FLASH_BLOCK_LIST_VERSION << 56) | ((f->num_blocks+1)*16); + f->num_blocks = cpu_to_be64(f->num_blocks); } printk(KERN_ALERT "FLASH: flash image is %ld bytes\n", image_size); -- cgit v0.10.2 From f960d2093f29f0bc4e1df1fcefb993455620c0b5 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 25 Jul 2014 19:40:20 -0400 Subject: crypto: arm64-aes - fix encryption of unaligned data cryptsetup fails on arm64 when using kernel encryption via AF_ALG socket. See https://bugzilla.redhat.com/show_bug.cgi?id=1122937 The bug is caused by incorrect handling of unaligned data in arch/arm64/crypto/aes-glue.c. Cryptsetup creates a buffer that is aligned on 8 bytes, but not on 16 bytes. It opens AF_ALG socket and uses the socket to encrypt data in the buffer. The arm64 crypto accelerator causes data corruption or crashes in the scatterwalk_pagedone. This patch fixes the bug by passing the residue bytes that were not processed as the last parameter to blkcipher_walk_done. Signed-off-by: Mikulas Patocka Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 60f2f4c..79cd911 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -106,7 +106,7 @@ static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_enc, rounds, blocks, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -128,7 +128,7 @@ static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) { aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_dec, rounds, blocks, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -151,7 +151,7 @@ static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_enc, rounds, blocks, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -174,7 +174,7 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_dec, rounds, blocks, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); return err; @@ -243,7 +243,7 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key1.key_enc, rounds, blocks, (u8 *)ctx->key2.key_enc, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); @@ -267,7 +267,7 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key1.key_dec, rounds, blocks, (u8 *)ctx->key2.key_enc, walk.iv, first); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } kernel_neon_end(); -- cgit v0.10.2 From f3c400ef473e00c680ea713a66196b05870b3710 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 25 Jul 2014 19:42:30 -0400 Subject: crypto: arm-aes - fix encryption of unaligned data Fix the same alignment bug as in arm64 - we need to pass residue unprocessed bytes as the last argument to blkcipher_walk_done. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org # 3.13+ Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu diff --git a/arch/arm/crypto/aesbs-glue.c b/arch/arm/crypto/aesbs-glue.c index 4522366..15468fb 100644 --- a/arch/arm/crypto/aesbs-glue.c +++ b/arch/arm/crypto/aesbs-glue.c @@ -137,7 +137,7 @@ static int aesbs_cbc_encrypt(struct blkcipher_desc *desc, dst += AES_BLOCK_SIZE; } while (--blocks); } - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } return err; } @@ -158,7 +158,7 @@ static int aesbs_cbc_decrypt(struct blkcipher_desc *desc, bsaes_cbc_encrypt(walk.src.virt.addr, walk.dst.virt.addr, walk.nbytes, &ctx->dec, walk.iv); kernel_neon_end(); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } while (walk.nbytes) { u32 blocks = walk.nbytes / AES_BLOCK_SIZE; @@ -182,7 +182,7 @@ static int aesbs_cbc_decrypt(struct blkcipher_desc *desc, dst += AES_BLOCK_SIZE; src += AES_BLOCK_SIZE; } while (--blocks); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } return err; } @@ -268,7 +268,7 @@ static int aesbs_xts_encrypt(struct blkcipher_desc *desc, bsaes_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr, walk.nbytes, &ctx->enc, walk.iv); kernel_neon_end(); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } return err; } @@ -292,7 +292,7 @@ static int aesbs_xts_decrypt(struct blkcipher_desc *desc, bsaes_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr, walk.nbytes, &ctx->dec, walk.iv); kernel_neon_end(); - err = blkcipher_walk_done(desc, &walk, 0); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } return err; } -- cgit v0.10.2 From 8266f5fcf015101fbeb73cbc152c9d208c2baec0 Mon Sep 17 00:00:00 2001 From: David L Stevens Date: Fri, 25 Jul 2014 10:30:11 -0400 Subject: sunvnet: only use connected ports when sending The sunvnet driver doesn't check whether or not a port is connected when transmitting packets, which results in failures if a port fails to connect (e.g., due to a version mismatch). The original code also assumes unnecessarily that the first port is up and a switch, even though there is a flag for switch ports. This patch only matches a port if it is connected, and otherwise uses the switch_port flag to send the packet to a switch port that is up. Signed-off-by: David L Stevens Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index fd411d6..d813bfb 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -610,6 +610,13 @@ static int __vnet_tx_trigger(struct vnet_port *port) return err; } +static inline bool port_is_up(struct vnet_port *vnet) +{ + struct vio_driver_state *vio = &vnet->vio; + + return !!(vio->hs_state & VIO_HS_COMPLETE); +} + struct vnet_port *__tx_port_find(struct vnet *vp, struct sk_buff *skb) { unsigned int hash = vnet_hashfn(skb->data); @@ -617,14 +624,19 @@ struct vnet_port *__tx_port_find(struct vnet *vp, struct sk_buff *skb) struct vnet_port *port; hlist_for_each_entry(port, hp, hash) { + if (!port_is_up(port)) + continue; if (ether_addr_equal(port->raddr, skb->data)) return port; } - port = NULL; - if (!list_empty(&vp->port_list)) - port = list_entry(vp->port_list.next, struct vnet_port, list); - - return port; + list_for_each_entry(port, &vp->port_list, list) { + if (!port->switch_port) + continue; + if (!port_is_up(port)) + continue; + return port; + } + return NULL; } struct vnet_port *tx_port_find(struct vnet *vp, struct sk_buff *skb) -- cgit v0.10.2 From 545469f7a5d7f7b2a17b74da0a1bd0c1aea2f545 Mon Sep 17 00:00:00 2001 From: Jun Zhao Date: Sat, 26 Jul 2014 00:38:59 +0800 Subject: neighbour : fix ndm_type type error issue ndm_type means L3 address type, in neighbour proxy and vxlan, it's RTN_UNICAST. NDA_DST is for netlink TLV type, hence it's not right value in this context. Signed-off-by: Jun Zhao Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index ade33ef..9f79192 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -339,7 +339,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, ndm->ndm_state = fdb->state; ndm->ndm_ifindex = vxlan->dev->ifindex; ndm->ndm_flags = fdb->flags; - ndm->ndm_type = NDA_DST; + ndm->ndm_type = RTN_UNICAST; if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr)) goto nla_put_failure; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 559890b..ef31fef 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2249,7 +2249,7 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, ndm->ndm_pad1 = 0; ndm->ndm_pad2 = 0; ndm->ndm_flags = pn->flags | NTF_PROXY; - ndm->ndm_type = NDA_DST; + ndm->ndm_type = RTN_UNICAST; ndm->ndm_ifindex = pn->dev->ifindex; ndm->ndm_state = NUD_NONE; -- cgit v0.10.2 From 04ca6973f7c1a0d8537f2d9906a0cf8e69886d75 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 26 Jul 2014 08:58:10 +0200 Subject: ip: make IP identifiers less predictable In "Counting Packets Sent Between Arbitrary Internet Hosts", Jeffrey and Jedidiah describe ways exploiting linux IP identifier generation to infer whether two machines are exchanging packets. With commit 73f156a6e8c1 ("inetpeer: get rid of ip_id_count"), we changed IP id generation, but this does not really prevent this side-channel technique. This patch adds a random amount of perturbation so that IP identifiers for a given destination [1] are no longer monotonically increasing after an idle period. Note that prandom_u32_max(1) returns 0, so if generator is used at most once per jiffy, this patch inserts no hole in the ID suite and do not increase collision probability. This is jiffies based, so in the worst case (HZ=1000), the id can rollover after ~65 seconds of idle time, which should be fine. We also change the hash used in __ip_select_ident() to not only hash on daddr, but also saddr and protocol, so that ICMP probes can not be used to infer information for other protocols. For IPv6, adds saddr into the hash as well, but not nexthdr. If I ping the patched target, we can see ID are now hard to predict. 21:57:11.008086 IP (...) A > target: ICMP echo request, seq 1, length 64 21:57:11.010752 IP (... id 2081 ...) target > A: ICMP echo reply, seq 1, length 64 21:57:12.013133 IP (...) A > target: ICMP echo request, seq 2, length 64 21:57:12.015737 IP (... id 3039 ...) target > A: ICMP echo reply, seq 2, length 64 21:57:13.016580 IP (...) A > target: ICMP echo request, seq 3, length 64 21:57:13.019251 IP (... id 3437 ...) target > A: ICMP echo reply, seq 3, length 64 [1] TCP sessions uses a per flow ID generator not changed by this patch. Signed-off-by: Eric Dumazet Reported-by: Jeffrey Knockel Reported-by: Jedidiah R. Crandall Cc: Willy Tarreau Cc: Hannes Frederic Sowa Signed-off-by: David S. Miller diff --git a/include/net/ip.h b/include/net/ip.h index 0e795df..7596eb2 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -309,16 +309,7 @@ static inline unsigned int ip_skb_dst_mtu(const struct sk_buff *skb) } } -#define IP_IDENTS_SZ 2048u -extern atomic_t *ip_idents; - -static inline u32 ip_idents_reserve(u32 hash, int segs) -{ - atomic_t *id_ptr = ip_idents + hash % IP_IDENTS_SZ; - - return atomic_add_return(segs, id_ptr) - segs; -} - +u32 ip_idents_reserve(u32 hash, int segs); void __ip_select_ident(struct iphdr *iph, int segs); static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, int segs) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3162ea9..1901998 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -457,8 +457,31 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, return neigh_create(&arp_tbl, pkey, dev); } -atomic_t *ip_idents __read_mostly; -EXPORT_SYMBOL(ip_idents); +#define IP_IDENTS_SZ 2048u +struct ip_ident_bucket { + atomic_t id; + u32 stamp32; +}; + +static struct ip_ident_bucket *ip_idents __read_mostly; + +/* In order to protect privacy, we add a perturbation to identifiers + * if one generator is seldom used. This makes hard for an attacker + * to infer how many packets were sent between two points in time. + */ +u32 ip_idents_reserve(u32 hash, int segs) +{ + struct ip_ident_bucket *bucket = ip_idents + hash % IP_IDENTS_SZ; + u32 old = ACCESS_ONCE(bucket->stamp32); + u32 now = (u32)jiffies; + u32 delta = 0; + + if (old != now && cmpxchg(&bucket->stamp32, old, now) == old) + delta = prandom_u32_max(now - old); + + return atomic_add_return(segs + delta, &bucket->id) - segs; +} +EXPORT_SYMBOL(ip_idents_reserve); void __ip_select_ident(struct iphdr *iph, int segs) { @@ -467,7 +490,10 @@ void __ip_select_ident(struct iphdr *iph, int segs) net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd)); - hash = jhash_1word((__force u32)iph->daddr, ip_idents_hashrnd); + hash = jhash_3words((__force u32)iph->daddr, + (__force u32)iph->saddr, + iph->protocol, + ip_idents_hashrnd); id = ip_idents_reserve(hash, segs); iph->id = htons(id); } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index cb9df0e..45702b8 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -545,6 +545,8 @@ static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd); + hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash); + id = ip_idents_reserve(hash, 1); fhdr->identification = htonl(id); } -- cgit v0.10.2 From d937678ab625b0f63ce54b2b9b3e1cbd1b4a1b15 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 25 Jul 2014 04:41:25 -0700 Subject: ARM: dts: Revert enabling of twl configuration for n900 Commit 9188883fd66e9 (ARM: dts: Enable twl4030 off-idle configuration for selected omaps) allowed n900 to cut off core voltages during off-idle. This however caused a regression where twl regulator vaux1 was not getting enabled for the LCD panel as we are not requesting it for the panel. Turns out quite a few devices on n900 are using vaux1, and we need to either stop idling it, or add proper regulator_get calls for all users. But until we have a proper solution implemented and tested, let's just disable the twl off-idle configuration for now for n900. Reported-by: Aaro Koskinen Fixes: 9188883fd66e9 (ARM: dts: Enable twl4030 off-idle configuration for selected omaps) Signed-off-by: Tony Lindgren diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index 1fe45d1..b15f1a7 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -353,7 +353,7 @@ }; twl_power: power { - compatible = "ti,twl4030-power-n900", "ti,twl4030-power-idle-osc-off"; + compatible = "ti,twl4030-power-n900"; ti,use_poweroff; }; }; -- cgit v0.10.2 From 1aab4d772ece470135bee47529d3f7c6d68609fa Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 27 Jul 2014 14:15:33 -0700 Subject: mm: fix page_alloc.c kernel-doc warnings Fix kernel-doc warnings and function name in mm/page_alloc.c: Warning(..//mm/page_alloc.c:6074): No description found for parameter 'pfn' Warning(..//mm/page_alloc.c:6074): No description found for parameter 'mask' Warning(..//mm/page_alloc.c:6074): Excess function parameter 'start_bitidx' description in 'get_pfnblock_flags_mask' Warning(..//mm/page_alloc.c:6102): No description found for parameter 'pfn' Warning(..//mm/page_alloc.c:6102): No description found for parameter 'mask' Warning(..//mm/page_alloc.c:6102): Excess function parameter 'start_bitidx' description in 'set_pfnblock_flags_mask' Signed-off-by: Randy Dunlap Acked-by: Mel Gorman Signed-off-by: Linus Torvalds diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0ea758b..8bcfe3a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6062,11 +6062,13 @@ static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn) } /** - * get_pageblock_flags_group - Return the requested group of flags for the pageblock_nr_pages block of pages + * get_pfnblock_flags_mask - Return the requested group of flags for the pageblock_nr_pages block of pages * @page: The page within the block of interest - * @start_bitidx: The first bit of interest to retrieve - * @end_bitidx: The last bit of interest - * returns pageblock_bits flags + * @pfn: The target page frame number + * @end_bitidx: The last bit of interest to retrieve + * @mask: mask of bits that the caller is interested in + * + * Return: pageblock_bits flags */ unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn, unsigned long end_bitidx, @@ -6091,9 +6093,10 @@ unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn, /** * set_pfnblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages * @page: The page within the block of interest - * @start_bitidx: The first bit of interest - * @end_bitidx: The last bit of interest * @flags: The flags to set + * @pfn: The target page frame number + * @end_bitidx: The last bit of interest + * @mask: mask of bits that the caller is interested in */ void set_pfnblock_flags_mask(struct page *page, unsigned long flags, unsigned long pfn, -- cgit v0.10.2 From 0ef135152353d323daa1fef2db94a23e9ae31fb6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Jul 2014 17:53:23 +0100 Subject: AFS: Correctly assemble the client UUID Correctly assemble the client UUID by OR'ing in the flags rather than assigning them over the other components. Reported-by: Himangi Saraogi Signed-off-by: David Howells Signed-off-by: Linus Torvalds diff --git a/fs/afs/main.c b/fs/afs/main.c index 42dd2e4..35de0c0 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -55,13 +55,13 @@ static int __init afs_get_client_UUID(void) afs_uuid.time_low = uuidtime; afs_uuid.time_mid = uuidtime >> 32; afs_uuid.time_hi_and_version = (uuidtime >> 48) & AFS_UUID_TIMEHI_MASK; - afs_uuid.time_hi_and_version = AFS_UUID_VERSION_TIME; + afs_uuid.time_hi_and_version |= AFS_UUID_VERSION_TIME; get_random_bytes(&clockseq, 2); afs_uuid.clock_seq_low = clockseq; afs_uuid.clock_seq_hi_and_reserved = (clockseq >> 8) & AFS_UUID_CLOCKHI_MASK; - afs_uuid.clock_seq_hi_and_reserved = AFS_UUID_VARIANT_STD; + afs_uuid.clock_seq_hi_and_reserved |= AFS_UUID_VARIANT_STD; _debug("AFS UUID: %08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", afs_uuid.time_low, -- cgit v0.10.2 From 86b7987dd7a8acbaa54a446a73e2431da88b3ca1 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 26 Jul 2014 02:34:31 +0400 Subject: isdn/bas_gigaset: fix a leak on failure path in gigaset_probe() There is a lack of usb_put_dev(udev) on failure path in gigaset_probe(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Acked-by: Tilman Schmidt Signed-off-by: David S. Miller diff --git a/drivers/isdn/gigaset/bas-gigaset.c b/drivers/isdn/gigaset/bas-gigaset.c index c44950d..b7ae0a0 100644 --- a/drivers/isdn/gigaset/bas-gigaset.c +++ b/drivers/isdn/gigaset/bas-gigaset.c @@ -2400,6 +2400,7 @@ allocerr: error: freeurbs(cs); usb_set_intfdata(interface, NULL); + usb_put_dev(udev); gigaset_freecs(cs); return rc; } -- cgit v0.10.2 From 40eea803c6b2cfaab092f053248cbeab3f368412 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Sat, 26 Jul 2014 21:26:58 +0400 Subject: net: sendmsg: fix NULL pointer dereference Sasha's report: > While fuzzing with trinity inside a KVM tools guest running the latest -next > kernel with the KASAN patchset, I've stumbled on the following spew: > > [ 4448.949424] ================================================================== > [ 4448.951737] AddressSanitizer: user-memory-access on address 0 > [ 4448.952988] Read of size 2 by thread T19638: > [ 4448.954510] CPU: 28 PID: 19638 Comm: trinity-c76 Not tainted 3.16.0-rc4-next-20140711-sasha-00046-g07d3099-dirty #813 > [ 4448.956823] ffff88046d86ca40 0000000000000000 ffff880082f37e78 ffff880082f37a40 > [ 4448.958233] ffffffffb6e47068 ffff880082f37a68 ffff880082f37a58 ffffffffb242708d > [ 4448.959552] 0000000000000000 ffff880082f37a88 ffffffffb24255b1 0000000000000000 > [ 4448.961266] Call Trace: > [ 4448.963158] dump_stack (lib/dump_stack.c:52) > [ 4448.964244] kasan_report_user_access (mm/kasan/report.c:184) > [ 4448.965507] __asan_load2 (mm/kasan/kasan.c:352) > [ 4448.966482] ? netlink_sendmsg (net/netlink/af_netlink.c:2339) > [ 4448.967541] netlink_sendmsg (net/netlink/af_netlink.c:2339) > [ 4448.968537] ? get_parent_ip (kernel/sched/core.c:2555) > [ 4448.970103] sock_sendmsg (net/socket.c:654) > [ 4448.971584] ? might_fault (mm/memory.c:3741) > [ 4448.972526] ? might_fault (./arch/x86/include/asm/current.h:14 mm/memory.c:3740) > [ 4448.973596] ? verify_iovec (net/core/iovec.c:64) > [ 4448.974522] ___sys_sendmsg (net/socket.c:2096) > [ 4448.975797] ? put_lock_stats.isra.13 (./arch/x86/include/asm/preempt.h:98 kernel/locking/lockdep.c:254) > [ 4448.977030] ? lock_release_holdtime (kernel/locking/lockdep.c:273) > [ 4448.978197] ? lock_release_non_nested (kernel/locking/lockdep.c:3434 (discriminator 1)) > [ 4448.979346] ? check_chain_key (kernel/locking/lockdep.c:2188) > [ 4448.980535] __sys_sendmmsg (net/socket.c:2181) > [ 4448.981592] ? trace_hardirqs_on_caller (kernel/locking/lockdep.c:2600) > [ 4448.982773] ? trace_hardirqs_on (kernel/locking/lockdep.c:2607) > [ 4448.984458] ? syscall_trace_enter (arch/x86/kernel/ptrace.c:1500 (discriminator 2)) > [ 4448.985621] ? trace_hardirqs_on_caller (kernel/locking/lockdep.c:2600) > [ 4448.986754] SyS_sendmmsg (net/socket.c:2201) > [ 4448.987708] tracesys (arch/x86/kernel/entry_64.S:542) > [ 4448.988929] ================================================================== This reports means that we've come to netlink_sendmsg() with msg->msg_name == NULL and msg->msg_namelen > 0. After this report there was no usual "Unable to handle kernel NULL pointer dereference" and this gave me a clue that address 0 is mapped and contains valid socket address structure in it. This bug was introduced in f3d3342602f8bcbf37d7c46641cb9bca7618eb1c (net: rework recvmsg handler msg_name and msg_namelen logic). Commit message states that: "Set msg->msg_name = NULL if user specified a NULL in msg_name but had a non-null msg_namelen in verify_iovec/verify_compat_iovec. This doesn't affect sendto as it would bail out earlier while trying to copy-in the address." But in fact this affects sendto when address 0 is mapped and contains socket address structure in it. In such case copy-in address will succeed, verify_iovec() function will successfully exit with msg->msg_namelen > 0 and msg->msg_name == NULL. This patch fixes it by setting msg_namelen to 0 if msg_name == NULL. Cc: Hannes Frederic Sowa Cc: Eric Dumazet Cc: Reported-by: Sasha Levin Signed-off-by: Andrey Ryabinin Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller diff --git a/net/compat.c b/net/compat.c index 9a76eaf..bc8aeef 100644 --- a/net/compat.c +++ b/net/compat.c @@ -85,7 +85,7 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov, { int tot_len; - if (kern_msg->msg_namelen) { + if (kern_msg->msg_name && kern_msg->msg_namelen) { if (mode == VERIFY_READ) { int err = move_addr_to_kernel(kern_msg->msg_name, kern_msg->msg_namelen, @@ -93,10 +93,11 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov, if (err < 0) return err; } - if (kern_msg->msg_name) - kern_msg->msg_name = kern_address; - } else + kern_msg->msg_name = kern_address; + } else { kern_msg->msg_name = NULL; + kern_msg->msg_namelen = 0; + } tot_len = iov_from_user_compat_to_kern(kern_iov, (struct compat_iovec __user *)kern_msg->msg_iov, diff --git a/net/core/iovec.c b/net/core/iovec.c index 827dd6b..e1ec45a 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -39,7 +39,7 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a { int size, ct, err; - if (m->msg_namelen) { + if (m->msg_name && m->msg_namelen) { if (mode == VERIFY_READ) { void __user *namep; namep = (void __user __force *) m->msg_name; @@ -48,10 +48,10 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a if (err < 0) return err; } - if (m->msg_name) - m->msg_name = address; + m->msg_name = address; } else { m->msg_name = NULL; + m->msg_namelen = 0; } size = m->msg_iovlen * sizeof(struct iovec); -- cgit v0.10.2 From 20fbe3ae990fd54fc7d1f889d61958bc8b38f254 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 28 Jul 2014 10:12:34 +0200 Subject: cdc_subset: deal with a device that needs reset for timeout This device needs to be reset to recover from a timeout. Unfortunately this can be handled only at the level of the subdrivers. Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller diff --git a/drivers/net/usb/cdc_subset.c b/drivers/net/usb/cdc_subset.c index 91f0919..3ef411e 100644 --- a/drivers/net/usb/cdc_subset.c +++ b/drivers/net/usb/cdc_subset.c @@ -85,9 +85,34 @@ static int always_connected (struct usbnet *dev) * *-------------------------------------------------------------------------*/ +static void m5632_recover(struct usbnet *dev) +{ + struct usb_device *udev = dev->udev; + struct usb_interface *intf = dev->intf; + int r; + + r = usb_lock_device_for_reset(udev, intf); + if (r < 0) + return; + + usb_reset_device(udev); + usb_unlock_device(udev); +} + +static int dummy_prereset(struct usb_interface *intf) +{ + return 0; +} + +static int dummy_postreset(struct usb_interface *intf) +{ + return 0; +} + static const struct driver_info ali_m5632_info = { .description = "ALi M5632", .flags = FLAG_POINTTOPOINT, + .recover = m5632_recover, }; #endif @@ -332,6 +357,8 @@ static struct usb_driver cdc_subset_driver = { .probe = usbnet_probe, .suspend = usbnet_suspend, .resume = usbnet_resume, + .pre_reset = dummy_prereset, + .post_reset = dummy_postreset, .disconnect = usbnet_disconnect, .id_table = products, .disable_hub_initiated_lpm = 1, diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index f9e96c4..5173821 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1218,8 +1218,12 @@ void usbnet_tx_timeout (struct net_device *net) unlink_urbs (dev, &dev->txq); tasklet_schedule (&dev->bh); - - // FIXME: device recovery -- reset? + /* this needs to be handled individually because the generic layer + * doesn't know what is sufficient and could not restore private + * information if a remedy of an unconditional reset were used. + */ + if (dev->driver_info->recover) + (dev->driver_info->recover)(dev); } EXPORT_SYMBOL_GPL(usbnet_tx_timeout); diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 0662e98..26088fe 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -148,6 +148,9 @@ struct driver_info { struct sk_buff *(*tx_fixup)(struct usbnet *dev, struct sk_buff *skb, gfp_t flags); + /* recover from timeout */ + void (*recover)(struct usbnet *dev); + /* early initialization code, can sleep. This is for minidrivers * having 'subminidrivers' that need to do extra initialization * right after minidriver have initialized hardware. */ -- cgit v0.10.2 From c472ab68ad67db23c9907a27649b7dc0899b61f9 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 28 Jul 2014 10:56:36 +0200 Subject: cdc-ether: clean packet filter upon probe There are devices that don't do reset all the way. So the packet filter should be set to a sane initial value. Failure to do so leads to intermittent failures of DHCP on some systems under some conditions. Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 9ea4bfe..2a32d91 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -341,6 +341,22 @@ next_desc: usb_driver_release_interface(driver, info->data); return -ENODEV; } + + /* Some devices don't initialise properly. In particular + * the packet filter is not reset. There are devices that + * don't do reset all the way. So the packet filter should + * be set to a sane initial value. + */ + usb_control_msg(dev->udev, + usb_sndctrlpipe(dev->udev, 0), + USB_CDC_SET_ETHERNET_PACKET_FILTER, + USB_TYPE_CLASS | USB_RECIP_INTERFACE, + USB_CDC_PACKET_TYPE_ALL_MULTICAST | USB_CDC_PACKET_TYPE_DIRECTED | USB_CDC_PACKET_TYPE_BROADCAST, + intf->cur_altsetting->desc.bInterfaceNumber, + NULL, + 0, + USB_CTRL_SET_TIMEOUT + ); return 0; bad_desc: -- cgit v0.10.2 From d92f5dec6325079c550889883af51db1b77d5623 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 28 Jul 2014 16:28:07 -0700 Subject: net: phy: re-apply PHY fixups during phy_register_device Commit 87aa9f9c61ad ("net: phy: consolidate PHY reset in phy_init_hw()") moved the call to phy_scan_fixups() in phy_init_hw() after a software reset is performed. By the time phy_init_hw() is called in phy_device_register(), no driver has been bound to this PHY yet, so all the checks in phy_init_hw() against the PHY driver and the PHY driver's config_init function will return 0. We will therefore never call phy_scan_fixups() as we should. Fix this by calling phy_scan_fixups() and check for its return value to restore the intended functionality. This broke PHY drivers which do register an early PHY fixup callback to intercept the PHY probing and do things like changing the 32-bits unique PHY identifier when a pseudo-PHY address has been used, as well as board-specific PHY fixups that need to be applied during driver probe time. Reported-by: Hauke Merthens Reported-by: Jonas Gorski Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 74a8232..22c57be 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -355,7 +355,7 @@ int phy_device_register(struct phy_device *phydev) phydev->bus->phy_map[phydev->addr] = phydev; /* Run all of the fixups for this PHY */ - err = phy_init_hw(phydev); + err = phy_scan_fixups(phydev); if (err) { pr_err("PHY %d failed to initialize\n", phydev->addr); goto out; -- cgit v0.10.2 From 4972a74b888c6b52ca41fae6076786dbbeb746d5 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 15 Jul 2014 10:03:34 -0700 Subject: of: Split early_init_dt_scan into two parts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, early_init_dt_scan validates the header, sets the boot params, and scans for chosen/memory all in one function. Split this up into two separate functions (validation/setting boot params in one, scanning in another) to allow for additional setup between boot params and scanning the memory. Signed-off-by: Laura Abbott Tested-by: Andreas Färber [glikely: s/early_init_dt_scan_all/early_init_dt_scan_nodes/] Signed-off-by: Grant Likely diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index b777d8f..ecc7a02 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -937,7 +937,7 @@ int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base, } #endif -bool __init early_init_dt_scan(void *params) +bool __init early_init_dt_verify(void *params) { if (!params) return false; @@ -951,6 +951,12 @@ bool __init early_init_dt_scan(void *params) return false; } + return true; +} + + +void __init early_init_dt_scan_nodes(void) +{ /* Retrieve various information from the /chosen node */ of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line); @@ -959,7 +965,17 @@ bool __init early_init_dt_scan(void *params) /* Setup memory, calling early_init_dt_add_memory_arch */ of_scan_flat_dt(early_init_dt_scan_memory, NULL); +} + +bool __init early_init_dt_scan(void *params) +{ + bool status; + + status = early_init_dt_verify(params); + if (!status) + return false; + early_init_dt_scan_nodes(); return true; } diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 0511789..ebb2449 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -73,6 +73,8 @@ extern int early_init_dt_scan_root(unsigned long node, const char *uname, int depth, void *data); extern bool early_init_dt_scan(void *params); +extern bool early_init_dt_verify(void *params); +extern void early_init_dt_scan_nodes(void); extern const char *of_flat_dt_get_machine_name(void); extern const void *of_flat_dt_match_machine(const void *default_match, -- cgit v0.10.2 From 704033cee2e5b3c1c6eaf5bb398e465a9c3667b5 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 15 Jul 2014 10:03:35 -0700 Subject: of: Add memory limiting function for flattened devicetrees MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Buggy bootloaders may pass bogus memory entries in the devicetree. Add of_fdt_limit_memory to add an upper bound on the number of entries that can be present in the devicetree. Signed-off-by: Laura Abbott Tested-by: Andreas Färber Signed-off-by: Grant Likely diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index ecc7a02..9aa012e 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -26,6 +26,54 @@ #include /* for COMMAND_LINE_SIZE */ #include +/* + * of_fdt_limit_memory - limit the number of regions in the /memory node + * @limit: maximum entries + * + * Adjust the flattened device tree to have at most 'limit' number of + * memory entries in the /memory node. This function may be called + * any time after initial_boot_param is set. + */ +void of_fdt_limit_memory(int limit) +{ + int memory; + int len; + const void *val; + int nr_address_cells = OF_ROOT_NODE_ADDR_CELLS_DEFAULT; + int nr_size_cells = OF_ROOT_NODE_SIZE_CELLS_DEFAULT; + const uint32_t *addr_prop; + const uint32_t *size_prop; + int root_offset; + int cell_size; + + root_offset = fdt_path_offset(initial_boot_params, "/"); + if (root_offset < 0) + return; + + addr_prop = fdt_getprop(initial_boot_params, root_offset, + "#address-cells", NULL); + if (addr_prop) + nr_address_cells = fdt32_to_cpu(*addr_prop); + + size_prop = fdt_getprop(initial_boot_params, root_offset, + "#size-cells", NULL); + if (size_prop) + nr_size_cells = fdt32_to_cpu(*size_prop); + + cell_size = sizeof(uint32_t)*(nr_address_cells + nr_size_cells); + + memory = fdt_path_offset(initial_boot_params, "/memory"); + if (memory > 0) { + val = fdt_getprop(initial_boot_params, memory, "reg", &len); + if (len > limit*cell_size) { + len = limit*cell_size; + pr_debug("Limiting number of entries to %d\n", limit); + fdt_setprop(initial_boot_params, memory, "reg", val, + len); + } + } +} + /** * of_fdt_is_compatible - Return true if given node from the given blob has * compat in its compatible list diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index ebb2449..0ff360d 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -86,6 +86,7 @@ extern void unflatten_and_copy_device_tree(void); extern void early_init_devtree(void *); extern void early_get_first_memblock_info(void *, phys_addr_t *); extern u64 fdt_translate_address(const void *blob, int node_offset); +extern void of_fdt_limit_memory(int limit); #else /* CONFIG_OF_FLATTREE */ static inline void early_init_fdt_scan_reserved_mem(void) {} static inline const char *of_flat_dt_get_machine_name(void) { return NULL; } -- cgit v0.10.2 From 5a12a597a8627b91fd9d94365853f9f69a4f399c Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 15 Jul 2014 10:03:36 -0700 Subject: arm: Add devicetree fixup machine function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 1c2f87c22566cd057bc8cde10c37ae9da1a1bb76 (ARM: 8025/1: Get rid of meminfo) dropped the upper bound on the number of memory banks that can be added as there was no technical need in the kernel. It turns out though, some bootloaders (specifically the arndale-octa exynos boards) may pass invalid memory information and rely on the kernel to not parse this data. This is a bug in the bootloader but we still need to work around this. Work around this by introducing a dt_fixup function. This function gets called before the flattened devicetree is scanned for memory and the like. In this fixup function for exynos, limit the maximum number of memory regions in the devicetree. Signed-off-by: Laura Abbott Tested-by: Andreas Färber [glikely: Added a comment and fixed up function name] Signed-off-by: Grant Likely diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h index 060a75e..0406cb3 100644 --- a/arch/arm/include/asm/mach/arch.h +++ b/arch/arm/include/asm/mach/arch.h @@ -50,6 +50,7 @@ struct machine_desc { struct smp_operations *smp; /* SMP operations */ bool (*smp_init)(void); void (*fixup)(struct tag *, char **); + void (*dt_fixup)(void); void (*init_meminfo)(void); void (*reserve)(void);/* reserve mem blocks */ void (*map_io)(void);/* IO mapping function */ diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index e94a157..11c54de 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -212,7 +212,7 @@ const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys) mdesc_best = &__mach_desc_GENERIC_DT; #endif - if (!dt_phys || !early_init_dt_scan(phys_to_virt(dt_phys))) + if (!dt_phys || !early_init_dt_verify(phys_to_virt(dt_phys))) return NULL; mdesc = of_flat_dt_match_machine(mdesc_best, arch_get_next_mach); @@ -237,6 +237,12 @@ const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys) dump_machine_table(); /* does not return */ } + /* We really don't want to do this, but sometimes firmware provides buggy data */ + if (mdesc->dt_fixup) + mdesc->dt_fixup(); + + early_init_dt_scan_nodes(); + /* Change machine number to match the mdesc we're using */ __machine_arch_type = mdesc->nr; diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c index 46d893f..66c9b96 100644 --- a/arch/arm/mach-exynos/exynos.c +++ b/arch/arm/mach-exynos/exynos.c @@ -335,6 +335,15 @@ static void __init exynos_reserve(void) #endif } +static void __init exynos_dt_fixup(void) +{ + /* + * Some versions of uboot pass garbage entries in the memory node, + * use the old CONFIG_ARM_NR_BANKS + */ + of_fdt_limit_memory(8); +} + DT_MACHINE_START(EXYNOS_DT, "SAMSUNG EXYNOS (Flattened Device Tree)") /* Maintainer: Thomas Abraham */ /* Maintainer: Kukjin Kim */ @@ -348,4 +357,5 @@ DT_MACHINE_START(EXYNOS_DT, "SAMSUNG EXYNOS (Flattened Device Tree)") .dt_compat = exynos_dt_compat, .restart = exynos_restart, .reserve = exynos_reserve, + .dt_fixup = exynos_dt_fixup, MACHINE_END -- cgit v0.10.2 From 63afbe7a0ac184ef8485dac4914e87b211b5bfaa Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 25 Jul 2014 16:29:12 +0100 Subject: kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform If the physical address of GICV isn't page-aligned, then we end up creating a stage-2 mapping of the page containing it, which causes us to map neighbouring memory locations directly into the guest. As an example, consider a platform with GICV at physical 0x2c02f000 running a 64k-page host kernel. If qemu maps this into the guest at 0x80010000, then guest physical addresses 0x80010000 - 0x8001efff will map host physical region 0x2c020000 - 0x2c02efff. Accesses to these physical regions may cause UNPREDICTABLE behaviour, for example, on the Juno platform this will cause an SError exception to EL3, which brings down the entire physical CPU resulting in RCU stalls / HYP panics / host crashing / wasted weeks of debugging. SBSA recommends that systems alias the 4k GICV across the bounding 64k region, in which case GICV physical could be described as 0x2c020000 in the above scenario. This patch fixes the problem by failing the vgic probe if the physical base address or the size of GICV aren't page-aligned. Note that this generated a warning in dmesg about freeing enabled IRQs, so I had to move the IRQ enabling later in the probe. Cc: Christoffer Dall Cc: Marc Zyngier Cc: Gleb Natapov Cc: Paolo Bonzini Cc: Joel Schopp Cc: Don Dutile Acked-by: Peter Maydell Acked-by: Joel Schopp Acked-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Christoffer Dall diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 56ff9be..476d3bf 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1526,17 +1526,33 @@ int kvm_vgic_hyp_init(void) goto out_unmap; } - kvm_info("%s@%llx IRQ%d\n", vgic_node->name, - vctrl_res.start, vgic_maint_irq); - on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); - if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { kvm_err("Cannot obtain VCPU resource\n"); ret = -ENXIO; goto out_unmap; } + + if (!PAGE_ALIGNED(vcpu_res.start)) { + kvm_err("GICV physical address 0x%llx not page aligned\n", + (unsigned long long)vcpu_res.start); + ret = -ENXIO; + goto out_unmap; + } + + if (!PAGE_ALIGNED(resource_size(&vcpu_res))) { + kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n", + (unsigned long long)resource_size(&vcpu_res), + PAGE_SIZE); + ret = -ENXIO; + goto out_unmap; + } + vgic_vcpu_base = vcpu_res.start; + kvm_info("%s@%llx IRQ%d\n", vgic_node->name, + vctrl_res.start, vgic_maint_irq); + on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); + goto out; out_unmap: -- cgit v0.10.2 From b7dd0e350e0bd4c0fddcc9b8958342700b00b168 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Fri, 11 Jul 2014 16:42:34 +0100 Subject: x86/xen: safely map and unmap grant frames when in atomic context arch_gnttab_map_frames() and arch_gnttab_unmap_frames() are called in atomic context but were calling alloc_vm_area() which might sleep. Also, if a driver attempts to allocate a grant ref from an interrupt and the table needs expanding, then the CPU may already by in lazy MMU mode and apply_to_page_range() will BUG when it tries to re-enable lazy MMU mode. These two functions are only used in PV guests. Introduce arch_gnttab_init() to allocates the virtual address space in advance. Avoid the use of apply_to_page_range() by using saving and using the array of PTE addresses from the alloc_vm_area() call (which ensures that the required page tables are pre-allocated). Signed-off-by: David Vrabel Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/arm/xen/grant-table.c b/arch/arm/xen/grant-table.c index 859a9bb..91cf08b 100644 --- a/arch/arm/xen/grant-table.c +++ b/arch/arm/xen/grant-table.c @@ -51,3 +51,8 @@ int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, { return -ENOSYS; } + +int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status) +{ + return 0; +} diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index c985835..ebfa9b2 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c @@ -36,99 +36,133 @@ #include #include +#include #include #include #include #include +#include #include -static int map_pte_fn(pte_t *pte, struct page *pmd_page, - unsigned long addr, void *data) +static struct gnttab_vm_area { + struct vm_struct *area; + pte_t **ptes; +} gnttab_shared_vm_area, gnttab_status_vm_area; + +int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, + unsigned long max_nr_gframes, + void **__shared) { - unsigned long **frames = (unsigned long **)data; + void *shared = *__shared; + unsigned long addr; + unsigned long i; - set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); - (*frames)++; - return 0; -} + if (shared == NULL) + *__shared = shared = gnttab_shared_vm_area.area->addr; -/* - * This function is used to map shared frames to store grant status. It is - * different from map_pte_fn above, the frames type here is uint64_t. - */ -static int map_pte_fn_status(pte_t *pte, struct page *pmd_page, - unsigned long addr, void *data) -{ - uint64_t **frames = (uint64_t **)data; + addr = (unsigned long)shared; + + for (i = 0; i < nr_gframes; i++) { + set_pte_at(&init_mm, addr, gnttab_shared_vm_area.ptes[i], + mfn_pte(frames[i], PAGE_KERNEL)); + addr += PAGE_SIZE; + } - set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL)); - (*frames)++; return 0; } -static int unmap_pte_fn(pte_t *pte, struct page *pmd_page, - unsigned long addr, void *data) +int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, + unsigned long max_nr_gframes, + grant_status_t **__shared) { + grant_status_t *shared = *__shared; + unsigned long addr; + unsigned long i; + + if (shared == NULL) + *__shared = shared = gnttab_status_vm_area.area->addr; + + addr = (unsigned long)shared; + + for (i = 0; i < nr_gframes; i++) { + set_pte_at(&init_mm, addr, gnttab_status_vm_area.ptes[i], + mfn_pte(frames[i], PAGE_KERNEL)); + addr += PAGE_SIZE; + } - set_pte_at(&init_mm, addr, pte, __pte(0)); return 0; } -int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, - unsigned long max_nr_gframes, - void **__shared) +void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) { - int rc; - void *shared = *__shared; + pte_t **ptes; + unsigned long addr; + unsigned long i; - if (shared == NULL) { - struct vm_struct *area = - alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL); - BUG_ON(area == NULL); - shared = area->addr; - *__shared = shared; - } + if (shared == gnttab_status_vm_area.area->addr) + ptes = gnttab_status_vm_area.ptes; + else + ptes = gnttab_shared_vm_area.ptes; - rc = apply_to_page_range(&init_mm, (unsigned long)shared, - PAGE_SIZE * nr_gframes, - map_pte_fn, &frames); - return rc; + addr = (unsigned long)shared; + + for (i = 0; i < nr_gframes; i++) { + set_pte_at(&init_mm, addr, ptes[i], __pte(0)); + addr += PAGE_SIZE; + } } -int arch_gnttab_map_status(uint64_t *frames, unsigned long nr_gframes, - unsigned long max_nr_gframes, - grant_status_t **__shared) +static int arch_gnttab_valloc(struct gnttab_vm_area *area, unsigned nr_frames) { - int rc; - grant_status_t *shared = *__shared; + area->ptes = kmalloc(sizeof(pte_t *) * nr_frames, GFP_KERNEL); + if (area->ptes == NULL) + return -ENOMEM; - if (shared == NULL) { - /* No need to pass in PTE as we are going to do it - * in apply_to_page_range anyhow. */ - struct vm_struct *area = - alloc_vm_area(PAGE_SIZE * max_nr_gframes, NULL); - BUG_ON(area == NULL); - shared = area->addr; - *__shared = shared; + area->area = alloc_vm_area(PAGE_SIZE * nr_frames, area->ptes); + if (area->area == NULL) { + kfree(area->ptes); + return -ENOMEM; } - rc = apply_to_page_range(&init_mm, (unsigned long)shared, - PAGE_SIZE * nr_gframes, - map_pte_fn_status, &frames); - return rc; + return 0; } -void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) +static void arch_gnttab_vfree(struct gnttab_vm_area *area) +{ + free_vm_area(area->area); + kfree(area->ptes); +} + +int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status) { - apply_to_page_range(&init_mm, (unsigned long)shared, - PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); + int ret; + + if (!xen_pv_domain()) + return 0; + + ret = arch_gnttab_valloc(&gnttab_shared_vm_area, nr_shared); + if (ret < 0) + return ret; + + /* + * Always allocate the space for the status frames in case + * we're migrated to a host with V2 support. + */ + ret = arch_gnttab_valloc(&gnttab_status_vm_area, nr_status); + if (ret < 0) + goto err; + + return 0; + err: + arch_gnttab_vfree(&gnttab_shared_vm_area); + return -ENOMEM; } + #ifdef CONFIG_XEN_PVH #include #include -#include #include static int __init xlated_setup_gnttab_pages(void) { diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 5d4de88..eeba754 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -1195,18 +1195,20 @@ static int gnttab_expand(unsigned int req_entries) int gnttab_init(void) { int i; + unsigned long max_nr_grant_frames; unsigned int max_nr_glist_frames, nr_glist_frames; unsigned int nr_init_grefs; int ret; gnttab_request_version(); + max_nr_grant_frames = gnttab_max_grant_frames(); nr_grant_frames = 1; /* Determine the maximum number of frames required for the * grant reference free list on the current hypervisor. */ BUG_ON(grefs_per_grant_frame == 0); - max_nr_glist_frames = (gnttab_max_grant_frames() * + max_nr_glist_frames = (max_nr_grant_frames * grefs_per_grant_frame / RPP); gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *), @@ -1223,6 +1225,11 @@ int gnttab_init(void) } } + ret = arch_gnttab_init(max_nr_grant_frames, + nr_status_frames(max_nr_grant_frames)); + if (ret < 0) + goto ini_nomem; + if (gnttab_setup() < 0) { ret = -ENODEV; goto ini_nomem; diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index a5af2a2..5c1aba1 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -170,6 +170,7 @@ gnttab_set_unmap_op(struct gnttab_unmap_grant_ref *unmap, phys_addr_t addr, unmap->dev_bus_addr = 0; } +int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status); int arch_gnttab_map_shared(xen_pfn_t *frames, unsigned long nr_gframes, unsigned long max_nr_gframes, void **__shared); -- cgit v0.10.2 From 1d8fcba1de632d7a43349788ad534c5a32c5a44c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 30 Jul 2014 08:56:23 -0700 Subject: Revert "cdc_subset: deal with a device that needs reset for timeout" This reverts commit 20fbe3ae990fd54fc7d1f889d61958bc8b38f254. As reported by Stephen Rothwell, it causes compile failures in certain configurations: drivers/net/usb/cdc_subset.c:360:15: error: 'dummy_prereset' undeclared here (not in a function) .pre_reset = dummy_prereset, ^ drivers/net/usb/cdc_subset.c:361:16: error: 'dummy_postreset' undeclared here (not in a function) .post_reset = dummy_postreset, ^ Reported-by: Stephen Rothwell Acked-by: David Miller Cc: Oliver Neukum Signed-off-by: Linus Torvalds diff --git a/drivers/net/usb/cdc_subset.c b/drivers/net/usb/cdc_subset.c index 3ef411e..91f0919 100644 --- a/drivers/net/usb/cdc_subset.c +++ b/drivers/net/usb/cdc_subset.c @@ -85,34 +85,9 @@ static int always_connected (struct usbnet *dev) * *-------------------------------------------------------------------------*/ -static void m5632_recover(struct usbnet *dev) -{ - struct usb_device *udev = dev->udev; - struct usb_interface *intf = dev->intf; - int r; - - r = usb_lock_device_for_reset(udev, intf); - if (r < 0) - return; - - usb_reset_device(udev); - usb_unlock_device(udev); -} - -static int dummy_prereset(struct usb_interface *intf) -{ - return 0; -} - -static int dummy_postreset(struct usb_interface *intf) -{ - return 0; -} - static const struct driver_info ali_m5632_info = { .description = "ALi M5632", .flags = FLAG_POINTTOPOINT, - .recover = m5632_recover, }; #endif @@ -357,8 +332,6 @@ static struct usb_driver cdc_subset_driver = { .probe = usbnet_probe, .suspend = usbnet_suspend, .resume = usbnet_resume, - .pre_reset = dummy_prereset, - .post_reset = dummy_postreset, .disconnect = usbnet_disconnect, .id_table = products, .disable_hub_initiated_lpm = 1, diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 5173821..f9e96c4 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1218,12 +1218,8 @@ void usbnet_tx_timeout (struct net_device *net) unlink_urbs (dev, &dev->txq); tasklet_schedule (&dev->bh); - /* this needs to be handled individually because the generic layer - * doesn't know what is sufficient and could not restore private - * information if a remedy of an unconditional reset were used. - */ - if (dev->driver_info->recover) - (dev->driver_info->recover)(dev); + + // FIXME: device recovery -- reset? } EXPORT_SYMBOL_GPL(usbnet_tx_timeout); diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 26088fe..0662e98 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -148,9 +148,6 @@ struct driver_info { struct sk_buff *(*tx_fixup)(struct usbnet *dev, struct sk_buff *skb, gfp_t flags); - /* recover from timeout */ - void (*recover)(struct usbnet *dev); - /* early initialization code, can sleep. This is for minidrivers * having 'subminidrivers' that need to do extra initialization * right after minidriver have initialized hardware. */ -- cgit v0.10.2