From 2f41898704d3cff796ea0adaea272808707d758e Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Sat, 1 Feb 2014 16:46:16 +0100 Subject: ARM: sun7i: dt: Fix interrupt trigger types The Allwinner A20 uses the ARM GIC as its internal interrupts controller. The GIC can work on several interrupt triggers, and the A20 was actually setting it up to use a rising edge as a trigger, while it was actually a level high trigger, leading to some interrupts that would be completely ignored if the edge was missed. Fix this for the remaining DT nodes that slipped through. Signed-off-by: Maxime Ripard Cc: stable@vger.kernel.org diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 119f066..2374f5a 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -454,7 +454,7 @@ rtc: rtc@01c20d00 { compatible = "allwinner,sun7i-a20-rtc"; reg = <0x01c20d00 0x20>; - interrupts = <0 24 1>; + interrupts = <0 24 4>; }; sid: eeprom@01c23800 { @@ -596,10 +596,10 @@ hstimer@01c60000 { compatible = "allwinner,sun7i-a20-hstimer"; reg = <0x01c60000 0x1000>; - interrupts = <0 81 1>, - <0 82 1>, - <0 83 1>, - <0 84 1>; + interrupts = <0 81 4>, + <0 82 4>, + <0 83 4>, + <0 84 4>; clocks = <&ahb_gates 28>; }; -- cgit v0.10.2 From 40dd8f3b900cac1d925605a9d3199368c4af0a40 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Sun, 2 Feb 2014 14:52:40 +0100 Subject: ARM: sunxi: dt: Change the touchscreen compatibles Switch the device tree touchscreen compatibles to have a common pattern accross all Allwinner SoCs. Since the touchscreen driver has not been merged yet, it has no side effect. Signed-off-by: Maxime Ripard diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index 040bb0e..e3ff64c 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -426,7 +426,7 @@ }; rtp: rtp@01c25000 { - compatible = "allwinner,sun4i-ts"; + compatible = "allwinner,sun4i-a10-ts"; reg = <0x01c25000 0x100>; interrupts = <29>; }; diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi index ea16054..8e57a28 100644 --- a/arch/arm/boot/dts/sun5i-a10s.dtsi +++ b/arch/arm/boot/dts/sun5i-a10s.dtsi @@ -383,7 +383,7 @@ }; rtp: rtp@01c25000 { - compatible = "allwinner,sun4i-ts"; + compatible = "allwinner,sun4i-a10-ts"; reg = <0x01c25000 0x100>; interrupts = <29>; }; diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi index 320335a..c463fd7 100644 --- a/arch/arm/boot/dts/sun5i-a13.dtsi +++ b/arch/arm/boot/dts/sun5i-a13.dtsi @@ -346,7 +346,7 @@ }; rtp: rtp@01c25000 { - compatible = "allwinner,sun4i-ts"; + compatible = "allwinner,sun4i-a10-ts"; reg = <0x01c25000 0x100>; interrupts = <29>; }; diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 2374f5a..b79a626 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -463,7 +463,7 @@ }; rtp: rtp@01c25000 { - compatible = "allwinner,sun4i-ts"; + compatible = "allwinner,sun4i-a10-ts"; reg = <0x01c25000 0x100>; interrupts = <0 29 4>; }; -- cgit v0.10.2 From cd91b2fecfa66967e6ad732a9af860eb96c31ba4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 14 Feb 2014 20:19:03 +0100 Subject: ARM: 7963/1: mm: report both sections from PMD On 2-level page table systems, the PMD has 2 section entries. Report these, otherwise ARM_PTDUMP will miss reporting permission changes on odd section boundaries. Signed-off-by: Kees Cook Acked-by: Catalin Marinas Tested-by: Steve Capper Signed-off-by: Russell King diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c index 2b3a564..ef69152 100644 --- a/arch/arm/mm/dump.c +++ b/arch/arm/mm/dump.c @@ -264,6 +264,9 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) note_page(st, addr, 3, pmd_val(*pmd)); else walk_pte(st, pmd, addr); + + if (SECTION_SIZE < PMD_SIZE && pmd_large(pmd[1])) + note_page(st, addr + SECTION_SIZE, 3, pmd_val(pmd[1])); } } -- cgit v0.10.2 From c9698e5cd6ad1ff2844bc44fabddc0f2e0562047 Mon Sep 17 00:00:00 2001 From: "David A. Long" Date: Fri, 14 Feb 2014 22:41:18 +0100 Subject: ARM: 7964/1: Detect section mismatches in thumb relocations Add processing for normally encountered thumb relocation types so that section mismatches will be detected. Comment from Rusty Russell follows: Happiest for this to go through an ARM tree, so: Signed-off-by: David A. Long Acked-by: Rusty Russell Signed-off-by: Russell King diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 4061098..99a45fd 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1502,6 +1502,16 @@ static int addend_386_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r) #define R_ARM_JUMP24 29 #endif +#ifndef R_ARM_THM_CALL +#define R_ARM_THM_CALL 10 +#endif +#ifndef R_ARM_THM_JUMP24 +#define R_ARM_THM_JUMP24 30 +#endif +#ifndef R_ARM_THM_JUMP19 +#define R_ARM_THM_JUMP19 51 +#endif + static int addend_arm_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r) { unsigned int r_typ = ELF_R_TYPE(r->r_info); @@ -1515,6 +1525,9 @@ static int addend_arm_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r) case R_ARM_PC24: case R_ARM_CALL: case R_ARM_JUMP24: + case R_ARM_THM_CALL: + case R_ARM_THM_JUMP24: + case R_ARM_THM_JUMP19: /* From ARM ABI: ((S + A) | T) - P */ r->r_addend = (int)(long)(elf->hdr + sechdr->sh_offset + -- cgit v0.10.2 From 5a98268e0f657e8f1289ad9b83fe010f0208565d Mon Sep 17 00:00:00 2001 From: Asai Thambi S P Date: Tue, 18 Feb 2014 14:49:17 -0800 Subject: mtip32xx: Reduce the number of unaligned writes to 2 After several experiments, deduced the the optimal number of unaligned writes to be 2. Changing the value accordingly. Signed-off-by: Asai Thambi S P Signed-off-by: Sam Bradshaw Signed-off-by: Jens Axboe diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index b52e9a6..54174cb 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -53,7 +53,7 @@ #define MTIP_FTL_REBUILD_TIMEOUT_MS 2400000 /* unaligned IO handling */ -#define MTIP_MAX_UNALIGNED_SLOTS 8 +#define MTIP_MAX_UNALIGNED_SLOTS 2 /* Macro to extract the tag bit number from a tag value. */ #define MTIP_TAG_BIT(tag) (tag & 0x1F) -- cgit v0.10.2 From 7611c7a5613f20ec2fb536c1d868d15045730bc0 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 19 Feb 2014 10:36:08 +0100 Subject: spi/topcliff-pch: Fix DMA channel bus_num might be asigned dynamically to e.g. 32766. In this case the calculated DMA channel based on SPI bus number is bogus. Use SPI channel number instead for calculation. Signed-off-by: Alexander Stein Signed-off-by: Mark Brown diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c index 2e7f38c..3383008c 100644 --- a/drivers/spi/spi-topcliff-pch.c +++ b/drivers/spi/spi-topcliff-pch.c @@ -915,7 +915,7 @@ static void pch_spi_request_dma(struct pch_spi_data *data, int bpw) /* Set Tx DMA */ param = &dma->param_tx; param->dma_dev = &dma_dev->dev; - param->chan_id = data->master->bus_num * 2; /* Tx = 0, 2 */ + param->chan_id = data->ch * 2; /* Tx = 0, 2 */; param->tx_reg = data->io_base_addr + PCH_SPDWR; param->width = width; chan = dma_request_channel(mask, pch_spi_filter, param); @@ -930,7 +930,7 @@ static void pch_spi_request_dma(struct pch_spi_data *data, int bpw) /* Set Rx DMA */ param = &dma->param_rx; param->dma_dev = &dma_dev->dev; - param->chan_id = data->master->bus_num * 2 + 1; /* Rx = Tx + 1 */ + param->chan_id = data->ch * 2 + 1; /* Rx = Tx + 1 */; param->rx_reg = data->io_base_addr + PCH_SPDRR; param->width = width; chan = dma_request_channel(mask, pch_spi_filter, param); -- cgit v0.10.2 From 18ba7b9d5fd6eb2c2c510dfcfc6e8fee56c42571 Mon Sep 17 00:00:00 2001 From: Vaibhav Bedia Date: Sun, 16 Feb 2014 18:15:44 -0500 Subject: ARM: OMAP5: PRM: Fix reboot handling Use the correct register offset for issuing the reset command in OMAP5. Since dev_inst is set dynamically OMAP4 should not be affected by this change. Signed-off-by: Vaibhav Bedia Tested-by: Lokesh Vutla Acked-by: Rajendra Nayak Signed-off-by: Paul Walmsley diff --git a/arch/arm/mach-omap2/prminst44xx.c b/arch/arm/mach-omap2/prminst44xx.c index 6334b96..280f3c5 100644 --- a/arch/arm/mach-omap2/prminst44xx.c +++ b/arch/arm/mach-omap2/prminst44xx.c @@ -183,11 +183,11 @@ void omap4_prminst_global_warm_sw_reset(void) OMAP4_PRM_RSTCTRL_OFFSET); v |= OMAP4430_RST_GLOBAL_WARM_SW_MASK; omap4_prminst_write_inst_reg(v, OMAP4430_PRM_PARTITION, - OMAP4430_PRM_DEVICE_INST, + dev_inst, OMAP4_PRM_RSTCTRL_OFFSET); /* OCP barrier */ v = omap4_prminst_read_inst_reg(OMAP4430_PRM_PARTITION, - OMAP4430_PRM_DEVICE_INST, + dev_inst, OMAP4_PRM_RSTCTRL_OFFSET); } -- cgit v0.10.2 From c317d0f241fa0bbb098aa35f3d4b3067be2b5f3d Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 10 Jan 2014 17:43:08 -0600 Subject: ARM: DRA7: hwmod data: correct the sysc data for spinlock The spinlock module's SYSCONFIG register on DRA7xx does not support smart wakeup, and also does not have the CLKACTIVITY field. The sysc data for spinlock module has been appropriately fixed up to reflect the same. Cc: Ambresh K Signed-off-by: Suman Anna Signed-off-by: Paul Walmsley diff --git a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c index 18f333c..810c205 100644 --- a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c @@ -1365,11 +1365,10 @@ static struct omap_hwmod_class_sysconfig dra7xx_spinlock_sysc = { .rev_offs = 0x0000, .sysc_offs = 0x0010, .syss_offs = 0x0014, - .sysc_flags = (SYSC_HAS_AUTOIDLE | SYSC_HAS_CLOCKACTIVITY | - SYSC_HAS_ENAWAKEUP | SYSC_HAS_SIDLEMODE | - SYSC_HAS_SOFTRESET | SYSS_HAS_RESET_STATUS), - .idlemodes = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART | - SIDLE_SMART_WKUP), + .sysc_flags = (SYSC_HAS_AUTOIDLE | SYSC_HAS_ENAWAKEUP | + SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET | + SYSS_HAS_RESET_STATUS), + .idlemodes = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART), .sysc_fields = &omap_hwmod_sysc_type1, }; -- cgit v0.10.2 From 01142519ffc0734436f26b01aeed37a915dece05 Mon Sep 17 00:00:00 2001 From: Illia Smyrnov Date: Wed, 5 Feb 2014 17:06:09 +0200 Subject: ARM: OMAP4: hwmod: Fix SOFTRESET logic for OMAP4 Commit 313a76e (ARM: OMAP2+: hwmod: Fix SOFTRESET logic) introduced softreset bit cleaning right after set one. It is caused L3 error for OMAP4 ISS because ISS register write occurs when ISS reset process is in progress. Avoid this situation by cleaning softreset bit later, when reset process is successfully finished. Signed-off-by: Illia Smyrnov Reviewed-by: Grygorii Strashko Acked-by: Roger Quadros Signed-off-by: Paul Walmsley diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 42d8188..1f33f5d 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -1947,29 +1947,31 @@ static int _ocp_softreset(struct omap_hwmod *oh) goto dis_opt_clks; _write_sysconfig(v, oh); - ret = _clear_softreset(oh, &v); - if (ret) - goto dis_opt_clks; - - _write_sysconfig(v, oh); if (oh->class->sysc->srst_udelay) udelay(oh->class->sysc->srst_udelay); c = _wait_softreset_complete(oh); - if (c == MAX_MODULE_SOFTRESET_WAIT) + if (c == MAX_MODULE_SOFTRESET_WAIT) { pr_warning("omap_hwmod: %s: softreset failed (waited %d usec)\n", oh->name, MAX_MODULE_SOFTRESET_WAIT); - else + ret = -ETIMEDOUT; + goto dis_opt_clks; + } else { pr_debug("omap_hwmod: %s: softreset in %d usec\n", oh->name, c); + } + + ret = _clear_softreset(oh, &v); + if (ret) + goto dis_opt_clks; + + _write_sysconfig(v, oh); /* * XXX add _HWMOD_STATE_WEDGED for modules that don't come back from * _wait_target_ready() or _reset() */ - ret = (c == MAX_MODULE_SOFTRESET_WAIT) ? -ETIMEDOUT : 0; - dis_opt_clks: if (oh->flags & HWMOD_CONTROL_OPT_CLKS_IN_RESET) _disable_optional_clocks(oh); -- cgit v0.10.2 From 994c41ee0ac875797b4dfef509ac7753e2649b4d Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 30 Jan 2014 13:17:20 +0200 Subject: ARM: OMAP2+: clock: fix clkoutx2 with CLK_SET_RATE_PARENT If CLK_SET_RATE_PARENT is set for a clkoutx2 clock, calling clk_set_rate() on the clock "skips" the x2 multiplier as there are no set_rate and round_rate functions defined for the clkoutx2. This results in getting double the requested clock rates, breaking the display on omap3430 based devices. This got broken when d0f58bd3bba3877fb1af4664c4e33273d36f00e4 and related patches were merged for v3.14, as omapdss driver now relies more on the clk-framework and CLK_SET_RATE_PARENT. This patch implements set_rate and round_rate for clkoutx2. Tested on OMAP3430, OMAP3630, OMAP4460. Signed-off-by: Tomi Valkeinen Acked-by: Tero Kristo Signed-off-by: Paul Walmsley diff --git a/arch/arm/mach-omap2/cclock3xxx_data.c b/arch/arm/mach-omap2/cclock3xxx_data.c index 3b05aea..11ed915 100644 --- a/arch/arm/mach-omap2/cclock3xxx_data.c +++ b/arch/arm/mach-omap2/cclock3xxx_data.c @@ -433,7 +433,9 @@ static const struct clk_ops dpll4_m5x2_ck_ops = { .enable = &omap2_dflt_clk_enable, .disable = &omap2_dflt_clk_disable, .is_enabled = &omap2_dflt_clk_is_enabled, + .set_rate = &omap3_clkoutx2_set_rate, .recalc_rate = &omap3_clkoutx2_recalc, + .round_rate = &omap3_clkoutx2_round_rate, }; static const struct clk_ops dpll4_m5x2_ck_3630_ops = { diff --git a/arch/arm/mach-omap2/dpll3xxx.c b/arch/arm/mach-omap2/dpll3xxx.c index 3185ced..3c418ea 100644 --- a/arch/arm/mach-omap2/dpll3xxx.c +++ b/arch/arm/mach-omap2/dpll3xxx.c @@ -623,6 +623,32 @@ void omap3_dpll_deny_idle(struct clk_hw_omap *clk) /* Clock control for DPLL outputs */ +/* Find the parent DPLL for the given clkoutx2 clock */ +static struct clk_hw_omap *omap3_find_clkoutx2_dpll(struct clk_hw *hw) +{ + struct clk_hw_omap *pclk = NULL; + struct clk *parent; + + /* Walk up the parents of clk, looking for a DPLL */ + do { + do { + parent = __clk_get_parent(hw->clk); + hw = __clk_get_hw(parent); + } while (hw && (__clk_get_flags(hw->clk) & CLK_IS_BASIC)); + if (!hw) + break; + pclk = to_clk_hw_omap(hw); + } while (pclk && !pclk->dpll_data); + + /* clk does not have a DPLL as a parent? error in the clock data */ + if (!pclk) { + WARN_ON(1); + return NULL; + } + + return pclk; +} + /** * omap3_clkoutx2_recalc - recalculate DPLL X2 output virtual clock rate * @clk: DPLL output struct clk @@ -637,27 +663,14 @@ unsigned long omap3_clkoutx2_recalc(struct clk_hw *hw, unsigned long rate; u32 v; struct clk_hw_omap *pclk = NULL; - struct clk *parent; if (!parent_rate) return 0; - /* Walk up the parents of clk, looking for a DPLL */ - do { - do { - parent = __clk_get_parent(hw->clk); - hw = __clk_get_hw(parent); - } while (hw && (__clk_get_flags(hw->clk) & CLK_IS_BASIC)); - if (!hw) - break; - pclk = to_clk_hw_omap(hw); - } while (pclk && !pclk->dpll_data); + pclk = omap3_find_clkoutx2_dpll(hw); - /* clk does not have a DPLL as a parent? error in the clock data */ - if (!pclk) { - WARN_ON(1); + if (!pclk) return 0; - } dd = pclk->dpll_data; @@ -672,6 +685,55 @@ unsigned long omap3_clkoutx2_recalc(struct clk_hw *hw, return rate; } +int omap3_clkoutx2_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + return 0; +} + +long omap3_clkoutx2_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate) +{ + const struct dpll_data *dd; + u32 v; + struct clk_hw_omap *pclk = NULL; + + if (!*prate) + return 0; + + pclk = omap3_find_clkoutx2_dpll(hw); + + if (!pclk) + return 0; + + dd = pclk->dpll_data; + + /* TYPE J does not have a clkoutx2 */ + if (dd->flags & DPLL_J_TYPE) { + *prate = __clk_round_rate(__clk_get_parent(pclk->hw.clk), rate); + return *prate; + } + + WARN_ON(!dd->enable_mask); + + v = omap2_clk_readl(pclk, dd->control_reg) & dd->enable_mask; + v >>= __ffs(dd->enable_mask); + + /* If in bypass, the rate is fixed to the bypass rate*/ + if (v != OMAP3XXX_EN_DPLL_LOCKED) + return *prate; + + if (__clk_get_flags(hw->clk) & CLK_SET_RATE_PARENT) { + unsigned long best_parent; + + best_parent = (rate / 2); + *prate = __clk_round_rate(__clk_get_parent(hw->clk), + best_parent); + } + + return *prate * 2; +} + /* OMAP3/4 non-CORE DPLL clkops */ const struct clk_hw_omap_ops clkhwops_omap3_dpll = { .allow_idle = omap3_dpll_allow_idle, diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 092b641..4a21a87 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -245,6 +245,10 @@ long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate, void omap2_init_clk_clkdm(struct clk_hw *clk); unsigned long omap3_clkoutx2_recalc(struct clk_hw *hw, unsigned long parent_rate); +int omap3_clkoutx2_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate); +long omap3_clkoutx2_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate); int omap2_clkops_enable_clkdm(struct clk_hw *hw); void omap2_clkops_disable_clkdm(struct clk_hw *hw); int omap2_clk_disable_autoidle_all(void); -- cgit v0.10.2 From b3634575930857724d5c3987a2739b0637999b4e Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 18 Feb 2014 17:02:54 +0100 Subject: ARM: 7980/1: kernel: improve error message when LPAE config doesn't match CPU Currently, when the kernel is configured with LPAE support, but the CPU doesn't support it, the error message is fairly cryptic: Error: unrecognized/unsupported processor variant (0x561f5811). This messages is normally shown when there is an issue when comparing the processor ID (CP15 0, c0, c0) with the values/masks described in proc-v7.S. However, the same message is displayed when LPAE support is enabled in the kernel configuration, but not available in the CPU, after looking at ID_MMFR0 (CP15 0, c0, c1, 4). Having the same error message is highly misleading. This commit improves this by showing a different error message when this situation occurs: Error: Kernel with LPAE support, but CPU does not support LPAE. Signed-off-by: Thomas Petazzoni Signed-off-by: Russell King diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 47cd974..c96ecac 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -177,6 +177,18 @@ __lookup_processor_type_data: .long __proc_info_end .size __lookup_processor_type_data, . - __lookup_processor_type_data +__error_lpae: +#ifdef CONFIG_DEBUG_LL + adr r0, str_lpae + bl printascii + b __error +str_lpae: .asciz "\nError: Kernel with LPAE support, but CPU does not support LPAE.\n" +#else + b __error +#endif + .align +ENDPROC(__error_lpae) + __error_p: #ifdef CONFIG_DEBUG_LL adr r0, str_p1 diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 914616e..f5f381d 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -102,7 +102,7 @@ ENTRY(stext) and r3, r3, #0xf @ extract VMSA support cmp r3, #5 @ long-descriptor translation table format? THUMB( it lo ) @ force fixup-able long branch encoding - blo __error_p @ only classic page table format + blo __error_lpae @ only classic page table format #endif #ifndef CONFIG_XIP_KERNEL -- cgit v0.10.2 From fd694131bb033e7b1fd748498cd9a70e9c16420b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Feb 2014 15:32:36 -0800 Subject: blk-mq: remove blk_mq_alloc_rq There's only one caller, which is a straight wrapper and fits the naming scheme of the related functions a lot better. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe diff --git a/block/blk-mq.c b/block/blk-mq.c index 1fa9dd1..1e585e3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -73,8 +73,8 @@ static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx, set_bit(ctx->index_hw, hctx->ctx_map); } -static struct request *blk_mq_alloc_rq(struct blk_mq_hw_ctx *hctx, gfp_t gfp, - bool reserved) +static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx, + gfp_t gfp, bool reserved) { struct request *rq; unsigned int tag; @@ -193,12 +193,6 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, ctx->rq_dispatched[rw_is_sync(rw_flags)]++; } -static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx, - gfp_t gfp, bool reserved) -{ - return blk_mq_alloc_rq(hctx, gfp, reserved); -} - static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, int rw, gfp_t gfp, bool reserved) -- cgit v0.10.2 From feb71dae1f9e0aeb056f7f639a21e620d327fc66 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Feb 2014 15:32:37 -0800 Subject: blk-mq: merge blk_mq_insert_request and blk_mq_run_request It's almost identical to blk_mq_insert_request, so fold the two into one slightly more generic function by making the flush special case a bit smarted. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe diff --git a/block/blk-exec.c b/block/blk-exec.c index c68613b..dbf4502 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -65,7 +65,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, * be resued after dying flag is set */ if (q->mq_ops) { - blk_mq_insert_request(q, rq, at_head, true); + blk_mq_insert_request(rq, at_head, true, false); return; } diff --git a/block/blk-flush.c b/block/blk-flush.c index 66e2b69..f598f79 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -137,7 +137,7 @@ static void mq_flush_run(struct work_struct *work) rq = container_of(work, struct request, mq_flush_work); memset(&rq->csd, 0, sizeof(rq->csd)); - blk_mq_run_request(rq, true, false); + blk_mq_insert_request(rq, false, true, false); } static bool blk_flush_queue_rq(struct request *rq) @@ -411,7 +411,7 @@ void blk_insert_flush(struct request *rq) if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { if (q->mq_ops) { - blk_mq_run_request(rq, false, true); + blk_mq_insert_request(rq, false, false, true); } else list_add_tail(&rq->queuelist, &q->queue_head); return; diff --git a/block/blk-mq.c b/block/blk-mq.c index 1e585e3..2af8405 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -724,61 +724,28 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, blk_mq_add_timer(rq); } -void blk_mq_insert_request(struct request_queue *q, struct request *rq, - bool at_head, bool run_queue) +void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, + bool async) { + struct request_queue *q = rq->q; struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx, *current_ctx; + struct blk_mq_ctx *ctx = rq->mq_ctx, *current_ctx; + + current_ctx = blk_mq_get_ctx(q); + if (!cpu_online(ctx->cpu)) + rq->mq_ctx = ctx = current_ctx; - ctx = rq->mq_ctx; hctx = q->mq_ops->map_queue(q, ctx->cpu); - if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) { + if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA) && + !(rq->cmd_flags & (REQ_FLUSH_SEQ))) { blk_insert_flush(rq); } else { - current_ctx = blk_mq_get_ctx(q); - - if (!cpu_online(ctx->cpu)) { - ctx = current_ctx; - hctx = q->mq_ops->map_queue(q, ctx->cpu); - rq->mq_ctx = ctx; - } spin_lock(&ctx->lock); __blk_mq_insert_request(hctx, rq, at_head); spin_unlock(&ctx->lock); - - blk_mq_put_ctx(current_ctx); } - if (run_queue) - __blk_mq_run_hw_queue(hctx); -} -EXPORT_SYMBOL(blk_mq_insert_request); - -/* - * This is a special version of blk_mq_insert_request to bypass FLUSH request - * check. Should only be used internally. - */ -void blk_mq_run_request(struct request *rq, bool run_queue, bool async) -{ - struct request_queue *q = rq->q; - struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx, *current_ctx; - - current_ctx = blk_mq_get_ctx(q); - - ctx = rq->mq_ctx; - if (!cpu_online(ctx->cpu)) { - ctx = current_ctx; - rq->mq_ctx = ctx; - } - hctx = q->mq_ops->map_queue(q, ctx->cpu); - - /* ctx->cpu might be offline */ - spin_lock(&ctx->lock); - __blk_mq_insert_request(hctx, rq, false); - spin_unlock(&ctx->lock); - blk_mq_put_ctx(current_ctx); if (run_queue) diff --git a/block/blk-mq.h b/block/blk-mq.h index ed0035c..72beba1 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -23,7 +23,6 @@ struct blk_mq_ctx { }; void __blk_mq_complete_request(struct request *rq); -void blk_mq_run_request(struct request *rq, bool run_queue, bool async); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_init_flush(struct request_queue *q); void blk_mq_drain_queue(struct request_queue *q); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 18ba8a6..ff28fe3 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -121,8 +121,7 @@ void blk_mq_init_commands(struct request_queue *, void (*init)(void *data, struc void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); -void blk_mq_insert_request(struct request_queue *, struct request *, - bool, bool); +void blk_mq_insert_request(struct request *, bool, bool, bool); void blk_mq_run_queues(struct request_queue *q, bool async); void blk_mq_free_request(struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); -- cgit v0.10.2 From d6a25b31315327eef7785b895c354cc45c3f3742 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Feb 2014 15:32:38 -0800 Subject: blk-mq: support partial I/O completions Add a new blk_mq_end_io_partial function to partially complete requests as needed by the SCSI layer. We do this by reusing blk_update_request to advance the bio instead of having a simplified version of it in the blk-mq code. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe diff --git a/block/blk-mq.c b/block/blk-mq.c index 2af8405..1b8b50d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -283,38 +283,10 @@ void blk_mq_free_request(struct request *rq) __blk_mq_free_request(hctx, ctx, rq); } -static void blk_mq_bio_endio(struct request *rq, struct bio *bio, int error) +bool blk_mq_end_io_partial(struct request *rq, int error, unsigned int nr_bytes) { - if (error) - clear_bit(BIO_UPTODATE, &bio->bi_flags); - else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) - error = -EIO; - - if (unlikely(rq->cmd_flags & REQ_QUIET)) - set_bit(BIO_QUIET, &bio->bi_flags); - - /* don't actually finish bio if it's part of flush sequence */ - if (!(rq->cmd_flags & REQ_FLUSH_SEQ)) - bio_endio(bio, error); -} - -void blk_mq_end_io(struct request *rq, int error) -{ - struct bio *bio = rq->bio; - unsigned int bytes = 0; - - trace_block_rq_complete(rq->q, rq); - - while (bio) { - struct bio *next = bio->bi_next; - - bio->bi_next = NULL; - bytes += bio->bi_iter.bi_size; - blk_mq_bio_endio(rq, bio, error); - bio = next; - } - - blk_account_io_completion(rq, bytes); + if (blk_update_request(rq, error, blk_rq_bytes(rq))) + return true; blk_account_io_done(rq); @@ -322,8 +294,9 @@ void blk_mq_end_io(struct request *rq, int error) rq->end_io(rq, error); else blk_mq_free_request(rq); + return false; } -EXPORT_SYMBOL(blk_mq_end_io); +EXPORT_SYMBOL(blk_mq_end_io_partial); static void __blk_mq_complete_request_remote(void *data) { diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ff28fe3..2ff2e8d 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -133,7 +133,13 @@ struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_ind struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_reg *, unsigned int); void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int); -void blk_mq_end_io(struct request *rq, int error); +bool blk_mq_end_io_partial(struct request *rq, int error, + unsigned int nr_bytes); +static inline void blk_mq_end_io(struct request *rq, int error) +{ + bool done = !blk_mq_end_io_partial(rq, error, blk_rq_bytes(rq)); + BUG_ON(!done); +} void blk_mq_complete_request(struct request *rq); -- cgit v0.10.2 From deff82e688a278eb4b822fd616c05fc62907bb71 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Mon, 17 Feb 2014 22:30:58 +0100 Subject: ARM: OMAP2+: Add support for thumb mode on DT booted N900 Without enabling the workaround for ARM errata 430973 thumb compiled userland crashes randomly on the Nokia N900. Signed-off-by: Sebastian Reichel Reviewed-by: Pavel Machek Signed-off-by: Tony Lindgren diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index 3d5b24d..0cc710d 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -22,6 +22,8 @@ #include "common-board-devices.h" #include "dss-common.h" #include "control.h" +#include "omap-secure.h" +#include "soc.h" struct pdata_init { const char *compatible; @@ -169,6 +171,22 @@ static void __init am3517_evm_legacy_init(void) omap_ctrl_writel(v, AM35XX_CONTROL_IP_SW_RESET); omap_ctrl_readl(AM35XX_CONTROL_IP_SW_RESET); /* OCP barrier */ } + +static void __init nokia_n900_legacy_init(void) +{ + hsmmc2_internal_input_clk(); + + if (omap_type() == OMAP2_DEVICE_TYPE_SEC) { + if (IS_ENABLED(CONFIG_ARM_ERRATA_430973)) { + pr_info("RX-51: Enabling ARM errata 430973 workaround\n"); + /* set IBE to 1 */ + rx51_secure_update_aux_cr(BIT(6), 0); + } else { + pr_warning("RX-51: Not enabling ARM errata 430973 workaround\n"); + pr_warning("Thumb binaries may crash randomly without this workaround\n"); + } + } +} #endif /* CONFIG_ARCH_OMAP3 */ #ifdef CONFIG_ARCH_OMAP4 @@ -259,7 +277,7 @@ struct of_dev_auxdata omap_auxdata_lookup[] __initdata = { static struct pdata_init pdata_quirks[] __initdata = { #ifdef CONFIG_ARCH_OMAP3 { "compulab,omap3-sbc-t3730", omap3_sbc_t3730_legacy_init, }, - { "nokia,omap3-n900", hsmmc2_internal_input_clk, }, + { "nokia,omap3-n900", nokia_n900_legacy_init, }, { "nokia,omap3-n9", hsmmc2_internal_input_clk, }, { "nokia,omap3-n950", hsmmc2_internal_input_clk, }, { "isee,omap3-igep0020", omap3_igep0020_legacy_init, }, -- cgit v0.10.2 From 865da01cd98d67518befe854a71e432d894db279 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Mon, 17 Feb 2014 13:22:55 +0530 Subject: ARM: OMAP: Kill warning in CPUIDLE code with !CONFIG_SMP for non SMP build, NR_CPUS is 1 and hence the code complains with below warnings. arch/arm/mach-omap2/cpuidle44xx.c:207:8: warning: array subscript is above array bounds [-Warray-bounds] arch/arm/mach-omap2/cpuidle44xx.c:212:11: warning: array subscript is above array bounds [-Warray-bounds] Kill it by making array size fixed. Acked-by: Nishanth Menon Signed-off-by: Santosh Shilimkar Signed-off-by: Mugunthan V N Signed-off-by: Tony Lindgren diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c index 4c158c8..01fc710 100644 --- a/arch/arm/mach-omap2/cpuidle44xx.c +++ b/arch/arm/mach-omap2/cpuidle44xx.c @@ -23,6 +23,8 @@ #include "prm.h" #include "clockdomain.h" +#define MAX_CPUS 2 + /* Machine specific information */ struct idle_statedata { u32 cpu_state; @@ -48,11 +50,11 @@ static struct idle_statedata omap4_idle_data[] = { }, }; -static struct powerdomain *mpu_pd, *cpu_pd[NR_CPUS]; -static struct clockdomain *cpu_clkdm[NR_CPUS]; +static struct powerdomain *mpu_pd, *cpu_pd[MAX_CPUS]; +static struct clockdomain *cpu_clkdm[MAX_CPUS]; static atomic_t abort_barrier; -static bool cpu_done[NR_CPUS]; +static bool cpu_done[MAX_CPUS]; static struct idle_statedata *state_ptr = &omap4_idle_data[0]; /* Private functions */ -- cgit v0.10.2 From eec70897d81bb2913e22c6792ea2739faf59c3e1 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 20 Feb 2014 00:52:01 +0000 Subject: bio-integrity: Drop bio_integrity_verify BUG_ON in post bip->bip_iter world Given that bip->bip_iter.bi_size is decremented after bio_advance() -> bio_integrity_advance() is called, the BUG_ON() in bio_integrity_verify() ends up tripping in v3.14-rc1 code with the advent of immutable biovecs in: commit d57a5f7c6605f15f3b5134837e68b448a7cea88e Author: Kent Overstreet Date: Sat Nov 23 17:20:16 2013 -0800 bio-integrity: Convert to bvec_iter Given that there is no easy way to ascertain the original bi_size value, go ahead and drop this BUG_ON(). Reported-by: Sagi Grimberg Reported-by: Akinobu Mita Acked-by: Martin K. Petersen Cc: Kent Overstreet Signed-off-by: Nicholas Bellinger Signed-off-by: Jens Axboe diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 0129b78..4f70f38 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -458,11 +458,10 @@ static int bio_integrity_verify(struct bio *bio) struct blk_integrity_exchg bix; struct bio_vec *bv; sector_t sector = bio->bi_integrity->bip_iter.bi_sector; - unsigned int sectors, total, ret; + unsigned int sectors, ret = 0; void *prot_buf = bio->bi_integrity->bip_buf; int i; - ret = total = 0; bix.disk_name = bio->bi_bdev->bd_disk->disk_name; bix.sector_size = bi->sector_size; @@ -484,8 +483,6 @@ static int bio_integrity_verify(struct bio *bio) sectors = bv->bv_len / bi->sector_size; sector += sectors; prot_buf += sectors * bi->tuple_size; - total += sectors * bi->tuple_size; - BUG_ON(total > bio->bi_integrity->bip_iter.bi_size); kunmap_atomic(kaddr); } -- cgit v0.10.2 From 6b1168e1617d9d4db73ef5276490627abf5adec4 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Fri, 14 Feb 2014 13:31:03 +0400 Subject: CIFS: Fix wrong pos argument of cifs_find_lock_conflict and use generic_file_aio_write rather than __generic_file_aio_write in cifs_writev. Signed-off-by: Pavel Shilovsky Reported-by: Al Viro Signed-off-by: Steve French diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 53c1507..834fce7 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2579,31 +2579,19 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov, struct cifsInodeInfo *cinode = CIFS_I(inode); struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; ssize_t rc = -EACCES; + loff_t lock_pos = pos; - BUG_ON(iocb->ki_pos != pos); - + if (file->f_flags & O_APPEND) + lock_pos = i_size_read(inode); /* * We need to hold the sem to be sure nobody modifies lock list * with a brlock that prevents writing. */ down_read(&cinode->lock_sem); - if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs), + if (!cifs_find_lock_conflict(cfile, lock_pos, iov_length(iov, nr_segs), server->vals->exclusive_lock_type, NULL, - CIFS_WRITE_OP)) { - mutex_lock(&inode->i_mutex); - rc = __generic_file_aio_write(iocb, iov, nr_segs, - &iocb->ki_pos); - mutex_unlock(&inode->i_mutex); - } - - if (rc > 0) { - ssize_t err; - - err = generic_write_sync(file, iocb->ki_pos - rc, rc); - if (err < 0) - rc = err; - } - + CIFS_WRITE_OP)) + rc = generic_file_aio_write(iocb, iov, nr_segs, pos); up_read(&cinode->lock_sem); return rc; } -- cgit v0.10.2 From a26054d184763969a411e3939fe243516715ff59 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 14 Feb 2014 07:21:00 -0500 Subject: cifs: sanity check length of data to send before sending We had a bug discovered recently where an upper layer function (cifs_iovec_write) could pass down a smb_rqst with an invalid amount of data in it. The length of the SMB frame would be correct, but the rqst struct would cause smb_send_rqst to send nearly 4GB of data. This should never be the case. Add some sanity checking to the beginning of smb_send_rqst that ensures that the amount of data we're going to send agrees with the length in the RFC1002 header. If it doesn't, WARN() and return -EIO to the upper layers. Signed-off-by: Jeff Layton Acked-by: Sachin Prabhu Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index b375709..18cd565 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -270,6 +270,26 @@ cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx, iov->iov_len = rqst->rq_pagesz; } +static unsigned long +rqst_len(struct smb_rqst *rqst) +{ + unsigned int i; + struct kvec *iov = rqst->rq_iov; + unsigned long buflen = 0; + + /* total up iov array first */ + for (i = 0; i < rqst->rq_nvec; i++) + buflen += iov[i].iov_len; + + /* add in the page array if there is one */ + if (rqst->rq_npages) { + buflen += rqst->rq_pagesz * (rqst->rq_npages - 1); + buflen += rqst->rq_tailsz; + } + + return buflen; +} + static int smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) { @@ -277,6 +297,7 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) struct kvec *iov = rqst->rq_iov; int n_vec = rqst->rq_nvec; unsigned int smb_buf_length = get_rfc1002_length(iov[0].iov_base); + unsigned long send_length; unsigned int i; size_t total_len = 0, sent; struct socket *ssocket = server->ssocket; @@ -285,6 +306,14 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) if (ssocket == NULL) return -ENOTSOCK; + /* sanity check send length */ + send_length = rqst_len(rqst); + if (send_length != smb_buf_length + 4) { + WARN(1, "Send length mismatch(send_length=%lu smb_buf_length=%u)\n", + send_length, smb_buf_length); + return -EIO; + } + cifs_dbg(FYI, "Sending smb: smb_len=%u\n", smb_buf_length); dump_smb(iov[0].iov_base, iov[0].iov_len); -- cgit v0.10.2 From a2530060c1157a7863abbe1fea3cfdd5da55bd48 Mon Sep 17 00:00:00 2001 From: Sherman Yin Date: Thu, 23 Jan 2014 12:44:47 -0800 Subject: Update dtsi with new pinctrl compatible string This commit updates bcm11351.dtsi with the new compatible string for the same driver. Signed-off-by: Sherman Yin Reviewed-by: Matt Porter Acked-by: Linus Walleij Signed-off-by: Christian Daudt diff --git a/arch/arm/boot/dts/bcm11351.dtsi b/arch/arm/boot/dts/bcm11351.dtsi index e491b82..792fde1 100644 --- a/arch/arm/boot/dts/bcm11351.dtsi +++ b/arch/arm/boot/dts/bcm11351.dtsi @@ -147,7 +147,7 @@ }; pinctrl@35004800 { - compatible = "brcm,capri-pinctrl"; + compatible = "brcm,bcm11351-pinctrl"; reg = <0x35004800 0x430>; }; -- cgit v0.10.2 From d0deca0276807740f3e688d97196122511b8f765 Mon Sep 17 00:00:00 2001 From: Christian Daudt Date: Mon, 24 Feb 2014 09:15:35 -0800 Subject: pinctrl: refer to updated dt binding string. Bring the driver in line with the bcm-based dt name for pinctrl. This is being done to keep consistency with other Broadcom mobile SoC drivers. Signed-off-by: Christian Daudt Reviewed-by: Matt Porter diff --git a/drivers/pinctrl/pinctrl-capri.c b/drivers/pinctrl/pinctrl-capri.c index 4669c53..eb25002 100644 --- a/drivers/pinctrl/pinctrl-capri.c +++ b/drivers/pinctrl/pinctrl-capri.c @@ -1435,7 +1435,7 @@ int __init capri_pinctrl_probe(struct platform_device *pdev) } static struct of_device_id capri_pinctrl_of_match[] = { - { .compatible = "brcm,capri-pinctrl", }, + { .compatible = "brcm,bcm11351-pinctrl", }, { }, }; -- cgit v0.10.2 From 735ea23c4868bf3123a4c79184e9206e0cc60211 Mon Sep 17 00:00:00 2001 From: Sherman Yin Date: Thu, 23 Jan 2014 12:44:44 -0800 Subject: pinctrl: Rename Broadcom Capri pinctrl binding The compatible string of the Broadcom Capri pinctrl driver is renamed to "brcm,bcm11351-pinctrl" to match the machine binding here: Documentation/devicetree/bindings/arm/bcm/bcm11351.txt Signed-off-by: Sherman Yin Reviewed-by: Matt Porter Acked-by: Linus Walleij Signed-off-by: Christian Daudt diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,bcm11351-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/brcm,bcm11351-pinctrl.txt new file mode 100644 index 0000000..c119deb --- /dev/null +++ b/Documentation/devicetree/bindings/pinctrl/brcm,bcm11351-pinctrl.txt @@ -0,0 +1,461 @@ +Broadcom BCM281xx Pin Controller + +This is a pin controller for the Broadcom BCM281xx SoC family, which includes +BCM11130, BCM11140, BCM11351, BCM28145, and BCM28155 SoCs. + +=== Pin Controller Node === + +Required Properties: + +- compatible: Must be "brcm,bcm11351-pinctrl" +- reg: Base address of the PAD Controller register block and the size + of the block. + +For example, the following is the bare minimum node: + + pinctrl@35004800 { + compatible = "brcm,bcm11351-pinctrl"; + reg = <0x35004800 0x430>; + }; + +As a pin controller device, in addition to the required properties, this node +should also contain the pin configuration nodes that client devices reference, +if any. + +=== Pin Configuration Node === + +Each pin configuration node is a sub-node of the pin controller node and is a +container of an arbitrary number of subnodes, called pin group nodes in this +document. + +Please refer to the pinctrl-bindings.txt in this directory for details of the +common pinctrl bindings used by client devices, including the definition of a +"pin configuration node". + +=== Pin Group Node === + +A pin group node specifies the desired pin mux and/or pin configuration for an +arbitrary number of pins. The name of the pin group node is optional and not +used. + +A pin group node only affects the properties specified in the node, and has no +effect on any properties that are omitted. + +The pin group node accepts a subset of the generic pin config properties. For +details generic pin config properties, please refer to pinctrl-bindings.txt +and . + +Each pin controlled by this pin controller belong to one of three types: +Standard, I2C, and HDMI. Each type accepts a different set of pin config +properties. A list of pins and their types is provided below. + +Required Properties (applicable to all pins): + +- pins: Multiple strings. Specifies the name(s) of one or more pins to + be configured by this node. + +Optional Properties (for standard pins): + +- function: String. Specifies the pin mux selection. Values + must be one of: "alt1", "alt2", "alt3", "alt4" +- input-schmitt-enable: No arguments. Enable schmitt-trigger mode. +- input-schmitt-disable: No arguments. Disable schmitt-trigger mode. +- bias-pull-up: No arguments. Pull up on pin. +- bias-pull-down: No arguments. Pull down on pin. +- bias-disable: No arguments. Disable pin bias. +- slew-rate: Integer. Meaning depends on configured pin mux: + *_SCL or *_SDA: + 0: Standard(100kbps)& Fast(400kbps) mode + 1: Highspeed (3.4Mbps) mode + IC_DM or IC_DP: + 0: normal slew rate + 1: fast slew rate + Otherwise: + 0: fast slew rate + 1: normal slew rate +- input-enable: No arguements. Enable input (does not affect + output.) +- input-disable: No arguements. Disable input (does not affect + output.) +- drive-strength: Integer. Drive strength in mA. Valid values are + 2, 4, 6, 8, 10, 12, 14, 16 mA. + +Optional Properties (for I2C pins): + +- function: String. Specifies the pin mux selection. Values + must be one of: "alt1", "alt2", "alt3", "alt4" +- bias-pull-up: Integer. Pull up strength in Ohm. There are 3 + pull-up resisitors (1.2k, 1.8k, 2.7k) available + in parallel for I2C pins, so the valid values + are: 568, 720, 831, 1080, 1200, 1800, 2700 Ohm. +- bias-disable: No arguments. Disable pin bias. +- slew-rate: Integer. Meaning depends on configured pin mux: + *_SCL or *_SDA: + 0: Standard(100kbps)& Fast(400kbps) mode + 1: Highspeed (3.4Mbps) mode + IC_DM or IC_DP: + 0: normal slew rate + 1: fast slew rate + Otherwise: + 0: fast slew rate + 1: normal slew rate +- input-enable: No arguements. Enable input (does not affect + output.) +- input-disable: No arguements. Disable input (does not affect + output.) + +Optional Properties (for HDMI pins): + +- function: String. Specifies the pin mux selection. Values + must be one of: "alt1", "alt2", "alt3", "alt4" +- slew-rate: Integer. Controls slew rate. + 0: Standard(100kbps)& Fast(400kbps) mode + 1: Highspeed (3.4Mbps) mode +- input-enable: No arguements. Enable input (does not affect + output.) +- input-disable: No arguements. Disable input (does not affect + output.) + +Example: +// pin controller node +pinctrl@35004800 { + compatible = "brcmbcm11351-pinctrl"; + reg = <0x35004800 0x430>; + + // pin configuration node + dev_a_default: dev_a_active { + //group node defining 1 standard pin + grp_1 { + pins = "std_pin1"; + function = "alt1"; + input-schmitt-enable; + bias-disable; + slew-rate = <1>; + drive-strength = <4>; + }; + + // group node defining 2 I2C pins + grp_2 { + pins = "i2c_pin1", "i2c_pin2"; + function = "alt2"; + bias-pull-up = <720>; + input-enable; + }; + + // group node defining 2 HDMI pins + grp_3 { + pins = "hdmi_pin1", "hdmi_pin2"; + function = "alt3"; + slew-rate = <1>; + }; + + // other pin group nodes + ... + }; + + // other pin configuration nodes + ... +}; + +In the example above, "dev_a_active" is a pin configuration node with a number +of sub-nodes. In the pin group node "grp_1", one pin, "std_pin1", is defined in +the "pins" property. Thus, the remaining properties in the "grp_1" node applies +only to this pin, including the following settings: + - setting pinmux to "alt1" + - enabling schmitt-trigger (hystersis) mode + - disabling pin bias + - setting the slew-rate to 1 + - setting the drive strength to 4 mA +Note that neither "input-enable" nor "input-disable" was specified - the pinctrl +subsystem will therefore leave this property unchanged from whatever state it +was in before applying these changes. + +The "pins" property in the pin group node "grp_2" specifies two pins - +"i2c_pin1" and "i2c_pin2"; the remaining properties in this pin group node, +therefore, applies to both of these pins. The properties include: + - setting pinmux to "alt2" + - setting pull-up resistance to 720 Ohm (ie. enabling 1.2k and 1.8k resistors + in parallel) + - enabling both pins' input +"slew-rate" is not specified in this pin group node, so the slew-rate for these +pins are left as-is. + +Finally, "grp_3" defines two HDMI pins. The following properties are applied to +both pins: + - setting pinmux to "alt3" + - setting slew-rate to 1; for HDMI pins, this corresponds to the 3.4 Mbps + Highspeed mode +The input is neither enabled or disabled, and is left untouched. + +=== Pin Names and Type === + +The following are valid pin names and their pin types: + + "adcsync", Standard + "bat_rm", Standard + "bsc1_scl", I2C + "bsc1_sda", I2C + "bsc2_scl", I2C + "bsc2_sda", I2C + "classgpwr", Standard + "clk_cx8", Standard + "clkout_0", Standard + "clkout_1", Standard + "clkout_2", Standard + "clkout_3", Standard + "clkreq_in_0", Standard + "clkreq_in_1", Standard + "cws_sys_req1", Standard + "cws_sys_req2", Standard + "cws_sys_req3", Standard + "digmic1_clk", Standard + "digmic1_dq", Standard + "digmic2_clk", Standard + "digmic2_dq", Standard + "gpen13", Standard + "gpen14", Standard + "gpen15", Standard + "gpio00", Standard + "gpio01", Standard + "gpio02", Standard + "gpio03", Standard + "gpio04", Standard + "gpio05", Standard + "gpio06", Standard + "gpio07", Standard + "gpio08", Standard + "gpio09", Standard + "gpio10", Standard + "gpio11", Standard + "gpio12", Standard + "gpio13", Standard + "gpio14", Standard + "gps_pablank", Standard + "gps_tmark", Standard + "hdmi_scl", HDMI + "hdmi_sda", HDMI + "ic_dm", Standard + "ic_dp", Standard + "kp_col_ip_0", Standard + "kp_col_ip_1", Standard + "kp_col_ip_2", Standard + "kp_col_ip_3", Standard + "kp_row_op_0", Standard + "kp_row_op_1", Standard + "kp_row_op_2", Standard + "kp_row_op_3", Standard + "lcd_b_0", Standard + "lcd_b_1", Standard + "lcd_b_2", Standard + "lcd_b_3", Standard + "lcd_b_4", Standard + "lcd_b_5", Standard + "lcd_b_6", Standard + "lcd_b_7", Standard + "lcd_g_0", Standard + "lcd_g_1", Standard + "lcd_g_2", Standard + "lcd_g_3", Standard + "lcd_g_4", Standard + "lcd_g_5", Standard + "lcd_g_6", Standard + "lcd_g_7", Standard + "lcd_hsync", Standard + "lcd_oe", Standard + "lcd_pclk", Standard + "lcd_r_0", Standard + "lcd_r_1", Standard + "lcd_r_2", Standard + "lcd_r_3", Standard + "lcd_r_4", Standard + "lcd_r_5", Standard + "lcd_r_6", Standard + "lcd_r_7", Standard + "lcd_vsync", Standard + "mdmgpio0", Standard + "mdmgpio1", Standard + "mdmgpio2", Standard + "mdmgpio3", Standard + "mdmgpio4", Standard + "mdmgpio5", Standard + "mdmgpio6", Standard + "mdmgpio7", Standard + "mdmgpio8", Standard + "mphi_data_0", Standard + "mphi_data_1", Standard + "mphi_data_2", Standard + "mphi_data_3", Standard + "mphi_data_4", Standard + "mphi_data_5", Standard + "mphi_data_6", Standard + "mphi_data_7", Standard + "mphi_data_8", Standard + "mphi_data_9", Standard + "mphi_data_10", Standard + "mphi_data_11", Standard + "mphi_data_12", Standard + "mphi_data_13", Standard + "mphi_data_14", Standard + "mphi_data_15", Standard + "mphi_ha0", Standard + "mphi_hat0", Standard + "mphi_hat1", Standard + "mphi_hce0_n", Standard + "mphi_hce1_n", Standard + "mphi_hrd_n", Standard + "mphi_hwr_n", Standard + "mphi_run0", Standard + "mphi_run1", Standard + "mtx_scan_clk", Standard + "mtx_scan_data", Standard + "nand_ad_0", Standard + "nand_ad_1", Standard + "nand_ad_2", Standard + "nand_ad_3", Standard + "nand_ad_4", Standard + "nand_ad_5", Standard + "nand_ad_6", Standard + "nand_ad_7", Standard + "nand_ale", Standard + "nand_cen_0", Standard + "nand_cen_1", Standard + "nand_cle", Standard + "nand_oen", Standard + "nand_rdy_0", Standard + "nand_rdy_1", Standard + "nand_wen", Standard + "nand_wp", Standard + "pc1", Standard + "pc2", Standard + "pmu_int", Standard + "pmu_scl", I2C + "pmu_sda", I2C + "rfst2g_mtsloten3g", Standard + "rgmii_0_rx_ctl", Standard + "rgmii_0_rxc", Standard + "rgmii_0_rxd_0", Standard + "rgmii_0_rxd_1", Standard + "rgmii_0_rxd_2", Standard + "rgmii_0_rxd_3", Standard + "rgmii_0_tx_ctl", Standard + "rgmii_0_txc", Standard + "rgmii_0_txd_0", Standard + "rgmii_0_txd_1", Standard + "rgmii_0_txd_2", Standard + "rgmii_0_txd_3", Standard + "rgmii_1_rx_ctl", Standard + "rgmii_1_rxc", Standard + "rgmii_1_rxd_0", Standard + "rgmii_1_rxd_1", Standard + "rgmii_1_rxd_2", Standard + "rgmii_1_rxd_3", Standard + "rgmii_1_tx_ctl", Standard + "rgmii_1_txc", Standard + "rgmii_1_txd_0", Standard + "rgmii_1_txd_1", Standard + "rgmii_1_txd_2", Standard + "rgmii_1_txd_3", Standard + "rgmii_gpio_0", Standard + "rgmii_gpio_1", Standard + "rgmii_gpio_2", Standard + "rgmii_gpio_3", Standard + "rtxdata2g_txdata3g1", Standard + "rtxen2g_txdata3g2", Standard + "rxdata3g0", Standard + "rxdata3g1", Standard + "rxdata3g2", Standard + "sdio1_clk", Standard + "sdio1_cmd", Standard + "sdio1_data_0", Standard + "sdio1_data_1", Standard + "sdio1_data_2", Standard + "sdio1_data_3", Standard + "sdio4_clk", Standard + "sdio4_cmd", Standard + "sdio4_data_0", Standard + "sdio4_data_1", Standard + "sdio4_data_2", Standard + "sdio4_data_3", Standard + "sim_clk", Standard + "sim_data", Standard + "sim_det", Standard + "sim_resetn", Standard + "sim2_clk", Standard + "sim2_data", Standard + "sim2_det", Standard + "sim2_resetn", Standard + "sri_c", Standard + "sri_d", Standard + "sri_e", Standard + "ssp_extclk", Standard + "ssp0_clk", Standard + "ssp0_fs", Standard + "ssp0_rxd", Standard + "ssp0_txd", Standard + "ssp2_clk", Standard + "ssp2_fs_0", Standard + "ssp2_fs_1", Standard + "ssp2_fs_2", Standard + "ssp2_fs_3", Standard + "ssp2_rxd_0", Standard + "ssp2_rxd_1", Standard + "ssp2_txd_0", Standard + "ssp2_txd_1", Standard + "ssp3_clk", Standard + "ssp3_fs", Standard + "ssp3_rxd", Standard + "ssp3_txd", Standard + "ssp4_clk", Standard + "ssp4_fs", Standard + "ssp4_rxd", Standard + "ssp4_txd", Standard + "ssp5_clk", Standard + "ssp5_fs", Standard + "ssp5_rxd", Standard + "ssp5_txd", Standard + "ssp6_clk", Standard + "ssp6_fs", Standard + "ssp6_rxd", Standard + "ssp6_txd", Standard + "stat_1", Standard + "stat_2", Standard + "sysclken", Standard + "traceclk", Standard + "tracedt00", Standard + "tracedt01", Standard + "tracedt02", Standard + "tracedt03", Standard + "tracedt04", Standard + "tracedt05", Standard + "tracedt06", Standard + "tracedt07", Standard + "tracedt08", Standard + "tracedt09", Standard + "tracedt10", Standard + "tracedt11", Standard + "tracedt12", Standard + "tracedt13", Standard + "tracedt14", Standard + "tracedt15", Standard + "txdata3g0", Standard + "txpwrind", Standard + "uartb1_ucts", Standard + "uartb1_urts", Standard + "uartb1_urxd", Standard + "uartb1_utxd", Standard + "uartb2_urxd", Standard + "uartb2_utxd", Standard + "uartb3_ucts", Standard + "uartb3_urts", Standard + "uartb3_urxd", Standard + "uartb3_utxd", Standard + "uartb4_ucts", Standard + "uartb4_urts", Standard + "uartb4_urxd", Standard + "uartb4_utxd", Standard + "vc_cam1_scl", I2C + "vc_cam1_sda", I2C + "vc_cam2_scl", I2C + "vc_cam2_sda", I2C + "vc_cam3_scl", I2C + "vc_cam3_sda", I2C diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,capri-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/brcm,capri-pinctrl.txt deleted file mode 100644 index 9e9e9ef..0000000 --- a/Documentation/devicetree/bindings/pinctrl/brcm,capri-pinctrl.txt +++ /dev/null @@ -1,461 +0,0 @@ -Broadcom Capri Pin Controller - -This is a pin controller for the Broadcom BCM281xx SoC family, which includes -BCM11130, BCM11140, BCM11351, BCM28145, and BCM28155 SoCs. - -=== Pin Controller Node === - -Required Properties: - -- compatible: Must be "brcm,capri-pinctrl". -- reg: Base address of the PAD Controller register block and the size - of the block. - -For example, the following is the bare minimum node: - - pinctrl@35004800 { - compatible = "brcm,capri-pinctrl"; - reg = <0x35004800 0x430>; - }; - -As a pin controller device, in addition to the required properties, this node -should also contain the pin configuration nodes that client devices reference, -if any. - -=== Pin Configuration Node === - -Each pin configuration node is a sub-node of the pin controller node and is a -container of an arbitrary number of subnodes, called pin group nodes in this -document. - -Please refer to the pinctrl-bindings.txt in this directory for details of the -common pinctrl bindings used by client devices, including the definition of a -"pin configuration node". - -=== Pin Group Node === - -A pin group node specifies the desired pin mux and/or pin configuration for an -arbitrary number of pins. The name of the pin group node is optional and not -used. - -A pin group node only affects the properties specified in the node, and has no -effect on any properties that are omitted. - -The pin group node accepts a subset of the generic pin config properties. For -details generic pin config properties, please refer to pinctrl-bindings.txt -and . - -Each pin controlled by this pin controller belong to one of three types: -Standard, I2C, and HDMI. Each type accepts a different set of pin config -properties. A list of pins and their types is provided below. - -Required Properties (applicable to all pins): - -- pins: Multiple strings. Specifies the name(s) of one or more pins to - be configured by this node. - -Optional Properties (for standard pins): - -- function: String. Specifies the pin mux selection. Values - must be one of: "alt1", "alt2", "alt3", "alt4" -- input-schmitt-enable: No arguments. Enable schmitt-trigger mode. -- input-schmitt-disable: No arguments. Disable schmitt-trigger mode. -- bias-pull-up: No arguments. Pull up on pin. -- bias-pull-down: No arguments. Pull down on pin. -- bias-disable: No arguments. Disable pin bias. -- slew-rate: Integer. Meaning depends on configured pin mux: - *_SCL or *_SDA: - 0: Standard(100kbps)& Fast(400kbps) mode - 1: Highspeed (3.4Mbps) mode - IC_DM or IC_DP: - 0: normal slew rate - 1: fast slew rate - Otherwise: - 0: fast slew rate - 1: normal slew rate -- input-enable: No arguements. Enable input (does not affect - output.) -- input-disable: No arguements. Disable input (does not affect - output.) -- drive-strength: Integer. Drive strength in mA. Valid values are - 2, 4, 6, 8, 10, 12, 14, 16 mA. - -Optional Properties (for I2C pins): - -- function: String. Specifies the pin mux selection. Values - must be one of: "alt1", "alt2", "alt3", "alt4" -- bias-pull-up: Integer. Pull up strength in Ohm. There are 3 - pull-up resisitors (1.2k, 1.8k, 2.7k) available - in parallel for I2C pins, so the valid values - are: 568, 720, 831, 1080, 1200, 1800, 2700 Ohm. -- bias-disable: No arguments. Disable pin bias. -- slew-rate: Integer. Meaning depends on configured pin mux: - *_SCL or *_SDA: - 0: Standard(100kbps)& Fast(400kbps) mode - 1: Highspeed (3.4Mbps) mode - IC_DM or IC_DP: - 0: normal slew rate - 1: fast slew rate - Otherwise: - 0: fast slew rate - 1: normal slew rate -- input-enable: No arguements. Enable input (does not affect - output.) -- input-disable: No arguements. Disable input (does not affect - output.) - -Optional Properties (for HDMI pins): - -- function: String. Specifies the pin mux selection. Values - must be one of: "alt1", "alt2", "alt3", "alt4" -- slew-rate: Integer. Controls slew rate. - 0: Standard(100kbps)& Fast(400kbps) mode - 1: Highspeed (3.4Mbps) mode -- input-enable: No arguements. Enable input (does not affect - output.) -- input-disable: No arguements. Disable input (does not affect - output.) - -Example: -// pin controller node -pinctrl@35004800 { - compatible = "brcm,capri-pinctrl"; - reg = <0x35004800 0x430>; - - // pin configuration node - dev_a_default: dev_a_active { - //group node defining 1 standard pin - grp_1 { - pins = "std_pin1"; - function = "alt1"; - input-schmitt-enable; - bias-disable; - slew-rate = <1>; - drive-strength = <4>; - }; - - // group node defining 2 I2C pins - grp_2 { - pins = "i2c_pin1", "i2c_pin2"; - function = "alt2"; - bias-pull-up = <720>; - input-enable; - }; - - // group node defining 2 HDMI pins - grp_3 { - pins = "hdmi_pin1", "hdmi_pin2"; - function = "alt3"; - slew-rate = <1>; - }; - - // other pin group nodes - ... - }; - - // other pin configuration nodes - ... -}; - -In the example above, "dev_a_active" is a pin configuration node with a number -of sub-nodes. In the pin group node "grp_1", one pin, "std_pin1", is defined in -the "pins" property. Thus, the remaining properties in the "grp_1" node applies -only to this pin, including the following settings: - - setting pinmux to "alt1" - - enabling schmitt-trigger (hystersis) mode - - disabling pin bias - - setting the slew-rate to 1 - - setting the drive strength to 4 mA -Note that neither "input-enable" nor "input-disable" was specified - the pinctrl -subsystem will therefore leave this property unchanged from whatever state it -was in before applying these changes. - -The "pins" property in the pin group node "grp_2" specifies two pins - -"i2c_pin1" and "i2c_pin2"; the remaining properties in this pin group node, -therefore, applies to both of these pins. The properties include: - - setting pinmux to "alt2" - - setting pull-up resistance to 720 Ohm (ie. enabling 1.2k and 1.8k resistors - in parallel) - - enabling both pins' input -"slew-rate" is not specified in this pin group node, so the slew-rate for these -pins are left as-is. - -Finally, "grp_3" defines two HDMI pins. The following properties are applied to -both pins: - - setting pinmux to "alt3" - - setting slew-rate to 1; for HDMI pins, this corresponds to the 3.4 Mbps - Highspeed mode -The input is neither enabled or disabled, and is left untouched. - -=== Pin Names and Type === - -The following are valid pin names and their pin types: - - "adcsync", Standard - "bat_rm", Standard - "bsc1_scl", I2C - "bsc1_sda", I2C - "bsc2_scl", I2C - "bsc2_sda", I2C - "classgpwr", Standard - "clk_cx8", Standard - "clkout_0", Standard - "clkout_1", Standard - "clkout_2", Standard - "clkout_3", Standard - "clkreq_in_0", Standard - "clkreq_in_1", Standard - "cws_sys_req1", Standard - "cws_sys_req2", Standard - "cws_sys_req3", Standard - "digmic1_clk", Standard - "digmic1_dq", Standard - "digmic2_clk", Standard - "digmic2_dq", Standard - "gpen13", Standard - "gpen14", Standard - "gpen15", Standard - "gpio00", Standard - "gpio01", Standard - "gpio02", Standard - "gpio03", Standard - "gpio04", Standard - "gpio05", Standard - "gpio06", Standard - "gpio07", Standard - "gpio08", Standard - "gpio09", Standard - "gpio10", Standard - "gpio11", Standard - "gpio12", Standard - "gpio13", Standard - "gpio14", Standard - "gps_pablank", Standard - "gps_tmark", Standard - "hdmi_scl", HDMI - "hdmi_sda", HDMI - "ic_dm", Standard - "ic_dp", Standard - "kp_col_ip_0", Standard - "kp_col_ip_1", Standard - "kp_col_ip_2", Standard - "kp_col_ip_3", Standard - "kp_row_op_0", Standard - "kp_row_op_1", Standard - "kp_row_op_2", Standard - "kp_row_op_3", Standard - "lcd_b_0", Standard - "lcd_b_1", Standard - "lcd_b_2", Standard - "lcd_b_3", Standard - "lcd_b_4", Standard - "lcd_b_5", Standard - "lcd_b_6", Standard - "lcd_b_7", Standard - "lcd_g_0", Standard - "lcd_g_1", Standard - "lcd_g_2", Standard - "lcd_g_3", Standard - "lcd_g_4", Standard - "lcd_g_5", Standard - "lcd_g_6", Standard - "lcd_g_7", Standard - "lcd_hsync", Standard - "lcd_oe", Standard - "lcd_pclk", Standard - "lcd_r_0", Standard - "lcd_r_1", Standard - "lcd_r_2", Standard - "lcd_r_3", Standard - "lcd_r_4", Standard - "lcd_r_5", Standard - "lcd_r_6", Standard - "lcd_r_7", Standard - "lcd_vsync", Standard - "mdmgpio0", Standard - "mdmgpio1", Standard - "mdmgpio2", Standard - "mdmgpio3", Standard - "mdmgpio4", Standard - "mdmgpio5", Standard - "mdmgpio6", Standard - "mdmgpio7", Standard - "mdmgpio8", Standard - "mphi_data_0", Standard - "mphi_data_1", Standard - "mphi_data_2", Standard - "mphi_data_3", Standard - "mphi_data_4", Standard - "mphi_data_5", Standard - "mphi_data_6", Standard - "mphi_data_7", Standard - "mphi_data_8", Standard - "mphi_data_9", Standard - "mphi_data_10", Standard - "mphi_data_11", Standard - "mphi_data_12", Standard - "mphi_data_13", Standard - "mphi_data_14", Standard - "mphi_data_15", Standard - "mphi_ha0", Standard - "mphi_hat0", Standard - "mphi_hat1", Standard - "mphi_hce0_n", Standard - "mphi_hce1_n", Standard - "mphi_hrd_n", Standard - "mphi_hwr_n", Standard - "mphi_run0", Standard - "mphi_run1", Standard - "mtx_scan_clk", Standard - "mtx_scan_data", Standard - "nand_ad_0", Standard - "nand_ad_1", Standard - "nand_ad_2", Standard - "nand_ad_3", Standard - "nand_ad_4", Standard - "nand_ad_5", Standard - "nand_ad_6", Standard - "nand_ad_7", Standard - "nand_ale", Standard - "nand_cen_0", Standard - "nand_cen_1", Standard - "nand_cle", Standard - "nand_oen", Standard - "nand_rdy_0", Standard - "nand_rdy_1", Standard - "nand_wen", Standard - "nand_wp", Standard - "pc1", Standard - "pc2", Standard - "pmu_int", Standard - "pmu_scl", I2C - "pmu_sda", I2C - "rfst2g_mtsloten3g", Standard - "rgmii_0_rx_ctl", Standard - "rgmii_0_rxc", Standard - "rgmii_0_rxd_0", Standard - "rgmii_0_rxd_1", Standard - "rgmii_0_rxd_2", Standard - "rgmii_0_rxd_3", Standard - "rgmii_0_tx_ctl", Standard - "rgmii_0_txc", Standard - "rgmii_0_txd_0", Standard - "rgmii_0_txd_1", Standard - "rgmii_0_txd_2", Standard - "rgmii_0_txd_3", Standard - "rgmii_1_rx_ctl", Standard - "rgmii_1_rxc", Standard - "rgmii_1_rxd_0", Standard - "rgmii_1_rxd_1", Standard - "rgmii_1_rxd_2", Standard - "rgmii_1_rxd_3", Standard - "rgmii_1_tx_ctl", Standard - "rgmii_1_txc", Standard - "rgmii_1_txd_0", Standard - "rgmii_1_txd_1", Standard - "rgmii_1_txd_2", Standard - "rgmii_1_txd_3", Standard - "rgmii_gpio_0", Standard - "rgmii_gpio_1", Standard - "rgmii_gpio_2", Standard - "rgmii_gpio_3", Standard - "rtxdata2g_txdata3g1", Standard - "rtxen2g_txdata3g2", Standard - "rxdata3g0", Standard - "rxdata3g1", Standard - "rxdata3g2", Standard - "sdio1_clk", Standard - "sdio1_cmd", Standard - "sdio1_data_0", Standard - "sdio1_data_1", Standard - "sdio1_data_2", Standard - "sdio1_data_3", Standard - "sdio4_clk", Standard - "sdio4_cmd", Standard - "sdio4_data_0", Standard - "sdio4_data_1", Standard - "sdio4_data_2", Standard - "sdio4_data_3", Standard - "sim_clk", Standard - "sim_data", Standard - "sim_det", Standard - "sim_resetn", Standard - "sim2_clk", Standard - "sim2_data", Standard - "sim2_det", Standard - "sim2_resetn", Standard - "sri_c", Standard - "sri_d", Standard - "sri_e", Standard - "ssp_extclk", Standard - "ssp0_clk", Standard - "ssp0_fs", Standard - "ssp0_rxd", Standard - "ssp0_txd", Standard - "ssp2_clk", Standard - "ssp2_fs_0", Standard - "ssp2_fs_1", Standard - "ssp2_fs_2", Standard - "ssp2_fs_3", Standard - "ssp2_rxd_0", Standard - "ssp2_rxd_1", Standard - "ssp2_txd_0", Standard - "ssp2_txd_1", Standard - "ssp3_clk", Standard - "ssp3_fs", Standard - "ssp3_rxd", Standard - "ssp3_txd", Standard - "ssp4_clk", Standard - "ssp4_fs", Standard - "ssp4_rxd", Standard - "ssp4_txd", Standard - "ssp5_clk", Standard - "ssp5_fs", Standard - "ssp5_rxd", Standard - "ssp5_txd", Standard - "ssp6_clk", Standard - "ssp6_fs", Standard - "ssp6_rxd", Standard - "ssp6_txd", Standard - "stat_1", Standard - "stat_2", Standard - "sysclken", Standard - "traceclk", Standard - "tracedt00", Standard - "tracedt01", Standard - "tracedt02", Standard - "tracedt03", Standard - "tracedt04", Standard - "tracedt05", Standard - "tracedt06", Standard - "tracedt07", Standard - "tracedt08", Standard - "tracedt09", Standard - "tracedt10", Standard - "tracedt11", Standard - "tracedt12", Standard - "tracedt13", Standard - "tracedt14", Standard - "tracedt15", Standard - "txdata3g0", Standard - "txpwrind", Standard - "uartb1_ucts", Standard - "uartb1_urts", Standard - "uartb1_urxd", Standard - "uartb1_utxd", Standard - "uartb2_urxd", Standard - "uartb2_utxd", Standard - "uartb3_ucts", Standard - "uartb3_urts", Standard - "uartb3_urxd", Standard - "uartb3_utxd", Standard - "uartb4_ucts", Standard - "uartb4_urts", Standard - "uartb4_urxd", Standard - "uartb4_utxd", Standard - "vc_cam1_scl", I2C - "vc_cam1_sda", I2C - "vc_cam2_scl", I2C - "vc_cam2_sda", I2C - "vc_cam3_scl", I2C - "vc_cam3_sda", I2C -- cgit v0.10.2 From 7995d74ab297e4f5526c8df70007e1e39f0d1f5c Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Wed, 26 Feb 2014 16:31:19 +0100 Subject: spi-topcliff-pch: Fix probing when DMA mode is used If during registering SPI master due to SPI device probing a SPI transfer is issued the DMA buffers are not allocated yet. This fixes the following oops: pch_spi 0000:02:0c.1: enabling device (0000 -> 0002) pch_spi 0000:02:0c.1: master is unqueued, this is deprecated BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] pch_spi_handle_dma+0x15c/0x6f4 [...] Signed-off-by: Alexander Stein Signed-off-by: Mark Brown diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c index 3383008c..88eb57e 100644 --- a/drivers/spi/spi-topcliff-pch.c +++ b/drivers/spi/spi-topcliff-pch.c @@ -1452,6 +1452,11 @@ static int pch_spi_pd_probe(struct platform_device *plat_dev) pch_spi_set_master_mode(master); + if (use_dma) { + dev_info(&plat_dev->dev, "Use DMA for data transfers\n"); + pch_alloc_dma_buf(board_dat, data); + } + ret = spi_register_master(master); if (ret != 0) { dev_err(&plat_dev->dev, @@ -1459,14 +1464,10 @@ static int pch_spi_pd_probe(struct platform_device *plat_dev) goto err_spi_register_master; } - if (use_dma) { - dev_info(&plat_dev->dev, "Use DMA for data transfers\n"); - pch_alloc_dma_buf(board_dat, data); - } - return 0; err_spi_register_master: + pch_free_dma_buf(board_dat, data); free_irq(board_dat->pdev->irq, data); err_request_irq: pch_spi_free_resources(board_dat, data); -- cgit v0.10.2 From c685689fd24d310343ac33942e9a54a974ae9c43 Mon Sep 17 00:00:00 2001 From: Chuansheng Liu Date: Mon, 24 Feb 2014 11:29:50 +0800 Subject: genirq: Remove racy waitqueue_active check We hit one rare case below: T1 calling disable_irq(), but hanging at synchronize_irq() always; The corresponding irq thread is in sleeping state; And all CPUs are in idle state; After analysis, we found there is one possible scenerio which causes T1 is waiting there forever: CPU0 CPU1 synchronize_irq() wait_event() spin_lock() atomic_dec_and_test(&threads_active) insert the __wait into queue spin_unlock() if(waitqueue_active) atomic_read(&threads_active) wake_up() Here after inserted the __wait into queue on CPU0, and before test if queue is empty on CPU1, there is no barrier, it maybe cause it is not visible for CPU1 immediately, although CPU0 has updated the queue list. It is similar for CPU0 atomic_read() threads_active also. So we'd need one smp_mb() before waitqueue_active.that, but removing the waitqueue_active() check solves it as wel l and it makes things simple and clear. Signed-off-by: Chuansheng Liu Cc: Xiaoming Wang Link: http://lkml.kernel.org/r/1393212590-32543-1-git-send-email-chuansheng.liu@intel.com Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 481a13c..d3bf660 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -802,8 +802,7 @@ static irqreturn_t irq_thread_fn(struct irq_desc *desc, static void wake_threads_waitq(struct irq_desc *desc) { - if (atomic_dec_and_test(&desc->threads_active) && - waitqueue_active(&desc->wait_for_threads)) + if (atomic_dec_and_test(&desc->threads_active)) wake_up(&desc->wait_for_threads); } -- cgit v0.10.2 From 64be38ab03e9b238a1299857fef8b3707c0ed045 Mon Sep 17 00:00:00 2001 From: Rashika Kheria Date: Thu, 27 Feb 2014 17:10:12 +0530 Subject: genirq: Include missing header file in irqdomain.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Include appropriate header file include/linux/of_irq.h in kernel/irq/irqdomain.c because it contains prototype definition of function define in kernel/irq/irqdomain.c. This eliminates the following warning in kernel/irq/irqdomain.c: kernel/irq/irqdomain.c:468:14: warning: no previous prototype for ‘irq_create_of_mapping’ [-Wmissing-prototypes] Signed-off-by: Rashika Kheria Reviewed-by: Josh Triplett Cc: Benjamin Herrenschmidt Link: http://lkml.kernel.org/r/eb89aebea7ff1a46122918ac389ebecf8248be9a.1393493276.git.rashika.kheria@gmail.com Signed-off-by: Thomas Gleixner diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index cf68bb3..f140337 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include -- cgit v0.10.2 From 4729583006772b9530404bc1bb7c3aa4a10ffd4d Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 27 Feb 2014 18:19:03 +0800 Subject: cpuset: fix a locking issue in cpuset_migrate_mm() I can trigger a lockdep warning: # mount -t cgroup -o cpuset xxx /cgroup # mkdir /cgroup/cpuset # mkdir /cgroup/tmp # echo 0 > /cgroup/tmp/cpuset.cpus # echo 0 > /cgroup/tmp/cpuset.mems # echo 1 > /cgroup/tmp/cpuset.memory_migrate # echo $$ > /cgroup/tmp/tasks # echo 1 > /cgruop/tmp/cpuset.mems =============================== [ INFO: suspicious RCU usage. ] 3.14.0-rc1-0.1-default+ #32 Not tainted ------------------------------- include/linux/cgroup.h:682 suspicious rcu_dereference_check() usage! ... [] dump_stack+0x72/0x86 [] lockdep_rcu_suspicious+0x101/0x140 [] cpuset_migrate_mm+0xb1/0xe0 ... We used to hold cgroup_mutex when calling cpuset_migrate_mm(), but now we hold cpuset_mutex, which causes task_css() to complain. This is not a false-positive but a real issue. Holding cpuset_mutex won't prevent a task from migrating to another cpuset, and it won't prevent the original task->cgroup from destroying during this change. Fixes: 5d21cc2db040 (cpuset: replace cgroup_mutex locking with cpuset internal locking) Cc: # 3.9+ Signed-off-by: Li Zefan Sigend-off-by: Tejun Heo diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 4410ac6..dba9e4a 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -974,12 +974,6 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, * Temporarilly set tasks mems_allowed to target nodes of migration, * so that the migration code can allocate pages on these nodes. * - * Call holding cpuset_mutex, so current's cpuset won't change - * during this call, as manage_mutex holds off any cpuset_attach() - * calls. Therefore we don't need to take task_lock around the - * call to guarantee_online_mems(), as we know no one is changing - * our task's cpuset. - * * While the mm_struct we are migrating is typically from some * other task, the task_struct mems_allowed that we are hacking * is for our current task, which must allocate new pages for that @@ -996,8 +990,10 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); + rcu_read_lock(); mems_cs = effective_nodemask_cpuset(task_cs(tsk)); guarantee_online_mems(mems_cs, &tsk->mems_allowed); + rcu_read_unlock(); } /* -- cgit v0.10.2 From 99afb0fd5f05aac467ffa85c36778fec4396209b Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 27 Feb 2014 18:19:36 +0800 Subject: cpuset: fix a race condition in __cpuset_node_allowed_softwall() It's not safe to access task's cpuset after releasing task_lock(). Holding callback_mutex won't help. Cc: Signed-off-by: Li Zefan Signed-off-by: Tejun Heo diff --git a/kernel/cpuset.c b/kernel/cpuset.c index dba9e4a..e6b1b66 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2482,9 +2482,9 @@ int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) task_lock(current); cs = nearest_hardwall_ancestor(task_cs(current)); + allowed = node_isset(node, cs->mems_allowed); task_unlock(current); - allowed = node_isset(node, cs->mems_allowed); mutex_unlock(&callback_mutex); return allowed; } -- cgit v0.10.2 From 4b41636878ee32d4c45a49de7749abca9721bd6a Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 27 Feb 2014 15:35:48 -0800 Subject: ARM: OMAP3: Fix pinctrl interrupts for core2 After splitting padconf core into two parts to avoid exposing unaccessable registers, the new padconf core2 domain was left without a wake-up interrupt. Fix the issue by passing the shared wake-up interrupt in platform data like we do for padconf core and wkup domains already. Fixes: 3d49538364 (ARM: dts: Split omap3 pinmux core device) Signed-off-by: Tony Lindgren diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index 0cc710d..c33e07e 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -257,6 +257,7 @@ struct of_dev_auxdata omap_auxdata_lookup[] __initdata = { #endif #ifdef CONFIG_ARCH_OMAP3 OF_DEV_AUXDATA("ti,omap3-padconf", 0x48002030, "48002030.pinmux", &pcs_pdata), + OF_DEV_AUXDATA("ti,omap3-padconf", 0x480025a0, "480025a0.pinmux", &pcs_pdata), OF_DEV_AUXDATA("ti,omap3-padconf", 0x48002a00, "48002a00.pinmux", &pcs_pdata), /* Only on am3517 */ OF_DEV_AUXDATA("ti,davinci_mdio", 0x5c030000, "davinci_mdio.0", NULL), -- cgit v0.10.2 From fd40dccb1a170ba689664481a3de83617b7194d2 Mon Sep 17 00:00:00 2001 From: Philippe De Muyter Date: Thu, 27 Feb 2014 10:16:15 +0100 Subject: spi: spi-imx: spi_imx_remove: do not disable disabled clocks Currently, at module removal, one gets the following warnings: ------------[ cut here ]------------ WARNING: at drivers/clk/clk.c:780 clk_disable+0x18/0x24() Modules linked in: spi_imx(-) [last unloaded: ev76c560] CPU: 1 PID: 16337 Comm: rmmod Tainted: G W 3.10.17-80548-g90191eb-dirty #33 [<80013b4c>] (unwind_backtrace+0x0/0xf8) from [<800115dc>] (show_stack+0x10/0x14) [<800115dc>] (show_stack+0x10/0x14) from [<800257b8>] (warn_slowpath_common+0x4c/0x68) [<800257b8>] (warn_slowpath_common+0x4c/0x68) from [<800257f0>] (warn_slowpath_null+0x1c/0x24) [<800257f0>] (warn_slowpath_null+0x1c/0x24) from [<803f60ec>] (clk_disable+0x18/0x24) [<803f60ec>] (clk_disable+0x18/0x24) from [<7f02c9cc>] (spi_imx_remove+0x54/0x9c [spi_imx]) [<7f02c9cc>] (spi_imx_remove+0x54/0x9c [spi_imx]) from [<8025868c>] (platform_drv_remove+0x18/0x1c) [<8025868c>] (platform_drv_remove+0x18/0x1c) from [<80256f60>] (__device_release_driver+0x70/0xcc) [<80256f60>] (__device_release_driver+0x70/0xcc) from [<80257770>] (driver_detach+0xcc/0xd0) [<80257770>] (driver_detach+0xcc/0xd0) from [<80256d90>] (bus_remove_driver+0x7c/0xc0) [<80256d90>] (bus_remove_driver+0x7c/0xc0) from [<80068668>] (SyS_delete_module+0x144/0x1f8) [<80068668>] (SyS_delete_module+0x144/0x1f8) from [<8000e080>] (ret_fast_syscall+0x0/0x30) ---[ end trace 1f5df9ad54996300 ]--- ------------[ cut here ]------------ WARNING: at drivers/clk/clk.c:780 clk_disable+0x18/0x24() Modules linked in: spi_imx(-) [last unloaded: ev76c560] CPU: 1 PID: 16337 Comm: rmmod Tainted: G W 3.10.17-80548-g90191eb-dirty #33 [<80013b4c>] (unwind_backtrace+0x0/0xf8) from [<800115dc>] (show_stack+0x10/0x14) [<800115dc>] (show_stack+0x10/0x14) from [<800257b8>] (warn_slowpath_common+0x4c/0x68) [<800257b8>] (warn_slowpath_common+0x4c/0x68) from [<800257f0>] (warn_slowpath_null+0x1c/0x24) [<800257f0>] (warn_slowpath_null+0x1c/0x24) from [<803f60ec>] (clk_disable+0x18/0x24) [<803f60ec>] (clk_disable+0x18/0x24) from [<7f02c9e8>] (spi_imx_remove+0x70/0x9c [spi_imx]) [<7f02c9e8>] (spi_imx_remove+0x70/0x9c [spi_imx]) from [<8025868c>] (platform_drv_remove+0x18/0x1c) [<8025868c>] (platform_drv_remove+0x18/0x1c) from [<80256f60>] (__device_release_driver+0x70/0xcc) [<80256f60>] (__device_release_driver+0x70/0xcc) from [<80257770>] (driver_detach+0xcc/0xd0) [<80257770>] (driver_detach+0xcc/0xd0) from [<80256d90>] (bus_remove_driver+0x7c/0xc0) [<80256d90>] (bus_remove_driver+0x7c/0xc0) from [<80068668>] (SyS_delete_module+0x144/0x1f8) [<80068668>] (SyS_delete_module+0x144/0x1f8) from [<8000e080>] (ret_fast_syscall+0x0/0x30) ---[ end trace 1f5df9ad54996301 ]--- Since commit 9e556dcc55774c9a1032f32baa0e5cfafede8b70, "spi: spi-imx: only enable the clocks when we start to transfer a message", clocks are always disabled except when transmitting messages. There is thus no need to disable them at module removal. Fixes: 9e556dcc55774 (spi: spi-imx: only enable the clocks when we start to transfer a message) Signed-off-by: Philippe De Muyter Acked-by: Huang Shijie Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index a5474ef..47f15d9 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -948,8 +948,8 @@ static int spi_imx_remove(struct platform_device *pdev) spi_bitbang_stop(&spi_imx->bitbang); writel(0, spi_imx->base + MXC_CSPICTRL); - clk_disable_unprepare(spi_imx->clk_ipg); - clk_disable_unprepare(spi_imx->clk_per); + clk_unprepare(spi_imx->clk_ipg); + clk_unprepare(spi_imx->clk_per); spi_master_put(master); return 0; -- cgit v0.10.2 From 8987583366ae9e03c306c2b7d73bdb952df1d08d Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Tue, 18 Feb 2014 22:25:15 +0100 Subject: firewire: net: fix use after free Commit 8408dc1c14c1 "firewire: net: use dev_printk API" introduced a use-after-free in a failure path. fwnet_transmit_packet_failed(ptask) may free ptask, then the dev_err() call dereferenced it. The fix is straightforward; simply reorder the two calls. Reported-by: Dan Carpenter Cc: stable@vger.kernel.org # v3.4+ Signed-off-by: Stefan Richter diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index 6b89598..4af0a7b 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -929,8 +929,6 @@ static void fwnet_write_complete(struct fw_card *card, int rcode, if (rcode == RCODE_COMPLETE) { fwnet_transmit_packet_done(ptask); } else { - fwnet_transmit_packet_failed(ptask); - if (printk_timed_ratelimit(&j, 1000) || rcode != last_rcode) { dev_err(&ptask->dev->netdev->dev, "fwnet_write_complete failed: %x (skipped %d)\n", @@ -938,8 +936,10 @@ static void fwnet_write_complete(struct fw_card *card, int rcode, errors_skipped = 0; last_rcode = rcode; - } else + } else { errors_skipped++; + } + fwnet_transmit_packet_failed(ptask); } } -- cgit v0.10.2 From 48095d991d85687569ac025b18a6c7ae1632c9f7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 3 Feb 2014 17:25:33 -0800 Subject: audit: Use struct net not pid_t to remember the network namespce to reply in In struct audit_netlink_list and audit_reply add a reference to the network namespace of the caller and remove the userspace pid of the caller. This cleanly remembers the callers network namespace, and removes a huge class of races and nasty failure modes that can occur when attempting to relook up the callers network namespace from a pid_t (including the caller's network namespace changing, pid wraparound, and the pid simply not being present). Signed-off-by: "Eric W. Biederman" diff --git a/kernel/audit.c b/kernel/audit.c index 34c5a23..1e5756f 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -182,7 +182,7 @@ struct audit_buffer { struct audit_reply { __u32 portid; - pid_t pid; + struct net *net; struct sk_buff *skb; }; @@ -500,7 +500,7 @@ int audit_send_list(void *_dest) { struct audit_netlink_list *dest = _dest; struct sk_buff *skb; - struct net *net = get_net_ns_by_pid(dest->pid); + struct net *net = dest->net; struct audit_net *aunet = net_generic(net, audit_net_id); /* wait for parent to finish and send an ACK */ @@ -510,6 +510,7 @@ int audit_send_list(void *_dest) while ((skb = __skb_dequeue(&dest->q)) != NULL) netlink_unicast(aunet->nlsk, skb, dest->portid, 0); + put_net(net); kfree(dest); return 0; @@ -543,7 +544,7 @@ out_kfree_skb: static int audit_send_reply_thread(void *arg) { struct audit_reply *reply = (struct audit_reply *)arg; - struct net *net = get_net_ns_by_pid(reply->pid); + struct net *net = reply->net; struct audit_net *aunet = net_generic(net, audit_net_id); mutex_lock(&audit_cmd_mutex); @@ -552,6 +553,7 @@ static int audit_send_reply_thread(void *arg) /* Ignore failure. It'll only happen if the sender goes away, because our timeout is set to infinite. */ netlink_unicast(aunet->nlsk , reply->skb, reply->portid, 0); + put_net(net); kfree(reply); return 0; } @@ -583,8 +585,8 @@ static void audit_send_reply(__u32 portid, int seq, int type, int done, if (!skb) goto out; + reply->net = get_net(current->nsproxy->net_ns); reply->portid = portid; - reply->pid = task_pid_vnr(current); reply->skb = skb; tsk = kthread_run(audit_send_reply_thread, reply, "audit_send_reply"); diff --git a/kernel/audit.h b/kernel/audit.h index 57cc64d..8df13221 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -247,7 +247,7 @@ extern void audit_panic(const char *message); struct audit_netlink_list { __u32 portid; - pid_t pid; + struct net *net; struct sk_buff_head q; }; diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 14a78cc..a5e3d73d 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "audit.h" /* @@ -1083,8 +1084,8 @@ int audit_list_rules_send(__u32 portid, int seq) dest = kmalloc(sizeof(struct audit_netlink_list), GFP_KERNEL); if (!dest) return -ENOMEM; + dest->net = get_net(current->nsproxy->net_ns); dest->portid = portid; - dest->pid = task_pid_vnr(current); skb_queue_head_init(&dest->q); mutex_lock(&audit_filter_mutex); -- cgit v0.10.2 From 14f398ca2f26a2ed6236aec54395e0fa06ec8a82 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Fri, 28 Feb 2014 12:02:56 -0500 Subject: dm cache mq: fix memory allocation failure for large cache devices The memory allocated for the multiqueue policy's hash table doesn't need to be physically contiguous. Use vzalloc() instead of kzalloc(). Fedora has been carrying this fix since 10/10/2013. Failure seen during creation of a 10TB cached device with a 2048 sector block size and 411GB cache size: dmsetup: page allocation failure: order:9, mode:0x10c0d0 CPU: 11 PID: 29235 Comm: dmsetup Not tainted 3.10.4 #3 Hardware name: Supermicro X8DTL/X8DTL, BIOS 2.1a 12/30/2011 000000000010c0d0 ffff880090941898 ffffffff81387ab4 ffff880090941928 ffffffff810bb26f 0000000000000009 000000000010c0d0 ffff880090941928 ffffffff81385dbc ffffffff815f3840 ffffffff00000000 000002000010c0d0 Call Trace: [] dump_stack+0x19/0x1b [] warn_alloc_failed+0x110/0x124 [] ? __alloc_pages_direct_compact+0x17c/0x18e [] __alloc_pages_nodemask+0x6c7/0x75e [] __get_free_pages+0x12/0x3f [] kmalloc_order_trace+0x29/0x88 [] __kmalloc+0x36/0x11b [] ? mq_create+0x1dc/0x2cf [dm_cache_mq] [] mq_create+0x2af/0x2cf [dm_cache_mq] [] dm_cache_policy_create+0xa7/0xd2 [dm_cache] [] ? cache_ctr+0x245/0xa13 [dm_cache] [] cache_ctr+0x353/0xa13 [dm_cache] [] dm_table_add_target+0x227/0x2ce [dm_mod] [] table_load+0x286/0x2ac [dm_mod] [] ? dev_wait+0x8a/0x8a [dm_mod] [] ctl_ioctl+0x39a/0x3c2 [dm_mod] [] dm_ctl_ioctl+0xe/0x12 [dm_mod] [] vfs_ioctl+0x21/0x34 [] do_vfs_ioctl+0x3b1/0x3f4 [] ? ____fput+0x9/0xb [] ? task_work_run+0x7e/0x92 [] SyS_ioctl+0x52/0x82 [] system_call_fastpath+0x16/0x1b Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c index 1e018e9..0e385e4 100644 --- a/drivers/md/dm-cache-policy-mq.c +++ b/drivers/md/dm-cache-policy-mq.c @@ -872,7 +872,7 @@ static void mq_destroy(struct dm_cache_policy *p) { struct mq_policy *mq = to_mq_policy(p); - kfree(mq->table); + vfree(mq->table); epool_exit(&mq->cache_pool); epool_exit(&mq->pre_cache_pool); kfree(mq); @@ -1245,7 +1245,7 @@ static struct dm_cache_policy *mq_create(dm_cblock_t cache_size, mq->nr_buckets = next_power(from_cblock(cache_size) / 2, 16); mq->hash_bits = ffs(mq->nr_buckets) - 1; - mq->table = kzalloc(sizeof(*mq->table) * mq->nr_buckets, GFP_KERNEL); + mq->table = vzalloc(sizeof(*mq->table) * mq->nr_buckets); if (!mq->table) goto bad_alloc_table; -- cgit v0.10.2 From fb0cfecf67cc1cec4487efb6267317de432add2f Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Sat, 22 Feb 2014 10:59:36 -0300 Subject: ARM: dts: omap3-igep: fix boot fail due wrong compatible match This patch is based on commit: 016c12d2 ("ARM: OMAP3: Fix hardware detection for omap3630 when booted with device tree") and fixes a boot hang due the IGEP board being wrongly initialized as an OMAP3430 platform instead of an OMAP3630. Signed-off-by: Javier Martinez Canillas Signed-off-by: Tony Lindgren diff --git a/arch/arm/boot/dts/omap3-igep0020.dts b/arch/arm/boot/dts/omap3-igep0020.dts index 25a2b5f..f2779ac 100644 --- a/arch/arm/boot/dts/omap3-igep0020.dts +++ b/arch/arm/boot/dts/omap3-igep0020.dts @@ -14,7 +14,7 @@ / { model = "IGEPv2 (TI OMAP AM/DM37x)"; - compatible = "isee,omap3-igep0020", "ti,omap3"; + compatible = "isee,omap3-igep0020", "ti,omap36xx", "ti,omap3"; leds { pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/omap3-igep0030.dts b/arch/arm/boot/dts/omap3-igep0030.dts index 145c58c..2793749 100644 --- a/arch/arm/boot/dts/omap3-igep0030.dts +++ b/arch/arm/boot/dts/omap3-igep0030.dts @@ -13,7 +13,7 @@ / { model = "IGEP COM MODULE (TI OMAP AM/DM37x)"; - compatible = "isee,omap3-igep0030", "ti,omap3"; + compatible = "isee,omap3-igep0030", "ti,omap36xx", "ti,omap3"; leds { pinctrl-names = "default"; -- cgit v0.10.2 From dca1c8d17a2feae056f9e334ea75a462ae4cb52a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 22 Feb 2014 19:35:38 -0500 Subject: cifs: mask off top byte in get_rfc1002_length() The rfc1002 length actually includes a type byte, which we aren't masking off. In most cases, it's not a problem since the RFC1002_SESSION_MESSAGE type is 0, but when doing a RFC1002 session establishment, the type is non-zero and that throws off the returned length. Signed-off-by: Jeff Layton Tested-by: Sachin Prabhu Signed-off-by: Steve French diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index cf32f03..c0f3718 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -513,7 +513,7 @@ struct cifs_mnt_data { static inline unsigned int get_rfc1002_length(void *buf) { - return be32_to_cpu(*((__be32 *)buf)); + return be32_to_cpu(*((__be32 *)buf)) & 0xffffff; } static inline void -- cgit v0.10.2 From 014325e933b43844f5d01fcef39bbc3301320e59 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Thu, 20 Feb 2014 14:37:20 -0700 Subject: ARM: tegra: add LED options back into tegra_defconfig The last time tegra_defconfig was rebuilt, various LEDs options were somehow selected by other options, and hence their entries in tegra_defconfig were removed by "make savedefconfig". However, for some reason this is no longer happening, so we need to add the entries back into tegra_defconfig so the they are enabled in .config. Fixes: db079b1811d1 ("ARM: tegra: rebuild tegra_defconfig to add DEBUG_FS)" Reported-by: Marc Dietrich Signed-off-by: Stephen Warren Signed-off-by: Arnd Bergmann diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index 00fe9e9..27d69b5 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig @@ -204,7 +204,10 @@ CONFIG_MMC_BLOCK_MINORS=16 CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SDHCI_TEGRA=y +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y CONFIG_LEDS_GPIO=y +CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y CONFIG_LEDS_TRIGGER_ONESHOT=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y -- cgit v0.10.2 From 6f285b19d09f72e801525f5eea1bdad22e559bf0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 28 Feb 2014 19:44:55 -0800 Subject: audit: Send replies in the proper network namespace. In perverse cases of file descriptor passing the current network namespace of a process and the network namespace of a socket used by that socket may differ. Therefore use the network namespace of the appropiate socket to ensure replies always go to the appropiate socket. Signed-off-by: "Eric W. Biederman" diff --git a/include/linux/audit.h b/include/linux/audit.h index aa865a9..ec1464d 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -43,6 +43,7 @@ struct mq_attr; struct mqstat; struct audit_watch; struct audit_tree; +struct sk_buff; struct audit_krule { int vers_ops; @@ -463,7 +464,7 @@ extern int audit_filter_user(int type); extern int audit_filter_type(int type); extern int audit_rule_change(int type, __u32 portid, int seq, void *data, size_t datasz); -extern int audit_list_rules_send(__u32 portid, int seq); +extern int audit_list_rules_send(struct sk_buff *request_skb, int seq); extern u32 audit_enabled; #else /* CONFIG_AUDIT */ diff --git a/kernel/audit.c b/kernel/audit.c index 1e5756f..32086bf 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -570,9 +570,11 @@ static int audit_send_reply_thread(void *arg) * Allocates an skb, builds the netlink message, and sends it to the port id. * No failure notifications. */ -static void audit_send_reply(__u32 portid, int seq, int type, int done, +static void audit_send_reply(struct sk_buff *request_skb, int seq, int type, int done, int multi, const void *payload, int size) { + u32 portid = NETLINK_CB(request_skb).portid; + struct net *net = sock_net(NETLINK_CB(request_skb).sk); struct sk_buff *skb; struct task_struct *tsk; struct audit_reply *reply = kmalloc(sizeof(struct audit_reply), @@ -585,7 +587,7 @@ static void audit_send_reply(__u32 portid, int seq, int type, int done, if (!skb) goto out; - reply->net = get_net(current->nsproxy->net_ns); + reply->net = get_net(net); reply->portid = portid; reply->skb = skb; @@ -675,8 +677,7 @@ static int audit_get_feature(struct sk_buff *skb) seq = nlmsg_hdr(skb)->nlmsg_seq; - audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0, - &af, sizeof(af)); + audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &af, sizeof(af)); return 0; } @@ -796,8 +797,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) s.backlog = skb_queue_len(&audit_skb_queue); s.version = AUDIT_VERSION_LATEST; s.backlog_wait_time = audit_backlog_wait_time; - audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0, - &s, sizeof(s)); + audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s)); break; } case AUDIT_SET: { @@ -907,7 +907,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) seq, data, nlmsg_len(nlh)); break; case AUDIT_LIST_RULES: - err = audit_list_rules_send(NETLINK_CB(skb).portid, seq); + err = audit_list_rules_send(skb, seq); break; case AUDIT_TRIM: audit_trim_trees(); @@ -972,8 +972,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) memcpy(sig_data->ctx, ctx, len); security_release_secctx(ctx, len); } - audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_SIGNAL_INFO, - 0, 0, sig_data, sizeof(*sig_data) + len); + audit_send_reply(skb, seq, AUDIT_SIGNAL_INFO, 0, 0, + sig_data, sizeof(*sig_data) + len); kfree(sig_data); break; case AUDIT_TTY_GET: { @@ -985,8 +985,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) s.log_passwd = tsk->signal->audit_tty_log_passwd; spin_unlock(&tsk->sighand->siglock); - audit_send_reply(NETLINK_CB(skb).portid, seq, - AUDIT_TTY_GET, 0, 0, &s, sizeof(s)); + audit_send_reply(skb, seq, AUDIT_TTY_GET, 0, 0, &s, sizeof(s)); break; } case AUDIT_TTY_SET: { diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index a5e3d73d..e8d1c7c 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "audit.h" /* @@ -1069,8 +1070,10 @@ int audit_rule_change(int type, __u32 portid, int seq, void *data, * @portid: target portid for netlink audit messages * @seq: netlink audit message sequence (serial) number */ -int audit_list_rules_send(__u32 portid, int seq) +int audit_list_rules_send(struct sk_buff *request_skb, int seq) { + u32 portid = NETLINK_CB(request_skb).portid; + struct net *net = sock_net(NETLINK_CB(request_skb).sk); struct task_struct *tsk; struct audit_netlink_list *dest; int err = 0; @@ -1084,7 +1087,7 @@ int audit_list_rules_send(__u32 portid, int seq) dest = kmalloc(sizeof(struct audit_netlink_list), GFP_KERNEL); if (!dest) return -ENOMEM; - dest->net = get_net(current->nsproxy->net_ns); + dest->net = get_net(net); dest->portid = portid; skb_queue_head_init(&dest->q); -- cgit v0.10.2 From b7e63a1079b266866a732cf699d8c4d61391bbda Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 26 Feb 2014 11:19:14 -0800 Subject: NFSv4: Fix another nfs4_sequence corruptor nfs4_release_lockowner needs to set the rpc_message reply to point to the nfs4_sequence_res in order to avoid another Oopsable situation in nfs41_assign_slot. Fixes: fbd4bfd1d9d21 (NFS: Add nfs4_sequence calls for RELEASE_LOCKOWNER) Cc: stable@vger.kernel.org # 3.12+ Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2da6a69..44e088d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5828,8 +5828,7 @@ struct nfs_release_lockowner_data { struct nfs4_lock_state *lsp; struct nfs_server *server; struct nfs_release_lockowner_args args; - struct nfs4_sequence_args seq_args; - struct nfs4_sequence_res seq_res; + struct nfs_release_lockowner_res res; unsigned long timestamp; }; @@ -5837,7 +5836,7 @@ static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata { struct nfs_release_lockowner_data *data = calldata; nfs40_setup_sequence(data->server, - &data->seq_args, &data->seq_res, task); + &data->args.seq_args, &data->res.seq_res, task); data->timestamp = jiffies; } @@ -5846,7 +5845,7 @@ static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata) struct nfs_release_lockowner_data *data = calldata; struct nfs_server *server = data->server; - nfs40_sequence_done(task, &data->seq_res); + nfs40_sequence_done(task, &data->res.seq_res); switch (task->tk_status) { case 0: @@ -5887,7 +5886,6 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st data = kmalloc(sizeof(*data), GFP_NOFS); if (!data) return -ENOMEM; - nfs4_init_sequence(&data->seq_args, &data->seq_res, 0); data->lsp = lsp; data->server = server; data->args.lock_owner.clientid = server->nfs_client->cl_clientid; @@ -5895,6 +5893,8 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st data->args.lock_owner.s_dev = server->s_dev; msg.rpc_argp = &data->args; + msg.rpc_resp = &data->res; + nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data); return 0; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index b2fb167..5624e4e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -467,9 +467,14 @@ struct nfs_lockt_res { }; struct nfs_release_lockowner_args { + struct nfs4_sequence_args seq_args; struct nfs_lowner lock_owner; }; +struct nfs_release_lockowner_res { + struct nfs4_sequence_res seq_res; +}; + struct nfs4_delegreturnargs { struct nfs4_sequence_args seq_args; const struct nfs_fh *fhandle; -- cgit v0.10.2 From b355cee88e3b1a193f0e9a81db810f6f83ad728b Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Thu, 27 Feb 2014 11:37:15 +0800 Subject: ACPI / resources: ignore invalid ACPI device resources ACPI table may export resource entry with 0 length. But the current code interprets this kind of resource in a wrong way. It will create a resource structure with res->end = acpi_resource->start + acpi_resource->len - 1; This patch fixes a problem on my machine that a platform device fails to be created because one of its ACPI IO resource entry (start = 0, end = 0, length = 0) is translated into a generic resource with start = 0, end = 0xffffffff. Signed-off-by: Zhang Rui Cc: All applicable Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index b7201fc..0bdacc5 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -77,18 +77,24 @@ bool acpi_dev_resource_memory(struct acpi_resource *ares, struct resource *res) switch (ares->type) { case ACPI_RESOURCE_TYPE_MEMORY24: memory24 = &ares->data.memory24; + if (!memory24->address_length) + return false; acpi_dev_get_memresource(res, memory24->minimum, memory24->address_length, memory24->write_protect); break; case ACPI_RESOURCE_TYPE_MEMORY32: memory32 = &ares->data.memory32; + if (!memory32->address_length) + return false; acpi_dev_get_memresource(res, memory32->minimum, memory32->address_length, memory32->write_protect); break; case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: fixed_memory32 = &ares->data.fixed_memory32; + if (!fixed_memory32->address_length) + return false; acpi_dev_get_memresource(res, fixed_memory32->address, fixed_memory32->address_length, fixed_memory32->write_protect); @@ -144,12 +150,16 @@ bool acpi_dev_resource_io(struct acpi_resource *ares, struct resource *res) switch (ares->type) { case ACPI_RESOURCE_TYPE_IO: io = &ares->data.io; + if (!io->address_length) + return false; acpi_dev_get_ioresource(res, io->minimum, io->address_length, io->io_decode); break; case ACPI_RESOURCE_TYPE_FIXED_IO: fixed_io = &ares->data.fixed_io; + if (!fixed_io->address_length) + return false; acpi_dev_get_ioresource(res, fixed_io->address, fixed_io->address_length, ACPI_DECODE_10); -- cgit v0.10.2 From ae41a3030ce8901853b3cfd5e118a4b0288aa068 Mon Sep 17 00:00:00 2001 From: Marek Belisko Date: Sat, 1 Mar 2014 14:58:48 +0100 Subject: ARM: dts: omap3-gta04: Add ti,omap36xx to compatible property to avoid problems with booting Without that change booting leads to crash with more warnings like below: [ 0.284454] omap_hwmod: uart4: cannot clk_get main_clk uart4_fck [ 0.284484] omap_hwmod: uart4: cannot _init_clocks [ 0.284484] ------------[ cut here ]------------ [ 0.284545] WARNING: CPU: 0 PID: 1 at arch/arm/mach-omap2/omap_hwmod.c:2543 _init+0x300/0x3e4() [ 0.284545] omap_hwmod: uart4: couldn't init clocks [ 0.284576] Modules linked in: [ 0.284606] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.13.0-next-20140124-00020-gd2aefec-dirty #26 [ 0.284637] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 0.284667] [] (show_stack) from [] (dump_stack+0x7c/0x94) [ 0.284729] [] (dump_stack) from [] (warn_slowpath_common+0x6c/0x90) [ 0.284729] [] (warn_slowpath_common) from [] (warn_slowpath_fmt+0x30/0x40) [ 0.284759] [] (warn_slowpath_fmt) from [] (_init+0x300/0x3e4) [ 0.284790] [] (_init) from [] (__omap_hwmod_setup_all+0x40/0x8c) [ 0.284820] [] (__omap_hwmod_setup_all) from [] (do_one_initcall+0xe8/0x14c) [ 0.284851] [] (do_one_initcall) from [] (kernel_init_freeable+0x104/0x1c8) [ 0.284881] [] (kernel_init_freeable) from [] (kernel_init+0x8/0x118) [ 0.284912] [] (kernel_init) from [] (ret_from_fork+0x14/0x2c) [ 0.285064] ---[ end trace 63de210ad43b627d ]--- Reference: https://lkml.org/lkml/2013/10/8/553 Signed-off-by: Marek Belisko Signed-off-by: Tony Lindgren diff --git a/arch/arm/boot/dts/omap3-gta04.dts b/arch/arm/boot/dts/omap3-gta04.dts index c551e4a..d3b253b 100644 --- a/arch/arm/boot/dts/omap3-gta04.dts +++ b/arch/arm/boot/dts/omap3-gta04.dts @@ -13,7 +13,7 @@ / { model = "OMAP3 GTA04"; - compatible = "ti,omap3-gta04", "ti,omap3"; + compatible = "ti,omap3-gta04", "ti,omap36xx", "ti,omap3"; cpus { cpu@0 { -- cgit v0.10.2 From 3130497f5babd42b24a7febda46b46116e0adc83 Mon Sep 17 00:00:00 2001 From: "Li, Aubrey" Date: Sun, 2 Mar 2014 08:53:57 +0800 Subject: ACPI / sleep: pm_power_off needs more sanity checks to be installed Sleep control and status registers need santity checks as well before ACPI installs acpi_power_off to pm_power_off hook. The checking code in acpi_enter_sleep_state() is too late, we should not allow a not-working pm_power_off function to be hooked up. Signed-off-by: Aubrey Li Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index b718806..b0f6c4a 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -807,7 +807,12 @@ int __init acpi_sleep_init(void) acpi_sleep_hibernate_setup(); status = acpi_get_sleep_type_data(ACPI_STATE_S5, &type_a, &type_b); - if (ACPI_SUCCESS(status)) { + /* + * Check both ACPI S5 object and ACPI sleep registers to + * install pm_power_off_prepare/pm_power_off hook + */ + if (ACPI_SUCCESS(status) && acpi_gbl_FADT.sleep_control.address + && acpi_gbl_FADT.sleep_status.address) { sleep_states[ACPI_STATE_S5] = 1; pm_power_off_prepare = acpi_power_off_prepare; pm_power_off = acpi_power_off; -- cgit v0.10.2 From 755a48a7a4eb05b9c8424e3017d947b2961a60e0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 2 Mar 2014 22:03:12 -0500 Subject: NFS: Fix a delegation callback race The clean-up in commit 36281caa839f ended up removing a NULL pointer check that is needed in order to prevent an Oops in nfs_async_inode_return_delegation(). Reported-by: "Yan, Zheng" Link: http://lkml.kernel.org/r/5313E9F6.2020405@intel.com Fixes: 36281caa839f (NFSv4: Further clean-ups of delegation stateid validation) Cc: stable@vger.kernel.org # 3.4+ Signed-off-by: Trond Myklebust diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index ef792f2..5d8ccec 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -659,16 +659,19 @@ int nfs_async_inode_return_delegation(struct inode *inode, rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation == NULL) + goto out_enoent; - if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) { - rcu_read_unlock(); - return -ENOENT; - } + if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) + goto out_enoent; nfs_mark_return_delegation(server, delegation); rcu_read_unlock(); nfs_delegation_run_state_manager(clp); return 0; +out_enoent: + rcu_read_unlock(); + return -ENOENT; } static struct inode * -- cgit v0.10.2 From 878eaf61be20a78dd4a0a14ac5b1a3a78298cba7 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 27 Feb 2014 14:51:55 +1000 Subject: MAINTAINERS: update AGP tree to point at drm tree Signed-off-by: Dave Airlie diff --git a/MAINTAINERS b/MAINTAINERS index c6d0e93..e9d85f6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -473,7 +473,7 @@ F: net/rxrpc/af_rxrpc.c AGPGART DRIVER M: David Airlie -T: git git://git.kernel.org/pub/scm/linux/kernel/git/airlied/drm-2.6.git +T: git git://people.freedesktop.org/~airlied/linux (part of drm maint) S: Maintained F: drivers/char/agp/ F: include/linux/agp* -- cgit v0.10.2 From 61d1cf163c8653934cc8cd5d0b2a562d0990c265 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Sun, 2 Mar 2014 20:54:42 +0100 Subject: spi: spi-ath79: fix initial GPIO CS line setup The 'ath79_spi_setup_cs' function initializes the chip select line of a given SPI device in order to make sure that the device is inactive. If the SPI_CS_HIGH bit is set for a given device, it means that the CS line of that device is active HIGH so it must be set to LOW initially. In case of GPIO CS lines, the 'ath79_spi_setup_cs' function does the opposite of that due to the wrong GPIO flags. Fix the code to use the correct GPIO flags. Reported-by: Ronald Wahl Signed-off-by: Gabor Juhos Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/drivers/spi/spi-ath79.c b/drivers/spi/spi-ath79.c index 31534b5..c3b2fb9 100644 --- a/drivers/spi/spi-ath79.c +++ b/drivers/spi/spi-ath79.c @@ -132,9 +132,9 @@ static int ath79_spi_setup_cs(struct spi_device *spi) flags = GPIOF_DIR_OUT; if (spi->mode & SPI_CS_HIGH) - flags |= GPIOF_INIT_HIGH; - else flags |= GPIOF_INIT_LOW; + else + flags |= GPIOF_INIT_HIGH; status = gpio_request_one(cdata->gpio, flags, dev_name(&spi->dev)); -- cgit v0.10.2 From a6b92b6650d010d58b6e6fe42c6271266e0b1134 Mon Sep 17 00:00:00 2001 From: Marius Knaust Date: Mon, 3 Mar 2014 01:48:58 +0100 Subject: ALSA: hda - Added inverted digital-mic handling for Acer TravelMate 8371 Signed-off-by: Marius Knaust Cc: Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ec304f3..7ba7a11f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4253,6 +4253,7 @@ static const struct hda_fixup alc269_fixups[] = { }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { + SND_PCI_QUIRK(0x1025, 0x0283, "Acer TravelMate 8371", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x029b, "Acer 1810TZ", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x0349, "Acer AOD260", ALC269_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x047c, "Acer AC700", ALC269_FIXUP_ACER_AC700), -- cgit v0.10.2 From b6ab66aa5d376583a17137cbb2d3a728f29acae2 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 25 Feb 2014 13:11:47 +0200 Subject: drm/i915: use backlight legacy combination mode also for i915gm/i945gm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i915gm and i945gm also seem to use and need the legacy combination mode bit in BLC_PWM_CTL. v2: Also do this for i915gm (Ville). Reported-and-tested-by: Luis Ortega Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=75001 Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 350de35..079ea38 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -698,7 +698,7 @@ static void i9xx_enable_backlight(struct intel_connector *connector) freq /= 0xff; ctl = freq << 17; - if (IS_GEN2(dev) && panel->backlight.combination_mode) + if (panel->backlight.combination_mode) ctl |= BLM_LEGACY_MODE; if (IS_PINEVIEW(dev) && panel->backlight.active_low_pwm) ctl |= BLM_POLARITY_PNV; @@ -979,7 +979,7 @@ static int i9xx_setup_backlight(struct intel_connector *connector) ctl = I915_READ(BLC_PWM_CTL); - if (IS_GEN2(dev)) + if (IS_GEN2(dev) || IS_I915GM(dev) || IS_I945GM(dev)) panel->backlight.combination_mode = ctl & BLM_LEGACY_MODE; if (IS_PINEVIEW(dev)) -- cgit v0.10.2 From 3617dc9675f0184b7bb210cfa34f3cac928d8055 Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Mon, 13 Jan 2014 16:25:21 +0530 Subject: drm/i915: Resolving the memory region conflict for Stolen area There is a conflict seen when requesting the kernel to reserve the physical space used for the stolen area. This is because some BIOS are wrapping the stolen area in the root PCI bus, but have an off-by-one error. As a workaround we retry the reservation with an offset of 1 instead of 0. v2: updated commit message & the comment in source file (Daniel) Signed-off-by: Akash Goel Reviewed-by: Jesse Barnes Tested-by: Arjan van de Ven Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 1a24e84..d58b4e2 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -82,9 +82,22 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev) r = devm_request_mem_region(dev->dev, base, dev_priv->gtt.stolen_size, "Graphics Stolen Memory"); if (r == NULL) { - DRM_ERROR("conflict detected with stolen region: [0x%08x - 0x%08x]\n", - base, base + (uint32_t)dev_priv->gtt.stolen_size); - base = 0; + /* + * One more attempt but this time requesting region from + * base + 1, as we have seen that this resolves the region + * conflict with the PCI Bus. + * This is a BIOS w/a: Some BIOS wrap stolen in the root + * PCI bus, but have an off-by-one error. Hence retry the + * reservation starting from 1 instead of 0. + */ + r = devm_request_mem_region(dev->dev, base + 1, + dev_priv->gtt.stolen_size - 1, + "Graphics Stolen Memory"); + if (r == NULL) { + DRM_ERROR("conflict detected with stolen region: [0x%08x - 0x%08x]\n", + base, base + (uint32_t)dev_priv->gtt.stolen_size); + base = 0; + } } return base; -- cgit v0.10.2 From bcdb72ac7c00d2b56359fc82bcc8fe50454717d5 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 14 Feb 2014 20:23:54 +0200 Subject: drm/i915: fix pch pci device enumeration pci_get_class(class, from) drops the refcount for 'from', so the extra pci_dev_put we do on it will result in a use after free bug starting with the WARN below. Regression introduced in commit 6a9c4b35e6696a63805b6da5e4889c6986e9ee1b Author: Rui Guo Date: Wed Jun 19 21:10:23 2013 +0800 drm/i915: Fix PCH detect with multiple ISA bridges in VM [ 164.338460] WARNING: CPU: 1 PID: 2094 at include/linux/kref.h:47 klist_next+0xae/0x110() [ 164.347731] CPU: 1 PID: 2094 Comm: modprobe Tainted: G O 3.13.0-imre+ #354 [ 164.356468] Hardware name: Intel Corp. VALLEYVIEW B0 PLATFORM/NOTEBOOK, BIOS BYTICRB1.X64.0062.R70.1310112051 10/11/2013 [ 164.368796] Call Trace: [ 164.371609] [] dump_stack+0x4e/0x7a [ 164.377447] [] warn_slowpath_common+0x7d/0xa0 [ 164.384238] [] warn_slowpath_null+0x1a/0x20 [ 164.390851] [] klist_next+0xae/0x110 [ 164.396777] [] ? pci_do_find_bus+0x70/0x70 [ 164.403286] [] bus_find_device+0x89/0xc0 [ 164.409719] [] pci_get_dev_by_id+0x63/0xa0 [ 164.416238] [] pci_get_class+0x44/0x50 [ 164.422433] [] intel_dsm_detect+0x16f/0x1f0 [i915] [ 164.429801] [] intel_register_dsm_handler+0xe/0x10 [i915] [ 164.437831] [] i915_driver_load+0xafe/0xf30 [i915] [ 164.445126] [] ? intel_alloc_coherent+0x110/0x110 [ 164.452340] [] drm_dev_register+0xc7/0x150 [drm] [ 164.459462] [] drm_get_pci_dev+0x11f/0x1f0 [drm] [ 164.466554] [] ? _raw_spin_unlock_irqrestore+0x51/0x70 [ 164.474287] [] i915_pci_probe+0x56/0x60 [i915] [ 164.481185] [] pci_device_probe+0x78/0xf0 [ 164.487603] [] driver_probe_device+0x155/0x350 [ 164.494505] [] __driver_attach+0x6e/0xa0 [ 164.500826] [] ? __device_attach+0x50/0x50 [ 164.507333] [] bus_for_each_dev+0x6e/0xc0 [ 164.513752] [] driver_attach+0x1e/0x20 [ 164.519870] [] bus_add_driver+0x138/0x260 [ 164.526289] [] ? 0xffffffffa0187fff [ 164.532116] [] driver_register+0x98/0xe0 [ 164.538558] [] ? 0xffffffffa0187fff [ 164.544389] [] __pci_register_driver+0x60/0x70 [ 164.551336] [] drm_pci_init+0x6d/0x120 [drm] [ 164.558040] [] ? 0xffffffffa0187fff [ 164.563928] [] i915_init+0x6a/0x6c [i915] [ 164.570363] [] do_one_initcall+0xaa/0x160 [ 164.576783] [] ? set_memory_nx+0x40/0x50 [ 164.583100] [] load_module+0x1fb5/0x2550 [ 164.589410] [] ? store_uevent+0x40/0x40 [ 164.595628] [] SyS_init_module+0xed/0x100 [ 164.602048] [] system_call_fastpath+0x16/0x1b v2: simplify the loop further (Chris) Signed-off-by: Imre Deak Cc: Jesse Barnes Reported-by: Jesse Barnes Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=65652 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=74161 Reviewed-by: Chris Wilson Cc: stable@vger.kernel.org Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 04f1f02..ec7bb0f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -403,7 +403,7 @@ MODULE_DEVICE_TABLE(pci, pciidlist); void intel_detect_pch(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - struct pci_dev *pch; + struct pci_dev *pch = NULL; /* In all current cases, num_pipes is equivalent to the PCH_NOP setting * (which really amounts to a PCH but no South Display). @@ -424,12 +424,9 @@ void intel_detect_pch(struct drm_device *dev) * all the ISA bridge devices and check for the first match, instead * of only checking the first one. */ - pch = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); - while (pch) { - struct pci_dev *curr = pch; + while ((pch = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, pch))) { if (pch->vendor == PCI_VENDOR_ID_INTEL) { - unsigned short id; - id = pch->device & INTEL_PCH_DEVICE_ID_MASK; + unsigned short id = pch->device & INTEL_PCH_DEVICE_ID_MASK; dev_priv->pch_id = id; if (id == INTEL_PCH_IBX_DEVICE_ID_TYPE) { @@ -461,18 +458,16 @@ void intel_detect_pch(struct drm_device *dev) DRM_DEBUG_KMS("Found LynxPoint LP PCH\n"); WARN_ON(!IS_HASWELL(dev)); WARN_ON(!IS_ULT(dev)); - } else { - goto check_next; - } - pci_dev_put(pch); + } else + continue; + break; } -check_next: - pch = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, curr); - pci_dev_put(curr); } if (!pch) - DRM_DEBUG_KMS("No PCH found?\n"); + DRM_DEBUG_KMS("No PCH found.\n"); + + pci_dev_put(pch); } bool i915_semaphore_is_enabled(struct drm_device *dev) -- cgit v0.10.2 From 1c37a72c1bd0b83be8b95cff7f1bc9b1f32bd433 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Mon, 3 Mar 2014 13:37:14 +0200 Subject: mac80211: consider virtual mon when calculating min_def When calculating the current max bw required for a channel context, we didn't consider the virtual monitor interface, resulting in its channel context being narrower than configured. This broke monitor mode with iwlmvm, which uses the minimal width. Reported-by: Ido Yariv Signed-off-by: Eliad Peller Signed-off-by: Johannes Berg diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index f43613a..0c1ecfd 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -100,6 +100,12 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, } max_bw = max(max_bw, width); } + + /* use the configured bandwidth in case of monitor interface */ + sdata = rcu_dereference(local->monitor_sdata); + if (sdata && rcu_access_pointer(sdata->vif.chanctx_conf) == conf) + max_bw = max(max_bw, conf->def.width); + rcu_read_unlock(); return max_bw; -- cgit v0.10.2 From bc00a91d627026f08abf69bf5d0015499dd30c2a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 3 Mar 2014 13:55:54 +0100 Subject: cfg80211: remove racy beacon_interval assignment In case of AP mode, the beacon interval is already reset to zero inside cfg80211_stop_ap(), and in the other modes it isn't relevant. Remove the assignment to remove a potential race since the assignment isn't properly locked. Reported-by: Michal Kazior Signed-off-by: Johannes Berg diff --git a/net/wireless/core.c b/net/wireless/core.c index 010892b..a3bf18d 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -788,8 +788,6 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, default: break; } - - wdev->beacon_interval = 0; } static int cfg80211_netdev_notifier_call(struct notifier_block *nb, -- cgit v0.10.2 From 7693decce869a94821bed1fa6c237c7224096b5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 3 Mar 2014 09:25:52 -0500 Subject: ARM: XEN depends on having a MMU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit arch/arm/xen/enlighten.c (and maybe others) use MMU-specific functions like pte_mkspecial which are only available on MMU builds. So let XEN depend on MMU. Suggested-by: Arnd Bergmann Signed-off-by: Uwe Kleine-König Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 24307dc..8b78465 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1883,6 +1883,7 @@ config XEN depends on ARM && AEABI && OF depends on CPU_V7 && !CPU_V6 depends on !GENERIC_ATOMIC64 + depends on MMU select ARM_PSCI select SWIOTLB_XEN select ARCH_DMA_ADDR_T_64BIT -- cgit v0.10.2 From 37d6a82bd99d3354e59a9a629fa48fc75ca51c66 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 3 Mar 2014 23:05:15 +0800 Subject: Thermal: update INT3404 thermal driver help text Signed-off-by: Zhang Rui diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 35c0664..7932294 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -210,8 +210,16 @@ config ACPI_INT3403_THERMAL tristate "ACPI INT3403 thermal driver" depends on X86 && ACPI help - This driver uses ACPI INT3403 device objects. If present, it will - register each INT3403 thermal sensor as a thermal zone. + Newer laptops and tablets that use ACPI may have thermal sensors + outside the core CPU/SOC for thermal safety reasons. These + temperature sensors are also exposed for the OS to use via the so + called INT3403 ACPI object. This driver will, on devices that have + such sensors, expose the temperature information from these sensors + to userspace via the normal thermal framework. This means that a wide + range of applications and GUI widgets can show this information to + the user or use this information for making decisions. For example, + the Intel Thermal Daemon can use this information to allow the user + to select his laptop to run without turning on the fans. menu "Texas Instruments thermal drivers" source "drivers/thermal/ti-soc-thermal/Kconfig" -- cgit v0.10.2 From 79786880a47a8c5b4c8146c03432b3387a07a169 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 2 Mar 2014 15:33:35 +0100 Subject: x86_pkg_temp_thermal: Do not expose as a hwmon device The temperature value reported by x86_pkg_temp_thermal is already reported by the coretemp driver. So, do not expose this thermal zone as a hwmon device, because it would be redundant. Signed-off-by: Jean Delvare Cc: Zhang Rui Cc: Eduardo Valentin Acked-by: Guenter Roeck Signed-off-by: Zhang Rui diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c index 972e1c7..3d8d972 100644 --- a/drivers/thermal/x86_pkg_temp_thermal.c +++ b/drivers/thermal/x86_pkg_temp_thermal.c @@ -68,6 +68,10 @@ struct phy_dev_entry { struct thermal_zone_device *tzone; }; +static const struct thermal_zone_params pkg_temp_tz_params = { + .no_hwmon = true, +}; + /* List maintaining number of package instances */ static LIST_HEAD(phy_dev_list); static DEFINE_MUTEX(phy_dev_list_mutex); @@ -446,7 +450,7 @@ static int pkg_temp_thermal_device_add(unsigned int cpu) thres_count, (thres_count == MAX_NUMBER_OF_TRIPS) ? 0x03 : 0x01, - phy_dev_entry, &tzone_ops, NULL, 0, 0); + phy_dev_entry, &tzone_ops, &pkg_temp_tz_params, 0, 0); if (IS_ERR(phy_dev_entry->tzone)) { err = PTR_ERR(phy_dev_entry->tzone); goto err_ret_free; -- cgit v0.10.2 From 3e4216531c442ea99cb32a69e5ef4af723e7b5a9 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 2 Mar 2014 15:05:49 +0100 Subject: x86_pkg_temp_thermal: Fix the thermal zone type The thermal zone type should not include an instance number. Otherwise each zone is considered a different type and the thermal-to-hwmon bridge fails to group them all in a single hwmon device. I also changed the type to "x86_pkg_temp", because "pkg" was too generic, and other thermal drivers use an underscore, not a dash, as a separator. Or maybe "cpu_pkg_temp" would be better? Signed-off-by: Jean Delvare Cc: Zhang Rui Cc: Eduardo Valentin Signed-off-by: Zhang Rui diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c index 3d8d972..081fd7e 100644 --- a/drivers/thermal/x86_pkg_temp_thermal.c +++ b/drivers/thermal/x86_pkg_temp_thermal.c @@ -398,7 +398,6 @@ static int pkg_temp_thermal_device_add(unsigned int cpu) int err; u32 tj_max; struct phy_dev_entry *phy_dev_entry; - char buffer[30]; int thres_count; u32 eax, ebx, ecx, edx; u8 *temp; @@ -444,9 +443,7 @@ static int pkg_temp_thermal_device_add(unsigned int cpu) phy_dev_entry->first_cpu = cpu; phy_dev_entry->tj_max = tj_max; phy_dev_entry->ref_cnt = 1; - snprintf(buffer, sizeof(buffer), "pkg-temp-%d\n", - phy_dev_entry->phys_proc_id); - phy_dev_entry->tzone = thermal_zone_device_register(buffer, + phy_dev_entry->tzone = thermal_zone_device_register("x86_pkg_temp", thres_count, (thres_count == MAX_NUMBER_OF_TRIPS) ? 0x03 : 0x01, -- cgit v0.10.2 From d1c8b0410b77f1e43f97cd22bc7e0a71a5305840 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Mon, 27 Jan 2014 09:40:58 +0100 Subject: thermal,rcar_thermal: Add dependency on HAS_IOMEM Commit beeb5a1e (thermal: rcar-thermal: Enable driver compilation with COMPILE_TEST) broke build on archs wihout io memory. On archs like S390 or um this driver cannot build nor work. Make it depend on HAS_IOMEM to bypass build failures. drivers/thermal/rcar_thermal.c:404: undefined reference to `devm_ioremap_resource' drivers/thermal/rcar_thermal.c:426: undefined reference to `devm_ioremap_resource' Signed-off-by: Richard Weinberger Acked-by: Laurent Pinchart Acked-by: Kuninori Morimoto Signed-off-by: Zhang Rui diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 7932294..5f88d76 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -136,6 +136,7 @@ config SPEAR_THERMAL config RCAR_THERMAL tristate "Renesas R-Car thermal driver" depends on ARCH_SHMOBILE || COMPILE_TEST + depends on HAS_IOMEM help Enable this to plug the R-Car thermal sensor driver into the Linux thermal framework. -- cgit v0.10.2 From 5ca0cce5622bf476e3e6bf627fe8e9381d6ae174 Mon Sep 17 00:00:00 2001 From: Ni Wade Date: Mon, 17 Feb 2014 11:02:55 +0800 Subject: Thermal: Allow first update of cooling device state In initialization, if the cooling device is initialized at max cooling state, and the thermal zone temperature is below the first trip point, then the cooling state can't be updated to the right state, untill the first trip point be triggered. To fix this issue, allow first update of cooling device state during registration, initialized "updated" device field as "false" (instead of "true"). Signed-off-by: Wei Ni Signed-off-by: Zhang Rui diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 338a88b..02f57af 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1107,7 +1107,7 @@ __thermal_cooling_device_register(struct device_node *np, INIT_LIST_HEAD(&cdev->thermal_instances); cdev->np = np; cdev->ops = ops; - cdev->updated = true; + cdev->updated = false; cdev->device.class = &thermal_class; cdev->devdata = devdata; dev_set_name(&cdev->device, "cooling_device%d", cdev->id); -- cgit v0.10.2 From f2234bcd03ad031225d7dc37dd18852a2f2ff2bf Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 24 Jan 2014 10:23:19 +0800 Subject: Thermal: thermal zone governor fix This patch does a cleanup about the thermal zone govenor, setting and make the following rule. 1. For thermal zone devices that are registered w/o tz->tzp, they can use the default thermal governor only. 2. For thermal zone devices w/ governor name specified in tz->tzp->governor_name, we will use the default govenor if the governor specified is not available at the moment, and update tz->governor when the matched governor is registered. This also fixes a problem that OF registered thermal zones are running with no governor. Signed-off-by: Zhang Rui Acked-by: Javi Merino diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 02f57af..71b0ec0 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -56,10 +56,15 @@ static LIST_HEAD(thermal_governor_list); static DEFINE_MUTEX(thermal_list_lock); static DEFINE_MUTEX(thermal_governor_lock); +static struct thermal_governor *def_governor; + static struct thermal_governor *__find_governor(const char *name) { struct thermal_governor *pos; + if (!name || !name[0]) + return def_governor; + list_for_each_entry(pos, &thermal_governor_list, governor_list) if (!strnicmp(name, pos->name, THERMAL_NAME_LENGTH)) return pos; @@ -82,17 +87,23 @@ int thermal_register_governor(struct thermal_governor *governor) if (__find_governor(governor->name) == NULL) { err = 0; list_add(&governor->governor_list, &thermal_governor_list); + if (!def_governor && !strncmp(governor->name, + DEFAULT_THERMAL_GOVERNOR, THERMAL_NAME_LENGTH)) + def_governor = governor; } mutex_lock(&thermal_list_lock); list_for_each_entry(pos, &thermal_tz_list, node) { + /* + * only thermal zones with specified tz->tzp->governor_name + * may run with tz->govenor unset + */ if (pos->governor) continue; - if (pos->tzp) - name = pos->tzp->governor_name; - else - name = DEFAULT_THERMAL_GOVERNOR; + + name = pos->tzp->governor_name; + if (!strnicmp(name, governor->name, THERMAL_NAME_LENGTH)) pos->governor = governor; } @@ -342,8 +353,8 @@ static void monitor_thermal_zone(struct thermal_zone_device *tz) static void handle_non_critical_trips(struct thermal_zone_device *tz, int trip, enum thermal_trip_type trip_type) { - if (tz->governor) - tz->governor->throttle(tz, trip); + tz->governor ? tz->governor->throttle(tz, trip) : + def_governor->throttle(tz, trip); } static void handle_critical_trips(struct thermal_zone_device *tz, @@ -1533,7 +1544,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, if (tz->tzp) tz->governor = __find_governor(tz->tzp->governor_name); else - tz->governor = __find_governor(DEFAULT_THERMAL_GOVERNOR); + tz->governor = def_governor; mutex_unlock(&thermal_governor_lock); -- cgit v0.10.2 From 2a26ebef841bac72a4c5776e920496e07b405628 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Mon, 3 Mar 2014 05:57:26 +0100 Subject: rt,blk,mq: Make blk_mq_cpu_notify_lock a raw spinlock [ 365.164040] BUG: sleeping function called from invalid context at kernel/rtmutex.c:674 [ 365.164041] in_atomic(): 1, irqs_disabled(): 1, pid: 26, name: migration/1 [ 365.164043] no locks held by migration/1/26. [ 365.164044] irq event stamp: 6648 [ 365.164056] hardirqs last enabled at (6647): [] restore_args+0x0/0x30 [ 365.164062] hardirqs last disabled at (6648): [] multi_cpu_stop+0x9d/0x120 [ 365.164070] softirqs last enabled at (0): [] copy_process.part.28+0x6fc/0x1920 [ 365.164072] softirqs last disabled at (0): [< (null)>] (null) [ 365.164076] CPU: 1 PID: 26 Comm: migration/1 Tainted: GF N 3.12.12-rt19-0.gcb6c4a2-rt #3 [ 365.164078] Hardware name: QCI QSSC-S4R/QSSC-S4R, BIOS QSSC-S4R.QCI.01.00.S013.032920111005 03/29/2011 [ 365.164091] 0000000000000001 ffff880a42ea7c30 ffffffff815367e6 ffffffff81a086c0 [ 365.164099] ffff880a42ea7c40 ffffffff8108919c ffff880a42ea7c60 ffffffff8153c24f [ 365.164107] ffff880a42ea91f0 00000000ffffffe1 ffff880a42ea7c88 ffffffff81297ec0 [ 365.164108] Call Trace: [ 365.164119] [] try_stack_unwind+0x191/0x1a0 [ 365.164127] [] dump_trace+0x92/0x360 [ 365.164133] [] show_trace_log_lvl+0x48/0x60 [ 365.164138] [] show_stack_log_lvl+0xd8/0x1d0 [ 365.164143] [] show_stack+0x20/0x50 [ 365.164153] [] dump_stack+0x54/0x9a [ 365.164163] [] __might_sleep+0xfc/0x140 [ 365.164173] [] rt_spin_lock+0x1f/0x70 [ 365.164182] [] blk_mq_main_cpu_notify+0x20/0x70 [ 365.164191] [] notifier_call_chain+0x4c/0x70 [ 365.164201] [] __raw_notifier_call_chain+0x9/0x10 [ 365.164207] [] cpu_notify+0x1e/0x40 [ 365.164217] [] take_cpu_down+0x22/0x40 [ 365.164223] [] multi_cpu_stop+0xd6/0x120 [ 365.164229] [] cpu_stopper_thread+0xd7/0x1e0 [ 365.164235] [] smpboot_thread_fn+0x203/0x380 [ 365.164241] [] kthread+0xc8/0xd0 [ 365.164250] [] ret_from_fork+0x7c/0xb0 [ 365.164429] smpboot: CPU 1 is now offline Signed-off-by: Mike Galbraith Signed-off-by: Jens Axboe diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c index 3146bef..136ef86 100644 --- a/block/blk-mq-cpu.c +++ b/block/blk-mq-cpu.c @@ -11,7 +11,7 @@ #include "blk-mq.h" static LIST_HEAD(blk_mq_cpu_notify_list); -static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock); +static DEFINE_RAW_SPINLOCK(blk_mq_cpu_notify_lock); static int blk_mq_main_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) @@ -19,12 +19,12 @@ static int blk_mq_main_cpu_notify(struct notifier_block *self, unsigned int cpu = (unsigned long) hcpu; struct blk_mq_cpu_notifier *notify; - spin_lock(&blk_mq_cpu_notify_lock); + raw_spin_lock(&blk_mq_cpu_notify_lock); list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) notify->notify(notify->data, action, cpu); - spin_unlock(&blk_mq_cpu_notify_lock); + raw_spin_unlock(&blk_mq_cpu_notify_lock); return NOTIFY_OK; } @@ -32,16 +32,16 @@ void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier) { BUG_ON(!notifier->notify); - spin_lock(&blk_mq_cpu_notify_lock); + raw_spin_lock(&blk_mq_cpu_notify_lock); list_add_tail(¬ifier->list, &blk_mq_cpu_notify_list); - spin_unlock(&blk_mq_cpu_notify_lock); + raw_spin_unlock(&blk_mq_cpu_notify_lock); } void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier) { - spin_lock(&blk_mq_cpu_notify_lock); + raw_spin_lock(&blk_mq_cpu_notify_lock); list_del(¬ifier->list); - spin_unlock(&blk_mq_cpu_notify_lock); + raw_spin_unlock(&blk_mq_cpu_notify_lock); } void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier, -- cgit v0.10.2 From 17b0c1f7865d8bf4f5e5aa94e2aeafb35d23e4e9 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 11 Feb 2014 21:39:06 +0200 Subject: drm/i915: vlv: reserve GT power context early We reserve the space for the power context in stolen memory at a fixed address from a delayed work. This races with the subsequent driver init/resume code which could allocate something at that address, so the reservation for the power context fails. Reserve the space up-front, so this can't happen. This also adds a missing struct_mutex lock around the stolen allocation, which wasn't taken in the delayed work path. Signed-off-by: Imre Deak Reviewed-by: Jesse Barnes Cc: stable@vger.kernel.org Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index d77cc81..e1fc35a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3493,6 +3493,8 @@ static void valleyview_setup_pctx(struct drm_device *dev) u32 pcbr; int pctx_size = 24*1024; + WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + pcbr = I915_READ(VLV_PCBR); if (pcbr) { /* BIOS set it up already, grab the pre-alloc'd space */ @@ -3542,8 +3544,6 @@ static void valleyview_enable_rps(struct drm_device *dev) I915_WRITE(GTFIFODBG, gtfifodbg); } - valleyview_setup_pctx(dev); - /* If VLV, Forcewake all wells, else re-direct to regular path */ gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); @@ -4395,6 +4395,8 @@ void intel_enable_gt_powersave(struct drm_device *dev) ironlake_enable_rc6(dev); intel_init_emon(dev); } else if (IS_GEN6(dev) || IS_GEN7(dev)) { + if (IS_VALLEYVIEW(dev)) + valleyview_setup_pctx(dev); /* * PCU communication is slow and this doesn't need to be * done at any specific time, so do this out of our fast path -- cgit v0.10.2 From d9d820810d4eeac336c515dfb62561c21b296f07 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Thu, 27 Feb 2014 16:30:56 -0300 Subject: drm/i915: fix assert_cursor on BDW MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to read the correct register, not a register that doesn't exist and will trigger "Unclaimed register" messages when we touch it. Also rearrange the checks in an attempt to prevent this error from happening again. Signed-off-by: Paulo Zanoni Reviewed-by: Ville Syrjälä [Jani: dropped an extra empty line introduced.] Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 4c16728..9b8a7c7 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1092,12 +1092,12 @@ static void assert_cursor(struct drm_i915_private *dev_priv, struct drm_device *dev = dev_priv->dev; bool cur_state; - if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) - cur_state = I915_READ(CURCNTR_IVB(pipe)) & CURSOR_MODE; - else if (IS_845G(dev) || IS_I865G(dev)) + if (IS_845G(dev) || IS_I865G(dev)) cur_state = I915_READ(_CURACNTR) & CURSOR_ENABLE; - else + else if (INTEL_INFO(dev)->gen <= 6 || IS_VALLEYVIEW(dev)) cur_state = I915_READ(CURCNTR(pipe)) & CURSOR_MODE; + else + cur_state = I915_READ(CURCNTR_IVB(pipe)) & CURSOR_MODE; WARN(cur_state != state, "cursor on pipe %c assertion failure (expected %s, current %s)\n", -- cgit v0.10.2 From 6375b768a9850b6154478993e5fb566fa4614a9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 3 Mar 2014 11:33:36 +0200 Subject: drm/i915: Reject >165MHz modes w/ DVI monitors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Single-link DVI max dotclock is 165MHz. Filter out modes with higher dotclock when the monitor doesn't support HDMI. Modes higher than 165 MHz were allowed in commit 7d148ef51a657fd04036c3ed7803da600dd0d451 Author: Daniel Vetter Date: Mon Jul 22 18:02:39 2013 +0200 drm/i915: fix hdmi portclock limits Also don't attempt to use 12bpc mode with DVI monitors. Cc: Adam Nielsen Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=75345 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=70331 Tested-by: Ralf Jung Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 6db0d9d..ee3181e 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -845,7 +845,7 @@ static int hdmi_portclock_limit(struct intel_hdmi *hdmi) { struct drm_device *dev = intel_hdmi_to_dev(hdmi); - if (IS_G4X(dev)) + if (!hdmi->has_hdmi_sink || IS_G4X(dev)) return 165000; else if (IS_HASWELL(dev) || INTEL_INFO(dev)->gen >= 8) return 300000; @@ -899,8 +899,8 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, * outputs. We also need to check that the higher clock still fits * within limits. */ - if (pipe_config->pipe_bpp > 8*3 && clock_12bpc <= portclock_limit - && HAS_PCH_SPLIT(dev)) { + if (pipe_config->pipe_bpp > 8*3 && intel_hdmi->has_hdmi_sink && + clock_12bpc <= portclock_limit && HAS_PCH_SPLIT(dev)) { DRM_DEBUG_KMS("picking bpc to 12 for HDMI output\n"); desired_bpp = 12*3; -- cgit v0.10.2 From d13c46c67e546bb1dc1c4dc7c43e388d0119276b Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 3 Mar 2014 14:49:51 +0000 Subject: DRM: armada: fix use of kfifo_put() The kfifo_put() API changed in 498d319bb512 (kfifo API type safety) which now results in the wrong pointer being added to the kfifo ring, which then causes an oops. Fix this. Cc: # 3.13 Signed-off-by: Russell King diff --git a/drivers/gpu/drm/armada/armada_drv.c b/drivers/gpu/drm/armada/armada_drv.c index 62d0ff3..073dbf3 100644 --- a/drivers/gpu/drm/armada/armada_drv.c +++ b/drivers/gpu/drm/armada/armada_drv.c @@ -68,15 +68,7 @@ void __armada_drm_queue_unref_work(struct drm_device *dev, { struct armada_private *priv = dev->dev_private; - /* - * Yes, we really must jump through these hoops just to store a - * _pointer_ to something into the kfifo. This is utterly insane - * and idiotic, because it kfifo requires the _data_ pointed to by - * the pointer const, not the pointer itself. Not only that, but - * you have to pass a pointer _to_ the pointer you want stored. - */ - const struct drm_framebuffer *silly_api_alert = fb; - WARN_ON(!kfifo_put(&priv->fb_unref, &silly_api_alert)); + WARN_ON(!kfifo_put(&priv->fb_unref, fb)); schedule_work(&priv->fb_unref_work); } -- cgit v0.10.2 From c64d240df31513522901539a3206a41e9e6d00c8 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 3 Mar 2014 10:57:56 -0500 Subject: dm: fix Kconfig indentation Since DM_DEBUG_BLOCK_STACK_TRACING is a DM_PERSISTENT_DATA config option move it from drivers/md/Kconfig to drivers/md/persistent-data/Kconfig. Doing so fixes indentation for other DM config options. Signed-off-by: Mike Snitzer diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 9a06fe8..95ad936 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -254,16 +254,6 @@ config DM_THIN_PROVISIONING ---help--- Provides thin provisioning and snapshots that share a data store. -config DM_DEBUG_BLOCK_STACK_TRACING - boolean "Keep stack trace of persistent data block lock holders" - depends on STACKTRACE_SUPPORT && DM_PERSISTENT_DATA - select STACKTRACE - ---help--- - Enable this for messages that may help debug problems with the - block manager locking used by thin provisioning and caching. - - If unsure, say N. - config DM_CACHE tristate "Cache target (EXPERIMENTAL)" depends on BLK_DEV_DM diff --git a/drivers/md/persistent-data/Kconfig b/drivers/md/persistent-data/Kconfig index 19b2687..0c2dec7 100644 --- a/drivers/md/persistent-data/Kconfig +++ b/drivers/md/persistent-data/Kconfig @@ -6,3 +6,13 @@ config DM_PERSISTENT_DATA ---help--- Library providing immutable on-disk data structure support for device-mapper targets such as the thin provisioning target. + +config DM_DEBUG_BLOCK_STACK_TRACING + boolean "Keep stack trace of persistent data block lock holders" + depends on STACKTRACE_SUPPORT && DM_PERSISTENT_DATA + select STACKTRACE + ---help--- + Enable this for messages that may help debug problems with the + block manager locking used by thin provisioning and caching. + + If unsure, say N. -- cgit v0.10.2 From 2564338b13e6e132ee224edb63e1e872adf431f4 Mon Sep 17 00:00:00 2001 From: Marios Andreopoulos Date: Mon, 3 Mar 2014 18:19:59 +0200 Subject: libata: disable queued TRIM for Crucial M500 mSATA SSDs Queued TRIM commands cause problems and silent file system corruption on Crucial M500 SSDs. This patch disables them for the mSATA model of the drive. Signed-off-by: Marios Andreopoulos Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org # 3.12+ Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=71371 diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 1a3dbd1..191cdfb 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4225,6 +4225,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* devices that don't properly handle queued TRIM commands */ { "Micron_M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, { "Crucial_CT???M500SSD1", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, + { "Crucial_CT???M500SSD3", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, /* * Some WD SATA-I drives spin up and down erratically when the link -- cgit v0.10.2 From 2c945820cab96ebf265598d998e63ef22393d0d4 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 3 Mar 2014 17:19:22 -0500 Subject: dm snapshot: fix metadata corruption Commit 55494bf2947dccdf2 ("dm snapshot: use dm-bufio") broke snapshots. Before that 3.14-rc1 commit, loading a snapshot's list of exceptions involved reading exception areas one by one into ps->area and inserting those exceptions into the hash table. Commit 55494bf2947dccdf2 changed it so that dm-bufio with prefetch is used to load exceptions in batchs. Exceptions are loaded correctly, but ps->area is left uninitialized. When a new exception is allocated, it is stored in this uninitialized ps->area which will be written to the disk. This causes metadata corruption. Fix this corruption by copying the last area that was read via dm-bufio into ps->area. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index afc3d01..d6e8817 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -546,6 +546,9 @@ static int read_exceptions(struct pstore *ps, r = insert_exceptions(ps, area, callback, callback_context, &full); + if (!full) + memcpy(ps->area, area, ps->store->chunk_size << SECTOR_SHIFT); + dm_bufio_release(bp); dm_bufio_forget(client, chunk); -- cgit v0.10.2 From 45ab2813d40d88fc575e753c38478de242d03f88 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 26 Feb 2014 13:37:38 -0500 Subject: tracing: Do not add event files for modules that fail tracepoints If a module fails to add its tracepoints due to module tainting, do not create the module event infrastructure in the debugfs directory. As the events will not work and worse yet, they will silently fail, making the user wonder why the events they enable do not display anything. Having a warning on module load and the events not visible to the users will make the cause of the problem much clearer. Link: http://lkml.kernel.org/r/20140227154923.265882695@goodmis.org Fixes: 6d723736e472 "tracing/events: add support for modules to TRACE_EVENT" Acked-by: Mathieu Desnoyers Cc: stable@vger.kernel.org # 2.6.31+ Cc: Rusty Russell Signed-off-by: Steven Rostedt diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index accc497..7159a0a 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -60,6 +60,12 @@ struct tp_module { unsigned int num_tracepoints; struct tracepoint * const *tracepoints_ptrs; }; +bool trace_module_has_bad_taint(struct module *mod); +#else +static inline bool trace_module_has_bad_taint(struct module *mod) +{ + return false; +} #endif /* CONFIG_MODULES */ struct tracepoint_iter { diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index e71ffd4..f3989ce 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1777,6 +1777,16 @@ static void trace_module_add_events(struct module *mod) { struct ftrace_event_call **call, **start, **end; + if (!mod->num_trace_events) + return; + + /* Don't add infrastructure for mods without tracepoints */ + if (trace_module_has_bad_taint(mod)) { + pr_err("%s: module has bad taint, not creating trace events\n", + mod->name); + return; + } + start = mod->trace_events; end = mod->trace_events + mod->num_trace_events; diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 29f2654..031cc56 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -631,6 +631,11 @@ void tracepoint_iter_reset(struct tracepoint_iter *iter) EXPORT_SYMBOL_GPL(tracepoint_iter_reset); #ifdef CONFIG_MODULES +bool trace_module_has_bad_taint(struct module *mod) +{ + return mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP)); +} + static int tracepoint_module_coming(struct module *mod) { struct tp_module *tp_mod, *iter; @@ -641,7 +646,7 @@ static int tracepoint_module_coming(struct module *mod) * module headers (for forced load), to make sure we don't cause a crash. * Staging and out-of-tree GPL modules are fine. */ - if (mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP))) + if (trace_module_has_bad_taint(mod)) return 0; mutex_lock(&tracepoints_mutex); tp_mod = kmalloc(sizeof(struct tp_module), GFP_KERNEL); -- cgit v0.10.2 From ea2787f350ea5f649fda6bb708c4182cf4ec06ca Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Tue, 4 Mar 2014 00:45:18 +0200 Subject: ASoC: n810: fix init with DT boot Since 3.14-rc1 only DT boot has been supported on N810, so this file fails to init. Make a minimal fix to retain functionality. This file should be properly converted to DT in longer term. There seems to be still other unresolved issues with N810 audio support, but this patch is needed at minimum as otherwise the machine driver probing would completely fail. Signed-off-by: Aaro Koskinen Acked-by: Jarkko Nikula Signed-off-by: Mark Brown diff --git a/sound/soc/omap/n810.c b/sound/soc/omap/n810.c index 3fde9e4..d163e18 100644 --- a/sound/soc/omap/n810.c +++ b/sound/soc/omap/n810.c @@ -305,7 +305,9 @@ static int __init n810_soc_init(void) int err; struct device *dev; - if (!(machine_is_nokia_n810() || machine_is_nokia_n810_wimax())) + if (!of_have_populated_dt() || + (!of_machine_is_compatible("nokia,n810") && + !of_machine_is_compatible("nokia,n810-wimax"))) return -ENODEV; n810_snd_device = platform_device_alloc("soc-audio", -1); -- cgit v0.10.2 From 3b4467522630b7ea0d65a691007ef0a93d471f8f Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Tue, 4 Mar 2014 16:20:18 +0800 Subject: ALSA: hda - add automute fix for another dell AIO model When plugging a headphone or headset, lots of noise is heard from internal speaker, after changing the automute via amp instead of pinctl, the noise disappears. BugLink: https://bugs.launchpad.net/bugs/1268468 Cc: David Henningsson Cc: stable@vger.kernel.org Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 7ba7a11f..850296a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5164,7 +5164,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0625, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0626, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0628, "Dell", ALC668_FIXUP_AUTO_MUTE), - SND_PCI_QUIRK(0x1028, 0x064e, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x064e, "Dell", ALC668_FIXUP_AUTO_MUTE), SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_BASS_1A_CHMAP), SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_BASS_CHMAP), -- cgit v0.10.2 From cdc2b4158405f1975f9d5205096f08430eda1c0e Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 14 Feb 2014 18:10:55 -0500 Subject: dm thin: synchronize the pool mode during suspend Commit b5330655 ("dm thin: handle metadata failures more consistently") increased potential for the pool's mode to be changed in response to metadata operation failures. When the pool mode is changed it isn't synchronized with the mode in pool_features stored in the target's context (ti->private) that is used as the basis for (re)establishing the pool mode during resume via bind_control_target. It is important that we synchronize the pool mode when it is changed otherwise the pool may experience and unexpected mode transition on the next resume (especially if there was no new table load). Signed-off-by: Mike Snitzer Acked-by: Joe Thornber diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 7e84bac..dfa48ec 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1402,6 +1402,7 @@ static enum pool_mode get_pool_mode(struct pool *pool) static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) { int r; + struct pool_c *pt = pool->ti->private; enum pool_mode old_mode = pool->pf.mode; switch (new_mode) { @@ -1448,6 +1449,11 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) } pool->pf.mode = new_mode; + /* + * The pool mode may have changed, sync it so bind_control_target() + * doesn't cause an unexpected mode transition on resume. + */ + pt->adjusted_pf.mode = new_mode; } /* -- cgit v0.10.2 From 1e9291996c4eedf79883f47ec635235e39d3d6cd Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 4 Mar 2014 10:28:23 +0200 Subject: iwlwifi: mvm: don't WARN when statistics are handled late Since the statistics handler is asynchrous, it can very well be that we will handle the statistics (hence the RSSI fluctuation) when we already disassociated. Don't WARN on this case. This solves: https://bugzilla.redhat.com/show_bug.cgi?id=1071998 Cc: [3.10+] Fixes: 2b76ef13086f ("iwlwifi: mvm: implement reduced Tx power") Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach diff --git a/drivers/net/wireless/iwlwifi/mvm/bt-coex.c b/drivers/net/wireless/iwlwifi/mvm/bt-coex.c index 76cde6c..18a895a 100644 --- a/drivers/net/wireless/iwlwifi/mvm/bt-coex.c +++ b/drivers/net/wireless/iwlwifi/mvm/bt-coex.c @@ -872,8 +872,11 @@ void iwl_mvm_bt_rssi_event(struct iwl_mvm *mvm, struct ieee80211_vif *vif, lockdep_assert_held(&mvm->mutex); - /* Rssi update while not associated ?! */ - if (WARN_ON_ONCE(mvmvif->ap_sta_id == IWL_MVM_STATION_COUNT)) + /* + * Rssi update while not associated - can happen since the statistics + * are handled asynchronously + */ + if (mvmvif->ap_sta_id == IWL_MVM_STATION_COUNT) return; /* No BT - reports should be disabled */ -- cgit v0.10.2 From acfcd9ed588465e48efe5ceb67177096d51c95a9 Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Tue, 4 Mar 2014 14:47:39 +0200 Subject: iwlwifi: fix and add 7265 series HW IDs Update of the HW IDs for the 7265 series. Signed-off-by: Oren Givon Signed-off-by: Emmanuel Grumbach diff --git a/drivers/net/wireless/iwlwifi/pcie/drv.c b/drivers/net/wireless/iwlwifi/pcie/drv.c index f47bcbe..3872ead 100644 --- a/drivers/net/wireless/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/iwlwifi/pcie/drv.c @@ -359,13 +359,12 @@ static DEFINE_PCI_DEVICE_TABLE(iwl_hw_card_ids) = { /* 7265 Series */ {IWL_PCI_DEVICE(0x095A, 0x5010, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5110, iwl7265_2ac_cfg)}, - {IWL_PCI_DEVICE(0x095A, 0x5112, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5100, iwl7265_2ac_cfg)}, - {IWL_PCI_DEVICE(0x095A, 0x510A, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095B, 0x5310, iwl7265_2ac_cfg)}, - {IWL_PCI_DEVICE(0x095B, 0x5302, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095B, 0x5302, iwl7265_n_cfg)}, {IWL_PCI_DEVICE(0x095B, 0x5210, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5012, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095A, 0x5412, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5410, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5400, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x1010, iwl7265_2ac_cfg)}, -- cgit v0.10.2 From b6251243ac0a3cb63afe748133f6600f77526078 Mon Sep 17 00:00:00 2001 From: Ivaylo Dimitrov Date: Sat, 1 Mar 2014 14:52:06 +0200 Subject: wl1251: use skb_trim to make skb shorter the current code is directly setting skb->len, which is not correct and brings problems with HAVE_EFFICIENT_UNALIGNED_ACCESS enabled in config Signed-off-by: Ivaylo Dimitrov Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/ti/wl1251/rx.c b/drivers/net/wireless/ti/wl1251/rx.c index 123c4bb..cde0eaf 100644 --- a/drivers/net/wireless/ti/wl1251/rx.c +++ b/drivers/net/wireless/ti/wl1251/rx.c @@ -180,7 +180,7 @@ static void wl1251_rx_body(struct wl1251 *wl, wl1251_mem_read(wl, rx_packet_ring_addr, rx_buffer, length); /* The actual length doesn't include the target's alignment */ - skb->len = desc->length - PLCP_HEADER_LENGTH; + skb_trim(skb, desc->length - PLCP_HEADER_LENGTH); fc = (u16 *)skb->data; -- cgit v0.10.2 From ae72758f1dd93bd367dc7719702f24a9bfb3bad9 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Mon, 11 Nov 2013 08:28:33 -0500 Subject: c6x: fix build failure caused by cache.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A patch to linux/irqflags.h uncovered a problem with c6x asm/cache.h which causes a build failure: /arch/c6x/include/asm/cache.h:63:20: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘c6x_cache_init’ extern void __init c6x_cache_init(void); The asm/cache.h was relying on linux/irqflags.h to pull in linux/init.h but the recent patch changed that. The c6x header should have included linux/init.h all along. Signed-off-by: Mark Salter diff --git a/arch/c6x/include/asm/cache.h b/arch/c6x/include/asm/cache.h index 09c5a0f..86648c0 100644 --- a/arch/c6x/include/asm/cache.h +++ b/arch/c6x/include/asm/cache.h @@ -12,6 +12,7 @@ #define _ASM_C6X_CACHE_H #include +#include /* * Cache line size -- cgit v0.10.2 From a5d90c923bcfb9632d998ed06e9569216ad695f3 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 4 Mar 2014 17:02:17 +0100 Subject: x86/efi: Quirk out SGI UV Alex reported hitting the following BUG after the EFI 1:1 virtual mapping work was merged, kernel BUG at arch/x86/mm/init_64.c:351! invalid opcode: 0000 [#1] SMP Call Trace: [] init_extra_mapping_uc+0x13/0x15 [] uv_system_init+0x22b/0x124b [] ? clockevents_register_device+0x138/0x13d [] ? setup_APIC_timer+0xc5/0xc7 [] ? clockevent_delta2ns+0xb/0xd [] ? setup_boot_APIC_clock+0x4a8/0x4b7 [] ? printk+0x72/0x74 [] native_smp_prepare_cpus+0x389/0x3d6 [] kernel_init_freeable+0xb7/0x1fb [] ? rest_init+0x74/0x74 [] kernel_init+0x9/0xff [] ret_from_fork+0x7c/0xb0 [] ? rest_init+0x74/0x74 Getting this thing to work with the new mapping scheme would need more work, so automatically switch to the old memmap layout for SGI UV. Acked-by: Russ Anderson Cc: Alex Thorlton Signed-off-by: Matt Fleming diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 3d6b9f8..acd86c8 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -134,6 +134,7 @@ extern void efi_setup_page_tables(void); extern void __init old_map_region(efi_memory_desc_t *md); extern void __init runtime_code_page_mkexec(void); extern void __init efi_runtime_mkexec(void); +extern void __init efi_apply_memmap_quirks(void); struct efi_setup_data { u64 fw_vendor; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 06853e6..ce72964 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1239,14 +1239,8 @@ void __init setup_arch(char **cmdline_p) register_refined_jiffies(CLOCK_TICK_RATE); #ifdef CONFIG_EFI - /* Once setup is done above, unmap the EFI memory map on - * mismatched firmware/kernel archtectures since there is no - * support for runtime services. - */ - if (efi_enabled(EFI_BOOT) && !efi_is_native()) { - pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); - efi_unmap_memmap(); - } + if (efi_enabled(EFI_BOOT)) + efi_apply_memmap_quirks(); #endif } diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 1a201ac..b97acec 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -52,6 +52,7 @@ #include #include #include +#include #define EFI_DEBUG @@ -1210,3 +1211,22 @@ static int __init parse_efi_cmdline(char *str) return 0; } early_param("efi", parse_efi_cmdline); + +void __init efi_apply_memmap_quirks(void) +{ + /* + * Once setup is done earlier, unmap the EFI memory map on mismatched + * firmware/kernel architectures since there is no support for runtime + * services. + */ + if (!efi_is_native()) { + pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); + efi_unmap_memmap(); + } + + /* + * UV doesn't support the new EFI pagetable mapping yet. + */ + if (is_uv_system()) + set_bit(EFI_OLD_MEMMAP, &x86_efi_facility); +} -- cgit v0.10.2 From 0ac09f9f8cd1fb028a48330edba6023d347d3cea Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 28 Feb 2014 17:05:26 +0100 Subject: x86, trace: Fix CR2 corruption when tracing page faults The trace_do_page_fault function trigger tracepoint and then handles the actual page fault. This could lead to error if the tracepoint caused page fault. The original cr2 value gets lost and the original page fault handler kills current process with SIGSEGV. This happens if you record page faults with callchain data, the user part of it will cause tracepoint handler to page fault: # perf record -g -e exceptions:page_fault_user ls Fixing this by saving the original cr2 value and using it after tracepoint handler is done. v2: Moving the cr2 read before exception_enter, because it could trigger tracepoint as well. Reported-by: Arnaldo Carvalho de Melo Reported-by: Vince Weaver Tested-by: Vince Weaver Acked-by: Steven Rostedt Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Seiji Aguchi Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1402211701380.6395@vincent-weaver-1.um.maine.edu Link: http://lkml.kernel.org/r/20140228160526.GD1133@krava.brq.redhat.com diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 6dea040..e7fa28b 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1022,11 +1022,11 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs) * routines. */ static void __kprobes -__do_page_fault(struct pt_regs *regs, unsigned long error_code) +__do_page_fault(struct pt_regs *regs, unsigned long error_code, + unsigned long address) { struct vm_area_struct *vma; struct task_struct *tsk; - unsigned long address; struct mm_struct *mm; int fault; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; @@ -1034,9 +1034,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) tsk = current; mm = tsk->mm; - /* Get the faulting address: */ - address = read_cr2(); - /* * Detect and handle instructions that would cause a page fault for * both a tracked kernel page and a userspace page. @@ -1252,9 +1249,11 @@ dotraplinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) { enum ctx_state prev_state; + /* Get the faulting address: */ + unsigned long address = read_cr2(); prev_state = exception_enter(); - __do_page_fault(regs, error_code); + __do_page_fault(regs, error_code, address); exception_exit(prev_state); } @@ -1271,9 +1270,16 @@ dotraplinkage void __kprobes trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) { enum ctx_state prev_state; + /* + * The exception_enter and tracepoint processing could + * trigger another page faults (user space callchain + * reading) and destroy the original cr2 value, so read + * the faulting address now. + */ + unsigned long address = read_cr2(); prev_state = exception_enter(); trace_page_fault_entries(regs, error_code); - __do_page_fault(regs, error_code); + __do_page_fault(regs, error_code, address); exception_exit(prev_state); } -- cgit v0.10.2 From 084b6e7765b9554699afa23a50e702a3d0ae4b24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Salva=20Peir=C3=B3?= Date: Mon, 3 Mar 2014 08:44:04 +0100 Subject: staging/cxt1e1/linux.c: Correct arbitrary memory write in c4_ioctl() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function c4_ioctl() writes data from user in ifr->ifr_data to the kernel struct data arg, without any iolen bounds checking. This can lead to a arbitrary write outside of the struct data arg. Corrected by adding bounds-checking of iolen before the copy_from_user(). Signed-off-by: Salva Peiró Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/staging/cxt1e1/linux.c b/drivers/staging/cxt1e1/linux.c index 4a08e16..79206cb 100644 --- a/drivers/staging/cxt1e1/linux.c +++ b/drivers/staging/cxt1e1/linux.c @@ -866,6 +866,8 @@ c4_ioctl (struct net_device *ndev, struct ifreq *ifr, int cmd) _IOC_SIZE (iocmd)); #endif iolen = _IOC_SIZE (iocmd); + if (iolen > sizeof(arg)) + return -EFAULT; data = ifr->ifr_data + sizeof (iocmd); if (copy_from_user (&arg, data, iolen)) return -EFAULT; -- cgit v0.10.2 From a2a99cea5ec7c1e47825559f0e75a4efbcf8aee3 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 26 Feb 2014 03:09:41 -0800 Subject: iscsi-target: Fix iscsit_get_tpg_from_np tpg_state bug This patch fixes a bug in iscsit_get_tpg_from_np() where the tpg->tpg_state sanity check was looking for TPG_STATE_FREE, instead of != TPG_STATE_ACTIVE. The latter is expected during a normal TPG shutdown once the tpg_state goes into TPG_STATE_INACTIVE in order to reject any new incoming login attempts. Cc: #3.10+ Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 3976183..44a5471 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -137,7 +137,7 @@ struct iscsi_portal_group *iscsit_get_tpg_from_np( list_for_each_entry(tpg, &tiqn->tiqn_tpg_list, tpg_list) { spin_lock(&tpg->tpg_state_lock); - if (tpg->tpg_state == TPG_STATE_FREE) { + if (tpg->tpg_state != TPG_STATE_ACTIVE) { spin_unlock(&tpg->tpg_state_lock); continue; } -- cgit v0.10.2 From 5159d763f60af693a3fcec45dce2021f66e528a4 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 3 Feb 2014 12:53:51 -0800 Subject: iscsi/iser-target: Use list_del_init for ->i_conn_node There are a handful of uses of list_empty() for cmd->i_conn_node within iser-target code that expect to return false once a cmd has been removed from the per connect list. This patch changes all uses of list_del -> list_del_init in order to ensure that list_empty() returns false as expected. Acked-by: Sagi Grimberg Cc: Or Gerlitz Cc: #3.10+ Signed-off-by: Nicholas Bellinger diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index d18d08a..5eb2b88 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1464,7 +1464,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd) case ISCSI_OP_SCSI_CMD: spin_lock_bh(&conn->cmd_lock); if (!list_empty(&cmd->i_conn_node)) - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); if (cmd->data_direction == DMA_TO_DEVICE) @@ -1476,7 +1476,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd) case ISCSI_OP_SCSI_TMFUNC: spin_lock_bh(&conn->cmd_lock); if (!list_empty(&cmd->i_conn_node)) - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); transport_generic_free_cmd(&cmd->se_cmd, 0); @@ -1486,7 +1486,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd) case ISCSI_OP_TEXT: spin_lock_bh(&conn->cmd_lock); if (!list_empty(&cmd->i_conn_node)) - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); /* diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 7f1a7ce..ece82b0 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -785,7 +785,7 @@ static void iscsit_ack_from_expstatsn(struct iscsi_conn *conn, u32 exp_statsn) spin_unlock_bh(&conn->cmd_lock); list_for_each_entry_safe(cmd, cmd_p, &ack_list, i_conn_node) { - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); iscsit_free_cmd(cmd, false); } } @@ -3708,7 +3708,7 @@ iscsit_immediate_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state break; case ISTATE_REMOVE: spin_lock_bh(&conn->cmd_lock); - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); iscsit_free_cmd(cmd, false); @@ -4151,7 +4151,7 @@ static void iscsit_release_commands_from_conn(struct iscsi_conn *conn) spin_lock_bh(&conn->cmd_lock); list_for_each_entry_safe(cmd, cmd_tmp, &conn->conn_cmd_list, i_conn_node) { - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); iscsit_increment_maxcmdsn(cmd, sess); diff --git a/drivers/target/iscsi/iscsi_target_erl2.c b/drivers/target/iscsi/iscsi_target_erl2.c index 33be1fb..4ca8fd2 100644 --- a/drivers/target/iscsi/iscsi_target_erl2.c +++ b/drivers/target/iscsi/iscsi_target_erl2.c @@ -138,7 +138,7 @@ void iscsit_free_connection_recovery_entires(struct iscsi_session *sess) list_for_each_entry_safe(cmd, cmd_tmp, &cr->conn_recovery_cmd_list, i_conn_node) { - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); cmd->conn = NULL; spin_unlock(&cr->conn_recovery_cmd_lock); iscsit_free_cmd(cmd, true); @@ -160,7 +160,7 @@ void iscsit_free_connection_recovery_entires(struct iscsi_session *sess) list_for_each_entry_safe(cmd, cmd_tmp, &cr->conn_recovery_cmd_list, i_conn_node) { - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); cmd->conn = NULL; spin_unlock(&cr->conn_recovery_cmd_lock); iscsit_free_cmd(cmd, true); @@ -216,7 +216,7 @@ int iscsit_remove_cmd_from_connection_recovery( } cr = cmd->cr; - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); return --cr->cmd_count; } @@ -297,7 +297,7 @@ int iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(struct iscsi_conn *conn) if (!(cmd->cmd_flags & ICF_OOO_CMDSN)) continue; - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); iscsit_free_cmd(cmd, true); @@ -335,7 +335,7 @@ int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn) /* * Only perform connection recovery on ISCSI_OP_SCSI_CMD or * ISCSI_OP_NOOP_OUT opcodes. For all other opcodes call - * list_del(&cmd->i_conn_node); to release the command to the + * list_del_init(&cmd->i_conn_node); to release the command to the * session pool and remove it from the connection's list. * * Also stop the DataOUT timer, which will be restarted after @@ -351,7 +351,7 @@ int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn) " CID: %hu\n", cmd->iscsi_opcode, cmd->init_task_tag, cmd->cmd_sn, conn->cid); - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); iscsit_free_cmd(cmd, true); spin_lock_bh(&conn->cmd_lock); @@ -371,7 +371,7 @@ int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn) */ if (!(cmd->cmd_flags & ICF_OOO_CMDSN) && !cmd->immediate_cmd && iscsi_sna_gte(cmd->cmd_sn, conn->sess->exp_cmd_sn)) { - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); iscsit_free_cmd(cmd, true); spin_lock_bh(&conn->cmd_lock); @@ -393,7 +393,7 @@ int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn) cmd->sess = conn->sess; - list_del(&cmd->i_conn_node); + list_del_init(&cmd->i_conn_node); spin_unlock_bh(&conn->cmd_lock); iscsit_free_all_datain_reqs(cmd); -- cgit v0.10.2 From defd884845297fd5690594bfe89656b01f16d87e Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 3 Feb 2014 12:54:39 -0800 Subject: iscsi/iser-target: Fix isert_conn->state hung shutdown issues This patch addresses a couple of different hug shutdown issues related to wait_event() + isert_conn->state. First, it changes isert_conn->conn_wait + isert_conn->conn_wait_comp_err from waitqueues to completions, and sets ISER_CONN_TERMINATING from within isert_disconnect_work(). Second, it splits isert_free_conn() into isert_wait_conn() that is called earlier in iscsit_close_connection() to ensure that all outstanding commands have completed before continuing. Finally, it breaks isert_cq_comp_err() into seperate TX / RX related code, and adds logic in isert_cq_rx_comp_err() to wait for outstanding commands to complete before setting ISER_CONN_DOWN and calling complete(&isert_conn->conn_wait_comp_err). Acked-by: Sagi Grimberg Cc: Or Gerlitz Cc: #3.10+ Signed-off-by: Nicholas Bellinger diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 5eb2b88..862d7b4 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -492,8 +492,8 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) isert_conn->state = ISER_CONN_INIT; INIT_LIST_HEAD(&isert_conn->conn_accept_node); init_completion(&isert_conn->conn_login_comp); - init_waitqueue_head(&isert_conn->conn_wait); - init_waitqueue_head(&isert_conn->conn_wait_comp_err); + init_completion(&isert_conn->conn_wait); + init_completion(&isert_conn->conn_wait_comp_err); kref_init(&isert_conn->conn_kref); kref_get(&isert_conn->conn_kref); mutex_init(&isert_conn->conn_mutex); @@ -688,11 +688,11 @@ isert_disconnect_work(struct work_struct *work) pr_debug("isert_disconnect_work(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); mutex_lock(&isert_conn->conn_mutex); - isert_conn->state = ISER_CONN_DOWN; + if (isert_conn->state == ISER_CONN_UP) + isert_conn->state = ISER_CONN_TERMINATING; if (isert_conn->post_recv_buf_count == 0 && atomic_read(&isert_conn->post_send_buf_count) == 0) { - pr_debug("Calling wake_up(&isert_conn->conn_wait);\n"); mutex_unlock(&isert_conn->conn_mutex); goto wake_up; } @@ -712,7 +712,7 @@ isert_disconnect_work(struct work_struct *work) mutex_unlock(&isert_conn->conn_mutex); wake_up: - wake_up(&isert_conn->conn_wait); + complete(&isert_conn->conn_wait); isert_put_conn(isert_conn); } @@ -1589,7 +1589,7 @@ isert_do_control_comp(struct work_struct *work) pr_debug("Calling iscsit_logout_post_handler >>>>>>>>>>>>>>\n"); /* * Call atomic_dec(&isert_conn->post_send_buf_count) - * from isert_free_conn() + * from isert_wait_conn() */ isert_conn->logout_posted = true; iscsit_logout_post_handler(cmd, cmd->conn); @@ -1691,31 +1691,39 @@ isert_send_completion(struct iser_tx_desc *tx_desc, } static void -isert_cq_comp_err(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn) +isert_cq_tx_comp_err(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn) { struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + struct isert_cmd *isert_cmd = tx_desc->isert_cmd; + + if (!isert_cmd) + isert_unmap_tx_desc(tx_desc, ib_dev); + else + isert_completion_put(tx_desc, isert_cmd, ib_dev); +} + +static void +isert_cq_rx_comp_err(struct isert_conn *isert_conn) +{ + struct ib_device *ib_dev = isert_conn->conn_cm_id->device; + struct iscsi_conn *conn = isert_conn->conn; - if (tx_desc) { - struct isert_cmd *isert_cmd = tx_desc->isert_cmd; + if (isert_conn->post_recv_buf_count) + return; - if (!isert_cmd) - isert_unmap_tx_desc(tx_desc, ib_dev); - else - isert_completion_put(tx_desc, isert_cmd, ib_dev); + if (conn->sess) { + target_sess_cmd_list_set_waiting(conn->sess->se_sess); + target_wait_for_sess_cmds(conn->sess->se_sess); } - if (isert_conn->post_recv_buf_count == 0 && - atomic_read(&isert_conn->post_send_buf_count) == 0) { - pr_debug("isert_cq_comp_err >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); - pr_debug("Calling wake_up from isert_cq_comp_err\n"); + while (atomic_read(&isert_conn->post_send_buf_count)) + msleep(3000); - mutex_lock(&isert_conn->conn_mutex); - if (isert_conn->state != ISER_CONN_DOWN) - isert_conn->state = ISER_CONN_TERMINATING; - mutex_unlock(&isert_conn->conn_mutex); + mutex_lock(&isert_conn->conn_mutex); + isert_conn->state = ISER_CONN_DOWN; + mutex_unlock(&isert_conn->conn_mutex); - wake_up(&isert_conn->conn_wait_comp_err); - } + complete(&isert_conn->conn_wait_comp_err); } static void @@ -1740,8 +1748,9 @@ isert_cq_tx_work(struct work_struct *work) pr_debug("TX wc.status != IB_WC_SUCCESS >>>>>>>>>>>>>>\n"); pr_debug("TX wc.status: 0x%08x\n", wc.status); pr_debug("TX wc.vendor_err: 0x%08x\n", wc.vendor_err); + atomic_dec(&isert_conn->post_send_buf_count); - isert_cq_comp_err(tx_desc, isert_conn); + isert_cq_tx_comp_err(tx_desc, isert_conn); } } @@ -1784,7 +1793,7 @@ isert_cq_rx_work(struct work_struct *work) wc.vendor_err); } isert_conn->post_recv_buf_count--; - isert_cq_comp_err(NULL, isert_conn); + isert_cq_rx_comp_err(isert_conn); } } @@ -2702,22 +2711,11 @@ isert_free_np(struct iscsi_np *np) kfree(isert_np); } -static int isert_check_state(struct isert_conn *isert_conn, int state) -{ - int ret; - - mutex_lock(&isert_conn->conn_mutex); - ret = (isert_conn->state == state); - mutex_unlock(&isert_conn->conn_mutex); - - return ret; -} - -static void isert_free_conn(struct iscsi_conn *conn) +static void isert_wait_conn(struct iscsi_conn *conn) { struct isert_conn *isert_conn = conn->context; - pr_debug("isert_free_conn: Starting \n"); + pr_debug("isert_wait_conn: Starting \n"); /* * Decrement post_send_buf_count for special case when called * from isert_do_control_comp() -> iscsit_logout_post_handler() @@ -2727,38 +2725,29 @@ static void isert_free_conn(struct iscsi_conn *conn) atomic_dec(&isert_conn->post_send_buf_count); if (isert_conn->conn_cm_id && isert_conn->state != ISER_CONN_DOWN) { - pr_debug("Calling rdma_disconnect from isert_free_conn\n"); + pr_debug("Calling rdma_disconnect from isert_wait_conn\n"); rdma_disconnect(isert_conn->conn_cm_id); } /* * Only wait for conn_wait_comp_err if the isert_conn made it * into full feature phase.. */ - if (isert_conn->state == ISER_CONN_UP) { - pr_debug("isert_free_conn: Before wait_event comp_err %d\n", - isert_conn->state); - mutex_unlock(&isert_conn->conn_mutex); - - wait_event(isert_conn->conn_wait_comp_err, - (isert_check_state(isert_conn, ISER_CONN_TERMINATING))); - - wait_event(isert_conn->conn_wait, - (isert_check_state(isert_conn, ISER_CONN_DOWN))); - - isert_put_conn(isert_conn); - return; - } if (isert_conn->state == ISER_CONN_INIT) { mutex_unlock(&isert_conn->conn_mutex); - isert_put_conn(isert_conn); return; } - pr_debug("isert_free_conn: wait_event conn_wait %d\n", - isert_conn->state); + if (isert_conn->state == ISER_CONN_UP) + isert_conn->state = ISER_CONN_TERMINATING; mutex_unlock(&isert_conn->conn_mutex); - wait_event(isert_conn->conn_wait, - (isert_check_state(isert_conn, ISER_CONN_DOWN))); + wait_for_completion(&isert_conn->conn_wait_comp_err); + + wait_for_completion(&isert_conn->conn_wait); +} + +static void isert_free_conn(struct iscsi_conn *conn) +{ + struct isert_conn *isert_conn = conn->context; isert_put_conn(isert_conn); } @@ -2771,6 +2760,7 @@ static struct iscsit_transport iser_target_transport = { .iscsit_setup_np = isert_setup_np, .iscsit_accept_np = isert_accept_np, .iscsit_free_np = isert_free_np, + .iscsit_wait_conn = isert_wait_conn, .iscsit_free_conn = isert_free_conn, .iscsit_get_login_rx = isert_get_login_rx, .iscsit_put_login_tx = isert_put_login_tx, diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 708a069..41e799f 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -116,8 +116,8 @@ struct isert_conn { struct isert_device *conn_device; struct work_struct conn_logout_work; struct mutex conn_mutex; - wait_queue_head_t conn_wait; - wait_queue_head_t conn_wait_comp_err; + struct completion conn_wait; + struct completion conn_wait_comp_err; struct kref conn_kref; struct list_head conn_fr_pool; int conn_fr_pool_size; diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index ece82b0..b83ec37 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -4196,6 +4196,10 @@ int iscsit_close_connection( iscsit_stop_timers_for_cmds(conn); iscsit_stop_nopin_response_timer(conn); iscsit_stop_nopin_timer(conn); + + if (conn->conn_transport->iscsit_wait_conn) + conn->conn_transport->iscsit_wait_conn(conn); + iscsit_free_queue_reqs_for_conn(conn); /* diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h index ae5a171..4483fad 100644 --- a/include/target/iscsi/iscsi_transport.h +++ b/include/target/iscsi/iscsi_transport.h @@ -12,6 +12,7 @@ struct iscsit_transport { int (*iscsit_setup_np)(struct iscsi_np *, struct __kernel_sockaddr_storage *); int (*iscsit_accept_np)(struct iscsi_np *, struct iscsi_conn *); void (*iscsit_free_np)(struct iscsi_np *); + void (*iscsit_wait_conn)(struct iscsi_conn *); void (*iscsit_free_conn)(struct iscsi_conn *); int (*iscsit_get_login_rx)(struct iscsi_conn *, struct iscsi_login *); int (*iscsit_put_login_tx)(struct iscsi_conn *, struct iscsi_login *, u32); -- cgit v0.10.2 From b6b87a1df604678ed1be40158080db012a99ccca Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 27 Feb 2014 09:05:03 -0800 Subject: iser-target: Fix post_send_buf_count for RDMA READ/WRITE This patch fixes the incorrect setting of ->post_send_buf_count related to RDMA WRITEs + READs where isert_rdma_rw->send_wr_num was not being taken into account. This includes incrementing ->post_send_buf_count within isert_put_datain() + isert_get_dataout(), decrementing within __isert_send_completion() + isert_response_completion(), and clearing wr->send_wr_num within isert_completion_rdma_read() This is necessary because even though IB_SEND_SIGNALED is not set for RDMA WRITEs + READs, during a QP failure event the work requests will be returned with exception status from the TX completion queue. Acked-by: Sagi Grimberg Cc: Or Gerlitz Cc: #3.10+ Signed-off-by: Nicholas Bellinger diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 862d7b4..a70b0cf 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1549,6 +1549,7 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc, iscsit_stop_dataout_timer(cmd); device->unreg_rdma_mem(isert_cmd, isert_conn); cmd->write_data_done = wr->cur_rdma_length; + wr->send_wr_num = 0; pr_debug("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd); spin_lock_bh(&cmd->istate_lock); @@ -1613,6 +1614,7 @@ isert_response_completion(struct iser_tx_desc *tx_desc, struct ib_device *ib_dev) { struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd; + struct isert_rdma_wr *wr = &isert_cmd->rdma_wr; if (cmd->i_state == ISTATE_SEND_TASKMGTRSP || cmd->i_state == ISTATE_SEND_LOGOUTRSP || @@ -1624,7 +1626,7 @@ isert_response_completion(struct iser_tx_desc *tx_desc, queue_work(isert_comp_wq, &isert_cmd->comp_work); return; } - atomic_dec(&isert_conn->post_send_buf_count); + atomic_sub(wr->send_wr_num + 1, &isert_conn->post_send_buf_count); cmd->i_state = ISTATE_SENT_STATUS; isert_completion_put(tx_desc, isert_cmd, ib_dev); @@ -1662,7 +1664,7 @@ __isert_send_completion(struct iser_tx_desc *tx_desc, case ISER_IB_RDMA_READ: pr_debug("isert_send_completion: Got ISER_IB_RDMA_READ:\n"); - atomic_dec(&isert_conn->post_send_buf_count); + atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count); isert_completion_rdma_read(tx_desc, isert_cmd); break; default: @@ -2386,12 +2388,12 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd) isert_init_send_wr(isert_conn, isert_cmd, &isert_cmd->tx_desc.send_wr, true); - atomic_inc(&isert_conn->post_send_buf_count); + atomic_add(wr->send_wr_num + 1, &isert_conn->post_send_buf_count); rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed); if (rc) { pr_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n"); - atomic_dec(&isert_conn->post_send_buf_count); + atomic_sub(wr->send_wr_num + 1, &isert_conn->post_send_buf_count); } pr_debug("Cmd: %p posted RDMA_WRITE + Response for iSER Data READ\n", isert_cmd); @@ -2419,12 +2421,12 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery) return rc; } - atomic_inc(&isert_conn->post_send_buf_count); + atomic_add(wr->send_wr_num, &isert_conn->post_send_buf_count); rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed); if (rc) { pr_warn("ib_post_send() failed for IB_WR_RDMA_READ\n"); - atomic_dec(&isert_conn->post_send_buf_count); + atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count); } pr_debug("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n", isert_cmd); -- cgit v0.10.2 From 9bb4ca68fc48eea618b1af1c1cde2a251ae32d1b Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 27 Feb 2014 07:02:48 -0800 Subject: iser-target: Ignore completions for FRWRs in isert_cq_tx_work This patch changes IB_WR_FAST_REG_MR + IB_WR_LOCAL_INV related work requests to include a ISER_FRWR_LI_WRID value in order to signal isert_cq_tx_work() that these requests should be ignored. This is necessary because even though IB_SEND_SIGNALED is not set for either work request, during a QP failure event the work requests will be returned with exception status from the TX completion queue. v2 changes: - Rename ISER_FRWR_LI_WRID -> ISER_FASTREG_LI_WRID (Sagi) Acked-by: Sagi Grimberg Cc: Or Gerlitz Cc: #3.12+ Signed-off-by: Nicholas Bellinger diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index a70b0cf..614a6ef 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1751,8 +1751,10 @@ isert_cq_tx_work(struct work_struct *work) pr_debug("TX wc.status: 0x%08x\n", wc.status); pr_debug("TX wc.vendor_err: 0x%08x\n", wc.vendor_err); - atomic_dec(&isert_conn->post_send_buf_count); - isert_cq_tx_comp_err(tx_desc, isert_conn); + if (wc.wr_id != ISER_FASTREG_LI_WRID) { + atomic_dec(&isert_conn->post_send_buf_count); + isert_cq_tx_comp_err(tx_desc, isert_conn); + } } } @@ -2213,6 +2215,7 @@ isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc, if (!fr_desc->valid) { memset(&inv_wr, 0, sizeof(inv_wr)); + inv_wr.wr_id = ISER_FASTREG_LI_WRID; inv_wr.opcode = IB_WR_LOCAL_INV; inv_wr.ex.invalidate_rkey = fr_desc->data_mr->rkey; wr = &inv_wr; @@ -2223,6 +2226,7 @@ isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc, /* Prepare FASTREG WR */ memset(&fr_wr, 0, sizeof(fr_wr)); + fr_wr.wr_id = ISER_FASTREG_LI_WRID; fr_wr.opcode = IB_WR_FAST_REG_MR; fr_wr.wr.fast_reg.iova_start = fr_desc->data_frpl->page_list[0] + page_off; diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 41e799f..10be55a 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -6,6 +6,7 @@ #define ISERT_RDMA_LISTEN_BACKLOG 10 #define ISCSI_ISER_SG_TABLESIZE 256 +#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL enum isert_desc_type { ISCSI_TX_CONTROL, -- cgit v0.10.2 From ebbe442183b7b8192c963266f1c89048fefc63a5 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 2 Mar 2014 14:51:12 -0800 Subject: iser-target: Fix command leak for tx_desc->comp_llnode_batch This patch addresses a number of active I/O shutdown issues related to isert_cmd descriptors being leaked that are part of a completion interrupt coalescing batch. This includes adding logic in isert_cq_tx_comp_err() to drain any associated tx_desc->comp_llnode_batch, as well as isert_cq_drain_comp_llist() to drain any associated isert_conn->conn_comp_llist. Also, set tx_desc->llnode_active in isert_init_send_wr() in order to determine when work requests need to be skipped in isert_cq_tx_work() exception path code. Finally, update isert_init_send_wr() to only allow interrupt coalescing when ISER_CONN_UP. Acked-by: Sagi Grimberg Cc: Or Gerlitz Cc: #3.13+ Signed-off-by: Nicholas Bellinger diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 614a6ef..8ee228e 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -497,7 +497,6 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) kref_init(&isert_conn->conn_kref); kref_get(&isert_conn->conn_kref); mutex_init(&isert_conn->conn_mutex); - mutex_init(&isert_conn->conn_comp_mutex); spin_lock_init(&isert_conn->conn_lock); cma_id->context = isert_conn; @@ -888,16 +887,17 @@ isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, * Coalesce send completion interrupts by only setting IB_SEND_SIGNALED * bit for every ISERT_COMP_BATCH_COUNT number of ib_post_send() calls. */ - mutex_lock(&isert_conn->conn_comp_mutex); - if (coalesce && + mutex_lock(&isert_conn->conn_mutex); + if (coalesce && isert_conn->state == ISER_CONN_UP && ++isert_conn->conn_comp_batch < ISERT_COMP_BATCH_COUNT) { + tx_desc->llnode_active = true; llist_add(&tx_desc->comp_llnode, &isert_conn->conn_comp_llist); - mutex_unlock(&isert_conn->conn_comp_mutex); + mutex_unlock(&isert_conn->conn_mutex); return; } isert_conn->conn_comp_batch = 0; tx_desc->comp_llnode_batch = llist_del_all(&isert_conn->conn_comp_llist); - mutex_unlock(&isert_conn->conn_comp_mutex); + mutex_unlock(&isert_conn->conn_mutex); send_wr->send_flags = IB_SEND_SIGNALED; } @@ -1693,10 +1693,45 @@ isert_send_completion(struct iser_tx_desc *tx_desc, } static void +isert_cq_drain_comp_llist(struct isert_conn *isert_conn, struct ib_device *ib_dev) +{ + struct llist_node *llnode; + struct isert_rdma_wr *wr; + struct iser_tx_desc *t; + + mutex_lock(&isert_conn->conn_mutex); + llnode = llist_del_all(&isert_conn->conn_comp_llist); + isert_conn->conn_comp_batch = 0; + mutex_unlock(&isert_conn->conn_mutex); + + while (llnode) { + t = llist_entry(llnode, struct iser_tx_desc, comp_llnode); + llnode = llist_next(llnode); + wr = &t->isert_cmd->rdma_wr; + + atomic_sub(wr->send_wr_num + 1, &isert_conn->post_send_buf_count); + isert_completion_put(t, t->isert_cmd, ib_dev); + } +} + +static void isert_cq_tx_comp_err(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn) { struct ib_device *ib_dev = isert_conn->conn_cm_id->device; struct isert_cmd *isert_cmd = tx_desc->isert_cmd; + struct llist_node *llnode = tx_desc->comp_llnode_batch; + struct isert_rdma_wr *wr; + struct iser_tx_desc *t; + + while (llnode) { + t = llist_entry(llnode, struct iser_tx_desc, comp_llnode); + llnode = llist_next(llnode); + wr = &t->isert_cmd->rdma_wr; + + atomic_sub(wr->send_wr_num + 1, &isert_conn->post_send_buf_count); + isert_completion_put(t, t->isert_cmd, ib_dev); + } + tx_desc->comp_llnode_batch = NULL; if (!isert_cmd) isert_unmap_tx_desc(tx_desc, ib_dev); @@ -1713,6 +1748,8 @@ isert_cq_rx_comp_err(struct isert_conn *isert_conn) if (isert_conn->post_recv_buf_count) return; + isert_cq_drain_comp_llist(isert_conn, ib_dev); + if (conn->sess) { target_sess_cmd_list_set_waiting(conn->sess->se_sess); target_wait_for_sess_cmds(conn->sess->se_sess); @@ -1752,6 +1789,9 @@ isert_cq_tx_work(struct work_struct *work) pr_debug("TX wc.vendor_err: 0x%08x\n", wc.vendor_err); if (wc.wr_id != ISER_FASTREG_LI_WRID) { + if (tx_desc->llnode_active) + continue; + atomic_dec(&isert_conn->post_send_buf_count); isert_cq_tx_comp_err(tx_desc, isert_conn); } diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 10be55a..f6ae7f5 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -46,6 +46,7 @@ struct iser_tx_desc { struct isert_cmd *isert_cmd; struct llist_node *comp_llnode_batch; struct llist_node comp_llnode; + bool llnode_active; struct ib_send_wr send_wr; } __packed; @@ -127,7 +128,6 @@ struct isert_conn { #define ISERT_COMP_BATCH_COUNT 8 int conn_comp_batch; struct llist_head conn_comp_llist; - struct mutex conn_comp_mutex; }; #define ISERT_MAX_CQ 64 -- cgit v0.10.2 From 905a5117e79367b7e58ae046d12ca9961f048c89 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 11 Feb 2014 00:22:37 +0800 Subject: pinctrl: sunxi: use chained_irq_{enter, exit} for GIC compatibility On tha Allwinner A20 SoC, the external interrupts on the pin controller device are connected to the GIC. Without chained_irq_{enter, exit}, external GPIO interrupts, such as used by mmc core card detect, cause the system to hang. This issue was first encountered during my attempt to get out-of-band interrupts for WiFi on the Cubietruck working. With David's new series of sunci-mci using mmc slot-gpio for (GPIO interrupt based) card detection, removing the SD card also causes my Cubietruck to hang. This problem should extend to all Allwinner A20 based boards. With this fix, the system no longer hangs when I remove or insert the SD card. /proc/interrupts show that the interrupt has correctly fired. However the system still does not detect card removal/insertion. I believe this is another unrelated issue. Cc: stable@vger.kernel.org Signed-off-by: Chen-Yu Tsai Acked-by: Maxime Ripard Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/pinctrl-sunxi.c b/drivers/pinctrl/pinctrl-sunxi.c index 9ccf681..787f352 100644 --- a/drivers/pinctrl/pinctrl-sunxi.c +++ b/drivers/pinctrl/pinctrl-sunxi.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -665,6 +666,7 @@ static struct irq_chip sunxi_pinctrl_irq_chip = { static void sunxi_pinctrl_irq_handler(unsigned irq, struct irq_desc *desc) { + struct irq_chip *chip = irq_get_chip(irq); struct sunxi_pinctrl *pctl = irq_get_handler_data(irq); const unsigned long reg = readl(pctl->membase + IRQ_STATUS_REG); @@ -674,10 +676,12 @@ static void sunxi_pinctrl_irq_handler(unsigned irq, struct irq_desc *desc) if (reg) { int irqoffset; + chained_irq_enter(chip, desc); for_each_set_bit(irqoffset, ®, SUNXI_IRQ_NUMBER) { int pin_irq = irq_find_mapping(pctl->domain, irqoffset); generic_handle_irq(pin_irq); } + chained_irq_exit(chip, desc); } } -- cgit v0.10.2 From d82f94013a9897e02a93400e4c16efe82b530eaa Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 17 Feb 2014 22:19:43 +0100 Subject: pinctrl: sunxi: Fix masking when setting irq type Signed-off-by: Hans de Goede Acked-by: Maxime Ripard Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/pinctrl-sunxi.c b/drivers/pinctrl/pinctrl-sunxi.c index 787f352..f9fabe9 100644 --- a/drivers/pinctrl/pinctrl-sunxi.c +++ b/drivers/pinctrl/pinctrl-sunxi.c @@ -585,7 +585,7 @@ static int sunxi_pinctrl_irq_set_type(struct irq_data *d, spin_lock_irqsave(&pctl->lock, flags); regval = readl(pctl->membase + reg); - regval &= ~IRQ_CFG_IRQ_MASK; + regval &= ~(IRQ_CFG_IRQ_MASK << index); writel(regval | (mode << index), pctl->membase + reg); spin_unlock_irqrestore(&pctl->lock, flags); -- cgit v0.10.2 From ef5aff05f1e6ff172bab86097fbbfe26a44cc228 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 17 Feb 2014 22:19:44 +0100 Subject: pinctrl: sunxi: Fix interrupt register offset calculation This fixing setting the interrupt type for eints >= 8. Signed-off-by: Hans de Goede Acked-by: Maxime Ripard Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/pinctrl-sunxi.h b/drivers/pinctrl/pinctrl-sunxi.h index 01c494f..552b0e9 100644 --- a/drivers/pinctrl/pinctrl-sunxi.h +++ b/drivers/pinctrl/pinctrl-sunxi.h @@ -511,7 +511,7 @@ static inline u32 sunxi_pull_offset(u16 pin) static inline u32 sunxi_irq_cfg_reg(u16 irq) { - u8 reg = irq / IRQ_CFG_IRQ_PER_REG; + u8 reg = irq / IRQ_CFG_IRQ_PER_REG * 0x04; return reg + IRQ_CFG_REG; } @@ -523,7 +523,7 @@ static inline u32 sunxi_irq_cfg_offset(u16 irq) static inline u32 sunxi_irq_ctrl_reg(u16 irq) { - u8 reg = irq / IRQ_CTRL_IRQ_PER_REG; + u8 reg = irq / IRQ_CTRL_IRQ_PER_REG * 0x04; return reg + IRQ_CTRL_REG; } @@ -535,7 +535,7 @@ static inline u32 sunxi_irq_ctrl_offset(u16 irq) static inline u32 sunxi_irq_status_reg(u16 irq) { - u8 reg = irq / IRQ_STATUS_IRQ_PER_REG; + u8 reg = irq / IRQ_STATUS_IRQ_PER_REG * 0x04; return reg + IRQ_STATUS_REG; } -- cgit v0.10.2 From 5ba341604a054294aeb812603349bba024d716ee Mon Sep 17 00:00:00 2001 From: Josh Cartwright Date: Mon, 24 Feb 2014 12:41:20 -0600 Subject: pinctrl: msm: make PINCTRL_MSM bool instead of tristate Modular builds of pinctrl-msm break due to handle_bad_irq being unexported for module use. For now, make PINCTRL_MSM 'bool'. Signed-off-by: Josh Cartwright Reviewed-by: Bjorn Andersson Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig index be361b7..1e4e693 100644 --- a/drivers/pinctrl/Kconfig +++ b/drivers/pinctrl/Kconfig @@ -217,7 +217,7 @@ config PINCTRL_IMX28 select PINCTRL_MXS config PINCTRL_MSM - tristate + bool select PINMUX select PINCONF select GENERIC_PINCONF -- cgit v0.10.2 From b5973fcd76ac5b3f2fabb0ab181d45ac16b8ce02 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 26 Feb 2014 19:10:26 +0900 Subject: pinctrl: sh-pfc: r8a7791: SD1_CLK fix Fix the SD1_CLK handling for r8a7791. Without this patch it is impossible to request all pins needed for SDHI1 on the Koelsch board. Signed-off-by: Magnus Damm Acked-by: Laurent Pinchart Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7791.c b/drivers/pinctrl/sh-pfc/pfc-r8a7791.c index 77d103f..567d691 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7791.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7791.c @@ -89,7 +89,8 @@ enum { /* GPSR6 */ FN_IP13_10, FN_IP13_11, FN_IP13_12, FN_IP13_13, FN_IP13_14, - FN_IP13_15, FN_IP13_18_16, FN_IP13_21_19, FN_IP13_22, FN_IP13_24_23, + FN_IP13_15, FN_IP13_18_16, FN_IP13_21_19, + FN_IP13_22, FN_IP13_24_23, FN_SD1_CLK, FN_IP13_25, FN_IP13_26, FN_IP13_27, FN_IP13_30_28, FN_IP14_1_0, FN_IP14_2, FN_IP14_3, FN_IP14_4, FN_IP14_5, FN_IP14_6, FN_IP14_7, FN_IP14_10_8, FN_IP14_13_11, FN_IP14_16_14, FN_IP14_19_17, @@ -788,6 +789,7 @@ static const u16 pinmux_data[] = { PINMUX_DATA(USB1_PWEN_MARK, FN_USB1_PWEN), PINMUX_DATA(USB1_OVC_MARK, FN_USB1_OVC), PINMUX_DATA(DU0_DOTCLKIN_MARK, FN_DU0_DOTCLKIN), + PINMUX_DATA(SD1_CLK_MARK, FN_SD1_CLK), /* IPSR0 */ PINMUX_IPSR_DATA(IP0_0, D0), @@ -3825,7 +3827,7 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { GP_6_11_FN, FN_IP13_25, GP_6_10_FN, FN_IP13_24_23, GP_6_9_FN, FN_IP13_22, - 0, 0, + GP_6_8_FN, FN_SD1_CLK, GP_6_7_FN, FN_IP13_21_19, GP_6_6_FN, FN_IP13_18_16, GP_6_5_FN, FN_IP13_15, -- cgit v0.10.2 From e4ad1accb28d0ed8cea6f12395d58686ad344ca7 Mon Sep 17 00:00:00 2001 From: Patrick Lai Date: Sun, 2 Mar 2014 11:52:57 -0800 Subject: ASoC: pcm: free path list before exiting from error conditions dpcm_path_get() allocates dynamic memory to hold path list. Corresponding dpcm_path_put() must be called to free the memory. dpcm_path_put() is not called under several error conditions. This leads to memory leak. Signed-off-by: Patrick Lai Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 47e1ce7..28522bd 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1989,6 +1989,7 @@ int soc_dpcm_runtime_update(struct snd_soc_card *card) paths = dpcm_path_get(fe, SNDRV_PCM_STREAM_PLAYBACK, &list); if (paths < 0) { + dpcm_path_put(&list); dev_warn(fe->dev, "ASoC: %s no valid %s path\n", fe->dai_link->name, "playback"); mutex_unlock(&card->mutex); @@ -2018,6 +2019,7 @@ capture: paths = dpcm_path_get(fe, SNDRV_PCM_STREAM_CAPTURE, &list); if (paths < 0) { + dpcm_path_put(&list); dev_warn(fe->dev, "ASoC: %s no valid %s path\n", fe->dai_link->name, "capture"); mutex_unlock(&card->mutex); @@ -2082,6 +2084,7 @@ static int dpcm_fe_dai_open(struct snd_pcm_substream *fe_substream) fe->dpcm[stream].runtime = fe_substream->runtime; if (dpcm_path_get(fe, stream, &list) <= 0) { + dpcm_path_put(&list); dev_dbg(fe->dev, "ASoC: %s no valid %s route\n", fe->dai_link->name, stream ? "capture" : "playback"); } -- cgit v0.10.2 From 017145fef567430789e40f6a22a90ce2a766370b Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Fri, 14 Feb 2014 12:49:12 +0800 Subject: spi: fsl-dspi: Fix getting correct address for master Current code set platform drvdata to dspi. However, the code in dspi_suspend() and dspi_resume() assumes the drvdata is the address of master. Fix it by setting platform drvdata to master. Signed-off-by: Axel Lin Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index ec79f72..a253920 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -420,7 +420,6 @@ static int dspi_suspend(struct device *dev) static int dspi_resume(struct device *dev) { - struct spi_master *master = dev_get_drvdata(dev); struct fsl_dspi *dspi = spi_master_get_devdata(master); @@ -504,7 +503,7 @@ static int dspi_probe(struct platform_device *pdev) clk_prepare_enable(dspi->clk); init_waitqueue_head(&dspi->waitq); - platform_set_drvdata(pdev, dspi); + platform_set_drvdata(pdev, master); ret = spi_bitbang_start(&dspi->bitbang); if (ret != 0) { @@ -525,7 +524,8 @@ out_master_put: static int dspi_remove(struct platform_device *pdev) { - struct fsl_dspi *dspi = platform_get_drvdata(pdev); + struct spi_master *master = platform_get_drvdata(pdev); + struct fsl_dspi *dspi = spi_master_get_devdata(master); /* Disconnect from the SPI framework */ spi_bitbang_stop(&dspi->bitbang); -- cgit v0.10.2 From ee73b4c6e3fc0755a91752ab8eebc8e070038b53 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Fri, 14 Feb 2014 09:53:00 +0800 Subject: spi: coldfire-qspi: Fix getting correct address for *mcfqspi dev_get_drvdata() returns the address of master rather than mcfqspi. Fixes: af361079 (spi/coldfire-qspi: Drop extra calls to spi_master_get in suspend/resume functions) Signed-off-by: Axel Lin Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/drivers/spi/spi-coldfire-qspi.c b/drivers/spi/spi-coldfire-qspi.c index cabed8f..28ae470 100644 --- a/drivers/spi/spi-coldfire-qspi.c +++ b/drivers/spi/spi-coldfire-qspi.c @@ -514,7 +514,8 @@ static int mcfqspi_resume(struct device *dev) #ifdef CONFIG_PM_RUNTIME static int mcfqspi_runtime_suspend(struct device *dev) { - struct mcfqspi *mcfqspi = dev_get_drvdata(dev); + struct spi_master *master = dev_get_drvdata(dev); + struct mcfqspi *mcfqspi = spi_master_get_devdata(master); clk_disable(mcfqspi->clk); @@ -523,7 +524,8 @@ static int mcfqspi_runtime_suspend(struct device *dev) static int mcfqspi_runtime_resume(struct device *dev) { - struct mcfqspi *mcfqspi = dev_get_drvdata(dev); + struct spi_master *master = dev_get_drvdata(dev); + struct mcfqspi *mcfqspi = spi_master_get_devdata(master); clk_enable(mcfqspi->clk); -- cgit v0.10.2 From ba938f3a295686aa9ab5077b10d1049f8091cbd7 Mon Sep 17 00:00:00 2001 From: Wenyou Yang Date: Wed, 5 Mar 2014 11:29:01 +0800 Subject: spi: atmel: add missing spi_master_{resume,suspend} calls to PM callbacks The PM callbacks implemented by the spi-atmel driver don't call spi_master_{resume,suspend}, fix that. Signed-off-by: Wenyou Yang Signed-off-by: Mark Brown diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c index b0842f7..5d7b07f 100644 --- a/drivers/spi/spi-atmel.c +++ b/drivers/spi/spi-atmel.c @@ -1455,6 +1455,14 @@ static int atmel_spi_suspend(struct device *dev) { struct spi_master *master = dev_get_drvdata(dev); struct atmel_spi *as = spi_master_get_devdata(master); + int ret; + + /* Stop the queue running */ + ret = spi_master_suspend(master); + if (ret) { + dev_warn(dev, "cannot suspend master\n"); + return ret; + } clk_disable_unprepare(as->clk); return 0; @@ -1464,9 +1472,16 @@ static int atmel_spi_resume(struct device *dev) { struct spi_master *master = dev_get_drvdata(dev); struct atmel_spi *as = spi_master_get_devdata(master); + int ret; clk_prepare_enable(as->clk); - return 0; + + /* Start the queue running */ + ret = spi_master_resume(master); + if (ret) + dev_err(dev, "problem starting queue (%d)\n", ret); + + return ret; } static SIMPLE_DEV_PM_OPS(atmel_spi_pm_ops, atmel_spi_suspend, atmel_spi_resume); -- cgit v0.10.2 From e291fd20ef0870ed527470ca9c2402ba6f44d31c Mon Sep 17 00:00:00 2001 From: Barry Song Date: Wed, 5 Mar 2014 14:55:02 +0800 Subject: pinctrl: sirf: fix kernel panic in gpio_lock_as_irq commit 655dada6277991 causes kernel panic, this patch fixes it. [ 1.197816] [ffffffee] *pgd=0d7fd821, *pte=00000000, *ppte=00000000 [ 1.204070] Internal error: Oops: 17 [#1] PREEMPT SMP ARM [ 1.209447] Modules linked in: [ 1.212490] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.14.0-rc1 #3 [ 1.218737] task: cd03c000 ti: cd040000 task.ti: cd040000 [ 1.224127] PC is at gpiod_lock_as_irq+0xc/0x64 [ 1.228634] LR is at sirfsoc_gpio_irq_startup+0x18/0x44 [ 1.233842] pc : [] lr : [] psr: a0000193 [ 1.233842] sp : cd041d30 ip : 00000000 fp : 00000000 [ 1.245296] r10: 00000000 r9 : cd023db4 r8 : 60000113 [ 1.250505] r7 : 0000003e r6 : cd023dd4 r5 : c06bfa54 r4 : cd023d80 [ 1.257014] r3 : 00000020 r2 : 00000000 r1 : ffffffea r0 : ffffffea [ 1.263526] Flags: NzCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment kernel [ 1.270903] Control: 10c53c7d Table: 00004059 DAC: 00000015 [ 1.276631] Process swapper/0 (pid: 1, stack limit = 0xcd040240) [ 1.282620] Stack: (0xcd041d30 to 0xcd042000) [ 1.286963] 1d20: cd023d80 c01d1c38 c01d1c20 cd023d80 [ 1.295124] 1d40: 00000001 c0068438 cd023d80 ccb6d880 cd023dd4 c0067044 0000718e c006719c [ 1.286963] 1d20: cd023d80 c01d1c38 c01d1c20 cd023d80 [ 1.295124] 1d40: 00000001 c0068438 cd023d80 ccb6d880 cd023dd4 c0067044 0000718e c006719c [ 1.295124] 1d40: 00000001 c0068438 cd023d80 ccb6d880 cd023dd4 c0067044 0000718e c006719c [ 1.303283] 1d60: 00000800 00000083 ccb6d880 cd023d80 c02b41d8 00000083 0000003e ccb7c410 [ 1.311442] 1d80: 00000000 c00671dc 00000083 0000003e c02b41d8 cd3dd5c0 0000003e ccb7c634 [ 1.319601] 1da0: cd040030 c00672a8 cd3dd5c0 ccb7c410 ccb6d340 ccb7c410 ccb6d340 cd3dd400 [ 1.327760] 1dc0: cd3dd410 c02b4434 ccb7c410 c01265a8 00000001 cd3dd410 c0687108 00000000 [ 1.335919] 1de0: c0687108 00000000 00000000 c0240170 c0240158 cd3dd410 c06c30d0 c023e8bc [ 1.344079] 1e00: c023e9d4 00000000 cd3dd410 c023e9d4 c0682150 c023cf88 cd003e98 cd2d50c4 [ 1.352238] 1e20: cd3dd410 cd3dd444 c06822f0 c023e768 cd3dd418 cd3dd410 c06822f0 c023de14 [ 1.360397] 1e40: cd3dd418 00000000 cd3dd410 c023c398 cd041e78 cd041ea8 cd3dd400 cd3dd410 [ 1.368556] 1e60: 00000083 00000000 cd3dd400 cd3dd410 00000083 000000c8 c04e00c8 c023fee8 [ 1.376715] 1e80: 00000000 cd041ea8 cd3dd400 00000001 00000083 c024048c c0435ef8 c0434dec [ 1.384874] 1ea0: c068da58 c04c6d04 c0682150 c0435ef8 ffffffff 00000000 00000000 c068da58 [ 1.393033] 1ec0: 00000020 00000000 00000000 00000000 c05dabb8 00000007 c068d640 c068d640 [ 1.401193] 1ee0: c04c247c c04c249c 00000000 c00088e8 cd004c00 c043bbb8 cd029180 c03812a0 [ 1.409352] 1f00: 00000000 00000000 60000113 c0673728 60000113 c0673728 00000000 00000000 [ 1.417511] 1f20: cd7fce01 c0390a54 00000065 c003a81c c049e8bc 00000007 cd7fce0e 00000007 [ 1.425670] 1f40: 00000000 c05dabb8 00000007 c068d640 c068d640 c04c050c c04e00c8 00000065 [ 1.433829] 1f60: c04e00c0 c04c0c54 00000007 00000007 c04c050c c037d8fc cd03c000 c004322c [ 1.441988] 1f80: c0662b40 0000d640 c03737c0 00000000 00000000 00000000 00000000 00000000 [ 1.450147] 1fa0: 00000000 c03737cc 00000000 c000e478 00000000 00000000 00000000 00000000 [ 1.458307] 1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 1.466467] 1fe0: 00000000 00000000 00000000 00000000 00000013 00000000 0002d481 05014092 [ 1.474640] [] (gpiod_lock_as_irq) from [] (sirfsoc_gpio_irq_startup+0x18/0x44) [ 1.483661] [] (sirfsoc_gpio_irq_startup) from [] (irq_startup+0x34/0x6c) [ 1.492163] [] (irq_startup) from [] (__setup_irq+0x450/0x4b8) [ 1.499714] [] (__setup_irq) from [] (request_threaded_irq+0xa8/0x128) [ 1.507960] [] (request_threaded_irq) from [] (request_any_context_irq+0x4c/0x7c) [ 1.517164] [] (request_any_context_irq) from [] (gpio_extcon_probe+0x144/0x1d4) [ 1.526279] [] (gpio_extcon_probe) from [] (platform_drv_probe+0x18/0x48) [ 1.534783] [] (platform_drv_probe) from [] (driver_probe_device+0x120/0x238) [ 1.543641] [] (driver_probe_device) from [] (bus_for_each_drv+0x58/0x8c) [ 1.552143] [] (bus_for_each_drv) from [] (device_attach+0x74/0x88) [ 1.560126] [] (device_attach) from [] (bus_probe_device+0x84/0xa8) [ 1.568113] [] (bus_probe_device) from [] (device_add+0x440/0x520) [ 1.576012] [] (device_add) from [] (platform_device_add+0xb4/0x214) [ 1.584084] [] (platform_device_add) from [] (platform_device_register_full+0xb8/0xdc) [ 1.593719] [] (platform_device_register_full) from [] (sirfsoc_init_late+0xec/0xf4) [ 1.603185] [] (sirfsoc_init_late) from [] (init_machine_late+0x20/0x28) [ 1.611603] [] (init_machine_late) from [] (do_one_initcall+0xf8/0x144) [ 1.619934] [] (do_one_initcall) from [] (kernel_init_freeable+0x13c/0x1dc) [ 1.628620] [] (kernel_init_freeable) from [] (kernel_init+0xc/0x118) Signed-off-by: Barry Song Signed-off-by: Linus Walleij diff --git a/drivers/pinctrl/sirf/pinctrl-sirf.c b/drivers/pinctrl/sirf/pinctrl-sirf.c index a0d6152..617a491 100644 --- a/drivers/pinctrl/sirf/pinctrl-sirf.c +++ b/drivers/pinctrl/sirf/pinctrl-sirf.c @@ -598,7 +598,7 @@ static unsigned int sirfsoc_gpio_irq_startup(struct irq_data *d) { struct sirfsoc_gpio_bank *bank = irq_data_get_irq_chip_data(d); - if (gpio_lock_as_irq(&bank->chip.gc, d->hwirq)) + if (gpio_lock_as_irq(&bank->chip.gc, d->hwirq % SIRFSOC_GPIO_BANK_SIZE)) dev_err(bank->chip.gc.dev, "unable to lock HW IRQ %lu for IRQ\n", d->hwirq); @@ -611,7 +611,7 @@ static void sirfsoc_gpio_irq_shutdown(struct irq_data *d) struct sirfsoc_gpio_bank *bank = irq_data_get_irq_chip_data(d); sirfsoc_gpio_irq_mask(d); - gpio_unlock_as_irq(&bank->chip.gc, d->hwirq); + gpio_unlock_as_irq(&bank->chip.gc, d->hwirq % SIRFSOC_GPIO_BANK_SIZE); } static struct irq_chip sirfsoc_irq_chip = { -- cgit v0.10.2 From c5eda4c1bf6214332c46fb2f4e7c42a85e5e5643 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 5 Mar 2014 11:52:24 +0100 Subject: ALSA: hda - Add missing loopback merge path for AD1884/1984 codecs The mixer widget (NID 0x20) of AD1884 and AD1984 codecs isn't connected directly to the actual I/O paths but only via another mixer widget (NID 0x21). We need a similar fix as we did for AD1882. Cc: Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index df3652a..4989710 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -1092,6 +1092,7 @@ static int patch_ad1884(struct hda_codec *codec) spec = codec->spec; spec->gen.mixer_nid = 0x20; + spec->gen.mixer_merge_nid = 0x21; spec->gen.beep_nid = 0x10; set_beep_amp(spec, 0x10, 0, HDA_OUTPUT); -- cgit v0.10.2 From f3e9b59cb948e2328bc06635ad39572d5b7b4791 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 5 Mar 2014 12:00:29 +0100 Subject: ALSA: hda - Use analog beep for Thinkpads with AD1984 codecs For making the driver behavior compatible with the earlier kernels, use the analog beep in the loopback path instead of the digital beep. Cc: Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 4989710..8ed0bcc 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -1026,6 +1026,9 @@ static void ad1884_fixup_thinkpad(struct hda_codec *codec, spec->gen.keep_eapd_on = 1; spec->gen.vmaster_mute.hook = ad_vmaster_eapd_hook; spec->eapd_nid = 0x12; + /* Analog PC Beeper - allow firmware/ACPI beeps */ + spec->beep_amp = HDA_COMPOSE_AMP_VAL(0x20, 3, 3, HDA_INPUT); + spec->gen.beep_nid = 0; /* no digital beep */ } } -- cgit v0.10.2 From e805ca8b0a9b6c91099c0eaa4b160a1196a4ae25 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 5 Mar 2014 12:34:39 +0100 Subject: ALSA: usb-audio: Add quirk for Logitech Webcam C500 Logitech C500 (046d:0807) needs the same workaround like other Logitech Webcams. Cc: Signed-off-by: Takashi Iwai diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 44b0ba4..1bed780 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -883,6 +883,7 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, } break; + case USB_ID(0x046d, 0x0807): /* Logitech Webcam C500 */ case USB_ID(0x046d, 0x0808): case USB_ID(0x046d, 0x0809): case USB_ID(0x046d, 0x081b): /* HD Webcam c310 */ -- cgit v0.10.2 From 864a6040f395464003af8dd0d8ca86fed19866d4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 4 Mar 2014 13:46:53 +0100 Subject: mac80211: clear sequence/fragment number in QoS-null frames Avoid leaking data by sending uninitialized memory and setting an invalid (non-zero) fragment number (the sequence number is ignored anyway) by setting the seq_ctrl field to zero. Cc: stable@vger.kernel.org Fixes: 3f52b7e328c5 ("mac80211: mesh power save basics") Fixes: ce662b44ce22 ("mac80211: send (QoS) Null if no buffered frames") Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c index 2802f9d..ad8b377 100644 --- a/net/mac80211/mesh_ps.c +++ b/net/mac80211/mesh_ps.c @@ -36,6 +36,7 @@ static struct sk_buff *mps_qos_null_get(struct sta_info *sta) sdata->vif.addr); nullfunc->frame_control = fc; nullfunc->duration_id = 0; + nullfunc->seq_ctrl = 0; /* no address resolution for this frame -> set addr 1 immediately */ memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); memset(skb_put(skb, 2), 0, 2); /* append QoS control field */ diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index a023b43..137a192 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1206,6 +1206,7 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN); memcpy(nullfunc->addr3, sdata->vif.addr, ETH_ALEN); + nullfunc->seq_ctrl = 0; skb->priority = tid; skb_set_queue_mapping(skb, ieee802_1d_to_ac[tid]); -- cgit v0.10.2 From e1253be0ece1a95a02c7f5843194877471af8179 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 5 Mar 2014 08:44:23 -0500 Subject: NFSv4: nfs4_stateid_is_current should return 'true' for an invalid stateid When nfs4_set_rw_stateid() can fails by returning EIO to indicate that the stateid is completely invalid, then it makes no sense to have it trigger a retry of the READ or WRITE operation. Instead, we should just have it fall through and attempt a recovery. This fixes an infinite loop in which the client keeps replaying the same bad stateid back to the server. Reported-by: Andy Adamson Link: http://lkml.kernel.org/r/1393954269-3974-1-git-send-email-andros@netapp.com Cc: stable@vger.kernel.org # 3.10+ Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 44e088d..4ae8141 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4011,8 +4011,9 @@ static bool nfs4_stateid_is_current(nfs4_stateid *stateid, { nfs4_stateid current_stateid; - if (nfs4_set_rw_stateid(¤t_stateid, ctx, l_ctx, fmode)) - return false; + /* If the current stateid represents a lost lock, then exit */ + if (nfs4_set_rw_stateid(¤t_stateid, ctx, l_ctx, fmode) == -EIO) + return true; return nfs4_stateid_match(stateid, ¤t_stateid); } -- cgit v0.10.2 From 927864cd92aaad1d6285e3bb16e503caf3d6e27e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 4 Mar 2014 13:12:03 -0500 Subject: NFSv4: Fix the return value of nfs4_select_rw_stateid In commit 5521abfdcf4d6 (NFSv4: Resend the READ/WRITE RPC call if a stateid change causes an error), we overloaded the return value of nfs4_select_rw_stateid() to cause it to return -EWOULDBLOCK if an RPC call is outstanding that would cause the NFSv4 lock or open stateid to change. That is all redundant when we actually copy the stateid used in the read/write RPC call that failed, and check that against the current stateid. It is doubly so, when we consider that in the NFSv4.1 case, we also set the stateid's seqid to the special value '0', which means 'match the current valid stateid'. Reported-by: Andy Adamson Link: http://lkml.kernel.org/r/1393954269-3974-1-git-send-email-andros@netapp.com Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index e1a4721..0deb321 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -974,9 +974,6 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst, else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { nfs4_stateid_copy(dst, &lsp->ls_stateid); ret = 0; - smp_rmb(); - if (!list_empty(&lsp->ls_seqid.list)) - ret = -EWOULDBLOCK; } spin_unlock(&state->state_lock); nfs4_put_lock_state(lsp); @@ -984,10 +981,9 @@ out: return ret; } -static int nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) +static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) { const nfs4_stateid *src; - int ret; int seq; do { @@ -996,12 +992,7 @@ static int nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) if (test_bit(NFS_OPEN_STATE, &state->flags)) src = &state->open_stateid; nfs4_stateid_copy(dst, src); - ret = 0; - smp_rmb(); - if (!list_empty(&state->owner->so_seqid.list)) - ret = -EWOULDBLOCK; } while (read_seqretry(&state->seqlock, seq)); - return ret; } /* @@ -1026,7 +1017,8 @@ int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, * choose to use. */ goto out; - ret = nfs4_copy_open_stateid(dst, state); + nfs4_copy_open_stateid(dst, state); + ret = 0; out: if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41)) dst->seqid = 0; -- cgit v0.10.2 From 869a9d375dca601dde1dee1344f3d1d665505f19 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 4 Mar 2014 12:31:09 -0500 Subject: NFSv4.1 Fail data server I/O if stateid represents a lost lock Signed-off-by: Andy Adamson Link: http://lkml.kernel.org/r/1393954269-3974-1-git-send-email-andros@netapp.com Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 12c8132..b9a35c0 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -324,8 +324,9 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) &rdata->res.seq_res, task)) return; - nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context, - rdata->args.lock_context, FMODE_READ); + if (nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context, + rdata->args.lock_context, FMODE_READ) == -EIO) + rpc_exit(task, -EIO); /* lost lock, terminate I/O */ } static void filelayout_read_call_done(struct rpc_task *task, void *data) @@ -435,8 +436,9 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data) &wdata->res.seq_res, task)) return; - nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context, - wdata->args.lock_context, FMODE_WRITE); + if (nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context, + wdata->args.lock_context, FMODE_WRITE) == -EIO) + rpc_exit(task, -EIO); /* lost lock, terminate I/O */ } static void filelayout_write_call_done(struct rpc_task *task, void *data) -- cgit v0.10.2 From 0418dae1056d6091e9527b7158a3763f7aa92353 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 4 Mar 2014 13:48:16 -0500 Subject: NFSv4: Fail the truncate() if the lock/open stateid is invalid If the open stateid could not be recovered, or the file locks were lost, then we should fail the truncate() operation altogether. Reported-by: Andy Adamson Link: http://lkml.kernel.org/r/1393954269-3974-1-git-send-email-andros@netapp.com Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4ae8141..450bfed 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2398,13 +2398,16 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, if (nfs4_copy_delegation_stateid(&arg.stateid, inode, fmode)) { /* Use that stateid */ - } else if (truncate && state != NULL && nfs4_valid_open_stateid(state)) { + } else if (truncate && state != NULL) { struct nfs_lockowner lockowner = { .l_owner = current->files, .l_pid = current->tgid, }; - nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE, - &lockowner); + if (!nfs4_valid_open_stateid(state)) + return -EBADF; + if (nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE, + &lockowner) == -EIO) + return -EBADF; } else nfs4_stateid_copy(&arg.stateid, &zero_stateid); -- cgit v0.10.2 From 07f2b6e0382ec4c59887d5954683f1a0b265574e Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 14 Feb 2014 11:58:41 -0500 Subject: dm thin: ensure user takes action to validate data and metadata consistency If a thin metadata operation fails the current transaction will abort, whereby causing potential for IO layers up the stack (e.g. filesystems) to have data loss. As such, set THIN_METADATA_NEEDS_CHECK_FLAG in the thin metadata's superblock which: 1) requires the user verify the thin metadata is consistent (e.g. use thin_check, etc) 2) suggests the user verify the thin data is consistent (e.g. use fsck) The only way to clear the superblock's THIN_METADATA_NEEDS_CHECK_FLAG is to run thin_repair. On metadata operation failure: abort current metadata transaction, set pool in read-only mode, and now set the needs_check flag. As part of this change, constraints are introduced or relaxed: * don't allow a pool to transition to write mode if needs_check is set * don't allow data or metadata space to be resized if needs_check is set * if a thin pool's metadata space is exhausted: the kernel will now force the user to take the pool offline for repair before the kernel will allow the metadata space to be extended. Also, update Documentation to include information about when the thin provisioning target commits metadata, how it handles metadata failures and running out of space. Signed-off-by: Mike Snitzer Signed-off-by: Joe Thornber diff --git a/Documentation/device-mapper/cache.txt b/Documentation/device-mapper/cache.txt index e6b72d3..68c0f51 100644 --- a/Documentation/device-mapper/cache.txt +++ b/Documentation/device-mapper/cache.txt @@ -124,12 +124,11 @@ the default being 204800 sectors (or 100MB). Updating on-disk metadata ------------------------- -On-disk metadata is committed every time a REQ_SYNC or REQ_FUA bio is -written. If no such requests are made then commits will occur every -second. This means the cache behaves like a physical disk that has a -write cache (the same is true of the thin-provisioning target). If -power is lost you may lose some recent writes. The metadata should -always be consistent in spite of any crash. +On-disk metadata is committed every time a FLUSH or FUA bio is written. +If no such requests are made then commits will occur every second. This +means the cache behaves like a physical disk that has a volatile write +cache. If power is lost you may lose some recent writes. The metadata +should always be consistent in spite of any crash. The 'dirty' state for a cache block changes far too frequently for us to keep updating it on the fly. So we treat it as a hint. In normal diff --git a/Documentation/device-mapper/thin-provisioning.txt b/Documentation/device-mapper/thin-provisioning.txt index 8a7a3d4..3b34b4f 100644 --- a/Documentation/device-mapper/thin-provisioning.txt +++ b/Documentation/device-mapper/thin-provisioning.txt @@ -116,6 +116,35 @@ Resuming a device with a new table itself triggers an event so the userspace daemon can use this to detect a situation where a new table already exceeds the threshold. +A low water mark for the metadata device is maintained in the kernel and +will trigger a dm event if free space on the metadata device drops below +it. + +Updating on-disk metadata +------------------------- + +On-disk metadata is committed every time a FLUSH or FUA bio is written. +If no such requests are made then commits will occur every second. This +means the thin-provisioning target behaves like a physical disk that has +a volatile write cache. If power is lost you may lose some recent +writes. The metadata should always be consistent in spite of any crash. + +If data space is exhausted the pool will either error or queue IO +according to the configuration (see: error_if_no_space). If metadata +space is exhausted or a metadata operation fails: the pool will error IO +until the pool is taken offline and repair is performed to 1) fix any +potential inconsistencies and 2) clear the flag that imposes repair. +Once the pool's metadata device is repaired it may be resized, which +will allow the pool to return to normal operation. Note that if a pool +is flagged as needing repair, the pool's data and metadata devices +cannot be resized until repair is performed. It should also be noted +that when the pool's metadata space is exhausted the current metadata +transaction is aborted. Given that the pool will cache IO whose +completion may have already been acknowledged to upper IO layers +(e.g. filesystem) it is strongly suggested that consistency checks +(e.g. fsck) be performed on those layers when repair of the pool is +required. + Thin provisioning ----------------- diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index baa87ff..fb9efc8 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -76,7 +76,7 @@ #define THIN_SUPERBLOCK_MAGIC 27022010 #define THIN_SUPERBLOCK_LOCATION 0 -#define THIN_VERSION 1 +#define THIN_VERSION 2 #define THIN_METADATA_CACHE_SIZE 64 #define SECTOR_TO_BLOCK_SHIFT 3 @@ -1755,3 +1755,38 @@ int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd, return r; } + +int dm_pool_metadata_set_needs_check(struct dm_pool_metadata *pmd) +{ + int r; + struct dm_block *sblock; + struct thin_disk_superblock *disk_super; + + down_write(&pmd->root_lock); + pmd->flags |= THIN_METADATA_NEEDS_CHECK_FLAG; + + r = superblock_lock(pmd, &sblock); + if (r) { + DMERR("couldn't read superblock"); + goto out; + } + + disk_super = dm_block_data(sblock); + disk_super->flags = cpu_to_le32(pmd->flags); + + dm_bm_unlock(sblock); +out: + up_write(&pmd->root_lock); + return r; +} + +bool dm_pool_metadata_needs_check(struct dm_pool_metadata *pmd) +{ + bool needs_check; + + down_read(&pmd->root_lock); + needs_check = pmd->flags & THIN_METADATA_NEEDS_CHECK_FLAG; + up_read(&pmd->root_lock); + + return needs_check; +} diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h index 82ea384..e3c857d 100644 --- a/drivers/md/dm-thin-metadata.h +++ b/drivers/md/dm-thin-metadata.h @@ -25,6 +25,11 @@ /*----------------------------------------------------------------*/ +/* + * Thin metadata superblock flags. + */ +#define THIN_METADATA_NEEDS_CHECK_FLAG (1 << 0) + struct dm_pool_metadata; struct dm_thin_device; @@ -202,6 +207,12 @@ int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd, dm_sm_threshold_fn fn, void *context); +/* + * Updates the superblock immediately. + */ +int dm_pool_metadata_set_needs_check(struct dm_pool_metadata *pmd); +bool dm_pool_metadata_needs_check(struct dm_pool_metadata *pmd); + /*----------------------------------------------------------------*/ #endif diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index dfa48ec..a04eba9 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1403,7 +1403,28 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) { int r; struct pool_c *pt = pool->ti->private; - enum pool_mode old_mode = pool->pf.mode; + bool needs_check = dm_pool_metadata_needs_check(pool->pmd); + enum pool_mode old_mode = get_pool_mode(pool); + + /* + * Never allow the pool to transition to PM_WRITE mode if user + * intervention is required to verify metadata and data consistency. + */ + if (new_mode == PM_WRITE && needs_check) { + DMERR("%s: unable to switch pool to write mode until repaired.", + dm_device_name(pool->pool_md)); + if (old_mode != new_mode) + new_mode = old_mode; + else + new_mode = PM_READ_ONLY; + } + /* + * If we were in PM_FAIL mode, rollback of metadata failed. We're + * not going to recover without a thin_repair. So we never let the + * pool move out of the old mode. + */ + if (old_mode == PM_FAIL) + new_mode = old_mode; switch (new_mode) { case PM_FAIL: @@ -1467,19 +1488,28 @@ static void out_of_data_space(struct pool *pool) set_pool_mode(pool, PM_READ_ONLY); } -static void metadata_operation_failed(struct pool *pool, const char *op, int r) +static void abort_transaction(struct pool *pool) { - dm_block_t free_blocks; + const char *dev_name = dm_device_name(pool->pool_md); + + DMERR_LIMIT("%s: aborting current metadata transaction", dev_name); + if (dm_pool_abort_metadata(pool->pmd)) { + DMERR("%s: failed to abort metadata transaction", dev_name); + set_pool_mode(pool, PM_FAIL); + } + + if (dm_pool_metadata_set_needs_check(pool->pmd)) { + DMERR("%s: failed to set 'needs_check' flag in metadata", dev_name); + set_pool_mode(pool, PM_FAIL); + } +} +static void metadata_operation_failed(struct pool *pool, const char *op, int r) +{ DMERR_LIMIT("%s: metadata operation '%s' failed: error = %d", dm_device_name(pool->pool_md), op, r); - if (r == -ENOSPC && - !dm_pool_get_free_metadata_block_count(pool->pmd, &free_blocks) && - !free_blocks) - DMERR_LIMIT("%s: no free metadata space available.", - dm_device_name(pool->pool_md)); - + abort_transaction(pool); set_pool_mode(pool, PM_READ_ONLY); } @@ -1693,7 +1723,7 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti) /* * We want to make sure that a pool in PM_FAIL mode is never upgraded. */ - enum pool_mode old_mode = pool->pf.mode; + enum pool_mode old_mode = get_pool_mode(pool); enum pool_mode new_mode = pt->adjusted_pf.mode; /* @@ -1707,16 +1737,6 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti) pool->pf = pt->adjusted_pf; pool->low_water_blocks = pt->low_water_blocks; - /* - * If we were in PM_FAIL mode, rollback of metadata failed. We're - * not going to recover without a thin_repair. So we never let the - * pool move out of the old mode. On the other hand a PM_READ_ONLY - * may have been due to a lack of metadata or data space, and may - * now work (ie. if the underlying devices have been resized). - */ - if (old_mode == PM_FAIL) - new_mode = old_mode; - set_pool_mode(pool, new_mode); return 0; @@ -2259,6 +2279,12 @@ static int maybe_resize_data_dev(struct dm_target *ti, bool *need_commit) return -EINVAL; } else if (data_size > sb_data_size) { + if (dm_pool_metadata_needs_check(pool->pmd)) { + DMERR("%s: unable to grow the data device until repaired.", + dm_device_name(pool->pool_md)); + return 0; + } + if (sb_data_size) DMINFO("%s: growing the data device from %llu to %llu blocks", dm_device_name(pool->pool_md), @@ -2300,6 +2326,12 @@ static int maybe_resize_metadata_dev(struct dm_target *ti, bool *need_commit) return -EINVAL; } else if (metadata_dev_size > sb_metadata_dev_size) { + if (dm_pool_metadata_needs_check(pool->pmd)) { + DMERR("%s: unable to grow the metadata device until repaired.", + dm_device_name(pool->pool_md)); + return 0; + } + warn_if_metadata_device_too_big(pool->md_dev); DMINFO("%s: growing the metadata device from %llu to %llu blocks", dm_device_name(pool->pool_md), @@ -2801,7 +2833,7 @@ static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 10, 0}, + .version = {1, 11, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -3091,7 +3123,7 @@ static int thin_iterate_devices(struct dm_target *ti, static struct target_type thin_target = { .name = "thin", - .version = {1, 10, 0}, + .version = {1, 11, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, -- cgit v0.10.2 From 3e1a0699095803e53072699a4a1485af7744601d Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Mon, 3 Mar 2014 16:03:26 +0000 Subject: dm thin: fix out of data space handling Ideally a thin pool would never run out of data space; the low water mark would trigger userland to extend the pool before we completely run out of space. However, many small random IOs to unprovisioned space can consume data space at an alarming rate. Adjust your low water mark if you're frequently seeing "out-of-data-space" mode. Before this fix, if data space ran out the pool would be put in PM_READ_ONLY mode which also aborted the pool's current metadata transaction (data loss for any changes in the transaction). This had a side-effect of needlessly compromising data consistency. And retry of queued unserviceable bios, once the data pool was resized, could initiate changes to potentially inconsistent pool metadata. Now when the pool's data space is exhausted transition to a new pool mode (PM_OUT_OF_DATA_SPACE) that allows metadata to be changed but data may not be allocated. This allows users to remove thin volumes or discard data to recover data space. The pool is no longer put in PM_READ_ONLY mode in response to the pool running out of data space. And PM_READ_ONLY mode no longer aborts the pool's current metadata transaction. Also, set_pool_mode() will now notify userspace when the pool mode is changed. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index a04eba9..38a063f 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -130,10 +130,11 @@ static void build_virtual_key(struct dm_thin_device *td, dm_block_t b, struct dm_thin_new_mapping; /* - * The pool runs in 3 modes. Ordered in degraded order for comparisons. + * The pool runs in 4 modes. Ordered in degraded order for comparisons. */ enum pool_mode { PM_WRITE, /* metadata may be changed */ + PM_OUT_OF_DATA_SPACE, /* metadata may be changed, though data may not be allocated */ PM_READ_ONLY, /* metadata may not be changed */ PM_FAIL, /* all I/O fails */ }; @@ -198,7 +199,6 @@ struct pool { }; static enum pool_mode get_pool_mode(struct pool *pool); -static void out_of_data_space(struct pool *pool); static void metadata_operation_failed(struct pool *pool, const char *op, int r); /* @@ -399,6 +399,23 @@ static void requeue_io(struct thin_c *tc) spin_unlock_irqrestore(&pool->lock, flags); } +static void error_retry_list(struct pool *pool) +{ + struct bio *bio; + unsigned long flags; + struct bio_list bios; + + bio_list_init(&bios); + + spin_lock_irqsave(&pool->lock, flags); + bio_list_merge(&bios, &pool->retry_on_resume_list); + bio_list_init(&pool->retry_on_resume_list); + spin_unlock_irqrestore(&pool->lock, flags); + + while ((bio = bio_list_pop(&bios))) + bio_io_error(bio); +} + /* * This section of code contains the logic for processing a thin device's IO. * Much of the code depends on pool object resources (lists, workqueues, etc) @@ -925,13 +942,15 @@ static void check_low_water_mark(struct pool *pool, dm_block_t free_blocks) } } +static void set_pool_mode(struct pool *pool, enum pool_mode new_mode); + static int alloc_data_block(struct thin_c *tc, dm_block_t *result) { int r; dm_block_t free_blocks; struct pool *pool = tc->pool; - if (get_pool_mode(pool) != PM_WRITE) + if (WARN_ON(get_pool_mode(pool) != PM_WRITE)) return -EINVAL; r = dm_pool_get_free_block_count(pool->pmd, &free_blocks); @@ -958,7 +977,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) } if (!free_blocks) { - out_of_data_space(pool); + set_pool_mode(pool, PM_OUT_OF_DATA_SPACE); return -ENOSPC; } } @@ -988,15 +1007,32 @@ static void retry_on_resume(struct bio *bio) spin_unlock_irqrestore(&pool->lock, flags); } -static void handle_unserviceable_bio(struct pool *pool, struct bio *bio) +static bool should_error_unserviceable_bio(struct pool *pool) { - /* - * When pool is read-only, no cell locking is needed because - * nothing is changing. - */ - WARN_ON_ONCE(get_pool_mode(pool) != PM_READ_ONLY); + enum pool_mode m = get_pool_mode(pool); + + switch (m) { + case PM_WRITE: + /* Shouldn't get here */ + DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode"); + return true; + + case PM_OUT_OF_DATA_SPACE: + return pool->pf.error_if_no_space; + + case PM_READ_ONLY: + case PM_FAIL: + return true; + default: + /* Shouldn't get here */ + DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode"); + return true; + } +} - if (pool->pf.error_if_no_space) +static void handle_unserviceable_bio(struct pool *pool, struct bio *bio) +{ + if (should_error_unserviceable_bio(pool)) bio_io_error(bio); else retry_on_resume(bio); @@ -1007,11 +1043,20 @@ static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *c struct bio *bio; struct bio_list bios; + if (should_error_unserviceable_bio(pool)) { + cell_error(pool, cell); + return; + } + bio_list_init(&bios); cell_release(pool, cell, &bios); - while ((bio = bio_list_pop(&bios))) - handle_unserviceable_bio(pool, bio); + if (should_error_unserviceable_bio(pool)) + while ((bio = bio_list_pop(&bios))) + bio_io_error(bio); + else + while ((bio = bio_list_pop(&bios))) + retry_on_resume(bio); } static void process_discard(struct thin_c *tc, struct bio *bio) @@ -1296,6 +1341,11 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio) } } +static void process_bio_success(struct thin_c *tc, struct bio *bio) +{ + bio_endio(bio, 0); +} + static void process_bio_fail(struct thin_c *tc, struct bio *bio) { bio_io_error(bio); @@ -1399,9 +1449,15 @@ static enum pool_mode get_pool_mode(struct pool *pool) return pool->pf.mode; } +static void notify_of_pool_mode_change(struct pool *pool, const char *new_mode) +{ + dm_table_event(pool->ti->table); + DMINFO("%s: switching pool to %s mode", + dm_device_name(pool->pool_md), new_mode); +} + static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) { - int r; struct pool_c *pt = pool->ti->private; bool needs_check = dm_pool_metadata_needs_check(pool->pmd); enum pool_mode old_mode = get_pool_mode(pool); @@ -1429,38 +1485,48 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) switch (new_mode) { case PM_FAIL: if (old_mode != new_mode) - DMERR("%s: switching pool to failure mode", - dm_device_name(pool->pool_md)); + notify_of_pool_mode_change(pool, "failure"); dm_pool_metadata_read_only(pool->pmd); pool->process_bio = process_bio_fail; pool->process_discard = process_bio_fail; pool->process_prepared_mapping = process_prepared_mapping_fail; pool->process_prepared_discard = process_prepared_discard_fail; + + error_retry_list(pool); break; case PM_READ_ONLY: if (old_mode != new_mode) - DMERR("%s: switching pool to read-only mode", - dm_device_name(pool->pool_md)); - r = dm_pool_abort_metadata(pool->pmd); - if (r) { - DMERR("%s: aborting transaction failed", - dm_device_name(pool->pool_md)); - new_mode = PM_FAIL; - set_pool_mode(pool, new_mode); - } else { - dm_pool_metadata_read_only(pool->pmd); - pool->process_bio = process_bio_read_only; - pool->process_discard = process_discard; - pool->process_prepared_mapping = process_prepared_mapping_fail; - pool->process_prepared_discard = process_prepared_discard_passdown; - } + notify_of_pool_mode_change(pool, "read-only"); + dm_pool_metadata_read_only(pool->pmd); + pool->process_bio = process_bio_read_only; + pool->process_discard = process_bio_success; + pool->process_prepared_mapping = process_prepared_mapping_fail; + pool->process_prepared_discard = process_prepared_discard_passdown; + + error_retry_list(pool); + break; + + case PM_OUT_OF_DATA_SPACE: + /* + * Ideally we'd never hit this state; the low water mark + * would trigger userland to extend the pool before we + * completely run out of data space. However, many small + * IOs to unprovisioned space can consume data space at an + * alarming rate. Adjust your low water mark if you're + * frequently seeing this mode. + */ + if (old_mode != new_mode) + notify_of_pool_mode_change(pool, "out-of-data-space"); + pool->process_bio = process_bio_read_only; + pool->process_discard = process_discard; + pool->process_prepared_mapping = process_prepared_mapping; + pool->process_prepared_discard = process_prepared_discard_passdown; break; case PM_WRITE: if (old_mode != new_mode) - DMINFO("%s: switching pool to write mode", - dm_device_name(pool->pool_md)); + notify_of_pool_mode_change(pool, "write"); dm_pool_metadata_read_write(pool->pmd); pool->process_bio = process_bio; pool->process_discard = process_discard; @@ -1477,17 +1543,6 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) pt->adjusted_pf.mode = new_mode; } -/* - * Rather than calling set_pool_mode directly, use these which describe the - * reason for mode degradation. - */ -static void out_of_data_space(struct pool *pool) -{ - DMERR_LIMIT("%s: no free data space available.", - dm_device_name(pool->pool_md)); - set_pool_mode(pool, PM_READ_ONLY); -} - static void abort_transaction(struct pool *pool) { const char *dev_name = dm_device_name(pool->pool_md); @@ -2719,7 +2774,9 @@ static void pool_status(struct dm_target *ti, status_type_t type, else DMEMIT("- "); - if (pool->pf.mode == PM_READ_ONLY) + if (pool->pf.mode == PM_OUT_OF_DATA_SPACE) + DMEMIT("out_of_data_space "); + else if (pool->pf.mode == PM_READ_ONLY) DMEMIT("ro "); else DMEMIT("rw "); -- cgit v0.10.2 From 18adc57779866ba451237dccca2ebf019be2fa20 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Mon, 3 Mar 2014 15:46:42 +0000 Subject: dm thin: fix deadlock in __requeue_bio_list The spin lock in requeue_io() was held for too long, allowing deadlock. Don't worry, due to other issues addressed in the following "dm thin: fix noflush suspend IO queueing" commit, this code was never called. Fix this by taking the spin lock for a much shorter period of time. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 38a063f..f39876d 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -369,14 +369,18 @@ struct dm_thin_endio_hook { struct dm_thin_new_mapping *overwrite_mapping; }; -static void __requeue_bio_list(struct thin_c *tc, struct bio_list *master) +static void requeue_bio_list(struct thin_c *tc, struct bio_list *master) { struct bio *bio; struct bio_list bios; + unsigned long flags; bio_list_init(&bios); + + spin_lock_irqsave(&tc->pool->lock, flags); bio_list_merge(&bios, master); bio_list_init(master); + spin_unlock_irqrestore(&tc->pool->lock, flags); while ((bio = bio_list_pop(&bios))) { struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); @@ -391,12 +395,9 @@ static void __requeue_bio_list(struct thin_c *tc, struct bio_list *master) static void requeue_io(struct thin_c *tc) { struct pool *pool = tc->pool; - unsigned long flags; - spin_lock_irqsave(&pool->lock, flags); - __requeue_bio_list(tc, &pool->deferred_bios); - __requeue_bio_list(tc, &pool->retry_on_resume_list); - spin_unlock_irqrestore(&pool->lock, flags); + requeue_bio_list(tc, &pool->deferred_bios); + requeue_bio_list(tc, &pool->retry_on_resume_list); } static void error_retry_list(struct pool *pool) -- cgit v0.10.2 From 738211f70a1d0c7ff7dc965395f7b8a436365ebd Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Mon, 3 Mar 2014 15:52:28 +0000 Subject: dm thin: fix noflush suspend IO queueing i) by the time DM core calls the postsuspend hook the dm_noflush flag has been cleared. So the old thin_postsuspend did nothing. We need to use the presuspend hook instead. ii) There was a race between bios leaving DM core and arriving in the deferred queue. thin_presuspend now sets a 'requeue' flag causing all bios destined for that thin to be requeued back to DM core. Then it requeues all held IO, and all IO on the deferred queue (destined for that thin). Finally postsuspend clears the 'requeue' flag. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index f39876d..be70d38 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -226,6 +226,7 @@ struct thin_c { struct pool *pool; struct dm_thin_device *td; + bool requeue_mode:1; }; /*----------------------------------------------------------------*/ @@ -1379,6 +1380,11 @@ static void process_deferred_bios(struct pool *pool) struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct thin_c *tc = h->tc; + if (tc->requeue_mode) { + bio_endio(bio, DM_ENDIO_REQUEUE); + continue; + } + /* * If we've got no free new_mapping structs, and processing * this bio might require one, we pause until there are some @@ -1445,6 +1451,51 @@ static void do_waker(struct work_struct *ws) /*----------------------------------------------------------------*/ +struct noflush_work { + struct work_struct worker; + struct thin_c *tc; + + atomic_t complete; + wait_queue_head_t wait; +}; + +static void complete_noflush_work(struct noflush_work *w) +{ + atomic_set(&w->complete, 1); + wake_up(&w->wait); +} + +static void do_noflush_start(struct work_struct *ws) +{ + struct noflush_work *w = container_of(ws, struct noflush_work, worker); + w->tc->requeue_mode = true; + requeue_io(w->tc); + complete_noflush_work(w); +} + +static void do_noflush_stop(struct work_struct *ws) +{ + struct noflush_work *w = container_of(ws, struct noflush_work, worker); + w->tc->requeue_mode = false; + complete_noflush_work(w); +} + +static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *)) +{ + struct noflush_work w; + + INIT_WORK(&w.worker, fn); + w.tc = tc; + atomic_set(&w.complete, 0); + init_waitqueue_head(&w.wait); + + queue_work(tc->pool->wq, &w.worker); + + wait_event(w.wait, atomic_read(&w.complete)); +} + +/*----------------------------------------------------------------*/ + static enum pool_mode get_pool_mode(struct pool *pool) { return pool->pf.mode; @@ -1616,6 +1667,11 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) thin_hook_bio(tc, bio); + if (tc->requeue_mode) { + bio_endio(bio, DM_ENDIO_REQUEUE); + return DM_MAPIO_SUBMITTED; + } + if (get_pool_mode(tc->pool) == PM_FAIL) { bio_io_error(bio); return DM_MAPIO_SUBMITTED; @@ -3093,10 +3149,23 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err) return 0; } -static void thin_postsuspend(struct dm_target *ti) +static void thin_presuspend(struct dm_target *ti) { + struct thin_c *tc = ti->private; + if (dm_noflush_suspending(ti)) - requeue_io((struct thin_c *)ti->private); + noflush_work(tc, do_noflush_start); +} + +static void thin_postsuspend(struct dm_target *ti) +{ + struct thin_c *tc = ti->private; + + /* + * The dm_noflush_suspending flag has been cleared by now, so + * unfortunately we must always run this. + */ + noflush_work(tc, do_noflush_stop); } /* @@ -3187,6 +3256,7 @@ static struct target_type thin_target = { .dtr = thin_dtr, .map = thin_map, .end_io = thin_endio, + .presuspend = thin_presuspend, .postsuspend = thin_postsuspend, .status = thin_status, .iterate_devices = thin_iterate_devices, -- cgit v0.10.2 From 24b9bf43e93e0edd89072da51cf1fab95fc69dec Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 3 Mar 2014 23:19:18 +0100 Subject: net: fix for a race condition in the inet frag code I stumbled upon this very serious bug while hunting for another one, it's a very subtle race condition between inet_frag_evictor, inet_frag_intern and the IPv4/6 frag_queue and expire functions (basically the users of inet_frag_kill/inet_frag_put). What happens is that after a fragment has been added to the hash chain but before it's been added to the lru_list (inet_frag_lru_add) in inet_frag_intern, it may get deleted (either by an expired timer if the system load is high or the timer sufficiently low, or by the fraq_queue function for different reasons) before it's added to the lru_list, then after it gets added it's a matter of time for the evictor to get to a piece of memory which has been freed leading to a number of different bugs depending on what's left there. I've been able to trigger this on both IPv4 and IPv6 (which is normal as the frag code is the same), but it's been much more difficult to trigger on IPv4 due to the protocol differences about how fragments are treated. The setup I used to reproduce this is: 2 machines with 4 x 10G bonded in a RR bond, so the same flow can be seen on multiple cards at the same time. Then I used multiple instances of ping/ping6 to generate fragmented packets and flood the machines with them while running other processes to load the attacked machine. *It is very important to have the _same flow_ coming in on multiple CPUs concurrently. Usually the attacked machine would die in less than 30 minutes, if configured properly to have many evictor calls and timeouts it could happen in 10 minutes or so. An important point to make is that any caller (frag_queue or timer) of inet_frag_kill will remove both the timer refcount and the original/guarding refcount thus removing everything that's keeping the frag from being freed at the next inet_frag_put. All of this could happen before the frag was ever added to the LRU list, then it gets added and the evictor uses a freed fragment. An example for IPv6 would be if a fragment is being added and is at the stage of being inserted in the hash after the hash lock is released, but before inet_frag_lru_add executes (or is able to obtain the lru lock) another overlapping fragment for the same flow arrives at a different CPU which finds it in the hash, but since it's overlapping it drops it invoking inet_frag_kill and thus removing all guarding refcounts, and afterwards freeing it by invoking inet_frag_put which removes the last refcount added previously by inet_frag_find, then inet_frag_lru_add gets executed by inet_frag_intern and we have a freed fragment in the lru_list. The fix is simple, just move the lru_add under the hash chain locked region so when a removing function is called it'll have to wait for the fragment to be added to the lru_list, and then it'll remove it (it works because the hash chain removal is done before the lru_list one and there's no window between the two list adds when the frag can get dropped). With this fix applied I couldn't kill the same machine in 24 hours with the same setup. Fixes: 3ef0eb0db4bf ("net: frag, move LRU list maintenance outside of rwlock") CC: Florian Westphal CC: Jesper Dangaard Brouer CC: David S. Miller Signed-off-by: Nikolay Aleksandrov Acked-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index bb075fc..322dceb 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -278,9 +278,10 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, atomic_inc(&qp->refcnt); hlist_add_head(&qp->list, &hb->chain); + inet_frag_lru_add(nf, qp); spin_unlock(&hb->chain_lock); read_unlock(&f->lock); - inet_frag_lru_add(nf, qp); + return qp; } -- cgit v0.10.2 From 6565b9eeef194afbb3beec80d6dd2447f4091f8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 4 Mar 2014 03:57:35 +0100 Subject: bridge: multicast: add sanity check for query source addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MLD queries are supposed to have an IPv6 link-local source address according to RFC2710, section 4 and RFC3810, section 5.1.14. This patch adds a sanity check to ignore such broken MLD queries. Without this check, such malformed MLD queries can result in a denial of service: The queries are ignored by any MLD listener therefore they will not respond with an MLD report. However, without this patch these malformed MLD queries would enable the snooping part in the bridge code, potentially shutting down the according ports towards these hosts for multicast traffic as the bridge did not learn about these listeners. Reported-by: Jan Stancek Signed-off-by: Linus Lüssing Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index ef66365..fb0e36f 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1235,6 +1235,12 @@ static int br_ip6_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; + /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */ + if (!(ipv6_addr_type(&ip6h->saddr) & IPV6_ADDR_LINKLOCAL)) { + err = -EINVAL; + goto out; + } + if (skb->len == sizeof(*mld)) { if (!pskb_may_pull(skb, sizeof(*mld))) { err = -EINVAL; -- cgit v0.10.2 From 367d56f7b4d5ce61e883c64f81786c7a3ae88eea Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 4 Mar 2014 15:35:20 +0800 Subject: net/mlx4: Support shutdown() interface In kexec scenario, we failed to load the mlx4 driver in the second kernel because the ownership bit was hold by the first kernel without release correctly. The patch adds shutdown() interface so that the ownership can be released correctly in the first kernel. It also helps avoiding EEH error happened during boot stage of the second kernel because of undesired traffic, which can't be handled by hardware during that stage on Power platform. Signed-off-by: Gavin Shan Tested-by: Wei Yang Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index d711158..5a6105f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2684,6 +2684,7 @@ static struct pci_driver mlx4_driver = { .name = DRV_NAME, .id_table = mlx4_pci_table, .probe = mlx4_init_one, + .shutdown = mlx4_remove_one, .remove = mlx4_remove_one, .err_handler = &mlx4_err_handler, }; -- cgit v0.10.2 From 10c3271712f58215f4d336a1e30aa25be09cd5d1 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 4 Mar 2014 20:47:48 +0800 Subject: r8152: disable the ECM mode There are known issues for switching the drivers between ECM mode and vendor mode. The interrup transfer may become abnormal. The hardware may have the opportunity to die if you change the configuration without unloading the current driver first, because all the control transfers of the current driver would fail after the command of switching the configuration. Although to use the ecm driver and vendor driver independently is fine, it may have problems to change the driver from one to the other by switching the configuration. Additionally, now the vendor mode driver is more powerful than the ECM driver. Thus, disable the ECM mode driver, and let r8152 to set the configuration to vendor mode and reset the device automatically. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller diff --git a/drivers/net/usb/Makefile b/drivers/net/usb/Makefile index 433f0a0..e2797f1 100644 --- a/drivers/net/usb/Makefile +++ b/drivers/net/usb/Makefile @@ -11,7 +11,7 @@ obj-$(CONFIG_USB_HSO) += hso.o obj-$(CONFIG_USB_NET_AX8817X) += asix.o asix-y := asix_devices.o asix_common.o ax88172a.o obj-$(CONFIG_USB_NET_AX88179_178A) += ax88179_178a.o -obj-$(CONFIG_USB_NET_CDCETHER) += cdc_ether.o r815x.o +obj-$(CONFIG_USB_NET_CDCETHER) += cdc_ether.o obj-$(CONFIG_USB_NET_CDC_EEM) += cdc_eem.o obj-$(CONFIG_USB_NET_DM9601) += dm9601.o obj-$(CONFIG_USB_NET_SR9700) += sr9700.o diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 42e1769..bd363b2 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -652,6 +652,13 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* Samsung USB Ethernet Adapters */ +{ + USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, 0xa101, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* WHITELIST!!! * * CDC Ether uses two interfaces, not necessarily consecutive. diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index d89dbe3..adb12f3 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -449,9 +449,6 @@ enum rtl8152_flags { #define MCU_TYPE_PLA 0x0100 #define MCU_TYPE_USB 0x0000 -#define REALTEK_USB_DEVICE(vend, prod) \ - USB_DEVICE_INTERFACE_CLASS(vend, prod, USB_CLASS_VENDOR_SPEC) - struct rx_desc { __le32 opts1; #define RX_LEN_MASK 0x7fff @@ -2739,6 +2736,12 @@ static int rtl8152_probe(struct usb_interface *intf, struct net_device *netdev; int ret; + if (udev->actconfig->desc.bConfigurationValue != 1) { + usb_driver_set_configuration(udev, 1); + return -ENODEV; + } + + usb_reset_device(udev); netdev = alloc_etherdev(sizeof(struct r8152)); if (!netdev) { dev_err(&intf->dev, "Out of memory\n"); @@ -2819,9 +2822,9 @@ static void rtl8152_disconnect(struct usb_interface *intf) /* table of devices that work with this driver */ static struct usb_device_id rtl8152_table[] = { - {REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, PRODUCT_ID_RTL8152)}, - {REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, PRODUCT_ID_RTL8153)}, - {REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, PRODUCT_ID_SAMSUNG)}, + {USB_DEVICE(VENDOR_ID_REALTEK, PRODUCT_ID_RTL8152)}, + {USB_DEVICE(VENDOR_ID_REALTEK, PRODUCT_ID_RTL8153)}, + {USB_DEVICE(VENDOR_ID_SAMSUNG, PRODUCT_ID_SAMSUNG)}, {} }; diff --git a/drivers/net/usb/r815x.c b/drivers/net/usb/r815x.c deleted file mode 100644 index f0a8791..0000000 --- a/drivers/net/usb/r815x.c +++ /dev/null @@ -1,248 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define RTL815x_REQT_READ 0xc0 -#define RTL815x_REQT_WRITE 0x40 -#define RTL815x_REQ_GET_REGS 0x05 -#define RTL815x_REQ_SET_REGS 0x05 - -#define MCU_TYPE_PLA 0x0100 -#define OCP_BASE 0xe86c -#define BASE_MII 0xa400 - -#define BYTE_EN_DWORD 0xff -#define BYTE_EN_WORD 0x33 -#define BYTE_EN_BYTE 0x11 - -#define R815x_PHY_ID 32 -#define REALTEK_VENDOR_ID 0x0bda - - -static int pla_read_word(struct usb_device *udev, u16 index) -{ - int ret; - u8 shift = index & 2; - __le32 *tmp; - - tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); - if (!tmp) - return -ENOMEM; - - index &= ~3; - - ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), - RTL815x_REQ_GET_REGS, RTL815x_REQT_READ, - index, MCU_TYPE_PLA, tmp, sizeof(*tmp), 500); - if (ret < 0) - goto out2; - - ret = __le32_to_cpu(*tmp); - ret >>= (shift * 8); - ret &= 0xffff; - -out2: - kfree(tmp); - return ret; -} - -static int pla_write_word(struct usb_device *udev, u16 index, u32 data) -{ - __le32 *tmp; - u32 mask = 0xffff; - u16 byen = BYTE_EN_WORD; - u8 shift = index & 2; - int ret; - - tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); - if (!tmp) - return -ENOMEM; - - data &= mask; - - if (shift) { - byen <<= shift; - mask <<= (shift * 8); - data <<= (shift * 8); - index &= ~3; - } - - ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), - RTL815x_REQ_GET_REGS, RTL815x_REQT_READ, - index, MCU_TYPE_PLA, tmp, sizeof(*tmp), 500); - if (ret < 0) - goto out3; - - data |= __le32_to_cpu(*tmp) & ~mask; - *tmp = __cpu_to_le32(data); - - ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), - RTL815x_REQ_SET_REGS, RTL815x_REQT_WRITE, - index, MCU_TYPE_PLA | byen, tmp, sizeof(*tmp), - 500); - -out3: - kfree(tmp); - return ret; -} - -static int ocp_reg_read(struct usbnet *dev, u16 addr) -{ - u16 ocp_base, ocp_index; - int ret; - - ocp_base = addr & 0xf000; - ret = pla_write_word(dev->udev, OCP_BASE, ocp_base); - if (ret < 0) - goto out; - - ocp_index = (addr & 0x0fff) | 0xb000; - ret = pla_read_word(dev->udev, ocp_index); - -out: - return ret; -} - -static int ocp_reg_write(struct usbnet *dev, u16 addr, u16 data) -{ - u16 ocp_base, ocp_index; - int ret; - - ocp_base = addr & 0xf000; - ret = pla_write_word(dev->udev, OCP_BASE, ocp_base); - if (ret < 0) - goto out1; - - ocp_index = (addr & 0x0fff) | 0xb000; - ret = pla_write_word(dev->udev, ocp_index, data); - -out1: - return ret; -} - -static int r815x_mdio_read(struct net_device *netdev, int phy_id, int reg) -{ - struct usbnet *dev = netdev_priv(netdev); - int ret; - - if (phy_id != R815x_PHY_ID) - return -EINVAL; - - if (usb_autopm_get_interface(dev->intf) < 0) - return -ENODEV; - - ret = ocp_reg_read(dev, BASE_MII + reg * 2); - - usb_autopm_put_interface(dev->intf); - return ret; -} - -static -void r815x_mdio_write(struct net_device *netdev, int phy_id, int reg, int val) -{ - struct usbnet *dev = netdev_priv(netdev); - - if (phy_id != R815x_PHY_ID) - return; - - if (usb_autopm_get_interface(dev->intf) < 0) - return; - - ocp_reg_write(dev, BASE_MII + reg * 2, val); - - usb_autopm_put_interface(dev->intf); -} - -static int r8153_bind(struct usbnet *dev, struct usb_interface *intf) -{ - int status; - - status = usbnet_cdc_bind(dev, intf); - if (status < 0) - return status; - - dev->mii.dev = dev->net; - dev->mii.mdio_read = r815x_mdio_read; - dev->mii.mdio_write = r815x_mdio_write; - dev->mii.phy_id_mask = 0x3f; - dev->mii.reg_num_mask = 0x1f; - dev->mii.phy_id = R815x_PHY_ID; - dev->mii.supports_gmii = 1; - - return status; -} - -static int r8152_bind(struct usbnet *dev, struct usb_interface *intf) -{ - int status; - - status = usbnet_cdc_bind(dev, intf); - if (status < 0) - return status; - - dev->mii.dev = dev->net; - dev->mii.mdio_read = r815x_mdio_read; - dev->mii.mdio_write = r815x_mdio_write; - dev->mii.phy_id_mask = 0x3f; - dev->mii.reg_num_mask = 0x1f; - dev->mii.phy_id = R815x_PHY_ID; - dev->mii.supports_gmii = 0; - - return status; -} - -static const struct driver_info r8152_info = { - .description = "RTL8152 ECM Device", - .flags = FLAG_ETHER | FLAG_POINTTOPOINT, - .bind = r8152_bind, - .unbind = usbnet_cdc_unbind, - .status = usbnet_cdc_status, - .manage_power = usbnet_manage_power, -}; - -static const struct driver_info r8153_info = { - .description = "RTL8153 ECM Device", - .flags = FLAG_ETHER | FLAG_POINTTOPOINT, - .bind = r8153_bind, - .unbind = usbnet_cdc_unbind, - .status = usbnet_cdc_status, - .manage_power = usbnet_manage_power, -}; - -static const struct usb_device_id products[] = { -{ - USB_DEVICE_AND_INTERFACE_INFO(REALTEK_VENDOR_ID, 0x8152, USB_CLASS_COMM, - USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), - .driver_info = (unsigned long) &r8152_info, -}, - -{ - USB_DEVICE_AND_INTERFACE_INFO(REALTEK_VENDOR_ID, 0x8153, USB_CLASS_COMM, - USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), - .driver_info = (unsigned long) &r8153_info, -}, - - { }, /* END */ -}; -MODULE_DEVICE_TABLE(usb, products); - -static struct usb_driver r815x_driver = { - .name = "r815x", - .id_table = products, - .probe = usbnet_probe, - .disconnect = usbnet_disconnect, - .suspend = usbnet_suspend, - .resume = usbnet_resume, - .reset_resume = usbnet_resume, - .supports_autosuspend = 1, - .disable_hub_initiated_lpm = 1, -}; - -module_usb_driver(r815x_driver); - -MODULE_AUTHOR("Hayes Wang"); -MODULE_DESCRIPTION("Realtek USB ECM device"); -MODULE_LICENSE("GPL"); -- cgit v0.10.2 From c485658bae87faccd7aed540fd2ca3ab37992310 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 4 Mar 2014 16:35:51 +0100 Subject: net: sctp: fix skb leakage in COOKIE ECHO path of chunk->auth_chunk While working on ec0223ec48a9 ("net: sctp: fix sctp_sf_do_5_1D_ce to verify if we/peer is AUTH capable"), we noticed that there's a skb memory leakage in the error path. Running the same reproducer as in ec0223ec48a9 and by unconditionally jumping to the error label (to simulate an error condition) in sctp_sf_do_5_1D_ce() receive path lets kmemleak detector bark about the unfreed chunk->auth_chunk skb clone: Unreferenced object 0xffff8800b8f3a000 (size 256): comm "softirq", pid 0, jiffies 4294769856 (age 110.757s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 89 ab 75 5e d4 01 58 13 00 00 00 00 00 00 00 00 ..u^..X......... backtrace: [] kmemleak_alloc+0x4e/0xb0 [] kmem_cache_alloc+0xc8/0x210 [] skb_clone+0x49/0xb0 [] sctp_endpoint_bh_rcv+0x1d9/0x230 [sctp] [] sctp_inq_push+0x4c/0x70 [sctp] [] sctp_rcv+0x82e/0x9a0 [sctp] [] ip_local_deliver_finish+0xa8/0x210 [] nf_reinject+0xbf/0x180 [] nfqnl_recv_verdict+0x1d2/0x2b0 [nfnetlink_queue] [] nfnetlink_rcv_msg+0x14b/0x250 [nfnetlink] [] netlink_rcv_skb+0xa9/0xc0 [] nfnetlink_rcv+0x23f/0x408 [nfnetlink] [] netlink_unicast+0x168/0x250 [] netlink_sendmsg+0x2e1/0x3f0 [] sock_sendmsg+0x8b/0xc0 [] ___sys_sendmsg+0x369/0x380 What happens is that commit bbd0d59809f9 clones the skb containing the AUTH chunk in sctp_endpoint_bh_rcv() when having the edge case that an endpoint requires COOKIE-ECHO chunks to be authenticated: ---------- INIT[RANDOM; CHUNKS; HMAC-ALGO] ----------> <------- INIT-ACK[RANDOM; CHUNKS; HMAC-ALGO] --------- ------------------ AUTH; COOKIE-ECHO ----------------> <-------------------- COOKIE-ACK --------------------- When we enter sctp_sf_do_5_1D_ce() and before we actually get to the point where we process (and subsequently free) a non-NULL chunk->auth_chunk, we could hit the "goto nomem_init" path from an error condition and thus leave the cloned skb around w/o freeing it. The fix is to centrally free such clones in sctp_chunk_destroy() handler that is invoked from sctp_chunk_free() after all refs have dropped; and also move both kfree_skb(chunk->auth_chunk) there, so that chunk->auth_chunk is either NULL (since sctp_chunkify() allocs new chunks through kmem_cache_zalloc()) or non-NULL with a valid skb pointer. chunk->skb and chunk->auth_chunk are the only skbs in the sctp_chunk structure that need to be handeled. While at it, we should use consume_skb() for both. It is the same as dev_kfree_skb() but more appropriately named as we are not a device but a protocol. Also, this effectively replaces the kfree_skb() from both invocations into consume_skb(). Functions are the same only that kfree_skb() assumes that the frame was being dropped after a failure (e.g. for tools like drop monitor), usage of consume_skb() seems more appropriate in function sctp_chunk_destroy() though. Fixes: bbd0d59809f9 ("[SCTP]: Implement the receive and verification of AUTH chunk") Signed-off-by: Daniel Borkmann Cc: Vlad Yasevich Cc: Neil Horman Acked-by: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 632090b..3a1767e 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1421,8 +1421,8 @@ static void sctp_chunk_destroy(struct sctp_chunk *chunk) BUG_ON(!list_empty(&chunk->list)); list_del_init(&chunk->transmitted_list); - /* Free the chunk skb data and the SCTP_chunk stub itself. */ - dev_kfree_skb(chunk->skb); + consume_skb(chunk->skb); + consume_skb(chunk->auth_chunk); SCTP_DBG_OBJCNT_DEC(chunk); kmem_cache_free(sctp_chunk_cachep, chunk); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index ae65b6b..01e0024 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -760,7 +760,6 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, /* Make sure that we and the peer are AUTH capable */ if (!net->sctp.auth_enable || !new_asoc->peer.auth_capable) { - kfree_skb(chunk->auth_chunk); sctp_association_free(new_asoc); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } @@ -775,10 +774,6 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, auth.transport = chunk->transport; ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth); - - /* We can now safely free the auth_chunk clone */ - kfree_skb(chunk->auth_chunk); - if (ret != SCTP_IERROR_NO_ERROR) { sctp_association_free(new_asoc); return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); -- cgit v0.10.2 From 920309086652651c7d59791e5b83e2f10112b293 Mon Sep 17 00:00:00 2001 From: Soren Brinkmann Date: Tue, 4 Mar 2014 08:46:39 -0800 Subject: net: macb: Check DMA mappings for error With CONFIG_DMA_API_DEBUG enabled the following warning is printed: WARNING: CPU: 0 PID: 619 at lib/dma-debug.c:1101 check_unmap+0x758/0x894() macb e000b000.ethernet: DMA-API: device driver failed to check map error[device address=0x000000002d171c02] [size=322 bytes] [mapped as single] Modules linked in: CPU: 0 PID: 619 Comm: udhcpc Not tainted 3.14.0-rc3-xilinx-00219-gd158fc7f36a2 #63 [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x7c/0xc8) [] (dump_stack) from [] (warn_slowpath_common+0x60/0x84) [] (warn_slowpath_common) from [] (warn_slowpath_fmt+0x2c/0x3c) [] (warn_slowpath_fmt) from [] (check_unmap+0x758/0x894) [] (check_unmap) from [] (debug_dma_unmap_page+0x64/0x70) [] (debug_dma_unmap_page) from [] (macb_interrupt+0x1f8/0x2dc) [] (macb_interrupt) from [] (handle_irq_event_percpu+0x2c/0x178) [] (handle_irq_event_percpu) from [] (handle_irq_event+0x3c/0x5c) [] (handle_irq_event) from [] (handle_fasteoi_irq+0xb8/0x100) [] (handle_fasteoi_irq) from [] (generic_handle_irq+0x20/0x30) [] (generic_handle_irq) from [] (handle_IRQ+0x64/0x8c) [] (handle_IRQ) from [] (gic_handle_irq+0x3c/0x60) [] (gic_handle_irq) from [] (__irq_svc+0x44/0x78) Exception stack(0xed197f60 to 0xed197fa8) 7f60: 00000134 60000013 bd94362e bd94362e be96b37c 00000014 fffffd72 00000122 7f80: c000ebe4 ed196000 00000000 00000011 c032c0d8 ed197fa8 c0064008 c000ea20 7fa0: 60000013 ffffffff [] (__irq_svc) from [] (ret_fast_syscall+0x0/0x48) ---[ end trace 478f921d0d542d1e ]--- Mapped at: [] debug_dma_map_page+0x48/0x11c [] macb_start_xmit+0x184/0x2a8 [] dev_hard_start_xmit+0x334/0x470 [] sch_direct_xmit+0x78/0x2f8 [] __dev_queue_xmit+0x318/0x708 due to missing checks of the dma mapping. Add the appropriate checks to fix this. Signed-off-by: Soren Brinkmann Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 3190d38..1f03e20 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -632,11 +632,16 @@ static void gem_rx_refill(struct macb *bp) "Unable to allocate sk_buff\n"); break; } - bp->rx_skbuff[entry] = skb; /* now fill corresponding descriptor entry */ paddr = dma_map_single(&bp->pdev->dev, skb->data, bp->rx_buffer_size, DMA_FROM_DEVICE); + if (dma_mapping_error(&bp->pdev->dev, paddr)) { + dev_kfree_skb(skb); + break; + } + + bp->rx_skbuff[entry] = skb; if (entry == RX_RING_SIZE - 1) paddr |= MACB_BIT(RX_WRAP); @@ -1036,11 +1041,15 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) } entry = macb_tx_ring_wrap(bp->tx_head); - bp->tx_head++; netdev_vdbg(bp->dev, "Allocated ring entry %u\n", entry); mapping = dma_map_single(&bp->pdev->dev, skb->data, len, DMA_TO_DEVICE); + if (dma_mapping_error(&bp->pdev->dev, mapping)) { + kfree_skb(skb); + goto unlock; + } + bp->tx_head++; tx_skb = &bp->tx_skb[entry]; tx_skb->skb = skb; tx_skb->mapping = mapping; @@ -1066,6 +1075,7 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) if (CIRC_SPACE(bp->tx_head, bp->tx_tail, TX_RING_SIZE) < 1) netif_stop_queue(dev); +unlock: spin_unlock_irqrestore(&bp->lock, flags); return NETDEV_TX_OK; -- cgit v0.10.2 From 48330e08fa168395b9fd9f369f06cca1df204361 Mon Sep 17 00:00:00 2001 From: Soren Brinkmann Date: Tue, 4 Mar 2014 08:46:40 -0800 Subject: net: macb: DMA-unmap full rx-buffer When allocating RX buffers a fixed size is used, while freeing is based on actually received bytes, resulting in the following kernel warning when CONFIG_DMA_API_DEBUG is enabled: WARNING: CPU: 0 PID: 0 at lib/dma-debug.c:1051 check_unmap+0x258/0x894() macb e000b000.ethernet: DMA-API: device driver frees DMA memory with different size [device address=0x000000002d170040] [map size=1536 bytes] [unmap size=60 bytes] Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.14.0-rc3-xilinx-00220-g49f84081ce4f #65 [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x7c/0xc8) [] (dump_stack) from [] (warn_slowpath_common+0x60/0x84) [] (warn_slowpath_common) from [] (warn_slowpath_fmt+0x2c/0x3c) [] (warn_slowpath_fmt) from [] (check_unmap+0x258/0x894) [] (check_unmap) from [] (debug_dma_unmap_page+0x64/0x70) [] (debug_dma_unmap_page) from [] (gem_rx+0x118/0x170) [] (gem_rx) from [] (macb_poll+0x24/0x94) [] (macb_poll) from [] (net_rx_action+0x6c/0x188) [] (net_rx_action) from [] (__do_softirq+0x108/0x280) [] (__do_softirq) from [] (irq_exit+0x84/0xf8) [] (irq_exit) from [] (handle_IRQ+0x68/0x8c) [] (handle_IRQ) from [] (gic_handle_irq+0x3c/0x60) [] (gic_handle_irq) from [] (__irq_svc+0x44/0x78) Exception stack(0xc056df20 to 0xc056df68) df20: 00000001 c0577430 00000000 c0577430 04ce8e0d 00000002 edfce238 00000000 df40: 04e20f78 00000002 c05981f4 00000000 00000008 c056df68 c0064008 c02d7658 df60: 20000013 ffffffff [] (__irq_svc) from [] (cpuidle_enter_state+0x54/0xf8) [] (cpuidle_enter_state) from [] (cpuidle_idle_call+0xe0/0x138) [] (cpuidle_idle_call) from [] (arch_cpu_idle+0x8/0x3c) [] (arch_cpu_idle) from [] (cpu_startup_entry+0xbc/0x124) [] (cpu_startup_entry) from [] (start_kernel+0x350/0x3b0) ---[ end trace d5fdc38641bd3a11 ]--- Mapped at: [] debug_dma_map_page+0x48/0x11c [] gem_rx_refill+0x154/0x1f8 [] macb_open+0x270/0x3e0 [] __dev_open+0x7c/0xfc [] __dev_change_flags+0x8c/0x140 Fixing this by passing the same size which is passed during mapping the memory to the unmap function as well. Signed-off-by: Soren Brinkmann Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index 1f03e20..d0c38e0 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -730,7 +730,7 @@ static int gem_rx(struct macb *bp, int budget) skb_put(skb, len); addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, addr)); dma_unmap_single(&bp->pdev->dev, addr, - len, DMA_FROM_DEVICE); + bp->rx_buffer_size, DMA_FROM_DEVICE); skb->protocol = eth_type_trans(skb, bp->dev); skb_checksum_none_assert(skb); -- cgit v0.10.2 From 1b07da516ee25250f458c76c012ebe4cd677a84f Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Tue, 4 Mar 2014 14:11:06 -0800 Subject: hyperv: Move state setting for link query It moves the state setting for query into rndis_filter_receive_response(). All callbacks including query-complete and status-callback are synchronized by channel->inbound_lock. This prevents pentential race between them. Signed-off-by: Haiyang Zhang Signed-off-by: David S. Miller diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 7141a19..d6fce97 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -442,6 +442,8 @@ static int netvsc_probe(struct hv_device *dev, if (!net) return -ENOMEM; + netif_carrier_off(net); + net_device_ctx = netdev_priv(net); net_device_ctx->device_ctx = dev; hv_set_drvdata(dev, net); @@ -473,6 +475,8 @@ static int netvsc_probe(struct hv_device *dev, pr_err("Unable to register netdev.\n"); rndis_filter_device_remove(dev); free_netdev(net); + } else { + schedule_delayed_work(&net_device_ctx->dwork, 0); } return ret; diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 1084e5d..b54fd25 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -243,6 +243,22 @@ static int rndis_filter_send_request(struct rndis_device *dev, return ret; } +static void rndis_set_link_state(struct rndis_device *rdev, + struct rndis_request *request) +{ + u32 link_status; + struct rndis_query_complete *query_complete; + + query_complete = &request->response_msg.msg.query_complete; + + if (query_complete->status == RNDIS_STATUS_SUCCESS && + query_complete->info_buflen == sizeof(u32)) { + memcpy(&link_status, (void *)((unsigned long)query_complete + + query_complete->info_buf_offset), sizeof(u32)); + rdev->link_state = link_status != 0; + } +} + static void rndis_filter_receive_response(struct rndis_device *dev, struct rndis_message *resp) { @@ -272,6 +288,10 @@ static void rndis_filter_receive_response(struct rndis_device *dev, sizeof(struct rndis_message) + RNDIS_EXT_LEN) { memcpy(&request->response_msg, resp, resp->msg_len); + if (request->request_msg.ndis_msg_type == + RNDIS_MSG_QUERY && request->request_msg.msg. + query_req.oid == RNDIS_OID_GEN_MEDIA_CONNECT_STATUS) + rndis_set_link_state(dev, request); } else { netdev_err(ndev, "rndis response buffer overflow " @@ -620,7 +640,6 @@ static int rndis_filter_query_device_link_status(struct rndis_device *dev) ret = rndis_filter_query_device(dev, RNDIS_OID_GEN_MEDIA_CONNECT_STATUS, &link_status, &size); - dev->link_state = (link_status != 0) ? true : false; return ret; } -- cgit v0.10.2 From d9120198ddef2c0b61ca6659ace41b7c1e7c8f08 Mon Sep 17 00:00:00 2001 From: Benoit Cousson Date: Fri, 28 Feb 2014 14:12:05 +0100 Subject: clk: shmobile: rcar-gen2: Use kick bit to allow Z clock frequency change The Z clock frequency change is effective only after setting the kick bit located in the FRQCRB register. Without that, the CA15 CPUs clock rate will never change. Fix that by checking if the kick bit is cleared and enable it to make the clock rate change effective. The bit is cleared automatically upon completion. Signed-off-by: Benoit Cousson Acked-by: Laurent Pinchart Signed-off-by: Mike Turquette diff --git a/drivers/clk/shmobile/clk-rcar-gen2.c b/drivers/clk/shmobile/clk-rcar-gen2.c index a59ec21..de680e6 100644 --- a/drivers/clk/shmobile/clk-rcar-gen2.c +++ b/drivers/clk/shmobile/clk-rcar-gen2.c @@ -26,6 +26,8 @@ struct rcar_gen2_cpg { void __iomem *reg; }; +#define CPG_FRQCRB 0x00000004 +#define CPG_FRQCRB_KICK BIT(31) #define CPG_SDCKCR 0x00000074 #define CPG_PLL0CR 0x000000d8 #define CPG_FRQCRC 0x000000e0 @@ -45,6 +47,7 @@ struct rcar_gen2_cpg { struct cpg_z_clk { struct clk_hw hw; void __iomem *reg; + void __iomem *kick_reg; }; #define to_z_clk(_hw) container_of(_hw, struct cpg_z_clk, hw) @@ -83,17 +86,45 @@ static int cpg_z_clk_set_rate(struct clk_hw *hw, unsigned long rate, { struct cpg_z_clk *zclk = to_z_clk(hw); unsigned int mult; - u32 val; + u32 val, kick; + unsigned int i; mult = div_u64((u64)rate * 32, parent_rate); mult = clamp(mult, 1U, 32U); + if (clk_readl(zclk->kick_reg) & CPG_FRQCRB_KICK) + return -EBUSY; + val = clk_readl(zclk->reg); val &= ~CPG_FRQCRC_ZFC_MASK; val |= (32 - mult) << CPG_FRQCRC_ZFC_SHIFT; clk_writel(val, zclk->reg); - return 0; + /* + * Set KICK bit in FRQCRB to update hardware setting and wait for + * clock change completion. + */ + kick = clk_readl(zclk->kick_reg); + kick |= CPG_FRQCRB_KICK; + clk_writel(kick, zclk->kick_reg); + + /* + * Note: There is no HW information about the worst case latency. + * + * Using experimental measurements, it seems that no more than + * ~10 iterations are needed, independently of the CPU rate. + * Since this value might be dependant of external xtal rate, pll1 + * rate or even the other emulation clocks rate, use 1000 as a + * "super" safe value. + */ + for (i = 1000; i; i--) { + if (!(clk_readl(zclk->kick_reg) & CPG_FRQCRB_KICK)) + return 0; + + cpu_relax(); + } + + return -ETIMEDOUT; } static const struct clk_ops cpg_z_clk_ops = { @@ -120,6 +151,7 @@ static struct clk * __init cpg_z_clk_register(struct rcar_gen2_cpg *cpg) init.num_parents = 1; zclk->reg = cpg->reg + CPG_FRQCRC; + zclk->kick_reg = cpg->reg + CPG_FRQCRB; zclk->hw.init = &init; clk = clk_register(NULL, &zclk->hw); -- cgit v0.10.2 From 6cbde8253a8143ada18ec0d1711230747a7c1934 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 6 Mar 2014 03:30:46 +0000 Subject: ARM: KVM: fix non-VGIC compilation Add a stub for kvm_vgic_addr when compiling without CONFIG_KVM_ARM_VGIC. The usefulness of this configurarion is extremely doubtful, but let's fix it anyway (until we decide that we'll always support a VGIC). Reported-by: Michele Paolino Cc: Paolo Bonzini Cc: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Paolo Bonzini diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index be85127..f27000f 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -171,6 +171,11 @@ static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 add return 0; } +static inline int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) +{ + return -ENXIO; +} + static inline int kvm_vgic_init(struct kvm *kvm) { return 0; -- cgit v0.10.2 From 999976e0f6233322a878b0b7148c810544d6c8a8 Mon Sep 17 00:00:00 2001 From: Aaron Plattner Date: Tue, 4 Mar 2014 12:42:15 -0800 Subject: cpufreq: use cpufreq_cpu_get() to avoid cpufreq_get() race conditions If a module calls cpufreq_get while cpufreq is initializing, it's possible for it to be called after cpufreq_driver is set but before cpufreq_cpu_data is written during subsys_interface_register. This happens because cpufreq_get doesn't take the cpufreq_driver_lock around its use of cpufreq_cpu_data. Fix this by using cpufreq_cpu_get(cpu) to look up the policy rather than reading it out of cpufreq_cpu_data directly. cpufreq_cpu_get() takes the appropriate locks to prevent this race from happening. Since it's possible for policy to be NULL if the caller passes in an invalid CPU number or calls the function before cpufreq is initialized, delete the BUG_ON(!policy) and simply return 0. Don't try to return -ENOENT because that's negative and the function returns an unsigned integer. References: https://bbs.archlinux.org/viewtopic.php?id=177934 Signed-off-by: Aaron Plattner Cc: 3.13+ # 3.13+ Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index cb003a6..c4cacab 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1546,23 +1546,16 @@ static unsigned int __cpufreq_get(unsigned int cpu) */ unsigned int cpufreq_get(unsigned int cpu) { - struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); unsigned int ret_freq = 0; - if (cpufreq_disabled() || !cpufreq_driver) - return -ENOENT; - - BUG_ON(!policy); - - if (!down_read_trylock(&cpufreq_rwsem)) - return 0; - - down_read(&policy->rwsem); - - ret_freq = __cpufreq_get(cpu); + if (policy) { + down_read(&policy->rwsem); + ret_freq = __cpufreq_get(cpu); + up_read(&policy->rwsem); - up_read(&policy->rwsem); - up_read(&cpufreq_rwsem); + cpufreq_cpu_put(policy); + } return ret_freq; } -- cgit v0.10.2 From 5a7e56a5d29071bcccd947dee6e3b9f8e4eb3309 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 4 Mar 2014 11:44:00 +0800 Subject: cpufreq: Initialize policy before making it available for others to use Policy must be fully initialized before it is being made available for use by others. Otherwise cpufreq_cpu_get() would be able to grab a half initialized policy structure that might not have affected_cpus (for example) populated. Then, anybody accessing those fields will get a wrong value and that will lead to unpredictable results. In order to fix this, do all the necessary initialization before we make the policy structure available via cpufreq_cpu_get(). That will guarantee that any code accessing fields of the policy will get correct data from them. Reported-by: Saravana Kannan Signed-off-by: Viresh Kumar [rjw: Changelog] Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index c4cacab..d6622689 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1109,6 +1109,20 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif, goto err_set_policy_cpu; } + /* related cpus should atleast have policy->cpus */ + cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus); + + /* + * affected cpus must always be the one, which are online. We aren't + * managing offline cpus here. + */ + cpumask_and(policy->cpus, policy->cpus, cpu_online_mask); + + if (!frozen) { + policy->user_policy.min = policy->min; + policy->user_policy.max = policy->max; + } + write_lock_irqsave(&cpufreq_driver_lock, flags); for_each_cpu(j, policy->cpus) per_cpu(cpufreq_cpu_data, j) = policy; @@ -1162,20 +1176,6 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif, } } - /* related cpus should atleast have policy->cpus */ - cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus); - - /* - * affected cpus must always be the one, which are online. We aren't - * managing offline cpus here. - */ - cpumask_and(policy->cpus, policy->cpus, cpu_online_mask); - - if (!frozen) { - policy->user_policy.min = policy->min; - policy->user_policy.max = policy->max; - } - blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_START, policy); -- cgit v0.10.2 From 4e97b631f24c927b2302368f4f83efbba82076ee Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 4 Mar 2014 11:44:01 +0800 Subject: cpufreq: Initialize governor for a new policy under policy->rwsem policy->rwsem is used to lock access to all parts of code modifying struct cpufreq_policy, but it's not used on a new policy created by __cpufreq_add_dev(). Because of that, if cpufreq_update_policy() is called in a tight loop on one CPU in parallel with offline/online of another CPU, then the following crash can be triggered: Unable to handle kernel NULL pointer dereference at virtual address 00000020 pgd = c0003000 [00000020] *pgd=80000000004003, *pmd=00000000 Internal error: Oops: 206 [#1] PREEMPT SMP ARM PC is at __cpufreq_governor+0x10/0x1ac LR is at cpufreq_update_policy+0x114/0x150 ---[ end trace f23a8defea6cd706 ]--- Kernel panic - not syncing: Fatal exception CPU0: stopping CPU: 0 PID: 7136 Comm: mpdecision Tainted: G D W 3.10.0-gd727407-00074-g979ede8 #396 [] (notifier_call_chain+0x40/0x68) from [] (__blocking_notifier_call_chain+0x40/0x58) [] (__blocking_notifier_call_chain+0x40/0x58) from [] (blocking_notifier_call_chain+0x14/0x1c) [] (blocking_notifier_call_chain+0x14/0x1c) from [] (cpufreq_set_policy+0xd4/0x2b8) [] (cpufreq_set_policy+0xd4/0x2b8) from [] (cpufreq_init_policy+0x30/0x98) [] (cpufreq_init_policy+0x30/0x98) from [] (__cpufreq_add_dev.isra.17+0x4dc/0x7a4) [] (__cpufreq_add_dev.isra.17+0x4dc/0x7a4) from [] (cpufreq_cpu_callback+0x58/0x84) [] (cpufreq_cpu_callback+0x58/0x84) from [] (notifier_call_chain+0x40/0x68) [] (notifier_call_chain+0x40/0x68) from [] (__cpu_notify+0x28/0x44) [] (__cpu_notify+0x28/0x44) from [] (_cpu_up+0xf4/0x1dc) [] (_cpu_up+0xf4/0x1dc) from [] (cpu_up+0x5c/0x78) [] (cpu_up+0x5c/0x78) from [] (store_online+0x44/0x74) [] (store_online+0x44/0x74) from [] (sysfs_write_file+0x108/0x14c) [] (sysfs_write_file+0x108/0x14c) from [] (vfs_write+0xd0/0x180) [] (vfs_write+0xd0/0x180) from [] (SyS_write+0x38/0x68) [] (SyS_write+0x38/0x68) from [] (ret_fast_syscall+0x0/0x30) Fix that by taking locks at appropriate places in __cpufreq_add_dev() as well. Reported-by: Saravana Kannan Suggested-by: Srivatsa S. Bhat Signed-off-by: Viresh Kumar [rjw: Changelog] Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d6622689..cf485d9 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1123,6 +1123,7 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif, policy->user_policy.max = policy->max; } + down_write(&policy->rwsem); write_lock_irqsave(&cpufreq_driver_lock, flags); for_each_cpu(j, policy->cpus) per_cpu(cpufreq_cpu_data, j) = policy; @@ -1206,6 +1207,7 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif, policy->user_policy.policy = policy->policy; policy->user_policy.governor = policy->governor; } + up_write(&policy->rwsem); kobject_uevent(&policy->kobj, KOBJ_ADD); up_read(&cpufreq_rwsem); -- cgit v0.10.2 From ad332c8a45330d170bb38b95209de449b31cd1b4 Mon Sep 17 00:00:00 2001 From: Kieran Clancy Date: Sat, 1 Mar 2014 00:42:28 +1030 Subject: ACPI / EC: Clear stale EC events on Samsung systems A number of Samsung notebooks (530Uxx/535Uxx/540Uxx/550Pxx/900Xxx/etc) continue to log events during sleep (lid open/close, AC plug/unplug, battery level change), which accumulate in the EC until a buffer fills. After the buffer is full (tests suggest it holds 8 events), GPEs stop being triggered for new events. This state persists on wake or even on power cycle, and prevents new events from being registered until the EC is manually polled. This is the root cause of a number of bugs, including AC not being detected properly, lid close not triggering suspend, and low ambient light not triggering the keyboard backlight. The bug also seemed to be responsible for performance issues on at least one user's machine. Juan Manuel Cabo found the cause of bug and the workaround of polling the EC manually on wake. The loop which clears the stale events is based on an earlier patch by Lan Tianyu (see referenced attachment). This patch: - Adds a function acpi_ec_clear() which polls the EC for stale _Q events at most ACPI_EC_CLEAR_MAX (currently 100) times. A warning is logged if this limit is reached. - Adds a flag EC_FLAGS_CLEAR_ON_RESUME which is set to 1 if the DMI system vendor is Samsung. This check could be replaced by several more specific DMI vendor/product pairs, but it's likely that the bug affects more Samsung products than just the five series mentioned above. Further, it should not be harmful to run acpi_ec_clear() on systems without the bug; it will return immediately after finding no data waiting. - Runs acpi_ec_clear() on initialisation (boot), from acpi_ec_add() - Runs acpi_ec_clear() on wake, from acpi_ec_unblock_transactions() References: https://bugzilla.kernel.org/show_bug.cgi?id=44161 References: https://bugzilla.kernel.org/show_bug.cgi?id=45461 References: https://bugzilla.kernel.org/show_bug.cgi?id=57271 References: https://bugzilla.kernel.org/attachment.cgi?id=126801 Suggested-by: Juan Manuel Cabo Signed-off-by: Kieran Clancy Reviewed-by: Lan Tianyu Reviewed-by: Dennis Jansen Tested-by: Kieran Clancy Tested-by: Juan Manuel Cabo Tested-by: Dennis Jansen Tested-by: Maurizio D'Addona Tested-by: San Zamoyski Cc: All applicable Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 959d41a..d7d32c2 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -67,6 +67,8 @@ enum ec_command { #define ACPI_EC_DELAY 500 /* Wait 500ms max. during EC ops */ #define ACPI_EC_UDELAY_GLK 1000 /* Wait 1ms max. to get global lock */ #define ACPI_EC_MSI_UDELAY 550 /* Wait 550us for MSI EC */ +#define ACPI_EC_CLEAR_MAX 100 /* Maximum number of events to query + * when trying to clear the EC */ enum { EC_FLAGS_QUERY_PENDING, /* Query is pending */ @@ -116,6 +118,7 @@ EXPORT_SYMBOL(first_ec); static int EC_FLAGS_MSI; /* Out-of-spec MSI controller */ static int EC_FLAGS_VALIDATE_ECDT; /* ASUStec ECDTs need to be validated */ static int EC_FLAGS_SKIP_DSDT_SCAN; /* Not all BIOS survive early DSDT scan */ +static int EC_FLAGS_CLEAR_ON_RESUME; /* Needs acpi_ec_clear() on boot/resume */ /* -------------------------------------------------------------------------- Transaction Management @@ -440,6 +443,29 @@ acpi_handle ec_get_handle(void) EXPORT_SYMBOL(ec_get_handle); +static int acpi_ec_query_unlocked(struct acpi_ec *ec, u8 *data); + +/* + * Clears stale _Q events that might have accumulated in the EC. + * Run with locked ec mutex. + */ +static void acpi_ec_clear(struct acpi_ec *ec) +{ + int i, status; + u8 value = 0; + + for (i = 0; i < ACPI_EC_CLEAR_MAX; i++) { + status = acpi_ec_query_unlocked(ec, &value); + if (status || !value) + break; + } + + if (unlikely(i == ACPI_EC_CLEAR_MAX)) + pr_warn("Warning: Maximum of %d stale EC events cleared\n", i); + else + pr_info("%d stale EC events cleared\n", i); +} + void acpi_ec_block_transactions(void) { struct acpi_ec *ec = first_ec; @@ -463,6 +489,10 @@ void acpi_ec_unblock_transactions(void) mutex_lock(&ec->mutex); /* Allow transactions to be carried out again */ clear_bit(EC_FLAGS_BLOCKED, &ec->flags); + + if (EC_FLAGS_CLEAR_ON_RESUME) + acpi_ec_clear(ec); + mutex_unlock(&ec->mutex); } @@ -821,6 +851,13 @@ static int acpi_ec_add(struct acpi_device *device) /* EC is fully operational, allow queries */ clear_bit(EC_FLAGS_QUERY_PENDING, &ec->flags); + + /* Clear stale _Q events if hardware might require that */ + if (EC_FLAGS_CLEAR_ON_RESUME) { + mutex_lock(&ec->mutex); + acpi_ec_clear(ec); + mutex_unlock(&ec->mutex); + } return ret; } @@ -922,6 +959,30 @@ static int ec_enlarge_storm_threshold(const struct dmi_system_id *id) return 0; } +/* + * On some hardware it is necessary to clear events accumulated by the EC during + * sleep. These ECs stop reporting GPEs until they are manually polled, if too + * many events are accumulated. (e.g. Samsung Series 5/9 notebooks) + * + * https://bugzilla.kernel.org/show_bug.cgi?id=44161 + * + * Ideally, the EC should also be instructed NOT to accumulate events during + * sleep (which Windows seems to do somehow), but the interface to control this + * behaviour is not known at this time. + * + * Models known to be affected are Samsung 530Uxx/535Uxx/540Uxx/550Pxx/900Xxx, + * however it is very likely that other Samsung models are affected. + * + * On systems which don't accumulate _Q events during sleep, this extra check + * should be harmless. + */ +static int ec_clear_on_resume(const struct dmi_system_id *id) +{ + pr_debug("Detected system needing EC poll on resume.\n"); + EC_FLAGS_CLEAR_ON_RESUME = 1; + return 0; +} + static struct dmi_system_id ec_dmi_table[] __initdata = { { ec_skip_dsdt_scan, "Compal JFL92", { @@ -965,6 +1026,9 @@ static struct dmi_system_id ec_dmi_table[] __initdata = { ec_validate_ecdt, "ASUS hardware", { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTek Computer Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "L4R"),}, NULL}, + { + ec_clear_on_resume, "Samsung hardware", { + DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD.")}, NULL}, {}, }; -- cgit v0.10.2 From c99b1861c232e1f641f13b8645e0febb3712cc71 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 4 Mar 2014 18:43:13 -0800 Subject: mwifiex: copy AP's HT capability info correctly While preparing association request, intersection of device's HT capability information and corresponding fields advertised by AP is used. This patch fixes an error while copying this field from AP's beacon. Cc: Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/mwifiex/11n.c b/drivers/net/wireless/mwifiex/11n.c index 6261f8c..7db1a89 100644 --- a/drivers/net/wireless/mwifiex/11n.c +++ b/drivers/net/wireless/mwifiex/11n.c @@ -308,8 +308,7 @@ mwifiex_cmd_append_11n_tlv(struct mwifiex_private *priv, ht_cap->header.len = cpu_to_le16(sizeof(struct ieee80211_ht_cap)); memcpy((u8 *) ht_cap + sizeof(struct mwifiex_ie_types_header), - (u8 *) bss_desc->bcn_ht_cap + - sizeof(struct ieee_types_header), + (u8 *)bss_desc->bcn_ht_cap, le16_to_cpu(ht_cap->header.len)); mwifiex_fill_cap_info(priv, radio_type, ht_cap); -- cgit v0.10.2 From d51246481c7f28bbfa1f814ded2da65e531cd4b2 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Tue, 4 Mar 2014 18:43:14 -0800 Subject: mwifiex: save and copy AP's VHT capability info correctly While preparing association request, intersection of device's VHT capability information and corresponding field advertised by AP is used. This patch fixes a couple errors while saving and copying vht_cap and vht_oper fields from AP's beacon. Cc: # 3.9+ Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: John W. Linville diff --git a/drivers/net/wireless/mwifiex/11ac.c b/drivers/net/wireless/mwifiex/11ac.c index 5e0eec4..5d9a808 100644 --- a/drivers/net/wireless/mwifiex/11ac.c +++ b/drivers/net/wireless/mwifiex/11ac.c @@ -189,8 +189,7 @@ int mwifiex_cmd_append_11ac_tlv(struct mwifiex_private *priv, vht_cap->header.len = cpu_to_le16(sizeof(struct ieee80211_vht_cap)); memcpy((u8 *)vht_cap + sizeof(struct mwifiex_ie_types_header), - (u8 *)bss_desc->bcn_vht_cap + - sizeof(struct ieee_types_header), + (u8 *)bss_desc->bcn_vht_cap, le16_to_cpu(vht_cap->header.len)); mwifiex_fill_vht_cap_tlv(priv, vht_cap, bss_desc->bss_band); diff --git a/drivers/net/wireless/mwifiex/scan.c b/drivers/net/wireless/mwifiex/scan.c index 0a8a26e..668547c 100644 --- a/drivers/net/wireless/mwifiex/scan.c +++ b/drivers/net/wireless/mwifiex/scan.c @@ -2101,12 +2101,12 @@ mwifiex_save_curr_bcn(struct mwifiex_private *priv) curr_bss->ht_info_offset); if (curr_bss->bcn_vht_cap) - curr_bss->bcn_ht_cap = (void *)(curr_bss->beacon_buf + - curr_bss->vht_cap_offset); + curr_bss->bcn_vht_cap = (void *)(curr_bss->beacon_buf + + curr_bss->vht_cap_offset); if (curr_bss->bcn_vht_oper) - curr_bss->bcn_ht_oper = (void *)(curr_bss->beacon_buf + - curr_bss->vht_info_offset); + curr_bss->bcn_vht_oper = (void *)(curr_bss->beacon_buf + + curr_bss->vht_info_offset); if (curr_bss->bcn_bss_co_2040) curr_bss->bcn_bss_co_2040 = -- cgit v0.10.2 From d4078e232267ff53f3b030b9698a3c001db4dbec Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 5 Mar 2014 14:07:49 +0100 Subject: x86, trace: Further robustify CR2 handling vs tracing Building on commit 0ac09f9f8cd1 ("x86, trace: Fix CR2 corruption when tracing page faults") this patch addresses another few issues: - Now that read_cr2() is lifted into trace_do_page_fault(), we should pass the address to trace_page_fault_entries() to avoid it re-reading a potentially changed cr2. - Put both trace_do_page_fault() and trace_page_fault_entries() under CONFIG_TRACING. - Mark both fault entry functions {,trace_}do_page_fault() as notrace to avoid getting __mcount or other function entry trace callbacks before we've observed CR2. - Mark __do_page_fault() as noinline to guarantee the function tracer does get to see the fault. Cc: Cc: Acked-by: Steven Rostedt Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140306145300.GO9987@twins.programming.kicks-ass.net Signed-off-by: H. Peter Anvin diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index e7fa28b..a10c8c7 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1020,8 +1020,12 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs) * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate * routines. + * + * This function must have noinline because both callers + * {,trace_}do_page_fault() have notrace on. Having this an actual function + * guarantees there's a function trace entry. */ -static void __kprobes +static void __kprobes noinline __do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address) { @@ -1245,31 +1249,38 @@ good_area: up_read(&mm->mmap_sem); } -dotraplinkage void __kprobes +dotraplinkage void __kprobes notrace do_page_fault(struct pt_regs *regs, unsigned long error_code) { + unsigned long address = read_cr2(); /* Get the faulting address */ enum ctx_state prev_state; - /* Get the faulting address: */ - unsigned long address = read_cr2(); + + /* + * We must have this function tagged with __kprobes, notrace and call + * read_cr2() before calling anything else. To avoid calling any kind + * of tracing machinery before we've observed the CR2 value. + * + * exception_{enter,exit}() contain all sorts of tracepoints. + */ prev_state = exception_enter(); __do_page_fault(regs, error_code, address); exception_exit(prev_state); } -static void trace_page_fault_entries(struct pt_regs *regs, +#ifdef CONFIG_TRACING +static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs, unsigned long error_code) { if (user_mode(regs)) - trace_page_fault_user(read_cr2(), regs, error_code); + trace_page_fault_user(address, regs, error_code); else - trace_page_fault_kernel(read_cr2(), regs, error_code); + trace_page_fault_kernel(address, regs, error_code); } -dotraplinkage void __kprobes +dotraplinkage void __kprobes notrace trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) { - enum ctx_state prev_state; /* * The exception_enter and tracepoint processing could * trigger another page faults (user space callchain @@ -1277,9 +1288,11 @@ trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) * the faulting address now. */ unsigned long address = read_cr2(); + enum ctx_state prev_state; prev_state = exception_enter(); - trace_page_fault_entries(regs, error_code); + trace_page_fault_entries(address, regs, error_code); __do_page_fault(regs, error_code, address); exception_exit(prev_state); } +#endif /* CONFIG_TRACING */ -- cgit v0.10.2 From f6d16d32797f665100b1507f6a77c4cd0acb30c5 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 6 Mar 2014 14:04:51 -0500 Subject: dm thin: fix Documentation for held metadata root feature The Documentation for the thin provisioning target's held metadata root feature was incorrect. It is now available and the value for the held metadata root is in block units (not 512b sectors). Signed-off-by: Mike Snitzer diff --git a/Documentation/device-mapper/thin-provisioning.txt b/Documentation/device-mapper/thin-provisioning.txt index 3b34b4f..05a27e9 100644 --- a/Documentation/device-mapper/thin-provisioning.txt +++ b/Documentation/device-mapper/thin-provisioning.txt @@ -287,10 +287,9 @@ ii) Status should register for the event and then check the target's status. held metadata root: - The location, in sectors, of the metadata root that has been + The location, in blocks, of the metadata root that has been 'held' for userspace read access. '-' indicates there is no - held root. This feature is not yet implemented so '-' is - always returned. + held root. discard_passdown|no_discard_passdown Whether or not discards are actually being passed down to the -- cgit v0.10.2 From bb5016eac1656506df1a9d6057ce5bec342afbef Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 6 Mar 2014 11:14:30 +0100 Subject: l2tp: fix manual sequencing (de)activation in L2TPv2 Commit e0d4435f "l2tp: Update PPP-over-L2TP driver to work over L2TPv3" broke the PPPOL2TP_SO_SENDSEQ setsockopt. The L2TP header length was previously computed by pppol2tp_l2t_header_len() before each call to l2tp_xmit_skb(). Now that header length is retrieved from the hdr_len session field, this field must be updated every time the L2TP header format is modified, or l2tp_xmit_skb() won't push the right amount of data for the L2TP header. This patch uses l2tp_session_set_header_len() to adjust hdr_len every time sequencing is (de)activated from userspace (either by the PPPOL2TP_SO_SENDSEQ setsockopt or the L2TP_ATTR_SEND_SEQ netlink attribute). Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 735d0f6..85d9d94 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -112,7 +112,6 @@ struct l2tp_net { spinlock_t l2tp_session_hlist_lock; }; -static void l2tp_session_set_header_len(struct l2tp_session *session, int version); static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk) @@ -1863,7 +1862,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_delete); /* We come here whenever a session's send_seq, cookie_len or * l2specific_len parameters are set. */ -static void l2tp_session_set_header_len(struct l2tp_session *session, int version) +void l2tp_session_set_header_len(struct l2tp_session *session, int version) { if (version == L2TP_HDR_VER_2) { session->hdr_len = 6; @@ -1876,6 +1875,7 @@ static void l2tp_session_set_header_len(struct l2tp_session *session, int versio } } +EXPORT_SYMBOL_GPL(l2tp_session_set_header_len); struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) { diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 1f01ba3..3f93ccd 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -263,6 +263,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, int length, int (*payload_hook)(struct sk_buff *skb)); int l2tp_session_queue_purge(struct l2tp_session *session); int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb); +void l2tp_session_set_header_len(struct l2tp_session *session, int version); int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 4cfd722..bd7387a 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -578,8 +578,10 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf if (info->attrs[L2TP_ATTR_RECV_SEQ]) session->recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]); - if (info->attrs[L2TP_ATTR_SEND_SEQ]) + if (info->attrs[L2TP_ATTR_SEND_SEQ]) { session->send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]); + l2tp_session_set_header_len(session, session->tunnel->version); + } if (info->attrs[L2TP_ATTR_LNS_MODE]) session->lns_mode = nla_get_u8(info->attrs[L2TP_ATTR_LNS_MODE]); diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index be5fadf..6bfeaa7 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1312,6 +1312,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk, po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ : PPPOL2TP_L2TP_HDR_SIZE_NOSEQ; } + l2tp_session_set_header_len(session, session->tunnel->version); l2tp_info(session, PPPOL2TP_MSG_CONTROL, "%s: set send_seq=%d\n", session->name, session->send_seq); -- cgit v0.10.2 From 9e9cb6221aa7cb04765484fe87cc2d1b92edce64 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 6 Mar 2014 11:15:10 +0100 Subject: l2tp: fix userspace reception on plain L2TP sockets As pppol2tp_recv() never queues up packets to plain L2TP sockets, pppol2tp_recvmsg() never returns data to userspace, thus making the recv*() system calls unusable. Instead of dropping packets when the L2TP socket isn't bound to a PPP channel, this patch adds them to its reception queue. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 6bfeaa7..5990919 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -254,12 +254,14 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int po = pppox_sk(sk); ppp_input(&po->chan, skb); } else { - l2tp_info(session, PPPOL2TP_MSG_DATA, "%s: socket not bound\n", - session->name); + l2tp_dbg(session, PPPOL2TP_MSG_DATA, + "%s: recv %d byte data frame, passing to L2TP socket\n", + session->name, data_len); - /* Not bound. Nothing we can do, so discard. */ - atomic_long_inc(&session->stats.rx_errors); - kfree_skb(skb); + if (sock_queue_rcv_skb(sk, skb) < 0) { + atomic_long_inc(&session->stats.rx_errors); + kfree_skb(skb); + } } return; -- cgit v0.10.2 From 6d4ebeb4df0176b1973875840a9f7e91394c0685 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 6 Mar 2014 14:40:16 +0100 Subject: tipc: allow connection shutdown callback to be invoked in advance Currently connection shutdown callback function is called when connection instance is released in tipc_conn_kref_release(), and receiving packets and sending packets are running in different threads. Even if connection is closed by the thread of receiving packets, its shutdown callback may not be called immediately as the connection reference count is non-zero at that moment. So, although the connection is shut down by the thread of receiving packets, the thread of sending packets doesn't know it. Before its shutdown callback is invoked to tell the sending thread its connection has been closed, the sending thread may deliver messages by tipc_conn_sendmsg(), this is why the following error information appears: "Sending subscription event failed, no memory" To eliminate it, allow connection shutdown callback function to be called before connection id is removed in tipc_close_conn(), which makes the sending thread know the truth in time that its socket is closed so that it doesn't send message to it. We also remove the "Sending XXX failed..." error reporting for topology and config services. Signed-off-by: Ying Xue Signed-off-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller diff --git a/net/tipc/config.c b/net/tipc/config.c index e74eef2..e6d7216 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -376,7 +376,6 @@ static void cfg_conn_msg_event(int conid, struct sockaddr_tipc *addr, struct tipc_cfg_msg_hdr *req_hdr; struct tipc_cfg_msg_hdr *rep_hdr; struct sk_buff *rep_buf; - int ret; /* Validate configuration message header (ignore invalid message) */ req_hdr = (struct tipc_cfg_msg_hdr *)buf; @@ -398,12 +397,8 @@ static void cfg_conn_msg_event(int conid, struct sockaddr_tipc *addr, memcpy(rep_hdr, req_hdr, sizeof(*rep_hdr)); rep_hdr->tcm_len = htonl(rep_buf->len); rep_hdr->tcm_flags &= htons(~TCM_F_REQUEST); - - ret = tipc_conn_sendmsg(&cfgsrv, conid, addr, rep_buf->data, - rep_buf->len); - if (ret < 0) - pr_err("Sending cfg reply message failed, no memory\n"); - + tipc_conn_sendmsg(&cfgsrv, conid, addr, rep_buf->data, + rep_buf->len); kfree_skb(rep_buf); } } diff --git a/net/tipc/server.c b/net/tipc/server.c index 3739797..bbaa8f5 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -87,7 +87,6 @@ static void tipc_clean_outqueues(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); - struct tipc_server *s = con->server; if (con->sock) { tipc_sock_release_local(con->sock); @@ -95,10 +94,6 @@ static void tipc_conn_kref_release(struct kref *kref) } tipc_clean_outqueues(con); - - if (con->conid) - s->tipc_conn_shutdown(con->conid, con->usr_data); - kfree(con); } @@ -181,6 +176,9 @@ static void tipc_close_conn(struct tipc_conn *con) struct tipc_server *s = con->server; if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { + if (con->conid) + s->tipc_conn_shutdown(con->conid, con->usr_data); + spin_lock_bh(&s->idr_lock); idr_remove(&s->conn_idr, con->conid); s->idr_in_use--; diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 7cb0bd5..a6ce3bb 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -96,20 +96,16 @@ static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower, { struct tipc_subscriber *subscriber = sub->subscriber; struct kvec msg_sect; - int ret; msg_sect.iov_base = (void *)&sub->evt; msg_sect.iov_len = sizeof(struct tipc_event); - sub->evt.event = htohl(event, sub->swap); sub->evt.found_lower = htohl(found_lower, sub->swap); sub->evt.found_upper = htohl(found_upper, sub->swap); sub->evt.port.ref = htohl(port_ref, sub->swap); sub->evt.port.node = htohl(node, sub->swap); - ret = tipc_conn_sendmsg(&topsrv, subscriber->conid, NULL, - msg_sect.iov_base, msg_sect.iov_len); - if (ret < 0) - pr_err("Sending subscription event failed, no memory\n"); + tipc_conn_sendmsg(&topsrv, subscriber->conid, NULL, msg_sect.iov_base, + msg_sect.iov_len); } /** -- cgit v0.10.2 From 4652edb70e8a7eebbe47fa931940f65522c36e8f Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 6 Mar 2014 14:40:17 +0100 Subject: tipc: fix connection refcount leak When tipc_conn_sendmsg() calls tipc_conn_lookup() to query a connection instance, its reference count value is increased if it's found. But subsequently if it's found that the connection is closed, the work of sending message is not queued into its server send workqueue, and the connection reference count is not decreased. This will cause a reference count leak. To reproduce this problem, an application would need to open and closes topology server connections with high intensity. We fix this by immediately decrementing the connection reference count if a send fails due to the connection being closed. Signed-off-by: Ying Xue Acked-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller diff --git a/net/tipc/server.c b/net/tipc/server.c index bbaa8f5..646a930 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -427,10 +427,12 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid, list_add_tail(&e->list, &con->outqueue); spin_unlock_bh(&con->outqueue_lock); - if (test_bit(CF_CONNECTED, &con->flags)) + if (test_bit(CF_CONNECTED, &con->flags)) { if (!queue_work(s->send_wq, &con->swork)) conn_put(con); - + } else { + conn_put(con); + } return 0; } -- cgit v0.10.2 From fe8e4649397915cf3b2ab0b695929a27e543967e Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Thu, 6 Mar 2014 14:40:18 +0100 Subject: tipc: avoid to unnecessary process switch under non-block mode When messages are received via tipc socket under non-block mode, schedule_timeout() is called in tipc_wait_for_rcvmsg(), that is, the process of receiving messages will be scheduled once although timeout value passed to schedule_timeout() is 0. The same issue exists in accept()/wait_for_accept(). To avoid this unnecessary process switch, we only call schedule_timeout() if the timeout value is non-zero. Signed-off-by: Ying Xue Reviewed-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller diff --git a/net/tipc/socket.c b/net/tipc/socket.c index a4cf274..0ed0eaa 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -997,7 +997,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo) for (;;) { prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - if (skb_queue_empty(&sk->sk_receive_queue)) { + if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { if (sock->state == SS_DISCONNECTING) { err = -ENOTCONN; break; @@ -1623,7 +1623,7 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) for (;;) { prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - if (skb_queue_empty(&sk->sk_receive_queue)) { + if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); -- cgit v0.10.2 From edcc0511b5ee7235282a688cd604e3ae7f9e1fc9 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Thu, 6 Mar 2014 14:40:19 +0100 Subject: tipc: drop subscriber connection id invalidation When a topology server subscriber is disconnected, the associated connection id is set to zero. A check vs zero is then done in the subscription timeout function to see if the subscriber have been shut down. This is unnecessary, because all subscription timers will be cancelled when a subscriber terminates. Setting the connection id to zero is actually harmful because id zero is the identity of the topology server listening socket, and can cause a race that leads to this socket being closed instead. Signed-off-by: Erik Hugne Acked-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index a6ce3bb..11c9ae0 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -149,14 +149,6 @@ static void subscr_timeout(struct tipc_subscription *sub) /* The spin lock per subscriber is used to protect its members */ spin_lock_bh(&subscriber->lock); - /* Validate if the connection related to the subscriber is - * closed (in case subscriber is terminating) - */ - if (subscriber->conid == 0) { - spin_unlock_bh(&subscriber->lock); - return; - } - /* Validate timeout (in case subscription is being cancelled) */ if (sub->timeout == TIPC_WAIT_FOREVER) { spin_unlock_bh(&subscriber->lock); @@ -211,9 +203,6 @@ static void subscr_release(struct tipc_subscriber *subscriber) spin_lock_bh(&subscriber->lock); - /* Invalidate subscriber reference */ - subscriber->conid = 0; - /* Destroy any existing subscriptions for subscriber */ list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, subscription_list) { -- cgit v0.10.2 From 1bb8dce57f4d15233688c68990852a10eb1cd79f Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Thu, 6 Mar 2014 14:40:20 +0100 Subject: tipc: fix memory leak during module removal When the TIPC module is removed, the tasklet handler is disabled before all other subsystems. This will cause lingering publications in the name table because the node_down tasklets responsible to clean up publications from an unreachable node will never run. When the name table is shut down, these publications are detected and an error message is logged: tipc: nametbl_stop(): orphaned hash chain detected This is actually a memory leak, introduced with commit 993b858e37b3120ee76d9957a901cca22312ffaa ("tipc: correct the order of stopping services at rmmod") Instead of just logging an error and leaking memory, we free the orphaned entries during nametable shutdown. Signed-off-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 48302be..042e8e3 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -941,17 +941,48 @@ int tipc_nametbl_init(void) return 0; } +/** + * tipc_purge_publications - remove all publications for a given type + * + * tipc_nametbl_lock must be held when calling this function + */ +static void tipc_purge_publications(struct name_seq *seq) +{ + struct publication *publ, *safe; + struct sub_seq *sseq; + struct name_info *info; + + if (!seq->sseqs) { + nameseq_delete_empty(seq); + return; + } + sseq = seq->sseqs; + info = sseq->info; + list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) { + tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node, + publ->ref, publ->key); + } +} + void tipc_nametbl_stop(void) { u32 i; + struct name_seq *seq; + struct hlist_head *seq_head; + struct hlist_node *safe; - /* Verify name table is empty, then release it */ + /* Verify name table is empty and purge any lingering + * publications, then release the name table + */ write_lock_bh(&tipc_nametbl_lock); for (i = 0; i < TIPC_NAMETBL_SIZE; i++) { if (hlist_empty(&table.types[i])) continue; - pr_err("nametbl_stop(): orphaned hash chain detected\n"); - break; + seq_head = &table.types[i]; + hlist_for_each_entry_safe(seq, safe, seq_head, ns_list) { + tipc_purge_publications(seq); + } + continue; } kfree(table.types); table.types = NULL; -- cgit v0.10.2 From 2892505ea170094f982516bb38105eac45f274b1 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Thu, 6 Mar 2014 14:40:21 +0100 Subject: tipc: don't log disabled tasklet handler errors Failure to schedule a TIPC tasklet with tipc_k_signal because the tasklet handler is disabled is not an error. It means TIPC is currently in the process of shutting down. We remove the error logging in this case. Signed-off-by: Erik Hugne Reviewed-by: Jon Maloy Signed-off-by: David S. Miller diff --git a/net/tipc/handler.c b/net/tipc/handler.c index e4bc8a2..1fabf16 100644 --- a/net/tipc/handler.c +++ b/net/tipc/handler.c @@ -58,7 +58,6 @@ unsigned int tipc_k_signal(Handler routine, unsigned long argument) spin_lock_bh(&qitem_lock); if (!handler_enabled) { - pr_err("Signal request ignored by handler\n"); spin_unlock_bh(&qitem_lock); return -ENOPROTOOPT; } -- cgit v0.10.2 From e588e2f286ed7da011ed357c24c5b9a554e26595 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 6 Mar 2014 18:06:41 +0100 Subject: inet: frag: make sure forced eviction removes all frags Quoting Alexander Aring: While fragmentation and unloading of 6lowpan module I got this kernel Oops after few seconds: BUG: unable to handle kernel paging request at f88bbc30 [..] Modules linked in: ipv6 [last unloaded: 6lowpan] Call Trace: [] ? call_timer_fn+0x54/0xb3 [] ? process_timeout+0xa/0xa [] run_timer_softirq+0x140/0x15f Problem is that incomplete frags are still around after unload; when their frag expire timer fires, we get crash. When a netns is removed (also done when unloading module), inet_frag calls the evictor with 'force' argument to purge remaining frags. The evictor loop terminates when accounted memory ('work') drops to 0 or the lru-list becomes empty. However, the mem accounting is done via percpu counters and may not be accurate, i.e. loop may terminate prematurely. Alter evictor to only stop once the lru list is empty when force is requested. Reported-by: Phoebe Buckheister Reported-by: Alexander Aring Tested-by: Alexander Aring Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 322dceb..3b01959 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -208,7 +208,7 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) } work = frag_mem_limit(nf) - nf->low_thresh; - while (work > 0) { + while (work > 0 || force) { spin_lock(&nf->lru_lock); if (list_empty(&nf->lru_list)) { -- cgit v0.10.2 From 0ca49345b6f489e95f8d6edeb0b092e257475b2a Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Thu, 6 Mar 2014 20:39:04 +0100 Subject: firewire: ohci: fix probe failure with Agere/LSI controllers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit bd972688eb24 "firewire: ohci: Fix 'failed to read phy reg' on FW643 rev8", there is a high chance that firewire-ohci fails to initialize LSI née Agere controllers. https://bugzilla.kernel.org/show_bug.cgi?id=65151 Peter Hurley points out the reason: IEEE 1394a:2000 clause 5A.1 (or IEEE 1394:2008 clause 17.2.1) say: "The PHY shall insure that no more than 10 ms elapse from the reassertion of LPS until the interface is reset. The link shall not assert LReq until the reset is complete." In other words, the link needs to give the PHY at least 10 ms to get the interface operational. With just the msleep(1) in bd972688eb24, the first read_phy_reg() during ohci_enable() may happen before the phy-link interface reset was finished, and fail. Due to the high variability of msleep(n) with small n, this failure was not fully reproducible, and not apparent at all with low CONFIG_HZ setting. On the other hand, Peter can no longer reproduce the issue with FW643 rev8. The read phy reg failures that happened back then may have had an unrelated cause. So, just revert bd972688eb24, except for the valid comment on TSB82AA2 cards. Reported-by: Mikhail Gavrilov Reported-by: Jay Fenlason Reported-by: Clemens Ladisch Reported-by: Peter Hurley Cc: stable@vger.kernel.org # v3.10+ Signed-off-by: Stefan Richter diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 6f74d8d..8db6632 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -290,7 +290,6 @@ static char ohci_driver_name[] = KBUILD_MODNAME; #define QUIRK_NO_MSI 0x10 #define QUIRK_TI_SLLZ059 0x20 #define QUIRK_IR_WAKE 0x40 -#define QUIRK_PHY_LCTRL_TIMEOUT 0x80 /* In case of multiple matches in ohci_quirks[], only the first one is used. */ static const struct { @@ -303,10 +302,7 @@ static const struct { QUIRK_BE_HEADERS}, {PCI_VENDOR_ID_ATT, PCI_DEVICE_ID_AGERE_FW643, 6, - QUIRK_PHY_LCTRL_TIMEOUT | QUIRK_NO_MSI}, - - {PCI_VENDOR_ID_ATT, PCI_ANY_ID, PCI_ANY_ID, - QUIRK_PHY_LCTRL_TIMEOUT}, + QUIRK_NO_MSI}, {PCI_VENDOR_ID_CREATIVE, PCI_DEVICE_ID_CREATIVE_SB1394, PCI_ANY_ID, QUIRK_RESET_PACKET}, @@ -353,7 +349,6 @@ MODULE_PARM_DESC(quirks, "Chip quirks (default = 0" ", disable MSI = " __stringify(QUIRK_NO_MSI) ", TI SLLZ059 erratum = " __stringify(QUIRK_TI_SLLZ059) ", IR wake unreliable = " __stringify(QUIRK_IR_WAKE) - ", phy LCtrl timeout = " __stringify(QUIRK_PHY_LCTRL_TIMEOUT) ")"); #define OHCI_PARAM_DEBUG_AT_AR 1 @@ -2299,9 +2294,6 @@ static int ohci_enable(struct fw_card *card, * TI TSB82AA2 + TSB81BA3(A) cards signal LPS enabled early but * cannot actually use the phy at that time. These need tens of * millisecods pause between LPS write and first phy access too. - * - * But do not wait for 50msec on Agere/LSI cards. Their phy - * arbitration state machine may time out during such a long wait. */ reg_write(ohci, OHCI1394_HCControlSet, @@ -2309,11 +2301,8 @@ static int ohci_enable(struct fw_card *card, OHCI1394_HCControl_postedWriteEnable); flush_writes(ohci); - if (!(ohci->quirks & QUIRK_PHY_LCTRL_TIMEOUT)) + for (lps = 0, i = 0; !lps && i < 3; i++) { msleep(50); - - for (lps = 0, i = 0; !lps && i < 150; i++) { - msleep(1); lps = reg_read(ohci, OHCI1394_HCControlSet) & OHCI1394_HCControl_LPS; } -- cgit v0.10.2 From 4ae6e50c76def306d726a5d2678e88998ad5258e Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 4 Mar 2014 17:35:44 -0700 Subject: phy: fix compiler array bounds warning on settings[] With -Werror=array-bounds, gcc v4.7.x warns that in phy_find_valid(), the settings[] "array subscript is above array bounds", I think because idx is a signed integer and if the caller supplied idx < 0, we pass the guard but still reference out of bounds. Fix this by making idx unsigned here and elsewhere. Signed-off-by: Bjorn Helgaas Acked-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 19c9eca..76d96b9 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -164,9 +164,9 @@ static const struct phy_setting settings[] = { * of that setting. Returns the index of the last setting if * none of the others match. */ -static inline int phy_find_setting(int speed, int duplex) +static inline unsigned int phy_find_setting(int speed, int duplex) { - int idx = 0; + unsigned int idx = 0; while (idx < ARRAY_SIZE(settings) && (settings[idx].speed != speed || settings[idx].duplex != duplex)) @@ -185,7 +185,7 @@ static inline int phy_find_setting(int speed, int duplex) * the mask in features. Returns the index of the last setting * if nothing else matches. */ -static inline int phy_find_valid(int idx, u32 features) +static inline unsigned int phy_find_valid(unsigned int idx, u32 features) { while (idx < MAX_NUM_SETTINGS && !(settings[idx].setting & features)) idx++; @@ -204,7 +204,7 @@ static inline int phy_find_valid(int idx, u32 features) static void phy_sanitize_settings(struct phy_device *phydev) { u32 features = phydev->supported; - int idx; + unsigned int idx; /* Sanitize settings based on PHY capabilities */ if ((features & SUPPORTED_Autoneg) == 0) @@ -954,7 +954,8 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable) (phydev->interface == PHY_INTERFACE_MODE_RGMII))) { int eee_lp, eee_cap, eee_adv; u32 lp, cap, adv; - int idx, status; + int status; + unsigned int idx; /* Read phy status to properly get the right settings */ status = phy_read_status(phydev); -- cgit v0.10.2 From adca4767821e54c72d4a2f467af77923f2c87e07 Mon Sep 17 00:00:00 2001 From: Andrew Lutomirski Date: Tue, 4 Mar 2014 17:24:10 -0800 Subject: net: Improve SO_TIMESTAMPING documentation and fix a minor code bug The original documentation was very unclear. The code fix is presumably related to the formerly unclear documentation: SOCK_TIMESTAMPING_RX_SOFTWARE has no effect on __sock_recv_timestamp's behavior, so calling __sock_recv_ts_and_drops from sock_recv_ts_and_drops if only SOCK_TIMESTAMPING_RX_SOFTWARE is set is pointless. This should have no user-observable effect. Signed-off-by: Andy Lutomirski Acked-by: Richard Cochran Signed-off-by: David S. Miller diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt index 661d3c3..048c92b 100644 --- a/Documentation/networking/timestamping.txt +++ b/Documentation/networking/timestamping.txt @@ -21,26 +21,38 @@ has such a feature). SO_TIMESTAMPING: -Instructs the socket layer which kind of information is wanted. The -parameter is an integer with some of the following bits set. Setting -other bits is an error and doesn't change the current state. - -SOF_TIMESTAMPING_TX_HARDWARE: try to obtain send time stamp in hardware -SOF_TIMESTAMPING_TX_SOFTWARE: if SOF_TIMESTAMPING_TX_HARDWARE is off or - fails, then do it in software -SOF_TIMESTAMPING_RX_HARDWARE: return the original, unmodified time stamp - as generated by the hardware -SOF_TIMESTAMPING_RX_SOFTWARE: if SOF_TIMESTAMPING_RX_HARDWARE is off or - fails, then do it in software -SOF_TIMESTAMPING_RAW_HARDWARE: return original raw hardware time stamp -SOF_TIMESTAMPING_SYS_HARDWARE: return hardware time stamp transformed to - the system time base -SOF_TIMESTAMPING_SOFTWARE: return system time stamp generated in - software - -SOF_TIMESTAMPING_TX/RX determine how time stamps are generated. -SOF_TIMESTAMPING_RAW/SYS determine how they are reported in the -following control message: +Instructs the socket layer which kind of information should be collected +and/or reported. The parameter is an integer with some of the following +bits set. Setting other bits is an error and doesn't change the current +state. + +Four of the bits are requests to the stack to try to generate +timestamps. Any combination of them is valid. + +SOF_TIMESTAMPING_TX_HARDWARE: try to obtain send time stamps in hardware +SOF_TIMESTAMPING_TX_SOFTWARE: try to obtain send time stamps in software +SOF_TIMESTAMPING_RX_HARDWARE: try to obtain receive time stamps in hardware +SOF_TIMESTAMPING_RX_SOFTWARE: try to obtain receive time stamps in software + +The other three bits control which timestamps will be reported in a +generated control message. If none of these bits are set or if none of +the set bits correspond to data that is available, then the control +message will not be generated: + +SOF_TIMESTAMPING_SOFTWARE: report systime if available +SOF_TIMESTAMPING_SYS_HARDWARE: report hwtimetrans if available +SOF_TIMESTAMPING_RAW_HARDWARE: report hwtimeraw if available + +It is worth noting that timestamps may be collected for reasons other +than being requested by a particular socket with +SOF_TIMESTAMPING_[TR]X_(HARD|SOFT)WARE. For example, most drivers that +can generate hardware receive timestamps ignore +SOF_TIMESTAMPING_RX_HARDWARE. It is still a good idea to set that flag +in case future drivers pay attention. + +If timestamps are reported, they will appear in a control message with +cmsg_level==SOL_SOCKET, cmsg_type==SO_TIMESTAMPING, and a payload like +this: struct scm_timestamping { struct timespec systime; diff --git a/include/net/sock.h b/include/net/sock.h index 5c3f7c3..7c4167b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2186,7 +2186,6 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, { #define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL) | \ (1UL << SOCK_RCVTSTAMP) | \ - (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \ (1UL << SOCK_TIMESTAMPING_SOFTWARE) | \ (1UL << SOCK_TIMESTAMPING_RAW_HARDWARE) | \ (1UL << SOCK_TIMESTAMPING_SYS_HARDWARE)) -- cgit v0.10.2 From 0a13404dd3bf4ea870e3d96270b5a382edca85c0 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 5 Mar 2014 14:29:58 +1100 Subject: net: unix socket code abuses csum_partial The unix socket code is using the result of csum_partial to hash into a lookup table: unix_hash_fold(csum_partial(sunaddr, len, 0)); csum_partial is only guaranteed to produce something that can be folded into a checksum, as its prototype explains: * returns a 32-bit number suitable for feeding into itself * or csum_tcpudp_magic The 32bit value should not be used directly. Depending on the alignment, the ppc64 csum_partial will return different 32bit partial checksums that will fold into the same 16bit checksum. This difference causes the following testcase (courtesy of Gustavo) to sometimes fail: #include #include int main() { int fd = socket(PF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC, 0); int i = 1; setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &i, 4); struct sockaddr addr; addr.sa_family = AF_LOCAL; bind(fd, &addr, 2); listen(fd, 128); struct sockaddr_storage ss; socklen_t sslen = (socklen_t)sizeof(ss); getsockname(fd, (struct sockaddr*)&ss, &sslen); fd = socket(PF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC, 0); if (connect(fd, (struct sockaddr*)&ss, sslen) == -1){ perror(NULL); return 1; } printf("OK\n"); return 0; } As suggested by davem, fix this by using csum_fold to fold the partial 32bit checksum into a 16bit checksum before using it. Signed-off-by: Anton Blanchard Cc: stable@vger.kernel.org Signed-off-by: David S. Miller diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 29fc8be..ce6ec6c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -163,9 +163,8 @@ static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) static inline unsigned int unix_hash_fold(__wsum n) { - unsigned int hash = (__force unsigned int)n; + unsigned int hash = (__force unsigned int)csum_fold(n); - hash ^= hash>>16; hash ^= hash>>8; return hash&(UNIX_HASH_SIZE-1); } -- cgit v0.10.2 From d746ca9561440685edb62614d1bcbbc27ff50e66 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 5 Mar 2014 14:51:37 +1100 Subject: ibmveth: Fix endian issues with MAC addresses The code to load a MAC address into a u64 for passing to the hypervisor via a register is broken on little endian. Create a helper function called ibmveth_encode_mac_addr which does the right thing in both big and little endian. We were storing the MAC address in a long in struct ibmveth_adapter. It's never used so remove it - we don't need another place in the driver where we create endian issues with MAC addresses. Signed-off-by: Anton Blanchard Cc: stable@vger.kernel.org Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 4be9715..1fc8334 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -522,10 +522,21 @@ retry: return rc; } +static u64 ibmveth_encode_mac_addr(u8 *mac) +{ + int i; + u64 encoded = 0; + + for (i = 0; i < ETH_ALEN; i++) + encoded = (encoded << 8) | mac[i]; + + return encoded; +} + static int ibmveth_open(struct net_device *netdev) { struct ibmveth_adapter *adapter = netdev_priv(netdev); - u64 mac_address = 0; + u64 mac_address; int rxq_entries = 1; unsigned long lpar_rc; int rc; @@ -579,8 +590,7 @@ static int ibmveth_open(struct net_device *netdev) adapter->rx_queue.num_slots = rxq_entries; adapter->rx_queue.toggle = 1; - memcpy(&mac_address, netdev->dev_addr, netdev->addr_len); - mac_address = mac_address >> 16; + mac_address = ibmveth_encode_mac_addr(netdev->dev_addr); rxq_desc.fields.flags_len = IBMVETH_BUF_VALID | adapter->rx_queue.queue_len; @@ -1183,8 +1193,8 @@ static void ibmveth_set_multicast_list(struct net_device *netdev) /* add the addresses to the filter table */ netdev_for_each_mc_addr(ha, netdev) { /* add the multicast address to the filter table */ - unsigned long mcast_addr = 0; - memcpy(((char *)&mcast_addr)+2, ha->addr, ETH_ALEN); + u64 mcast_addr; + mcast_addr = ibmveth_encode_mac_addr(ha->addr); lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address, IbmVethMcastAddFilter, mcast_addr); @@ -1372,9 +1382,6 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) netif_napi_add(netdev, &adapter->napi, ibmveth_poll, 16); - adapter->mac_addr = 0; - memcpy(&adapter->mac_addr, mac_addr_p, ETH_ALEN); - netdev->irq = dev->irq; netdev->netdev_ops = &ibmveth_netdev_ops; netdev->ethtool_ops = &netdev_ethtool_ops; @@ -1383,7 +1390,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; netdev->features |= netdev->hw_features; - memcpy(netdev->dev_addr, &adapter->mac_addr, netdev->addr_len); + memcpy(netdev->dev_addr, mac_addr_p, ETH_ALEN); for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++) { struct kobject *kobj = &adapter->rx_buff_pool[i].kobj; diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h index 451ba79..1f37499 100644 --- a/drivers/net/ethernet/ibm/ibmveth.h +++ b/drivers/net/ethernet/ibm/ibmveth.h @@ -138,7 +138,6 @@ struct ibmveth_adapter { struct napi_struct napi; struct net_device_stats stats; unsigned int mcastFilterSize; - unsigned long mac_addr; void * buffer_list_addr; void * filter_list_addr; dma_addr_t buffer_list_dma; -- cgit v0.10.2 From d2d273ffabd315eecefce21a4391d44b6e156b73 Mon Sep 17 00:00:00 2001 From: Anton Nayshtut Date: Wed, 5 Mar 2014 08:30:08 +0200 Subject: ipv6: Fix exthdrs offload registration. Without this fix, ipv6_exthdrs_offload_init doesn't register IPPROTO_DSTOPTS offload, but returns 0 (as the IPPROTO_ROUTING registration actually succeeds). This then causes the ipv6_gso_segment to drop IPv6 packets with IPPROTO_DSTOPTS header. The issue detected and the fix verified by running MS HCK Offload LSO test on top of QEMU Windows guests, as this test sends IPv6 packets with IPPROTO_DSTOPTS. Signed-off-by: Anton Nayshtut Signed-off-by: David S. Miller diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c index cf77f3a..447a7fb 100644 --- a/net/ipv6/exthdrs_offload.c +++ b/net/ipv6/exthdrs_offload.c @@ -25,11 +25,11 @@ int __init ipv6_exthdrs_offload_init(void) int ret; ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING); - if (!ret) + if (ret) goto out; ret = inet6_add_offload(&dstopt_offload, IPPROTO_DSTOPTS); - if (!ret) + if (ret) goto out_rt; out: -- cgit v0.10.2 From 14eedc32a3c0ec9dd70448a73763ee21feae3111 Mon Sep 17 00:00:00 2001 From: Lauri Kasanen Date: Fri, 28 Feb 2014 20:50:23 +0200 Subject: drm/radeon: TTM must be init with cpu-visible VRAM, v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this, a bo may get created in the cpu-inaccessible vram. Before the CP engines get setup, all copies are done via cpu memcpy. This means that the cpu tries to read from inaccessible memory, fails, and the radeon module proceeds to disable acceleration. Doing this has no downsides, as the real VRAM size gets set as soon as the CP engines get init. This is a candidate for 3.14 fixes. v2: Add comment on why the function is used Signed-off-by: Lauri Kasanen Signed-off-by: Alex Deucher Reviewed-by: Christian König Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 77f5b0c..5cdba9b 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -714,6 +714,9 @@ int radeon_ttm_init(struct radeon_device *rdev) DRM_ERROR("Failed initializing VRAM heap.\n"); return r; } + /* Change the size here instead of the init above so only lpfn is affected */ + radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); + r = radeon_bo_create(rdev, 256 * 1024, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->stollen_vga_memory); -- cgit v0.10.2 From bc6a62955f6ea6aabe26292a21dbdd67f5b89b67 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 25 Feb 2014 12:01:28 -0500 Subject: drm/radeon: resume old pm late Moving the pm resume up in the init order to fix dpm seems to have regressed somes cases with the old pm code. Move it back to late resume. Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index e6419ca..a6d1e6c 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7902,7 +7902,8 @@ int cik_resume(struct radeon_device *rdev) /* init golden registers */ cik_init_golden_registers(rdev); - radeon_pm_resume(rdev); + if (rdev->pm.pm_method == PM_METHOD_DPM) + radeon_pm_resume(rdev); rdev->accel_working = true; r = cik_startup(rdev); diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 8a2c010..27b0ff1 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -5299,7 +5299,8 @@ int evergreen_resume(struct radeon_device *rdev) /* init golden registers */ evergreen_init_golden_registers(rdev); - radeon_pm_resume(rdev); + if (rdev->pm.pm_method == PM_METHOD_DPM) + radeon_pm_resume(rdev); rdev->accel_working = true; r = evergreen_startup(rdev); diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index ea932ac..bf6300c 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2105,7 +2105,8 @@ int cayman_resume(struct radeon_device *rdev) /* init golden registers */ ni_init_golden_registers(rdev); - radeon_pm_resume(rdev); + if (rdev->pm.pm_method == PM_METHOD_DPM) + radeon_pm_resume(rdev); rdev->accel_working = true; r = cayman_startup(rdev); diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index ef024ce..3cc78bb 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -3942,8 +3942,6 @@ int r100_resume(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); - radeon_pm_resume(rdev); - rdev->accel_working = true; r = r100_startup(rdev); if (r) { diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 7c63ef8..0b658b3 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -1430,8 +1430,6 @@ int r300_resume(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); - radeon_pm_resume(rdev); - rdev->accel_working = true; r = r300_startup(rdev); if (r) { diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c index 3768aab..802b192 100644 --- a/drivers/gpu/drm/radeon/r420.c +++ b/drivers/gpu/drm/radeon/r420.c @@ -325,8 +325,6 @@ int r420_resume(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); - radeon_pm_resume(rdev); - rdev->accel_working = true; r = r420_startup(rdev); if (r) { diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c index e209eb7..98d6053 100644 --- a/drivers/gpu/drm/radeon/r520.c +++ b/drivers/gpu/drm/radeon/r520.c @@ -240,8 +240,6 @@ int r520_resume(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); - radeon_pm_resume(rdev); - rdev->accel_working = true; r = r520_startup(rdev); if (r) { diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index cdbc417..647ef40 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2968,7 +2968,8 @@ int r600_resume(struct radeon_device *rdev) /* post card */ atom_asic_init(rdev->mode_info.atom_context); - radeon_pm_resume(rdev); + if (rdev->pm.pm_method == PM_METHOD_DPM) + radeon_pm_resume(rdev); rdev->accel_working = true; r = r600_startup(rdev); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index b012cbb..044bc98 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1521,13 +1521,16 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon) if (r) DRM_ERROR("ib ring test failed (%d).\n", r); - if (rdev->pm.dpm_enabled) { + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { /* do dpm late init */ r = radeon_pm_late_init(rdev); if (r) { rdev->pm.dpm_enabled = false; DRM_ERROR("radeon_pm_late_init failed, disabling dpm\n"); } + } else { + /* resume old pm late */ + radeon_pm_resume(rdev); } radeon_restore_bios_scratch_regs(rdev); diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c index b5c2369..130d5cc 100644 --- a/drivers/gpu/drm/radeon/rs400.c +++ b/drivers/gpu/drm/radeon/rs400.c @@ -474,8 +474,6 @@ int rs400_resume(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); - radeon_pm_resume(rdev); - rdev->accel_working = true; r = rs400_startup(rdev); if (r) { diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index fdcde76..72d3616 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -1048,8 +1048,6 @@ int rs600_resume(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); - radeon_pm_resume(rdev); - rdev->accel_working = true; r = rs600_startup(rdev); if (r) { diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c index 3595073..3462b64 100644 --- a/drivers/gpu/drm/radeon/rs690.c +++ b/drivers/gpu/drm/radeon/rs690.c @@ -756,8 +756,6 @@ int rs690_resume(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); - radeon_pm_resume(rdev); - rdev->accel_working = true; r = rs690_startup(rdev); if (r) { diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 98e8138..237dd29 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -586,8 +586,6 @@ int rv515_resume(struct radeon_device *rdev) /* Initialize surface registers */ radeon_surface_init(rdev); - radeon_pm_resume(rdev); - rdev->accel_working = true; r = rv515_startup(rdev); if (r) { diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 4e37a42..fef3107 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -1811,7 +1811,8 @@ int rv770_resume(struct radeon_device *rdev) /* init golden registers */ rv770_init_golden_registers(rdev); - radeon_pm_resume(rdev); + if (rdev->pm.pm_method == PM_METHOD_DPM) + radeon_pm_resume(rdev); rdev->accel_working = true; r = rv770_startup(rdev); diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 8357832..9a124d0 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6618,7 +6618,8 @@ int si_resume(struct radeon_device *rdev) /* init golden registers */ si_init_golden_registers(rdev); - radeon_pm_resume(rdev); + if (rdev->pm.pm_method == PM_METHOD_DPM) + radeon_pm_resume(rdev); rdev->accel_working = true; r = si_startup(rdev); -- cgit v0.10.2 From 0d997b68574217b41c1288ee4c6728c7003aac1f Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Tue, 4 Mar 2014 10:34:48 +0100 Subject: drm/radeon: silence GCC warning on 32 bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building radeon_ttm.o on 32 bit x86 triggers a warning: In file included from include/asm-generic/bug.h:13:0, from [...]/arch/x86/include/asm/bug.h:38, from include/linux/bug.h:4, from include/drm/drm_mm.h:39, from include/drm/drm_vma_manager.h:26, from include/drm/ttm/ttm_bo_api.h:35, from drivers/gpu/drm/radeon/radeon_ttm.c:32: drivers/gpu/drm/radeon/radeon_ttm.c: In function 'radeon_ttm_gtt_read': include/linux/kernel.h:712:17: warning: comparison of distinct pointer types lacks a cast [enabled by default] (void) (&_min1 == &_min2); \ ^ drivers/gpu/drm/radeon/radeon_ttm.c:938:22: note: in expansion of macro 'min' ssize_t cur_size = min(size, PAGE_SIZE - off); ^ Silence this warning by using min_t(). Since cur_size will never be negative and its upper bound is PAGE_SIZE, we can change its type to size_t and use min_t(size_t, [...]) here. Signed-off-by: Paul Bolle Signed-off-by: Alex Deucher Reviewed-by: Christian König diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 5cdba9b..040a2a1 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -938,7 +938,7 @@ static ssize_t radeon_ttm_gtt_read(struct file *f, char __user *buf, while (size) { loff_t p = *pos / PAGE_SIZE; unsigned off = *pos & ~PAGE_MASK; - ssize_t cur_size = min(size, PAGE_SIZE - off); + size_t cur_size = min_t(size_t, size, PAGE_SIZE - off); struct page *page; void *ptr; -- cgit v0.10.2 From 972c5ddb177e14835212de63f8f16a690e4db9ff Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 4 Mar 2014 15:50:29 -0500 Subject: drm/radeon/cik: fix typo in documentation Copy-paste typo. Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index a6d1e6c..e22be84 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3046,7 +3046,7 @@ static u32 cik_create_bitmask(u32 bit_width) } /** - * cik_select_se_sh - select which SE, SH to address + * cik_get_rb_disabled - computes the mask of disabled RBs * * @rdev: radeon_device pointer * @max_rb_num: max RBs (render backends) for the asic -- cgit v0.10.2 From 13714323f83ffa5a772fe0d8b74e0fa32ee08819 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 6 Mar 2014 13:16:55 -0500 Subject: drm/radeon/dpm: fix typo in EVERGREEN_SMC_FIRMWARE_HEADER_softRegisters Should be at 0x8 rather than 0. fixes: https://bugzilla.kernel.org/show_bug.cgi?id=60523 Noticed by ArtForz on #radeon Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/radeon/evergreen_smc.h b/drivers/gpu/drm/radeon/evergreen_smc.h index 76ada8c..3a03ba3 100644 --- a/drivers/gpu/drm/radeon/evergreen_smc.h +++ b/drivers/gpu/drm/radeon/evergreen_smc.h @@ -57,7 +57,7 @@ typedef struct SMC_Evergreen_MCRegisters SMC_Evergreen_MCRegisters; #define EVERGREEN_SMC_FIRMWARE_HEADER_LOCATION 0x100 -#define EVERGREEN_SMC_FIRMWARE_HEADER_softRegisters 0x0 +#define EVERGREEN_SMC_FIRMWARE_HEADER_softRegisters 0x8 #define EVERGREEN_SMC_FIRMWARE_HEADER_stateTable 0xC #define EVERGREEN_SMC_FIRMWARE_HEADER_mcRegisterTable 0x20 -- cgit v0.10.2 From 5bd4e4c158ceb4e76ce9ed005c876d59caad8af2 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 6 Mar 2014 16:53:11 -0500 Subject: bonding: correctly handle out of range parameters for lp_interval We didn't correctly check cases where the value for lp_interval is not within the legal range due to a missing table terminator. This would let userspace trigger a kernel panic by specifying a value out of range: echo -1 > /sys/devices/virtual/net/bond0/bonding/lp_interval Introduced by commit 4325b374f84 ("bonding: convert lp_interval to use the new option API"). Acked-by: Nikolay Aleksandrov Signed-off-by: Sasha Levin Signed-off-by: David S. Miller diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index c378784..298c265 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -121,6 +121,7 @@ static struct bond_opt_value bond_resend_igmp_tbl[] = { static struct bond_opt_value bond_lp_interval_tbl[] = { { "minval", 1, BOND_VALFLAG_MIN | BOND_VALFLAG_DEFAULT}, { "maxval", INT_MAX, BOND_VALFLAG_MAX}, + { NULL, -1, 0}, }; static struct bond_option bond_opts[] = { -- cgit v0.10.2 From 57352ef4f5f19969a50d42e84b274287993b576f Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 6 Mar 2014 18:28:16 +0200 Subject: net/mlx4_core: Fix memory access error in mlx4_QUERY_DEV_CAP_wrapper() Fix a regression introduced by [1]. outbox was accessed instead of outbox->buf. Typo was copy-pasted to [2] and [3]. [1] - cc1ade9 mlx4_core: Disable memory windows for virtual functions [2] - 4de6580 mlx4_core: Add support for steerable IB UD QPs [3] - 7ffdf72 net/mlx4_core: Add basic support for TCP/IP offloads under tunneling Signed-off-by: Or Gerlitz Signed-off-by: Amir Vadai Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 91b69ff..8726e34 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -859,7 +859,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET); /* For guests, disable vxlan tunneling */ - MLX4_GET(field, outbox, QUERY_DEV_CAP_VXLAN); + MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_VXLAN); field &= 0xf7; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_VXLAN); @@ -869,7 +869,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_BF_OFFSET); /* For guests, disable mw type 2 */ - MLX4_GET(bmme_flags, outbox, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); + MLX4_GET(bmme_flags, outbox->buf, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN; MLX4_PUT(outbox->buf, bmme_flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); @@ -883,7 +883,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, } /* turn off ipoib managed steering for guests */ - MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); + MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); field &= ~0x80; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET); -- cgit v0.10.2 From 97989356af0ec8b1b1658d804892abb354127330 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Thu, 6 Mar 2014 18:28:17 +0200 Subject: net/mlx4_core: mlx4_init_slave() shouldn't access comm channel before PF is ready Currently, the PF call to pci_enable_sriov from the PF probe function stalls for 10 seconds times the number of VFs probed on the host. This happens because the way for such VFs to determine of the PF initialization finished, is by attempting to issue reset on the comm-channel and get timeout (after 10s). The PF probe function is called from a kenernel workqueue, and therefore during that time, rcu lock is being held and kernel's workqueue is stalled. This blocks other processes that try to use the workqueue or rcu lock. For example, interface renaming which is calling rcu_synchronize is blocked, and timedout by systemd. Changed mlx4_init_slave() to allow VF probed on the host to immediatly detect that the PF is not ready, and return EPROBE_DEFER instantly. Only when the PF finishes the initialization, allow such VFs to access the comm channel. This issue and fix are relevant only for probed VFs on the hypervisor, there is no way to pass this information to a VM until comm channel is ready, so in a VM, if PF is not ready, the first command will be timedout after 10 seconds and return EPROBE_DEFER. Signed-off-by: Amir Vadai Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 5a6105f..30a08a6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -150,6 +150,8 @@ struct mlx4_port_config { struct pci_dev *pdev; }; +static atomic_t pf_loading = ATOMIC_INIT(0); + int mlx4_check_port_params(struct mlx4_dev *dev, enum mlx4_port_type *port_type) { @@ -1407,6 +1409,11 @@ static int mlx4_init_slave(struct mlx4_dev *dev) u32 slave_read; u32 cmd_channel_ver; + if (atomic_read(&pf_loading)) { + mlx4_warn(dev, "PF is not ready. Deferring probe\n"); + return -EPROBE_DEFER; + } + mutex_lock(&priv->cmd.slave_cmd_mutex); priv->cmd.max_cmds = 1; mlx4_warn(dev, "Sending reset\n"); @@ -2319,7 +2326,11 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data) if (num_vfs) { mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", num_vfs); + + atomic_inc(&pf_loading); err = pci_enable_sriov(pdev, num_vfs); + atomic_dec(&pf_loading); + if (err) { mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n", err); -- cgit v0.10.2 From c88507fbad8055297c1d1e21e599f46960cbee39 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 6 Mar 2014 17:51:57 +0100 Subject: ipv6: don't set DST_NOCOUNT for remotely added routes DST_NOCOUNT should only be used if an authorized user adds routes locally. In case of routes which are added on behalf of router advertisments this flag must not get used as it allows an unlimited number of routes getting added remotely. Signed-off-by: Sabrina Dubroca Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 11dac21..fba54a4 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1513,7 +1513,7 @@ int ip6_route_add(struct fib6_config *cfg) if (!table) goto out; - rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table); + rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table); if (!rt) { err = -ENOMEM; -- cgit v0.10.2 From 77ac9a05d4a0be6b2ab22b61d7fb36d29c212d72 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 6 Mar 2014 13:26:36 +0100 Subject: drm: fix bochs kconfig dependencies Signed-off-by: Gerd Hoffmann Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/bochs/Kconfig b/drivers/gpu/drm/bochs/Kconfig index c8fcf12..5f8b0c2 100644 --- a/drivers/gpu/drm/bochs/Kconfig +++ b/drivers/gpu/drm/bochs/Kconfig @@ -2,6 +2,7 @@ config DRM_BOCHS tristate "DRM Support for bochs dispi vga interface (qemu stdvga)" depends on DRM && PCI select DRM_KMS_HELPER + select DRM_KMS_FB_HELPER select FB_SYS_FILLRECT select FB_SYS_COPYAREA select FB_SYS_IMAGEBLIT -- cgit v0.10.2 From f50d7146a298692daa730b40335e1466110727de Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 3 Mar 2014 12:18:14 +0000 Subject: MAINTAINERS: add maintainer entry for TDA998x driver Add a maintainers entry for the TDA998x driver. Rob Clark has handed this driver over to me to look after. Acked-by: Rob Clark Signed-off-by: Russell King Signed-off-by: Dave Airlie diff --git a/MAINTAINERS b/MAINTAINERS index e9d85f6..07d093e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6171,6 +6171,12 @@ S: Supported F: drivers/block/nvme* F: include/linux/nvme.h +NXP TDA998X DRM DRIVER +M: Russell King +S: Supported +F: drivers/gpu/drm/i2c/tda998x_drv.c +F: include/drm/i2c/tda998x.h + OMAP SUPPORT M: Tony Lindgren L: linux-omap@vger.kernel.org -- cgit v0.10.2 From d03874c881a049a50e12f285077ab1f9fc2686e1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 6 Mar 2014 18:09:52 -0500 Subject: drm/radeon/atom: select the proper number of lanes in transmitter setup We need to check for DVI vs. HDMI when setting up duallink since HDMI is single link only. Fixes 4k modes on newer asics. bug: https://bugs.freedesktop.org/show_bug.cgi?id=75223 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index 2cec2ab..607dc14 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -1314,7 +1314,7 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t } if (is_dp) args.v5.ucLaneNum = dp_lane_count; - else if (radeon_encoder->pixel_clock > 165000) + else if (radeon_dig_monitor_is_duallink(encoder, radeon_encoder->pixel_clock)) args.v5.ucLaneNum = 8; else args.v5.ucLaneNum = 4; -- cgit v0.10.2 From 621b5060e823301d0cba4cb52a7ee3491922d291 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Mon, 3 Mar 2014 14:21:40 +1100 Subject: powerpc/tm: Fix crash when forking inside a transaction When we fork/clone we currently don't copy any of the TM state to the new thread. This results in a TM bad thing (program check) when the new process is switched in as the kernel does a tmrechkpt with TEXASR FS not set. Also, since R1 is from userspace, we trigger the bad kernel stack pointer detection. So we end up with something like this: Bad kernel stack pointer 0 at c0000000000404fc cpu 0x2: Vector: 700 (Program Check) at [c00000003ffefd40] pc: c0000000000404fc: restore_gprs+0xc0/0x148 lr: 0000000000000000 sp: 0 msr: 9000000100201030 current = 0xc000001dd1417c30 paca = 0xc00000000fe00800 softe: 0 irq_happened: 0x01 pid = 0, comm = swapper/2 WARNING: exception is not recoverable, can't continue The below fixes this by flushing the TM state before we copy the task_struct to the clone. To do this we go through the tmreclaim patch, which removes the checkpointed registers from the CPU and transitions the CPU out of TM suspend mode. Hence we need to call tmrechkpt after to restore the checkpointed state and the TM mode for the current task. To make this fail from userspace is simply: tbegin li r0, 2 sc Kudos to Adhemerval Zanella Neto for finding this. Signed-off-by: Michael Neuling cc: Adhemerval Zanella Neto cc: stable@vger.kernel.org Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 8d4c247f1..af064d2 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1048,6 +1048,15 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) flush_altivec_to_thread(src); flush_vsx_to_thread(src); flush_spe_to_thread(src); + /* + * Flush TM state out so we can copy it. __switch_to_tm() does this + * flush but it removes the checkpointed state from the current CPU and + * transitions the CPU out of TM mode. Hence we need to call + * tm_recheckpoint_new_task() (on the same task) to restore the + * checkpointed state back and the TM mode. + */ + __switch_to_tm(src); + tm_recheckpoint_new_task(src); *dst = *src; -- cgit v0.10.2 From a5b2cf5b1af424ee3dd9e3ce6d5cea18cb927e67 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 4 Mar 2014 08:31:24 +1100 Subject: powerpc: Align p_dyn, p_rela and p_st symbols The 64bit relocation code places a few symbols in the text segment. These symbols are only 4 byte aligned where they need to be 8 byte aligned. Add an explicit alignment. Signed-off-by: Anton Blanchard Cc: stable@vger.kernel.org Tested-by: Laurent Dufour Signed-off-by: Benjamin Herrenschmidt diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S index 1482327..d88736f 100644 --- a/arch/powerpc/kernel/reloc_64.S +++ b/arch/powerpc/kernel/reloc_64.S @@ -81,6 +81,7 @@ _GLOBAL(relocate) 6: blr +.balign 8 p_dyn: .llong __dynamic_start - 0b p_rela: .llong __rela_dyn_start - 0b p_st: .llong _stext - 0b -- cgit v0.10.2 From 16c0ae028989df40bdab46b7f241d331ddc97c59 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 5 Mar 2014 14:05:09 +0200 Subject: Target/sbc: Fix sbc_copy_prot for offset scatters When copying between device and command protection scatters we must take into account that device scatters might be offset and we might copy outside scatter range. Thus for each cmd prot scatter we must take the min between cmd prot scatter, dev prot scatter, and whats left (and loop in case we havn't copied enough from/to cmd prot scatter). Example (single t_prot_sg of len 2048): kernel: sbc_dif_copy_prot: se_cmd=ffff880380aaf970, left=2048, len=2048, dev_prot_sg_offset=3072, dev_prot_sg_len=4096 kernel: isert: se_cmd=ffff880380aaf970 PI error found type 0 at sector 0x2600 expected 0x0 vs actual 0x725f, lba=2580 Instead of copying 2048 from offset 3072 (copying junk outside sg limit 4096), we must to copy 1024 and continue to next sg until we complete cmd prot scatter. This issue was found using iSER T10-PI offload over rd_mcp (wasn't discovered with fileio since file_dev prot sglists are never offset). Changes from v1: - Fix sbc_copy_prot copy length miss-calculation Changes from v0: - Removed psg->offset consideration for psg_len computation - Removed sg->offset consideration for offset condition - Added copied consideraiton for len computation - Added copied offset to paddr when doing memcpy Signed-off-by: Sagi Grimberg Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 42f18fc..77e6531 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -1079,25 +1079,31 @@ sbc_dif_copy_prot(struct se_cmd *cmd, unsigned int sectors, bool read, left = sectors * dev->prot_length; for_each_sg(cmd->t_prot_sg, psg, cmd->t_prot_nents, i) { - - len = min(psg->length, left); - if (offset >= sg->length) { - sg = sg_next(sg); - offset = 0; - } + unsigned int psg_len, copied = 0; paddr = kmap_atomic(sg_page(psg)) + psg->offset; - addr = kmap_atomic(sg_page(sg)) + sg->offset + offset; - - if (read) - memcpy(paddr, addr, len); - else - memcpy(addr, paddr, len); - - left -= len; - offset += len; + psg_len = min(left, psg->length); + while (psg_len) { + len = min(psg_len, sg->length - offset); + addr = kmap_atomic(sg_page(sg)) + sg->offset + offset; + + if (read) + memcpy(paddr + copied, addr, len); + else + memcpy(addr, paddr + copied, len); + + left -= len; + offset += len; + copied += len; + psg_len -= len; + + if (offset >= sg->length) { + sg = sg_next(sg); + offset = 0; + } + kunmap_atomic(addr); + } kunmap_atomic(paddr); - kunmap_atomic(addr); } } -- cgit v0.10.2 From 9b745ab897199c2af9f21ca9681ef86d5b971002 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 7 Mar 2014 08:37:19 +0100 Subject: ALSA: hda - Fix loud click noise with IdeaPad 410Y Lenovo IdeaPad 410Y with ALC282 codec makes loud click noises at boot and shutdown. Also, it wrongly misdetects the acpi_thinkpad hook. This patch adds a device-specific fixup for disabling the shutup callback that is the cause of the click noise and also avoiding the thinpad_helper calls. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=71511 Reported-and-tested-by: Guilherme Amadio Cc: Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 850296a..8d0a844 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3616,6 +3616,19 @@ static void alc_fixup_auto_mute_via_amp(struct hda_codec *codec, } } +static void alc_no_shutup(struct hda_codec *codec) +{ +} + +static void alc_fixup_no_shutup(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + struct alc_spec *spec = codec->spec; + spec->shutup = alc_no_shutup; + } +} + static void alc_fixup_headset_mode_alc668(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -3844,6 +3857,7 @@ enum { ALC269_FIXUP_HP_GPIO_LED, ALC269_FIXUP_INV_DMIC, ALC269_FIXUP_LENOVO_DOCK, + ALC269_FIXUP_NO_SHUTUP, ALC286_FIXUP_SONY_MIC_NO_PRESENCE, ALC269_FIXUP_PINCFG_NO_HP_TO_LINEOUT, ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, @@ -4020,6 +4034,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_inv_dmic_0x12, }, + [ALC269_FIXUP_NO_SHUTUP] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_no_shutup, + }, [ALC269_FIXUP_LENOVO_DOCK] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -4405,6 +4423,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x2212, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x2214, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x2215, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x17aa, 0x3978, "IdeaPad Y410P", ALC269_FIXUP_NO_SHUTUP), SND_PCI_QUIRK(0x17aa, 0x5013, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x17aa, 0x501a, "Thinkpad", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x5026, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), -- cgit v0.10.2 From 739c3eea711a255df5ed1246face0a4dce5e589f Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 19 Feb 2014 20:20:21 +0800 Subject: blk-mq: add REQ_SYNC early Add REQ_SYNC early, so rq_dispatched[] in blk_mq_rq_ctx_init is set correctly. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe diff --git a/block/blk-mq.c b/block/blk-mq.c index 1b8b50d..883f720 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -860,6 +860,8 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) ctx = blk_mq_get_ctx(q); hctx = q->mq_ops->map_queue(q, ctx->cpu); + if (is_sync) + rw |= REQ_SYNC; trace_block_getrq(q, bio, rw); rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false); if (likely(rq)) -- cgit v0.10.2 From 70044d71d31d6973665ced5be04ef39ac1c09a48 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 7 Mar 2014 10:19:57 -0500 Subject: firewire: don't use PREPARE_DELAYED_WORK PREPARE_[DELAYED_]WORK() are being phased out. They have few users and a nasty surprise in terms of reentrancy guarantee as workqueue considers work items to be different if they don't have the same work function. firewire core-device and sbp2 have been been multiplexing work items with multiple work functions. Introduce fw_device_workfn() and sbp2_lu_workfn() which invoke fw_device->workfn and sbp2_logical_unit->workfn respectively and always use the two functions as the work functions and update the users to set the ->workfn fields instead of overriding work functions using PREPARE_DELAYED_WORK(). This fixes a variety of possible regressions since a2c1c57be8d9 "workqueue: consider work function when searching for busy work items" due to which fw_workqueue lost its required non-reentrancy property. Signed-off-by: Tejun Heo Acked-by: Stefan Richter Cc: linux1394-devel@lists.sourceforge.net Cc: stable@vger.kernel.org # v3.9+ Cc: stable@vger.kernel.org # v3.8.2+ Cc: stable@vger.kernel.org # v3.4.60+ Cc: stable@vger.kernel.org # v3.2.40+ diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index de4aa40..2c6d5e1 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -916,7 +916,7 @@ static int lookup_existing_device(struct device *dev, void *data) old->config_rom_retries = 0; fw_notice(card, "rediscovered device %s\n", dev_name(dev)); - PREPARE_DELAYED_WORK(&old->work, fw_device_update); + old->workfn = fw_device_update; fw_schedule_device_work(old, 0); if (current_node == card->root_node) @@ -1075,7 +1075,7 @@ static void fw_device_init(struct work_struct *work) if (atomic_cmpxchg(&device->state, FW_DEVICE_INITIALIZING, FW_DEVICE_RUNNING) == FW_DEVICE_GONE) { - PREPARE_DELAYED_WORK(&device->work, fw_device_shutdown); + device->workfn = fw_device_shutdown; fw_schedule_device_work(device, SHUTDOWN_DELAY); } else { fw_notice(card, "created device %s: GUID %08x%08x, S%d00\n", @@ -1196,13 +1196,20 @@ static void fw_device_refresh(struct work_struct *work) dev_name(&device->device), fw_rcode_string(ret)); gone: atomic_set(&device->state, FW_DEVICE_GONE); - PREPARE_DELAYED_WORK(&device->work, fw_device_shutdown); + device->workfn = fw_device_shutdown; fw_schedule_device_work(device, SHUTDOWN_DELAY); out: if (node_id == card->root_node->node_id) fw_schedule_bm_work(card, 0); } +static void fw_device_workfn(struct work_struct *work) +{ + struct fw_device *device = container_of(to_delayed_work(work), + struct fw_device, work); + device->workfn(work); +} + void fw_node_event(struct fw_card *card, struct fw_node *node, int event) { struct fw_device *device; @@ -1252,7 +1259,8 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event) * power-up after getting plugged in. We schedule the * first config rom scan half a second after bus reset. */ - INIT_DELAYED_WORK(&device->work, fw_device_init); + device->workfn = fw_device_init; + INIT_DELAYED_WORK(&device->work, fw_device_workfn); fw_schedule_device_work(device, INITIAL_DELAY); break; @@ -1268,7 +1276,7 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event) if (atomic_cmpxchg(&device->state, FW_DEVICE_RUNNING, FW_DEVICE_INITIALIZING) == FW_DEVICE_RUNNING) { - PREPARE_DELAYED_WORK(&device->work, fw_device_refresh); + device->workfn = fw_device_refresh; fw_schedule_device_work(device, device->is_local ? 0 : INITIAL_DELAY); } @@ -1283,7 +1291,7 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event) smp_wmb(); /* update node_id before generation */ device->generation = card->generation; if (atomic_read(&device->state) == FW_DEVICE_RUNNING) { - PREPARE_DELAYED_WORK(&device->work, fw_device_update); + device->workfn = fw_device_update; fw_schedule_device_work(device, 0); } break; @@ -1308,7 +1316,7 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event) device = node->data; if (atomic_xchg(&device->state, FW_DEVICE_GONE) == FW_DEVICE_RUNNING) { - PREPARE_DELAYED_WORK(&device->work, fw_device_shutdown); + device->workfn = fw_device_shutdown; fw_schedule_device_work(device, list_empty(&card->link) ? 0 : SHUTDOWN_DELAY); } diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c index 281029d..7aef911 100644 --- a/drivers/firewire/sbp2.c +++ b/drivers/firewire/sbp2.c @@ -146,6 +146,7 @@ struct sbp2_logical_unit { */ int generation; int retries; + work_func_t workfn; struct delayed_work work; bool has_sdev; bool blocked; @@ -864,7 +865,7 @@ static void sbp2_login(struct work_struct *work) /* set appropriate retry limit(s) in BUSY_TIMEOUT register */ sbp2_set_busy_timeout(lu); - PREPARE_DELAYED_WORK(&lu->work, sbp2_reconnect); + lu->workfn = sbp2_reconnect; sbp2_agent_reset(lu); /* This was a re-login. */ @@ -918,7 +919,7 @@ static void sbp2_login(struct work_struct *work) * If a bus reset happened, sbp2_update will have requeued * lu->work already. Reset the work from reconnect to login. */ - PREPARE_DELAYED_WORK(&lu->work, sbp2_login); + lu->workfn = sbp2_login; } static void sbp2_reconnect(struct work_struct *work) @@ -952,7 +953,7 @@ static void sbp2_reconnect(struct work_struct *work) lu->retries++ >= 5) { dev_err(tgt_dev(tgt), "failed to reconnect\n"); lu->retries = 0; - PREPARE_DELAYED_WORK(&lu->work, sbp2_login); + lu->workfn = sbp2_login; } sbp2_queue_work(lu, DIV_ROUND_UP(HZ, 5)); @@ -972,6 +973,13 @@ static void sbp2_reconnect(struct work_struct *work) sbp2_conditionally_unblock(lu); } +static void sbp2_lu_workfn(struct work_struct *work) +{ + struct sbp2_logical_unit *lu = container_of(to_delayed_work(work), + struct sbp2_logical_unit, work); + lu->workfn(work); +} + static int sbp2_add_logical_unit(struct sbp2_target *tgt, int lun_entry) { struct sbp2_logical_unit *lu; @@ -998,7 +1006,8 @@ static int sbp2_add_logical_unit(struct sbp2_target *tgt, int lun_entry) lu->blocked = false; ++tgt->dont_block; INIT_LIST_HEAD(&lu->orb_list); - INIT_DELAYED_WORK(&lu->work, sbp2_login); + lu->workfn = sbp2_login; + INIT_DELAYED_WORK(&lu->work, sbp2_lu_workfn); list_add_tail(&lu->link, &tgt->lu_list); return 0; diff --git a/include/linux/firewire.h b/include/linux/firewire.h index 5d7782e..c3683bd 100644 --- a/include/linux/firewire.h +++ b/include/linux/firewire.h @@ -200,6 +200,7 @@ struct fw_device { unsigned irmc:1; unsigned bc_implemented:2; + work_func_t workfn; struct delayed_work work; struct fw_attribute_group attribute_group; }; -- cgit v0.10.2 From cebc2de44d3bce53e46476e774126c298ca2c8a9 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 7 Mar 2014 14:57:19 +0000 Subject: dm space map metadata: fix refcount decrement below 0 which caused corruption This has been a relatively long-standing issue that wasn't nailed down until Teng-Feng Yang's meticulous bug report to dm-devel on 3/7/2014, see: http://www.redhat.com/archives/dm-devel/2014-March/msg00021.html From that report: "When decreasing the reference count of a metadata block with its reference count equals 3, we will call dm_btree_remove() to remove this enrty from the B+tree which keeps the reference count info in metadata device. The B+tree will try to rebalance the entry of the child nodes in each node it traversed, and the rebalance process contains the following steps. (1) Finding the corresponding children in current node (shadow_current(s)) (2) Shadow the children block (issue BOP_INC) (3) redistribute keys among children, and free children if necessary (issue BOP_DEC) Since the update of a metadata block's reference count could be recursive, we will stash these reference count update operations in smm->uncommitted and then process them in a FILO fashion. The problem is that step(3) could free the children which is created in step(2), so the BOP_DEC issued in step(3) will be carried out before the BOP_INC issued in step(2) since these BOPs will be processed in FILO fashion. Once the BOP_DEC from step(3) tries to decrease the reference count of newly shadow block, it will report failure for its reference equals 0 before decreasing. It looks like we can solve this issue by processing these BOPs in a FIFO fashion instead of FILO." Commit 5b564d80 ("dm space map: disallow decrementing a reference count below zero") changed the code to report an error for this temporary refcount decrement below zero. So what was previously a harmless invalid refcount became a hard failure due to the new error path: device-mapper: space map common: unable to decrement a reference count below 0 device-mapper: thin: 253:6: dm_thin_insert_block() failed: error = -22 device-mapper: thin: 253:6: switching pool to read-only mode This bug is in dm persistent-data code that is common to the DM thin and cache targets. So any users of those targets should apply this fix. Fix this by applying recursive space map operations in FIFO order rather than FILO. Resolves: https://bugzilla.kernel.org/show_bug.cgi?id=68801 Reported-by: Apollon Oikonomopoulos Reported-by: edwillam1007@gmail.com Reported-by: Teng-Feng Yang Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org # 3.13+ diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index e9bdd46..786b689 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -91,6 +91,69 @@ struct block_op { dm_block_t block; }; +struct bop_ring_buffer { + unsigned begin; + unsigned end; + struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1]; +}; + +static void brb_init(struct bop_ring_buffer *brb) +{ + brb->begin = 0; + brb->end = 0; +} + +static bool brb_empty(struct bop_ring_buffer *brb) +{ + return brb->begin == brb->end; +} + +static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old) +{ + unsigned r = old + 1; + return (r >= (sizeof(brb->bops) / sizeof(*brb->bops))) ? 0 : r; +} + +static int brb_push(struct bop_ring_buffer *brb, + enum block_op_type type, dm_block_t b) +{ + struct block_op *bop; + unsigned next = brb_next(brb, brb->end); + + /* + * We don't allow the last bop to be filled, this way we can + * differentiate between full and empty. + */ + if (next == brb->begin) + return -ENOMEM; + + bop = brb->bops + brb->end; + bop->type = type; + bop->block = b; + + brb->end = next; + + return 0; +} + +static int brb_pop(struct bop_ring_buffer *brb, struct block_op *result) +{ + struct block_op *bop; + + if (brb_empty(brb)) + return -ENODATA; + + bop = brb->bops + brb->begin; + result->type = bop->type; + result->block = bop->block; + + brb->begin = brb_next(brb, brb->begin); + + return 0; +} + +/*----------------------------------------------------------------*/ + struct sm_metadata { struct dm_space_map sm; @@ -101,25 +164,20 @@ struct sm_metadata { unsigned recursion_count; unsigned allocated_this_transaction; - unsigned nr_uncommitted; - struct block_op uncommitted[MAX_RECURSIVE_ALLOCATIONS]; + struct bop_ring_buffer uncommitted; struct threshold threshold; }; static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b) { - struct block_op *op; + int r = brb_push(&smm->uncommitted, type, b); - if (smm->nr_uncommitted == MAX_RECURSIVE_ALLOCATIONS) { + if (r) { DMERR("too many recursive allocations"); return -ENOMEM; } - op = smm->uncommitted + smm->nr_uncommitted++; - op->type = type; - op->block = b; - return 0; } @@ -158,11 +216,17 @@ static int out(struct sm_metadata *smm) return -ENOMEM; } - if (smm->recursion_count == 1 && smm->nr_uncommitted) { - while (smm->nr_uncommitted && !r) { - smm->nr_uncommitted--; - r = commit_bop(smm, smm->uncommitted + - smm->nr_uncommitted); + if (smm->recursion_count == 1) { + while (!brb_empty(&smm->uncommitted)) { + struct block_op bop; + + r = brb_pop(&smm->uncommitted, &bop); + if (r) { + DMERR("bug in bop ring buffer"); + break; + } + + r = commit_bop(smm, &bop); if (r) break; } @@ -217,7 +281,8 @@ static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count) static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, uint32_t *result) { - int r, i; + int r; + unsigned i; struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); unsigned adjustment = 0; @@ -225,8 +290,10 @@ static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, * We may have some uncommitted adjustments to add. This list * should always be really short. */ - for (i = 0; i < smm->nr_uncommitted; i++) { - struct block_op *op = smm->uncommitted + i; + for (i = smm->uncommitted.begin; + i != smm->uncommitted.end; + i = brb_next(&smm->uncommitted, i)) { + struct block_op *op = smm->uncommitted.bops + i; if (op->block != b) continue; @@ -254,7 +321,8 @@ static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm, dm_block_t b, int *result) { - int r, i, adjustment = 0; + int r, adjustment = 0; + unsigned i; struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); uint32_t rc; @@ -262,8 +330,11 @@ static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm, * We may have some uncommitted adjustments to add. This list * should always be really short. */ - for (i = 0; i < smm->nr_uncommitted; i++) { - struct block_op *op = smm->uncommitted + i; + for (i = smm->uncommitted.begin; + i != smm->uncommitted.end; + i = brb_next(&smm->uncommitted, i)) { + + struct block_op *op = smm->uncommitted.bops + i; if (op->block != b) continue; @@ -671,7 +742,7 @@ int dm_sm_metadata_create(struct dm_space_map *sm, smm->begin = superblock + 1; smm->recursion_count = 0; smm->allocated_this_transaction = 0; - smm->nr_uncommitted = 0; + brb_init(&smm->uncommitted); threshold_init(&smm->threshold); memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); @@ -715,7 +786,7 @@ int dm_sm_metadata_open(struct dm_space_map *sm, smm->begin = 0; smm->recursion_count = 0; smm->allocated_this_transaction = 0; - smm->nr_uncommitted = 0; + brb_init(&smm->uncommitted); threshold_init(&smm->threshold); memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); -- cgit v0.10.2 From b053940df41808f0f27568eb36820d10a8a987f8 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 7 Mar 2014 13:22:22 +0530 Subject: ARC: Use correct PTAG register for icache flush This fixes a subtle issue with cache flush which could potentially cause random userspace crashes because of stale icache lines. This error crept in when consolidating the cache flush code Fixes: bd12976c3664 (ARC: cacheflush refactor #3: Unify the {d,i}cache) Signed-off-by: Vineet Gupta Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # 3.13 Cc: arc-linux-dev@synopsys.com Signed-off-by: Linus Torvalds diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c index 6b58c1d..400c663 100644 --- a/arch/arc/mm/cache_arc700.c +++ b/arch/arc/mm/cache_arc700.c @@ -282,7 +282,7 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr, #else /* if V-P const for loop, PTAG can be written once outside loop */ if (full_page_op) - write_aux_reg(ARC_REG_DC_PTAG, paddr); + write_aux_reg(aux_tag, paddr); #endif while (num_lines-- > 0) { @@ -296,7 +296,7 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr, write_aux_reg(aux_cmd, vaddr); vaddr += L1_CACHE_BYTES; #else - write_aux_reg(aux, paddr); + write_aux_reg(aux_cmd, paddr); paddr += L1_CACHE_BYTES; #endif } -- cgit v0.10.2 From b28a613e9138e4b3a64649bd60b13436f4b4b49b Mon Sep 17 00:00:00 2001 From: Michele Baldessari Date: Fri, 7 Mar 2014 16:34:29 +0000 Subject: libata: add ATA_HORKAGE_BROKEN_FPDMA_AA quirk for Seagate Momentus SpinPoint M8 (2BA30001) Via commit 87809942d3fa "libata: add ATA_HORKAGE_BROKEN_FPDMA_AA quirk for Seagate Momentus SpinPoint M8" we added a quirk for disks named "ST1000LM024 HN-M101MBB" with firmware revision "2AR10001". As reported on https://bugzilla.redhat.com/show_bug.cgi?id=1073901, we need to also add firmware revision 2BA30001 as it is broken as well. Reported-by: Nicholas Signed-off-by: Michele Baldessari Tested-by: Guilherme Amadio Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 191cdfb..65d3f1b 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4175,6 +4175,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* Seagate Momentus SpinPoint M8 seem to have FPMDA_AA issues */ { "ST1000LM024 HN-M101MBB", "2AR10001", ATA_HORKAGE_BROKEN_FPDMA_AA }, + { "ST1000LM024 HN-M101MBB", "2BA30001", ATA_HORKAGE_BROKEN_FPDMA_AA }, /* Blacklist entries taken from Silicon Image 3124/3132 Windows driver .inf file - also several Linux problem reports */ -- cgit v0.10.2 From e0429362ab15c46ea4d64c3f8c9e0933e48a143a Mon Sep 17 00:00:00 2001 From: Julius Werner Date: Tue, 4 Mar 2014 10:52:39 -0800 Subject: usb: Add device quirk for Logitech HD Pro Webcams C920 and C930e We've encountered a rare issue when enumerating two Logitech webcams after a reboot that doesn't power cycle the USB ports. They are spewing random data (possibly some leftover UVC buffers) on the second (full-sized) Get Configuration request of the enumeration phase. Since the data is random this can potentially cause all kinds of odd behavior, and since it occasionally happens multiple times (after the kernel issues another reset due to the garbled configuration descriptor), it is not always recoverable. Set the USB_DELAY_INIT quirk that seems to work around the issue. Signed-off-by: Julius Werner Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 8f37063..739ee8e 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -47,6 +47,10 @@ static const struct usb_device_id usb_quirk_list[] = { /* Microsoft LifeCam-VX700 v2.0 */ { USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Logitech HD Pro Webcams C920 and C930e */ + { USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT }, + { USB_DEVICE(0x046d, 0x0843), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Logitech Quickcam Fusion */ { USB_DEVICE(0x046d, 0x08c1), .driver_info = USB_QUIRK_RESET_RESUME }, -- cgit v0.10.2 From d86db25e53fa69e3e97f3b55dd82a70689787c5d Mon Sep 17 00:00:00 2001 From: Julius Werner Date: Tue, 4 Mar 2014 11:27:38 -0800 Subject: usb: Make DELAY_INIT quirk wait 100ms between Get Configuration requests The DELAY_INIT quirk only reduces the frequency of enumeration failures with the Logitech HD Pro C920 and C930e webcams, but does not quite eliminate them. We have found that adding a delay of 100ms between the first and second Get Configuration request makes the device enumerate perfectly reliable even after several weeks of extensive testing. The reasons for that are anyone's guess, but since the DELAY_INIT quirk already delays enumeration by a whole second, wating for another 10th of that isn't really a big deal for the one other device that uses it, and it will resolve the problems with these webcams. Signed-off-by: Julius Werner Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 8d72f0c..062967c 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -717,6 +717,10 @@ int usb_get_configuration(struct usb_device *dev) result = -ENOMEM; goto err; } + + if (dev->quirks & USB_QUIRK_DELAY_INIT) + msleep(100); + result = usb_get_descriptor(dev, USB_DT_CONFIG, cfgno, bigbuffer, length); if (result < 0) { -- cgit v0.10.2 From e2ed511400d41e0d136089d5a55ceab57c6a2426 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 7 Mar 2014 17:06:57 +0200 Subject: Revert "xhci 1.0: Limit arbitrarily-aligned scatter gather." This reverts commit 247bf557273dd775505fb9240d2d152f4f20d304. This commit, together with commit 3804fad45411b48233b48003e33a78f290d227c8 "USBNET: ax88179_178a: enable tso if usb host supports sg dma" were origially added to get xHCI 1.0 hosts and usb ethernet ax88179_178a devices working together with scatter gather. xHCI 1.0 hosts pose some requirement on how transfer buffers are aligned, setting this requirement for 1.0 hosts caused USB 3.0 mass storage devices to fail more frequently. USB 3.0 mass storage devices used to work before 3.14-rc1. Theoretically, the TD fragment rules could have caused an occasional disk glitch. Now the devices *will* fail, instead of theoretically failing. >From a user perspective, this looks like a regression; the USB device obviously fails on 3.14-rc1, and may sometimes silently fail on prior kernels. The proper soluition is to implement the TD fragment rules required, but for now this patch needs to be reverted to get USB 3.0 mass storage devices working at the level they used to. Signed-off-by: Mathias Nyman Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 6fe577d..924a6cc 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4733,6 +4733,9 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) /* Accept arbitrarily long scatter-gather lists */ hcd->self.sg_tablesize = ~0; + /* support to build packet from discontinuous buffers */ + hcd->self.no_sg_constraint = 1; + /* XHCI controllers don't stop the ep queue on short packets :| */ hcd->self.no_stop_on_short = 1; @@ -4757,14 +4760,6 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) /* xHCI private pointer was set in xhci_pci_probe for the second * registered roothub. */ - xhci = hcd_to_xhci(hcd); - /* - * Support arbitrarily aligned sg-list entries on hosts without - * TD fragment rules (which are currently unsupported). - */ - if (xhci->hci_version < 0x100) - hcd->self.no_sg_constraint = 1; - return 0; } @@ -4793,9 +4788,6 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) if (xhci->hci_version > 0x96) xhci->quirks |= XHCI_SPURIOUS_SUCCESS; - if (xhci->hci_version < 0x100) - hcd->self.no_sg_constraint = 1; - /* Make sure the HC is halted. */ retval = xhci_halt(xhci); if (retval) -- cgit v0.10.2 From 469d417b68958a064c09e7875646c97c6e783dfc Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 7 Mar 2014 17:06:58 +0200 Subject: Revert "USBNET: ax88179_178a: enable tso if usb host supports sg dma" This reverts commit 3804fad45411b48233b48003e33a78f290d227c8. This commit, together with commit 247bf557273dd775505fb9240d2d152f4f20d304 "xhci 1.0: Limit arbitrarily-aligned scatter gather." were origially added to get xHCI 1.0 hosts and usb ethernet ax88179_178a devices working together with scatter gather. xHCI 1.0 hosts pose some requirement on how transfer buffers are aligned, setting this requirement for 1.0 hosts caused USB 3.0 mass storage devices to fail more frequently. USB 3.0 mass storage devices used to work before 3.14-rc1. Theoretically, the TD fragment rules could have caused an occasional disk glitch. Now the devices *will* fail, instead of theoretically failing. >From a user perspective, this looks like a regression; the USB device obviously fails on 3.14-rc1, and may sometimes silently fail on prior kernels. The proper soluition is to implement the TD fragment rules for xHCI 1.0 hosts, but for now, revert this patch until scatter gather can be properly supported. Signed-off-by: Mathias Nyman Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 955df81..42085e6 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1029,20 +1029,12 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) dev->mii.phy_id = 0x03; dev->mii.supports_gmii = 1; - if (usb_device_no_sg_constraint(dev->udev)) - dev->can_dma_sg = 1; - dev->net->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM; dev->net->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM; - if (dev->can_dma_sg) { - dev->net->features |= NETIF_F_SG | NETIF_F_TSO; - dev->net->hw_features |= NETIF_F_SG | NETIF_F_TSO; - } - /* Enable checksum offload */ *tmp = AX_RXCOE_IP | AX_RXCOE_TCP | AX_RXCOE_UDP | AX_RXCOE_TCPV6 | AX_RXCOE_UDPV6; -- cgit v0.10.2 From 006fa2599bf0daf107cbb7a8a99fcfb9a998a169 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 26 Feb 2014 19:40:46 +0000 Subject: ARM: fix noMMU kallsyms symbol filtering With noMMU, CONFIG_PAGE_OFFSET was not being set correctly. As there's no MMU, PAGE_OFFSET should be equal to PHYS_OFFSET in all cases. This commit makes that explicit. Since we do this, we don't need to mess around in asm/memory.h with ifdefs to sort this out, so let's get rid of that, and there's no point offering the "Memory split" option for noMMU as that's meaningless there. Fixes: b9b32bf70f2f ("ARM: use linker magic for vectors and vector stubs") Cc: Signed-off-by: Russell King diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index e254198..2a232ce 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1578,6 +1578,7 @@ config BL_SWITCHER_DUMMY_IF choice prompt "Memory split" + depends on MMU default VMSPLIT_3G help Select the desired split between kernel and user memory. @@ -1595,6 +1596,7 @@ endchoice config PAGE_OFFSET hex + default PHYS_OFFSET if !MMU default 0x40000000 if VMSPLIT_1G default 0x80000000 if VMSPLIT_2G default 0xC0000000 diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 8756e4b..4afb376 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -30,14 +30,15 @@ */ #define UL(x) _AC(x, UL) +/* PAGE_OFFSET - the virtual address of the start of the kernel image */ +#define PAGE_OFFSET UL(CONFIG_PAGE_OFFSET) + #ifdef CONFIG_MMU /* - * PAGE_OFFSET - the virtual address of the start of the kernel image * TASK_SIZE - the maximum size of a user space task. * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area */ -#define PAGE_OFFSET UL(CONFIG_PAGE_OFFSET) #define TASK_SIZE (UL(CONFIG_PAGE_OFFSET) - UL(SZ_16M)) #define TASK_UNMAPPED_BASE ALIGN(TASK_SIZE / 3, SZ_16M) @@ -104,10 +105,6 @@ #define END_MEM (UL(CONFIG_DRAM_BASE) + CONFIG_DRAM_SIZE) #endif -#ifndef PAGE_OFFSET -#define PAGE_OFFSET PLAT_PHYS_OFFSET -#endif - /* * The module can be at any place in ram in nommu mode. */ -- cgit v0.10.2 From 052450fdc55894a39fbae93d9bbe43947956f663 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 25 Feb 2014 22:41:41 +0100 Subject: ARM: 7991/1: sa1100: fix compile problem on Collie Due to a problem in the MFD Kconfig it was not possible to compile the UCB battery driver for the Collie SA1100 system, in turn making it impossible to compile in the battery driver. (See patch "mfd: include all drivers in subsystem menu".) After fixing the MFD Kconfig (separate patch) a compile error appears in the Collie battery driver due to the implicitly requiring through via prior to commit 40ca061b "ARM: 7841/1: sa1100: remove complex GPIO interface". Fix this up by including the required header into . Cc: stable@vger.kernel.org Cc: Andrea Adami Cc: Dmitry Eremin-Solenikov Signed-off-by: Linus Walleij Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/include/mach/collie.h b/arch/arm/mach-sa1100/include/mach/collie.h index f33679d..50e1d85 100644 --- a/arch/arm/mach-sa1100/include/mach/collie.h +++ b/arch/arm/mach-sa1100/include/mach/collie.h @@ -13,6 +13,8 @@ #ifndef __ASM_ARCH_COLLIE_H #define __ASM_ARCH_COLLIE_H +#include "hardware.h" /* Gives GPIO_MAX */ + extern void locomolcd_power(int on); #define COLLIE_SCOOP_GPIO_BASE (GPIO_MAX + 1) -- cgit v0.10.2 From 38e0b088d322e5762ac21fb4df433e83faf128eb Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 26 Feb 2014 17:21:26 +0100 Subject: ARM: 7992/1: boot: compressed: ignore bswapsdi2.S Commit 017f161a55b4 (ARM: 7877/1: use built-in byte swap function) added bswapsdi2.{o,S} to arch/arm/boot/compressed/Makefile, but didn't update the .gitignore. Thus after a a build git status shows bswapsdi2.S as a new file, which is a little annoying. This patch updates arch/arm/boot/compressed/.gitignore to ignore bswapsdi2.S, as we already do for ashldi3.S and others. Signed-off-by: Mark Rutland Acked-by: Nicolas Pitre Acked-by: Kim Phillips Cc: David Woodhouse Signed-off-by: Russell King diff --git a/arch/arm/boot/compressed/.gitignore b/arch/arm/boot/compressed/.gitignore index 47279aa..0714e03 100644 --- a/arch/arm/boot/compressed/.gitignore +++ b/arch/arm/boot/compressed/.gitignore @@ -1,4 +1,5 @@ ashldi3.S +bswapsdi2.S font.c lib1funcs.S hyp-stub.S -- cgit v0.10.2 From 5fa10196bdb5f190f595ebd048490ee52dddea0f Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 7 Mar 2014 15:05:20 -0800 Subject: x86: Ignore NMIs that come in during early boot Don Zickus reports: A customer generated an external NMI using their iLO to test kdump worked. Unfortunately, the machine hung. Disabling the nmi_watchdog made things work. I speculated the external NMI fired, caused the machine to panic (as expected) and the perf NMI from the watchdog came in and was latched. My guess was this somehow caused the hang. ---- It appears that the latched NMI stays latched until the early page table generation on 64 bits, which causes exceptions to happen which end in IRET, which re-enable NMI. Therefore, ignore NMIs that come in during early execution, until we have proper exception handling. Reported-and-tested-by: Don Zickus Link: http://lkml.kernel.org/r/1394221143-29713-1-git-send-email-dzickus@redhat.com Signed-off-by: H. Peter Anvin Cc: # v3.5+, older with some backport effort diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 81ba276..d2a2159 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -544,6 +544,10 @@ ENDPROC(early_idt_handlers) /* This is global to keep gas from relaxing the jumps */ ENTRY(early_idt_handler) cld + + cmpl $X86_TRAP_NMI,(%esp) + je is_nmi # Ignore NMI + cmpl $2,%ss:early_recursion_flag je hlt_loop incl %ss:early_recursion_flag @@ -594,8 +598,9 @@ ex_entry: pop %edx pop %ecx pop %eax - addl $8,%esp /* drop vector number and error code */ decl %ss:early_recursion_flag +is_nmi: + addl $8,%esp /* drop vector number and error code */ iret ENDPROC(early_idt_handler) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index e1aabdb..33f36c7 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -343,6 +343,9 @@ early_idt_handlers: ENTRY(early_idt_handler) cld + cmpl $X86_TRAP_NMI,(%rsp) + je is_nmi # Ignore NMI + cmpl $2,early_recursion_flag(%rip) jz 1f incl early_recursion_flag(%rip) @@ -405,8 +408,9 @@ ENTRY(early_idt_handler) popq %rdx popq %rcx popq %rax - addq $16,%rsp # drop vector number and error code decl early_recursion_flag(%rip) +is_nmi: + addq $16,%rsp # drop vector number and error code INTERRUPT_RETURN ENDPROC(early_idt_handler) -- cgit v0.10.2 From 2ca310fc4160ed0420da65534a21ae77b24326a8 Mon Sep 17 00:00:00 2001 From: Ditang Chen Date: Fri, 7 Mar 2014 13:27:57 +0800 Subject: SUNRPC: Fix oops when trace sunrpc_task events in nfs client When tracking sunrpc_task events in nfs client, the clnt pointer may be NULL. [ 139.269266] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 [ 139.269915] IP: [] ftrace_raw_event_rpc_task_running+0x86/0xf0 [sunrpc] [ 139.269915] PGD 1d293067 PUD 1d294067 PMD 0 [ 139.269915] Oops: 0000 [#1] SMP [ 139.269915] Modules linked in: nfsv4 dns_resolver nfs lockd sunrpc fscache sg ppdev e1000 serio_raw pcspkr parport_pc parport i2c_piix4 i2c_core microcode xfs libcrc32c sd_mod sr_mod cdrom ata_generic crc_t10dif crct10dif_common pata_acpi ahci libahci ata_piix libata dm_mirror dm_region_hash dm_log dm_mod [ 139.269915] CPU: 0 PID: 59 Comm: kworker/0:2 Not tainted 3.10.0-84.el7.x86_64 #1 [ 139.269915] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 139.269915] Workqueue: rpciod rpc_async_schedule [sunrpc] [ 139.269915] task: ffff88001b598000 ti: ffff88001b632000 task.ti: ffff88001b632000 [ 139.269915] RIP: 0010:[] [] ftrace_raw_event_rpc_task_running+0x86/0xf0 [sunrpc] [ 139.269915] RSP: 0018:ffff88001b633d70 EFLAGS: 00010206 [ 139.269915] RAX: ffff88001dfc5338 RBX: ffff88001cc37a00 RCX: ffff88001dfc5334 [ 139.269915] RDX: ffff88001dfc5338 RSI: 0000000000000000 RDI: ffff88001dfc533c [ 139.269915] RBP: ffff88001b633db0 R08: 000000000000002c R09: 000000000000000a [ 139.269915] R10: 0000000000062180 R11: 00000020759fb9dc R12: ffffffffa0292c20 [ 139.269915] R13: ffff88001dfc5334 R14: 0000000000000000 R15: 0000000000000000 [ 139.269915] FS: 0000000000000000(0000) GS:ffff88001fc00000(0000) knlGS:0000000000000000 [ 139.269915] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 139.269915] CR2: 0000000000000004 CR3: 000000001d290000 CR4: 00000000000006f0 [ 139.269915] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 139.269915] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 139.269915] Stack: [ 139.269915] 000000001b633d98 0000000000000246 ffff88001df1dc00 ffff88001cc37a00 [ 139.269915] ffff88001bc35e60 0000000000000000 ffff88001ffa0a48 ffff88001bc35ee0 [ 139.269915] ffff88001b633e08 ffffffffa02704b5 0000000000010000 ffff88001cc37a70 [ 139.269915] Call Trace: [ 139.269915] [] __rpc_execute+0x1d5/0x400 [sunrpc] [ 139.269915] [] rpc_async_schedule+0x26/0x30 [sunrpc] [ 139.269915] [] process_one_work+0x17b/0x460 [ 139.269915] [] worker_thread+0x11b/0x400 [ 139.269915] [] ? rescuer_thread+0x3e0/0x3e0 [ 139.269915] [] kthread+0xc0/0xd0 [ 139.269915] [] ? kthread_create_on_node+0x110/0x110 [ 139.269915] [] ret_from_fork+0x7c/0xb0 [ 139.269915] [] ? kthread_create_on_node+0x110/0x110 [ 139.269915] Code: 4c 8b 45 c8 48 8d 7d d0 89 4d c4 41 89 c9 b9 28 00 00 00 e8 9d b4 e9 e0 48 85 c0 49 89 c5 74 a2 48 89 c7 e8 9d 3f e9 e0 48 89 c2 <41> 8b 46 04 48 8b 7d d0 4c 89 e9 4c 89 e6 89 42 0c 0f b7 83 d4 [ 139.269915] RIP [] ftrace_raw_event_rpc_task_running+0x86/0xf0 [sunrpc] [ 139.269915] RSP [ 139.269915] CR2: 0000000000000004 [ 140.946406] ---[ end trace ba486328b98d7622 ]--- Signed-off-by: Ditang Chen Signed-off-by: Trond Myklebust diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index ddc179b..1fef3e6 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -83,7 +83,7 @@ DECLARE_EVENT_CLASS(rpc_task_running, ), TP_fast_assign( - __entry->client_id = clnt->cl_clid; + __entry->client_id = clnt ? clnt->cl_clid : -1; __entry->task_id = task->tk_pid; __entry->action = action; __entry->runstate = task->tk_runstate; @@ -91,7 +91,7 @@ DECLARE_EVENT_CLASS(rpc_task_running, __entry->flags = task->tk_flags; ), - TP_printk("task:%u@%u flags=%4.4x state=%4.4lx status=%d action=%pf", + TP_printk("task:%u@%d flags=%4.4x state=%4.4lx status=%d action=%pf", __entry->task_id, __entry->client_id, __entry->flags, __entry->runstate, -- cgit v0.10.2 From b01d4e68933ec23e43b1046fa35d593cefcf37d1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 7 Mar 2014 18:58:40 -0800 Subject: x86: fix compile error due to X86_TRAP_NMI use in asm files It's an enum, not a #define, you can't use it in asm files. Introduced in commit 5fa10196bdb5 ("x86: Ignore NMIs that come in during early boot"), and sadly I didn't compile-test things like I should have before pushing out. My weak excuse is that the x86 tree generally doesn't introduce stupid things like this (and the ARM pull afterwards doesn't cause me to do a compile-test either, since I don't cross-compile). Cc: Don Zickus Cc: H. Peter Anvin Signed-off-by: Linus Torvalds diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index d2a2159..f36bd42 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -545,7 +545,7 @@ ENDPROC(early_idt_handlers) ENTRY(early_idt_handler) cld - cmpl $X86_TRAP_NMI,(%esp) + cmpl $2,(%esp) # X86_TRAP_NMI je is_nmi # Ignore NMI cmpl $2,%ss:early_recursion_flag diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 33f36c7..a468c0a 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -343,7 +343,7 @@ early_idt_handlers: ENTRY(early_idt_handler) cld - cmpl $X86_TRAP_NMI,(%rsp) + cmpl $2,(%rsp) # X86_TRAP_NMI je is_nmi # Ignore NMI cmpl $2,early_recursion_flag(%rip) -- cgit v0.10.2 From d211f177b28ec070c25b3d0b960aa55f352f731f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 8 Mar 2014 15:31:54 -0800 Subject: audit: Update kdoc for audit_send_reply and audit_list_rules_send The kbuild test robot reported: > tree: git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git for-next > head: 6f285b19d09f72e801525f5eea1bdad22e559bf0 > commit: 6f285b19d09f72e801525f5eea1bdad22e559bf0 [2/2] audit: Send replies in the proper network namespace. > reproduce: make htmldocs > > >> Warning(kernel/audit.c:575): No description found for parameter 'request_skb' > >> Warning(kernel/audit.c:575): Excess function parameter 'portid' description in 'audit_send_reply' > >> Warning(kernel/auditfilter.c:1074): No description found for parameter 'request_skb' > >> Warning(kernel/auditfilter.c:1074): Excess function parameter 'portid' description in 'audit_list_rules_s Which was caused by my failure to update the kdoc annotations when I updated the functions. Fix that small oversight now. Signed-off-by: "Eric W. Biederman" diff --git a/kernel/audit.c b/kernel/audit.c index 32086bf..3392d3e 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -559,7 +559,7 @@ static int audit_send_reply_thread(void *arg) } /** * audit_send_reply - send an audit reply message via netlink - * @portid: netlink port to which to send reply + * @request_skb: skb of request we are replying to (used to target the reply) * @seq: sequence number * @type: audit message type * @done: done (last) flag diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index e8d1c7c..92062fd 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1067,7 +1067,7 @@ int audit_rule_change(int type, __u32 portid, int seq, void *data, /** * audit_list_rules_send - list the audit rules - * @portid: target portid for netlink audit messages + * @request_skb: skb of request we are replying to (used to target the reply) * @seq: netlink audit message sequence (serial) number */ int audit_list_rules_send(struct sk_buff *request_skb, int seq) -- cgit v0.10.2 From 4c7b70406e5aadc1c78db4d375d5744df65c5b58 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 9 Mar 2014 18:30:50 +0100 Subject: Revert "ACPI / sleep: pm_power_off needs more sanity checks to be installed" Revert commit 3130497f5bab ("ACPI / sleep: pm_power_off needs more sanity checks to be installed") that breaks power ACPI power off on a lot of systems, because it checks wrong registers. Fixes: 3130497f5bab ("ACPI / sleep: pm_power_off needs more sanity checks to be installed") Signed-off-by: Rafael J. Wysocki Signed-off-by: Linus Torvalds diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index b0f6c4a..b718806 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -807,12 +807,7 @@ int __init acpi_sleep_init(void) acpi_sleep_hibernate_setup(); status = acpi_get_sleep_type_data(ACPI_STATE_S5, &type_a, &type_b); - /* - * Check both ACPI S5 object and ACPI sleep registers to - * install pm_power_off_prepare/pm_power_off hook - */ - if (ACPI_SUCCESS(status) && acpi_gbl_FADT.sleep_control.address - && acpi_gbl_FADT.sleep_status.address) { + if (ACPI_SUCCESS(status)) { sleep_states[ACPI_STATE_S5] = 1; pm_power_off_prepare = acpi_power_off_prepare; pm_power_off = acpi_power_off; -- cgit v0.10.2 From a8d9bc2e9f5d1c5a25e33cec096d2a1652d3fd52 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 9 Mar 2014 15:45:32 -0800 Subject: bnx2: Fix shutdown sequence The pci shutdown handler added in: bnx2: Add pci shutdown handler commit 25bfb1dd4ba3b2d9a49ce9d9b0cd7be1840e15ed created a shutdown down sequence without chip reset if the device was never brought up. This can cause the firmware to shutdown the PHY prematurely and cause MMIO read cycles to be unresponsive. On some systems, it may generate NMI in the bnx2's pci shutdown handler. The fix is to tell the firmware not to shutdown the PHY if there was no prior chip reset. Signed-off-by: Michael Chan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index cda25ac..6c9e1c9 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -2507,6 +2507,7 @@ bnx2_fw_sync(struct bnx2 *bp, u32 msg_data, int ack, int silent) bp->fw_wr_seq++; msg_data |= bp->fw_wr_seq; + bp->fw_last_msg = msg_data; bnx2_shmem_wr(bp, BNX2_DRV_MB, msg_data); @@ -4000,8 +4001,23 @@ bnx2_setup_wol(struct bnx2 *bp) wol_msg = BNX2_DRV_MSG_CODE_SUSPEND_NO_WOL; } - if (!(bp->flags & BNX2_FLAG_NO_WOL)) - bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT3 | wol_msg, 1, 0); + if (!(bp->flags & BNX2_FLAG_NO_WOL)) { + u32 val; + + wol_msg |= BNX2_DRV_MSG_DATA_WAIT3; + if (bp->fw_last_msg || BNX2_CHIP(bp) != BNX2_CHIP_5709) { + bnx2_fw_sync(bp, wol_msg, 1, 0); + return; + } + /* Tell firmware not to power down the PHY yet, otherwise + * the chip will take a long time to respond to MMIO reads. + */ + val = bnx2_shmem_rd(bp, BNX2_PORT_FEATURE); + bnx2_shmem_wr(bp, BNX2_PORT_FEATURE, + val | BNX2_PORT_FEATURE_ASF_ENABLED); + bnx2_fw_sync(bp, wol_msg, 1, 0); + bnx2_shmem_wr(bp, BNX2_PORT_FEATURE, val); + } } @@ -4033,9 +4049,22 @@ bnx2_set_power_state(struct bnx2 *bp, pci_power_t state) if (bp->wol) pci_set_power_state(bp->pdev, PCI_D3hot); - } else { - pci_set_power_state(bp->pdev, PCI_D3hot); + break; + + } + if (!bp->fw_last_msg && BNX2_CHIP(bp) == BNX2_CHIP_5709) { + u32 val; + + /* Tell firmware not to power down the PHY yet, + * otherwise the other port may not respond to + * MMIO reads. + */ + val = bnx2_shmem_rd(bp, BNX2_BC_STATE_CONDITION); + val &= ~BNX2_CONDITION_PM_STATE_MASK; + val |= BNX2_CONDITION_PM_STATE_UNPREP; + bnx2_shmem_wr(bp, BNX2_BC_STATE_CONDITION, val); } + pci_set_power_state(bp->pdev, PCI_D3hot); /* No more memory access after this point until * device is brought back to D0. diff --git a/drivers/net/ethernet/broadcom/bnx2.h b/drivers/net/ethernet/broadcom/bnx2.h index f1cf2c4..e341bc3 100644 --- a/drivers/net/ethernet/broadcom/bnx2.h +++ b/drivers/net/ethernet/broadcom/bnx2.h @@ -6900,6 +6900,7 @@ struct bnx2 { u16 fw_wr_seq; u16 fw_drv_pulse_wr_seq; + u32 fw_last_msg; int rx_max_ring; int rx_ring_size; @@ -7406,6 +7407,10 @@ struct bnx2_rv2p_fw_file { #define BNX2_CONDITION_MFW_RUN_NCSI 0x00006000 #define BNX2_CONDITION_MFW_RUN_NONE 0x0000e000 #define BNX2_CONDITION_MFW_RUN_MASK 0x0000e000 +#define BNX2_CONDITION_PM_STATE_MASK 0x00030000 +#define BNX2_CONDITION_PM_STATE_FULL 0x00030000 +#define BNX2_CONDITION_PM_STATE_PREP 0x00020000 +#define BNX2_CONDITION_PM_STATE_UNPREP 0x00010000 #define BNX2_BC_STATE_DEBUG_CMD 0x1dc #define BNX2_BC_STATE_BC_DBG_CMD_SIGNATURE 0x42440000 -- cgit v0.10.2 From 979e0d74651ba5aa533277f2a6423d0f982fb6f6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 9 Mar 2014 08:21:58 +0000 Subject: KEYS: Make the keyring cycle detector ignore other keyrings of the same name This fixes CVE-2014-0102. The following command sequence produces an oops: keyctl new_session i=`keyctl newring _ses @s` keyctl link @s $i The problem is that search_nested_keyrings() sees two keyrings that have matching type and description, so keyring_compare_object() returns true. s_n_k() then passes the key to the iterator function - keyring_detect_cycle_iterator() - which *should* check to see whether this is the keyring of interest, not just one with the same name. Because assoc_array_find() will return one and only one match, I assumed that the iterator function would only see an exact match or never be called - but the iterator isn't only called from assoc_array_find()... The oops looks something like this: kernel BUG at /data/fs/linux-2.6-fscache/security/keys/keyring.c:1003! invalid opcode: 0000 [#1] SMP ... RIP: keyring_detect_cycle_iterator+0xe/0x1f ... Call Trace: search_nested_keyrings+0x76/0x2aa __key_link_check_live_key+0x50/0x5f key_link+0x4e/0x85 keyctl_keyring_link+0x60/0x81 SyS_keyctl+0x65/0xe4 tracesys+0xdd/0xe2 The fix is to make keyring_detect_cycle_iterator() check that the key it has is the key it was actually looking for rather than calling BUG_ON(). A testcase has been included in the keyutils testsuite for this: http://git.kernel.org/cgit/linux/kernel/git/dhowells/keyutils.git/commit/?id=891f3365d07f1996778ade0e3428f01878a1790b Reported-by: Tommi Rantala Signed-off-by: David Howells Acked-by: James Morris Signed-off-by: Linus Torvalds diff --git a/security/keys/keyring.c b/security/keys/keyring.c index d46cbc5..2fb2576 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -1000,7 +1000,11 @@ static int keyring_detect_cycle_iterator(const void *object, kenter("{%d}", key->serial); - BUG_ON(key != ctx->match_data); + /* We might get a keyring with matching index-key that is nonetheless a + * different keyring. */ + if (key != ctx->match_data) + return 0; + ctx->result = ERR_PTR(-EDEADLK); return 1; } -- cgit v0.10.2 From fa389e220254c69ffae0d403eac4146171062d08 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 9 Mar 2014 19:41:57 -0700 Subject: Linux 3.14-rc6 diff --git a/Makefile b/Makefile index 78209ee..1a2628e 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Shuffling Zombie Juror # *DOCUMENTATION* -- cgit v0.10.2 From 58d4d3c976b33784a1443c446a3d7203bf2153f0 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 4 Mar 2014 09:41:53 +0100 Subject: ASoC: si476x: Fix IO setup The si476x is a MFD device and the CODEC driver is using the regmap struct of the parent device, hence automatic IO setup will not work and we need to manually call snd_soc_codec_set_cache_io(). The issue was introduced commit d6173df35f ("ASoC: si476x: Remove custom register I/O implementation") Fixes: d6173df35f ("ASoC: si476x: Remove custom register I/O implementation") Signed-off-by: Lars-Peter Clausen Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/sound/soc/codecs/si476x.c b/sound/soc/codecs/si476x.c index 52e7cb0..fa2b8e0 100644 --- a/sound/soc/codecs/si476x.c +++ b/sound/soc/codecs/si476x.c @@ -210,7 +210,7 @@ out: static int si476x_codec_probe(struct snd_soc_codec *codec) { codec->control_data = dev_get_regmap(codec->dev->parent, NULL); - return 0; + return snd_soc_codec_set_cache_io(codec, 0, 0, SND_SOC_REGMAP); } static struct snd_soc_dai_ops si476x_dai_ops = { -- cgit v0.10.2 From 8eeb5c15131d7b5061c10423eda3ae4c68db4eaf Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 4 Mar 2014 09:39:24 +0100 Subject: ASoC: 88pm860: Fix IO setup The 88pm860 is a MFD device and the CODEC driver is using the regmap struct of the parent device, hence automatic IO setup will not work and we need to manually call snd_soc_codec_set_cache_io(). The issue was introduced in commit f9ded3b2e7 ("ASoC: 88pm860x: Use regmap for I/O"). Fixes: f9ded3b2e7 ("ASoC: 88pm860x: Use regmap for I/O"). Signed-off-by: Lars-Peter Clausen Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/sound/soc/codecs/88pm860x-codec.c b/sound/soc/codecs/88pm860x-codec.c index 75d0ad5..647a72c 100644 --- a/sound/soc/codecs/88pm860x-codec.c +++ b/sound/soc/codecs/88pm860x-codec.c @@ -1328,6 +1328,9 @@ static int pm860x_probe(struct snd_soc_codec *codec) pm860x->codec = codec; codec->control_data = pm860x->regmap; + ret = snd_soc_codec_set_cache_io(codec, 0, 0, SND_SOC_REGMAP); + if (ret) + return ret; for (i = 0; i < 4; i++) { ret = request_threaded_irq(pm860x->irq[i], NULL, -- cgit v0.10.2 From 83493d7e782d2630f1a55def14a79f0e7c4faac3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 10 Mar 2014 11:13:43 -0400 Subject: libata: use wider match for blacklisting Crucial M500 We're now blacklisting "Crucial_CT???M500SSD1" and "Crucial_CT???M500SSD3". Also, "Micron_M500*" is blacklisted which is about the same devices as the crucial branded ones. Let's merge the two Crucial M500 entries and widen the match to "Crucial_CT???M500SSD*" so that we don't have to fiddle with new entries for similar devices. Signed-off-by: Tejun Heo Suggested-by: Linus Torvalds Cc: stable@vger.kernel.org diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 65d3f1b..8cb2522 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4225,8 +4225,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* devices that don't properly handle queued TRIM commands */ { "Micron_M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, - { "Crucial_CT???M500SSD1", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, - { "Crucial_CT???M500SSD3", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, + { "Crucial_CT???M500SSD*", NULL, ATA_HORKAGE_NO_NCQ_TRIM, }, /* * Some WD SATA-I drives spin up and down erratically when the link -- cgit v0.10.2 From 1b56e98990bcdbb20b9fab163654b9315bf158e8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 10 Feb 2014 15:18:55 -0500 Subject: ocfs2 syncs the wrong range... Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8450262bc..51632c4 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2393,8 +2393,8 @@ out_dio: if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || ((file->f_flags & O_DIRECT) && !direct_io)) { - ret = filemap_fdatawrite_range(file->f_mapping, pos, - pos + count - 1); + ret = filemap_fdatawrite_range(file->f_mapping, *ppos, + *ppos + count - 1); if (ret < 0) written = ret; @@ -2407,8 +2407,8 @@ out_dio: } if (!ret) - ret = filemap_fdatawait_range(file->f_mapping, pos, - pos + count - 1); + ret = filemap_fdatawait_range(file->f_mapping, *ppos, + *ppos + count - 1); } /* -- cgit v0.10.2 From 9c225f2655e36a470c4f58dbbc99244c5fc7f2d4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 3 Mar 2014 09:36:58 -0800 Subject: vfs: atomic f_pos accesses as per POSIX Our write() system call has always been atomic in the sense that you get the expected thread-safe contiguous write, but we haven't actually guaranteed that concurrent writes are serialized wrt f_pos accesses, so threads (or processes) that share a file descriptor and use "write()" concurrently would quite likely overwrite each others data. This violates POSIX.1-2008/SUSv4 Section XSI 2.9.7 that says: "2.9.7 Thread Interactions with Regular File Operations All of the following functions shall be atomic with respect to each other in the effects specified in POSIX.1-2008 when they operate on regular files or symbolic links: [...]" and one of the effects is the file position update. This unprotected file position behavior is not new behavior, and nobody has ever cared. Until now. Yongzhi Pan reported unexpected behavior to Michael Kerrisk that was due to this. This resolves the issue with a f_pos-specific lock that is taken by read/write/lseek on file descriptors that may be shared across threads or processes. Reported-by: Yongzhi Pan Reported-by: Michael Kerrisk Cc: Al Viro Signed-off-by: Linus Torvalds Signed-off-by: Al Viro diff --git a/fs/file_table.c b/fs/file_table.c index 5fff903..5b24008 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -135,6 +135,7 @@ struct file *get_empty_filp(void) atomic_long_set(&f->f_count, 1); rwlock_init(&f->f_owner.lock); spin_lock_init(&f->f_lock); + mutex_init(&f->f_pos_lock); eventpoll_init_file(f); /* f->f_version: 0 */ return f; diff --git a/fs/namei.c b/fs/namei.c index 385f781..2f730ef 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1884,7 +1884,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->path = f.file->f_path; if (flags & LOOKUP_RCU) { - if (f.need_put) + if (f.flags & FDPUT_FPUT) *fp = f.file; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); rcu_read_lock(); diff --git a/fs/open.c b/fs/open.c index 4b3e1ed..b9ed8b2 100644 --- a/fs/open.c +++ b/fs/open.c @@ -705,6 +705,10 @@ static int do_dentry_open(struct file *f, return 0; } + /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */ + if (S_ISREG(inode->i_mode)) + f->f_mode |= FMODE_ATOMIC_POS; + f->f_op = fops_get(inode->i_fop); if (unlikely(WARN_ON(!f->f_op))) { error = -ENODEV; diff --git a/fs/read_write.c b/fs/read_write.c index edc5746..932bb34 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -264,10 +264,36 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence) } EXPORT_SYMBOL(vfs_llseek); +/* + * We only lock f_pos if we have threads or if the file might be + * shared with another process. In both cases we'll have an elevated + * file count (done either by fdget() or by fork()). + */ +static inline struct fd fdget_pos(int fd) +{ + struct fd f = fdget(fd); + struct file *file = f.file; + + if (file && (file->f_mode & FMODE_ATOMIC_POS)) { + if (file_count(file) > 1) { + f.flags |= FDPUT_POS_UNLOCK; + mutex_lock(&file->f_pos_lock); + } + } + return f; +} + +static inline void fdput_pos(struct fd f) +{ + if (f.flags & FDPUT_POS_UNLOCK) + mutex_unlock(&f.file->f_pos_lock); + fdput(f); +} + SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) { off_t retval; - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); if (!f.file) return -EBADF; @@ -278,7 +304,7 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) if (res != (loff_t)retval) retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ } - fdput(f); + fdput_pos(f); return retval; } @@ -498,7 +524,7 @@ static inline void file_pos_write(struct file *file, loff_t pos) SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) { - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; if (f.file) { @@ -506,7 +532,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) ret = vfs_read(f.file, buf, count, &pos); if (ret >= 0) file_pos_write(f.file, pos); - fdput(f); + fdput_pos(f); } return ret; } @@ -514,7 +540,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, size_t, count) { - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; if (f.file) { @@ -522,7 +548,7 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, ret = vfs_write(f.file, buf, count, &pos); if (ret >= 0) file_pos_write(f.file, pos); - fdput(f); + fdput_pos(f); } return ret; @@ -797,7 +823,7 @@ EXPORT_SYMBOL(vfs_writev); SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, unsigned long, vlen) { - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; if (f.file) { @@ -805,7 +831,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, ret = vfs_readv(f.file, vec, vlen, &pos); if (ret >= 0) file_pos_write(f.file, pos); - fdput(f); + fdput_pos(f); } if (ret > 0) @@ -817,7 +843,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, unsigned long, vlen) { - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; if (f.file) { @@ -825,7 +851,7 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, ret = vfs_writev(f.file, vec, vlen, &pos); if (ret >= 0) file_pos_write(f.file, pos); - fdput(f); + fdput_pos(f); } if (ret > 0) @@ -968,7 +994,7 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, const struct compat_iovec __user *,vec, compat_ulong_t, vlen) { - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); ssize_t ret; loff_t pos; @@ -978,7 +1004,7 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, ret = compat_readv(f.file, vec, vlen, &pos); if (ret >= 0) f.file->f_pos = pos; - fdput(f); + fdput_pos(f); return ret; } @@ -1035,7 +1061,7 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, const struct compat_iovec __user *, vec, compat_ulong_t, vlen) { - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); ssize_t ret; loff_t pos; @@ -1045,7 +1071,7 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, ret = compat_writev(f.file, vec, vlen, &pos); if (ret >= 0) f.file->f_pos = pos; - fdput(f); + fdput_pos(f); return ret; } diff --git a/include/linux/file.h b/include/linux/file.h index cbacf4f..f2517fa 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -28,12 +28,14 @@ static inline void fput_light(struct file *file, int fput_needed) struct fd { struct file *file; - int need_put; + unsigned int flags; }; +#define FDPUT_FPUT 1 +#define FDPUT_POS_UNLOCK 2 static inline void fdput(struct fd fd) { - if (fd.need_put) + if (fd.flags & FDPUT_FPUT) fput(fd.file); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 6082956..ebfde04 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -123,6 +123,9 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File is opened with O_PATH; almost nothing can be done with it */ #define FMODE_PATH ((__force fmode_t)0x4000) +/* File needs atomic accesses to f_pos */ +#define FMODE_ATOMIC_POS ((__force fmode_t)0x8000) + /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x1000000) @@ -780,13 +783,14 @@ struct file { const struct file_operations *f_op; /* - * Protects f_ep_links, f_flags, f_pos vs i_size in lseek SEEK_CUR. + * Protects f_ep_links, f_flags. * Must not be taken from IRQ context. */ spinlock_t f_lock; atomic_long_t f_count; unsigned int f_flags; fmode_t f_mode; + struct mutex f_pos_lock; loff_t f_pos; struct fown_struct f_owner; const struct cred *f_cred; -- cgit v0.10.2 From 00e188ef6a7e7bf2883bcffc30d89e98a0bb76b3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 3 Mar 2014 23:48:18 -0500 Subject: sockfd_lookup_light(): switch to fdget^W^Waway from fget_light Signed-off-by: Al Viro diff --git a/net/socket.c b/net/socket.c index 879933a..fd8d86e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -450,16 +450,17 @@ EXPORT_SYMBOL(sockfd_lookup); static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) { - struct file *file; + struct fd f = fdget(fd); struct socket *sock; *err = -EBADF; - file = fget_light(fd, fput_needed); - if (file) { - sock = sock_from_file(file, err); - if (sock) + if (f.file) { + sock = sock_from_file(f.file, err); + if (likely(sock)) { + *fput_needed = f.flags; return sock; - fput_light(file, *fput_needed); + } + fdput(f); } return NULL; } -- cgit v0.10.2 From bd2a31d522344b3ac2fb680bd2366e77a9bd8209 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 4 Mar 2014 14:54:22 -0500 Subject: get rid of fget_light() instead of returning the flags by reference, we can just have the low-level primitive return those in lower bits of unsigned long, with struct file * derived from the rest. Signed-off-by: Al Viro diff --git a/fs/file.c b/fs/file.c index db25c2b..60a45e9 100644 --- a/fs/file.c +++ b/fs/file.c @@ -683,35 +683,65 @@ EXPORT_SYMBOL(fget_raw); * The fput_needed flag returned by fget_light should be passed to the * corresponding fput_light. */ -struct file *__fget_light(unsigned int fd, fmode_t mask, int *fput_needed) +static unsigned long __fget_light(unsigned int fd, fmode_t mask) { struct files_struct *files = current->files; struct file *file; - *fput_needed = 0; if (atomic_read(&files->count) == 1) { file = __fcheck_files(files, fd); - if (file && (file->f_mode & mask)) - file = NULL; + if (!file || unlikely(file->f_mode & mask)) + return 0; + return (unsigned long)file; } else { file = __fget(fd, mask); - if (file) - *fput_needed = 1; + if (!file) + return 0; + return FDPUT_FPUT | (unsigned long)file; } - - return file; } -struct file *fget_light(unsigned int fd, int *fput_needed) +unsigned long __fdget(unsigned int fd) { - return __fget_light(fd, FMODE_PATH, fput_needed); + return __fget_light(fd, FMODE_PATH); } -EXPORT_SYMBOL(fget_light); +EXPORT_SYMBOL(__fdget); -struct file *fget_raw_light(unsigned int fd, int *fput_needed) +unsigned long __fdget_raw(unsigned int fd) { - return __fget_light(fd, 0, fput_needed); + return __fget_light(fd, 0); +} + +unsigned long __fdget_pos(unsigned int fd) +{ + struct files_struct *files = current->files; + struct file *file; + unsigned long v; + + if (atomic_read(&files->count) == 1) { + file = __fcheck_files(files, fd); + v = 0; + } else { + file = __fget(fd, 0); + v = FDPUT_FPUT; + } + if (!file) + return 0; + + if (file->f_mode & FMODE_ATOMIC_POS) { + if (file_count(file) > 1) { + v |= FDPUT_POS_UNLOCK; + mutex_lock(&file->f_pos_lock); + } + } + return v | (unsigned long)file; } +/* + * We only lock f_pos if we have threads or if the file might be + * shared with another process. In both cases we'll have an elevated + * file count (done either by fdget() or by fork()). + */ + void set_close_on_exec(unsigned int fd, int flag) { struct files_struct *files = current->files; diff --git a/fs/read_write.c b/fs/read_write.c index 932bb34..54e19b9 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -264,23 +264,9 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence) } EXPORT_SYMBOL(vfs_llseek); -/* - * We only lock f_pos if we have threads or if the file might be - * shared with another process. In both cases we'll have an elevated - * file count (done either by fdget() or by fork()). - */ static inline struct fd fdget_pos(int fd) { - struct fd f = fdget(fd); - struct file *file = f.file; - - if (file && (file->f_mode & FMODE_ATOMIC_POS)) { - if (file_count(file) > 1) { - f.flags |= FDPUT_POS_UNLOCK; - mutex_lock(&file->f_pos_lock); - } - } - return f; + return __to_fd(__fdget_pos(fd)); } static inline void fdput_pos(struct fd f) diff --git a/include/linux/file.h b/include/linux/file.h index f2517fa..4d69123 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -40,23 +40,24 @@ static inline void fdput(struct fd fd) } extern struct file *fget(unsigned int fd); -extern struct file *fget_light(unsigned int fd, int *fput_needed); +extern struct file *fget_raw(unsigned int fd); +extern unsigned long __fdget(unsigned int fd); +extern unsigned long __fdget_raw(unsigned int fd); +extern unsigned long __fdget_pos(unsigned int fd); -static inline struct fd fdget(unsigned int fd) +static inline struct fd __to_fd(unsigned long v) { - int b; - struct file *f = fget_light(fd, &b); - return (struct fd){f,b}; + return (struct fd){(struct file *)(v & ~3),v & 3}; } -extern struct file *fget_raw(unsigned int fd); -extern struct file *fget_raw_light(unsigned int fd, int *fput_needed); +static inline struct fd fdget(unsigned int fd) +{ + return __to_fd(__fdget(fd)); +} static inline struct fd fdget_raw(unsigned int fd) { - int b; - struct file *f = fget_raw_light(fd, &b); - return (struct fd){f,b}; + return __to_fd(__fdget_raw(fd)); } extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); diff --git a/include/linux/fs.h b/include/linux/fs.h index ebfde04..23b2a35 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -812,7 +812,7 @@ struct file { #ifdef CONFIG_DEBUG_WRITECOUNT unsigned long f_mnt_write_state; #endif -}; +} __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ struct file_handle { __u32 handle_bytes; -- cgit v0.10.2 From 37314363cd65d19c71bea5f222e5108c93dc3c78 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 8 Mar 2014 08:01:19 -0800 Subject: pkt_sched: move the sanity test in qdisc_list_add() The WARN_ON(root == &noop_qdisc)) added in qdisc_list_add() can trigger in normal conditions when devices are not up. It should be done only right before the list_add_tail() call. Fixes: e57a784d8cae4 ("pkt_sched: set root qdisc before change() in attach_default_qdiscs()") Reported-by: Valdis Kletnieks Tested-by: Mirco Tischler Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 1313145..a07d55e 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -273,11 +273,12 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) void qdisc_list_add(struct Qdisc *q) { - struct Qdisc *root = qdisc_dev(q)->qdisc; + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { + struct Qdisc *root = qdisc_dev(q)->qdisc; - WARN_ON_ONCE(root == &noop_qdisc); - if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) + WARN_ON_ONCE(root == &noop_qdisc); list_add_tail(&q->list, &root->list); + } } EXPORT_SYMBOL(qdisc_list_add); -- cgit v0.10.2 From bc48bc806442114ea44f61d6b18e02c2bd6236fd Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 9 Mar 2014 04:03:22 +0000 Subject: bna: Replace large udelay() with mdelay() udelay() does not work on some architectures for values above 2000, in particular on ARM: ERROR: "__bad_udelay" [drivers/net/ethernet/brocade/bna/bna.ko] undefined! Reported-by: Vagrant Cascadian Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c index 1803c39..354ae97 100644 --- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c +++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c @@ -1704,7 +1704,7 @@ bfa_flash_sem_get(void __iomem *bar) while (!bfa_raw_sem_get(bar)) { if (--n <= 0) return BFA_STATUS_BADFLASH; - udelay(10000); + mdelay(10); } return BFA_STATUS_OK; } -- cgit v0.10.2 From 2818fa0fa068bcbc87d6bd9064e3c1f72d6fcc2a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Mar 2014 22:57:52 -0800 Subject: pkt_sched: fq: do not hold qdisc lock while allocating memory Resizing fq hash table allocates memory while holding qdisc spinlock, with BH disabled. This is definitely not good, as allocation might sleep. We can drop the lock and get it when needed, we hold RTNL so no other changes can happen at the same time. Signed-off-by: Eric Dumazet Fixes: afe4fd062416 ("pkt_sched: fq: Fair Queue packet scheduler") Signed-off-by: David S. Miller diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 08ef7a4..21e2517 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -601,6 +601,7 @@ static int fq_resize(struct Qdisc *sch, u32 log) { struct fq_sched_data *q = qdisc_priv(sch); struct rb_root *array; + void *old_fq_root; u32 idx; if (q->fq_root && log == q->fq_trees_log) @@ -615,13 +616,19 @@ static int fq_resize(struct Qdisc *sch, u32 log) for (idx = 0; idx < (1U << log); idx++) array[idx] = RB_ROOT; - if (q->fq_root) { - fq_rehash(q, q->fq_root, q->fq_trees_log, array, log); - fq_free(q->fq_root); - } + sch_tree_lock(sch); + + old_fq_root = q->fq_root; + if (old_fq_root) + fq_rehash(q, old_fq_root, q->fq_trees_log, array, log); + q->fq_root = array; q->fq_trees_log = log; + sch_tree_unlock(sch); + + fq_free(old_fq_root); + return 0; } @@ -697,9 +704,11 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) q->flow_refill_delay = usecs_to_jiffies(usecs_delay); } - if (!err) + if (!err) { + sch_tree_unlock(sch); err = fq_resize(sch, fq_log); - + sch_tree_lock(sch); + } while (sch->q.qlen > sch->limit) { struct sk_buff *skb = fq_dequeue(sch); -- cgit v0.10.2 From e97ca8e5b864f88b028c1759ba8536fa827d6d96 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 10 Mar 2014 15:49:43 -0700 Subject: mm: fix GFP_THISNODE callers and clarify GFP_THISNODE is for callers that implement their own clever fallback to remote nodes. It restricts the allocation to the specified node and does not invoke reclaim, assuming that the caller will take care of it when the fallback fails, e.g. through a subsequent allocation request without GFP_THISNODE set. However, many current GFP_THISNODE users only want the node exclusive aspect of the flag, without actually implementing their own fallback or triggering reclaim if necessary. This results in things like page migration failing prematurely even when there is easily reclaimable memory available, unless kswapd happens to be running already or a concurrent allocation attempt triggers the necessary reclaim. Convert all callsites that don't implement their own fallback strategy to __GFP_THISNODE. This restricts the allocation a single node too, but at the same time allows the allocator to enter the slowpath, wake kswapd, and invoke direct reclaim if necessary, to make the allocation happen when memory is full. Signed-off-by: Johannes Weiner Acked-by: Rik van Riel Cc: Jan Stancek Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c index a96bcf8..20e8a9b 100644 --- a/arch/ia64/kernel/uncached.c +++ b/arch/ia64/kernel/uncached.c @@ -98,7 +98,7 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid) /* attempt to allocate a granule's worth of cached memory pages */ page = alloc_pages_exact_node(nid, - GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, IA64_GRANULE_SHIFT-PAGE_SHIFT); if (!page) { mutex_unlock(&uc_pool->add_chunk_mutex); diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c index 5ec1e47..e865d74 100644 --- a/arch/powerpc/platforms/cell/ras.c +++ b/arch/powerpc/platforms/cell/ras.c @@ -123,7 +123,8 @@ static int __init cbe_ptcal_enable_on_node(int nid, int order) area->nid = nid; area->order = order; - area->pages = alloc_pages_exact_node(area->nid, GFP_KERNEL|GFP_THISNODE, + area->pages = alloc_pages_exact_node(area->nid, + GFP_KERNEL|__GFP_THISNODE, area->order); if (!area->pages) { diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c index b9e2000..95c8944 100644 --- a/drivers/misc/sgi-xp/xpc_uv.c +++ b/drivers/misc/sgi-xp/xpc_uv.c @@ -240,7 +240,7 @@ xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name, nid = cpu_to_node(cpu); page = alloc_pages_exact_node(nid, - GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, pg_order); if (page == NULL) { dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d " diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 0437439..39b81dc 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -123,6 +123,10 @@ struct vm_area_struct; __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \ __GFP_NO_KSWAPD) +/* + * GFP_THISNODE does not perform any reclaim, you most likely want to + * use __GFP_THISNODE to allocate from a given node without fallback! + */ #ifdef CONFIG_NUMA #define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) #else diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 5f2052c..9b61b9b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -590,10 +590,10 @@ static inline bool zone_is_empty(struct zone *zone) /* * The NUMA zonelists are doubled because we need zonelists that restrict the - * allocations to a single node for GFP_THISNODE. + * allocations to a single node for __GFP_THISNODE. * * [0] : Zonelist with fallback - * [1] : No fallback (GFP_THISNODE) + * [1] : No fallback (__GFP_THISNODE) */ #define MAX_ZONELISTS 2 diff --git a/include/linux/slab.h b/include/linux/slab.h index 9260abd..b5b2df6 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -410,7 +410,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) * * %GFP_NOWAIT - Allocation will not sleep. * - * %GFP_THISNODE - Allocate node-local memory only. + * %__GFP_THISNODE - Allocate node-local memory only. * * %GFP_DMA - Allocation suitable for DMA. * Should only be used for kmalloc() caches. Otherwise, use a diff --git a/kernel/profile.c b/kernel/profile.c index 6631e1e..ebdd9c1 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -549,14 +549,14 @@ static int create_hash_tables(void) struct page *page; page = alloc_pages_exact_node(node, - GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, 0); if (!page) goto out_cleanup; per_cpu(cpu_profile_hits, cpu)[1] = (struct profile_hit *)page_address(page); page = alloc_pages_exact_node(node, - GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, 0); if (!page) goto out_cleanup; diff --git a/mm/migrate.c b/mm/migrate.c index 482a33d..b494fdb 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1158,7 +1158,7 @@ static struct page *new_page_node(struct page *p, unsigned long private, pm->node); else return alloc_pages_exact_node(pm->node, - GFP_HIGHUSER_MOVABLE | GFP_THISNODE, 0); + GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0); } /* @@ -1544,9 +1544,9 @@ static struct page *alloc_misplaced_dst_page(struct page *page, struct page *newpage; newpage = alloc_pages_exact_node(nid, - (GFP_HIGHUSER_MOVABLE | GFP_THISNODE | - __GFP_NOMEMALLOC | __GFP_NORETRY | - __GFP_NOWARN) & + (GFP_HIGHUSER_MOVABLE | + __GFP_THISNODE | __GFP_NOMEMALLOC | + __GFP_NORETRY | __GFP_NOWARN) & ~GFP_IOFS, 0); return newpage; @@ -1747,7 +1747,8 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, goto out_dropref; new_page = alloc_pages_node(node, - (GFP_TRANSHUGE | GFP_THISNODE) & ~__GFP_WAIT, HPAGE_PMD_ORDER); + (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_WAIT, + HPAGE_PMD_ORDER); if (!new_page) goto out_fail; -- cgit v0.10.2 From 2af120bc040c5ebcda156df6be6a66610ab6957f Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Mon, 10 Mar 2014 15:49:44 -0700 Subject: mm/compaction: break out of loop on !PageBuddy in isolate_freepages_block We received several reports of bad page state when freeing CMA pages previously allocated with alloc_contig_range: BUG: Bad page state in process Binder_A pfn:63202 page:d21130b0 count:0 mapcount:1 mapping: (null) index:0x7dfbf page flags: 0x40080068(uptodate|lru|active|swapbacked) Based on the page state, it looks like the page was still in use. The page flags do not make sense for the use case though. Further debugging showed that despite alloc_contig_range returning success, at least one page in the range still remained in the buddy allocator. There is an issue with isolate_freepages_block. In strict mode (which CMA uses), if any pages in the range cannot be isolated, isolate_freepages_block should return failure 0. The current check keeps track of the total number of isolated pages and compares against the size of the range: if (strict && nr_strict_required > total_isolated) total_isolated = 0; After taking the zone lock, if one of the pages in the range is not in the buddy allocator, we continue through the loop and do not increment total_isolated. If in the last iteration of the loop we isolate more than one page (e.g. last page needed is a higher order page), the check for total_isolated may pass and we fail to detect that a page was skipped. The fix is to bail out if the loop immediately if we are in strict mode. There's no benfit to continuing anyway since we need all pages to be isolated. Additionally, drop the error checking based on nr_strict_required and just check the pfn ranges. This matches with what isolate_freepages_range does. Signed-off-by: Laura Abbott Acked-by: Minchan Kim Cc: Mel Gorman Acked-by: Vlastimil Babka Cc: Joonsoo Kim Acked-by: Bartlomiej Zolnierkiewicz Acked-by: Michal Nazarewicz Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/compaction.c b/mm/compaction.c index b48c525..9185775 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -251,7 +251,6 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, { int nr_scanned = 0, total_isolated = 0; struct page *cursor, *valid_page = NULL; - unsigned long nr_strict_required = end_pfn - blockpfn; unsigned long flags; bool locked = false; @@ -264,11 +263,12 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, nr_scanned++; if (!pfn_valid_within(blockpfn)) - continue; + goto isolate_fail; + if (!valid_page) valid_page = page; if (!PageBuddy(page)) - continue; + goto isolate_fail; /* * The zone lock must be held to isolate freepages. @@ -289,12 +289,10 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, /* Recheck this is a buddy page under lock */ if (!PageBuddy(page)) - continue; + goto isolate_fail; /* Found a free page, break it into order-0 pages */ isolated = split_free_page(page); - if (!isolated && strict) - break; total_isolated += isolated; for (i = 0; i < isolated; i++) { list_add(&page->lru, freelist); @@ -305,7 +303,15 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, if (isolated) { blockpfn += isolated - 1; cursor += isolated - 1; + continue; } + +isolate_fail: + if (strict) + break; + else + continue; + } trace_mm_compaction_isolate_freepages(nr_scanned, total_isolated); @@ -315,7 +321,7 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, * pages requested were isolated. If there were any failures, 0 is * returned and CMA will fail. */ - if (strict && nr_strict_required > total_isolated) + if (strict && blockpfn < end_pfn) total_isolated = 0; if (locked) -- cgit v0.10.2 From 70335abb2689c8cd5df91bf2d95a65649addf50b Mon Sep 17 00:00:00 2001 From: Artem Fetishev Date: Mon, 10 Mar 2014 15:49:45 -0700 Subject: fs/proc/base.c: fix GPF in /proc/$PID/map_files The expected logic of proc_map_files_get_link() is either to return 0 and initialize 'path' or return an error and leave 'path' uninitialized. By the time dname_to_vma_addr() returns 0 the corresponding vma may have already be gone. In this case the path is not initialized but the return value is still 0. This results in 'general protection fault' inside d_path(). Steps to reproduce: CONFIG_CHECKPOINT_RESTORE=y fd = open(...); while (1) { mmap(fd, ...); munmap(fd, ...); } ls -la /proc/$PID/map_files Addresses https://bugzilla.kernel.org/show_bug.cgi?id=68991 Signed-off-by: Artem Fetishev Signed-off-by: Aleksandr Terekhov Reported-by: Acked-by: Pavel Emelyanov Acked-by: Cyrill Gorcunov Reviewed-by: "Eric W. Biederman" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/proc/base.c b/fs/proc/base.c index 5150706..b976062 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1824,6 +1824,7 @@ static int proc_map_files_get_link(struct dentry *dentry, struct path *path) if (rc) goto out_mmput; + rc = -ENOENT; down_read(&mm->mmap_sem); vma = find_exact_vma(mm, vm_start, vm_end); if (vma && vma->vm_file) { -- cgit v0.10.2 From 2216ee853017f9c9371106c5c02d4fe42f61cbfa Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 10 Mar 2014 15:49:46 -0700 Subject: mm/Kconfig: fix URL for zsmalloc benchmark The help text for CONFIG_PGTABLE_MAPPING has an incorrect URL. While we're at it, remove the unnecessary footnote notation. Signed-off-by: Ben Hutchings Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/Kconfig b/mm/Kconfig index 2d9f150..2888024 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -575,5 +575,5 @@ config PGTABLE_MAPPING then you should select this. This causes zsmalloc to use page table mapping rather than copying for object mapping. - You can check speed with zsmalloc benchmark[1]. - [1] https://github.com/spartacus06/zsmalloc + You can check speed with zsmalloc benchmark: + https://github.com/spartacus06/zsmapbench -- cgit v0.10.2 From 2930ffc7593b64fe00fd7c5a0a7f543078d73ed9 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 10 Mar 2014 15:49:48 -0700 Subject: revert "kallsyms: fix absolute addresses for kASLR" Revert the recently applied 0f55159d091c ("kallsyms: fix absolute addresses for kASLR"). Kees said : This got NAKed, please don't apply -- this patch works for x86 and : ARM, but may cause problems for others: : : https://lkml.org/lkml/2014/2/24/718 It appears that Kees will be fixing all this up for 3.15. Cc: Andy Honig Cc: Kees Cook Cc: Michal Marek Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 276e84b..10085de 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -330,7 +330,8 @@ static void write_src(void) printf("\tPTR\t_text + %#llx\n", table[i].addr - _text); else - printf("\tPTR\t%#llx\n", table[i].addr); + printf("\tPTR\t_text - %#llx\n", + _text - table[i].addr); } else { printf("\tPTR\t%#llx\n", table[i].addr); } -- cgit v0.10.2 From 1443176fd6857744d6772e6a607d7d08f0f6afd9 Mon Sep 17 00:00:00 2001 From: Michael Opdenacker Date: Mon, 10 Mar 2014 15:49:49 -0700 Subject: MAINTAINERS: blackfin: add git repository Add the git repository currently in use for blackfin architecture development. This information was obtained from Steven Miao. Signed-off-by: Michael Opdenacker Cc: Steven Miao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index b7befe7..1ecfde1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1738,6 +1738,7 @@ F: include/uapi/linux/bfs_fs.h BLACKFIN ARCHITECTURE M: Steven Miao L: adi-buildroot-devel@lists.sourceforge.net +T: git git://git.code.sf.net/p/adi-linux/code W: http://blackfin.uclinux.org S: Supported F: arch/blackfin/ -- cgit v0.10.2 From 53942232369ec317f77dc8c2d5a0637bb85a6e84 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 10 Mar 2014 15:49:50 -0700 Subject: tools/testing/selftests/ipc/msgque.c: handle msgget failure return correctly A failed msgget causes the test to return an uninitialised value in ret. Assign ret to -errno on error exit. Signed-off-by: Colin Ian King Acked-by: Davidlohr Bueso Cc: Stanislav Kinsbursky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c index d664182..aa290c0 100644 --- a/tools/testing/selftests/ipc/msgque.c +++ b/tools/testing/selftests/ipc/msgque.c @@ -201,6 +201,7 @@ int main(int argc, char **argv) msgque.msq_id = msgget(msgque.key, IPC_CREAT | IPC_EXCL | 0666); if (msgque.msq_id == -1) { + err = -errno; printf("Can't create queue\n"); goto err_out; } -- cgit v0.10.2 From d7d673a591701f131e53d4fd4e2b9352f1316642 Mon Sep 17 00:00:00 2001 From: Sergei Antonov Date: Mon, 10 Mar 2014 15:49:51 -0700 Subject: hfsplus: add HFSX subfolder count support Adds support for HFSX 'HasFolderCount' flag and a corresponding 'folderCount' field in folder records. (For reference see HFS_FOLDERCOUNT and kHFSHasFolderCountBit/kHFSHasFolderCountMask in Apple's source code.) Ignoring subfolder count leads to fs errors found by Mac: ... Checking catalog hierarchy. HasFolderCount flag needs to be set (id = 105) (It should be 0x10 instead of 0) Incorrect folder count in a directory (id = 2) (It should be 7 instead of 6) ... Steps to reproduce: Format with "newfs_hfs -s /dev/diskXXX". Mount in Linux. Create a new directory in root. Unmount. Run "fsck_hfs /dev/diskXXX". The patch handles directory creation, deletion, and rename. Signed-off-by: Sergei Antonov Reviewed-by: Vyacheslav Dubeyko Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index 968ce41..32602c6 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -103,6 +103,8 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, folder = &entry->folder; memset(folder, 0, sizeof(*folder)); folder->type = cpu_to_be16(HFSPLUS_FOLDER); + if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) + folder->flags |= cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT); folder->id = cpu_to_be32(inode->i_ino); HFSPLUS_I(inode)->create_date = folder->create_date = @@ -203,6 +205,36 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid, return hfs_brec_find(fd, hfs_find_rec_by_key); } +static void hfsplus_subfolders_inc(struct inode *dir) +{ + struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); + + if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) { + /* + * Increment subfolder count. Note, the value is only meaningful + * for folders with HFSPLUS_HAS_FOLDER_COUNT flag set. + */ + HFSPLUS_I(dir)->subfolders++; + } +} + +static void hfsplus_subfolders_dec(struct inode *dir) +{ + struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); + + if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) { + /* + * Decrement subfolder count. Note, the value is only meaningful + * for folders with HFSPLUS_HAS_FOLDER_COUNT flag set. + * + * Check for zero. Some subfolders may have been created + * by an implementation ignorant of this counter. + */ + if (HFSPLUS_I(dir)->subfolders) + HFSPLUS_I(dir)->subfolders--; + } +} + int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct inode *inode) { @@ -247,6 +279,8 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, goto err1; dir->i_size++; + if (S_ISDIR(inode->i_mode)) + hfsplus_subfolders_inc(dir); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY); @@ -336,6 +370,8 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) goto out; dir->i_size--; + if (type == HFSPLUS_FOLDER) + hfsplus_subfolders_dec(dir); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY); @@ -380,6 +416,7 @@ int hfsplus_rename_cat(u32 cnid, hfs_bnode_read(src_fd.bnode, &entry, src_fd.entryoffset, src_fd.entrylength); + type = be16_to_cpu(entry.type); /* create new dir entry with the data from the old entry */ hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name); @@ -394,6 +431,8 @@ int hfsplus_rename_cat(u32 cnid, if (err) goto out; dst_dir->i_size++; + if (type == HFSPLUS_FOLDER) + hfsplus_subfolders_inc(dst_dir); dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC; /* finally remove the old entry */ @@ -405,6 +444,8 @@ int hfsplus_rename_cat(u32 cnid, if (err) goto out; src_dir->i_size--; + if (type == HFSPLUS_FOLDER) + hfsplus_subfolders_dec(src_dir); src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC; /* remove old thread entry */ diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 08846425b..62d571e 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -242,6 +242,7 @@ struct hfsplus_inode_info { */ sector_t fs_blocks; u8 userflags; /* BSD user file flags */ + u32 subfolders; /* Subfolder count (HFSX only) */ struct list_head open_dir_list; loff_t phys_size; diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h index 8ffb3a8..5a12682 100644 --- a/fs/hfsplus/hfsplus_raw.h +++ b/fs/hfsplus/hfsplus_raw.h @@ -261,7 +261,7 @@ struct hfsplus_cat_folder { struct DInfo user_info; struct DXInfo finder_info; __be32 text_encoding; - u32 reserved; + __be32 subfolders; /* Subfolder count in HFSX. Reserved in HFS+. */ } __packed; /* HFS file info (stolen from hfs.h) */ @@ -301,11 +301,13 @@ struct hfsplus_cat_file { struct hfsplus_fork_raw rsrc_fork; } __packed; -/* File attribute bits */ +/* File and folder flag bits */ #define HFSPLUS_FILE_LOCKED 0x0001 #define HFSPLUS_FILE_THREAD_EXISTS 0x0002 #define HFSPLUS_XATTR_EXISTS 0x0004 #define HFSPLUS_ACL_EXISTS 0x0008 +#define HFSPLUS_HAS_FOLDER_COUNT 0x0010 /* Folder has subfolder count + * (HFSX only) */ /* HFS+ catalog thread (part of a cat_entry) */ struct hfsplus_cat_thread { diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index fa929f3..a4f45bd 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -375,6 +375,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode) hip->extent_state = 0; hip->flags = 0; hip->userflags = 0; + hip->subfolders = 0; memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec)); memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); hip->alloc_blocks = 0; @@ -494,6 +495,10 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date); HFSPLUS_I(inode)->create_date = folder->create_date; HFSPLUS_I(inode)->fs_blocks = 0; + if (folder->flags & cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT)) { + HFSPLUS_I(inode)->subfolders = + be32_to_cpu(folder->subfolders); + } inode->i_op = &hfsplus_dir_inode_operations; inode->i_fop = &hfsplus_dir_operations; } else if (type == HFSPLUS_FILE) { @@ -566,6 +571,10 @@ int hfsplus_cat_write_inode(struct inode *inode) folder->content_mod_date = hfsp_ut2mt(inode->i_mtime); folder->attribute_mod_date = hfsp_ut2mt(inode->i_ctime); folder->valence = cpu_to_be32(inode->i_size - 2); + if (folder->flags & cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT)) { + folder->subfolders = + cpu_to_be32(HFSPLUS_I(inode)->subfolders); + } hfs_bnode_write(fd.bnode, &entry, fd.entryoffset, sizeof(struct hfsplus_cat_folder)); } else if (HFSPLUS_IS_RSRC(inode)) { -- cgit v0.10.2 From 0eb808eb753cd44e740da8e64f5c97a2a8e07578 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 10 Mar 2014 15:49:52 -0700 Subject: cris: convert ffs from an object-like macro to a function-like macro This avoids bad interactions with code using identifiers called "ffs": drivers/usb/gadget/f_fs.c: In function 'ffsmod_init': drivers/usb/gadget/f_fs.c:2693:494: error: 'ffsusb_func' undeclared (first use in this function) drivers/usb/gadget/f_fs.c:2693:494: note: each undeclared identifier is reported only once for each function it appears in drivers/usb/gadget/f_fs.c: In function 'ffsmod_exit': drivers/usb/gadget/f_fs.c:2693:677: error: 'ffsusb_func' undeclared (first use in this function) drivers/usb/gadget/f_fs.c: At top level: drivers/usb/gadget/f_fs.c:2693:35: warning: 'kernel_ffsusb_func' defined but not used [-Wunused-variable] drivers/usb/gadget/f_fs.c: In function 'ffsmod_init': drivers/usb/gadget/f_fs.c:2693:15: warning: control reaches end of non-void function [-Wreturn-type] See http://kisskb.ellerman.id.au/kisskb/buildresult/10715817/ Signed-off-by: Geert Uytterhoeven Cc: Mikael Starvik Cc: Jesper Nilsson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/cris/include/asm/bitops.h b/arch/cris/include/asm/bitops.h index 184066c..053c17b 100644 --- a/arch/cris/include/asm/bitops.h +++ b/arch/cris/include/asm/bitops.h @@ -144,7 +144,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr) * definition, which doesn't have the same semantics. We don't want to * use -fno-builtin, so just hide the name ffs. */ -#define ffs kernel_ffs +#define ffs(x) kernel_ffs(x) #include #include -- cgit v0.10.2 From 5bd076708664313f2bdbbc1cf71093313b7774a1 Mon Sep 17 00:00:00 2001 From: Annie Li Date: Mon, 10 Mar 2014 22:58:34 +0800 Subject: Xen-netback: Fix issue caused by using gso_type wrongly Current netback uses gso_type to check whether the skb contains gso offload, and this is wrong. Gso_size is the right one to check gso existence, and gso_type is only used to check gso type. Some skbs contains nonzero gso_type and zero gso_size, current netback would treat these skbs as gso and create wrong response for this. This also causes ssh failure to domu from other server. V2: use skb_is_gso function as Paul Durrant suggested Signed-off-by: Annie Li Acked-by: Wei Liu Reviewed-by: Paul Durrant Signed-off-by: David S. Miller diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index e5284bc..438d0c0 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -240,7 +240,7 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb, struct gnttab_copy *copy_gop; struct xenvif_rx_meta *meta; unsigned long bytes; - int gso_type; + int gso_type = XEN_NETIF_GSO_TYPE_NONE; /* Data must not cross a page boundary. */ BUG_ON(size + offset > PAGE_SIZE<gso_type & SKB_GSO_TCPV4) - gso_type = XEN_NETIF_GSO_TYPE_TCPV4; - else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) - gso_type = XEN_NETIF_GSO_TYPE_TCPV6; - else - gso_type = XEN_NETIF_GSO_TYPE_NONE; + if (skb_is_gso(skb)) { + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) + gso_type = XEN_NETIF_GSO_TYPE_TCPV4; + else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) + gso_type = XEN_NETIF_GSO_TYPE_TCPV6; + } if (*head && ((1 << gso_type) & vif->gso_mask)) vif->rx.req_cons++; @@ -338,19 +338,15 @@ static int xenvif_gop_skb(struct sk_buff *skb, int head = 1; int old_meta_prod; int gso_type; - int gso_size; old_meta_prod = npo->meta_prod; - if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { - gso_type = XEN_NETIF_GSO_TYPE_TCPV4; - gso_size = skb_shinfo(skb)->gso_size; - } else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { - gso_type = XEN_NETIF_GSO_TYPE_TCPV6; - gso_size = skb_shinfo(skb)->gso_size; - } else { - gso_type = XEN_NETIF_GSO_TYPE_NONE; - gso_size = 0; + gso_type = XEN_NETIF_GSO_TYPE_NONE; + if (skb_is_gso(skb)) { + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) + gso_type = XEN_NETIF_GSO_TYPE_TCPV4; + else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) + gso_type = XEN_NETIF_GSO_TYPE_TCPV6; } /* Set up a GSO prefix descriptor, if necessary */ @@ -358,7 +354,7 @@ static int xenvif_gop_skb(struct sk_buff *skb, req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); meta = npo->meta + npo->meta_prod++; meta->gso_type = gso_type; - meta->gso_size = gso_size; + meta->gso_size = skb_shinfo(skb)->gso_size; meta->size = 0; meta->id = req->id; } @@ -368,7 +364,7 @@ static int xenvif_gop_skb(struct sk_buff *skb, if ((1 << gso_type) & vif->gso_mask) { meta->gso_type = gso_type; - meta->gso_size = gso_size; + meta->gso_size = skb_shinfo(skb)->gso_size; } else { meta->gso_type = XEN_NETIF_GSO_TYPE_NONE; meta->gso_size = 0; @@ -500,8 +496,9 @@ static void xenvif_rx_action(struct xenvif *vif) size = skb_frag_size(&skb_shinfo(skb)->frags[i]); max_slots_needed += DIV_ROUND_UP(size, PAGE_SIZE); } - if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 || - skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) + if (skb_is_gso(skb) && + (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 || + skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)) max_slots_needed++; /* If the skb may not fit then bail out now */ -- cgit v0.10.2 From dd38743b4cc2f86be250eaf156cf113ba3dd531a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20Bostr=C3=B6m?= Date: Mon, 10 Mar 2014 16:17:15 +0100 Subject: vlan: Set correct source MAC address with TX VLAN offload enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With TX VLAN offload enabled the source MAC address for frames sent using the VLAN interface is currently set to the address of the real interface. This is wrong since the VLAN interface may be configured with a different address. The bug was introduced in commit 2205369a314e12fcec4781cc73ac9c08fc2b47de ("vlan: Fix header ops passthru when doing TX VLAN offload."). This patch sets the source address before calling the create function of the real interface. Signed-off-by: Peter Boström Signed-off-by: David S. Miller diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index de51c48..4b65aa4 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -538,6 +538,9 @@ static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; + if (saddr == NULL) + saddr = dev->dev_addr; + return dev_hard_header(skb, real_dev, type, daddr, saddr, len); } -- cgit v0.10.2 From 09df7c4c8097ca4a11393b1edd4997d786daad52 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 10 Mar 2014 19:32:22 -0400 Subject: x86: Remove CONFIG_X86_OOSTORE This was an optimization that made memcpy type benchmarks a little faster on ancient (Circa 1998) IDT Winchip CPUs. In real-life workloads, it wasn't even noticable, and I doubt anyone is running benchmarks on 16 year old silicon any more. Given this code has likely seen very little use over the last decade, let's just remove it. Signed-off-by: Dave Jones Signed-off-by: Linus Torvalds diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index c026cca..f3aaf23 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -341,10 +341,6 @@ config X86_USE_3DNOW def_bool y depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML -config X86_OOSTORE - def_bool y - depends on (MWINCHIP3D || MWINCHIPC6) && MTRR - # # P6_NOPs are a relatively minor optimization that require a family >= # 6 processor, except that it is broken on certain VIA chips. diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 04a4890..69bbb48 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -85,11 +85,7 @@ #else # define smp_rmb() barrier() #endif -#ifdef CONFIG_X86_OOSTORE -# define smp_wmb() wmb() -#else -# define smp_wmb() barrier() -#endif +#define smp_wmb() barrier() #define smp_read_barrier_depends() read_barrier_depends() #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) #else /* !SMP */ @@ -100,7 +96,7 @@ #define set_mb(var, value) do { var = value; barrier(); } while (0) #endif /* SMP */ -#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) +#if defined(CONFIG_X86_PPRO_FENCE) /* * For either of these options x86 doesn't have a strong TSO memory diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 34f69cb..91d9c69 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -237,7 +237,7 @@ memcpy_toio(volatile void __iomem *dst, const void *src, size_t count) static inline void flush_write_buffers(void) { -#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) +#if defined(CONFIG_X86_PPRO_FENCE) asm volatile("lock; addl $0,0(%%esp)": : :"memory"); #endif } diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index bf156de..0f62f54 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -26,10 +26,9 @@ # define LOCK_PTR_REG "D" #endif -#if defined(CONFIG_X86_32) && \ - (defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)) +#if defined(CONFIG_X86_32) && (defined(CONFIG_X86_PPRO_FENCE)) /* - * On PPro SMP or if we are using OOSTORE, we use a locked operation to unlock + * On PPro SMP, we use a locked operation to unlock * (PPro errata 66, 92) */ # define UNLOCK_LOCK_PREFIX LOCK_PREFIX diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 8779eda..d8fba5c 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -8,236 +8,6 @@ #include "cpu.h" -#ifdef CONFIG_X86_OOSTORE - -static u32 power2(u32 x) -{ - u32 s = 1; - - while (s <= x) - s <<= 1; - - return s >>= 1; -} - - -/* - * Set up an actual MCR - */ -static void centaur_mcr_insert(int reg, u32 base, u32 size, int key) -{ - u32 lo, hi; - - hi = base & ~0xFFF; - lo = ~(size-1); /* Size is a power of 2 so this makes a mask */ - lo &= ~0xFFF; /* Remove the ctrl value bits */ - lo |= key; /* Attribute we wish to set */ - wrmsr(reg+MSR_IDT_MCR0, lo, hi); - mtrr_centaur_report_mcr(reg, lo, hi); /* Tell the mtrr driver */ -} - -/* - * Figure what we can cover with MCR's - * - * Shortcut: We know you can't put 4Gig of RAM on a winchip - */ -static u32 ramtop(void) -{ - u32 clip = 0xFFFFFFFFUL; - u32 top = 0; - int i; - - for (i = 0; i < e820.nr_map; i++) { - unsigned long start, end; - - if (e820.map[i].addr > 0xFFFFFFFFUL) - continue; - /* - * Don't MCR over reserved space. Ignore the ISA hole - * we frob around that catastrophe already - */ - if (e820.map[i].type == E820_RESERVED) { - if (e820.map[i].addr >= 0x100000UL && - e820.map[i].addr < clip) - clip = e820.map[i].addr; - continue; - } - start = e820.map[i].addr; - end = e820.map[i].addr + e820.map[i].size; - if (start >= end) - continue; - if (end > top) - top = end; - } - /* - * Everything below 'top' should be RAM except for the ISA hole. - * Because of the limited MCR's we want to map NV/ACPI into our - * MCR range for gunk in RAM - * - * Clip might cause us to MCR insufficient RAM but that is an - * acceptable failure mode and should only bite obscure boxes with - * a VESA hole at 15Mb - * - * The second case Clip sometimes kicks in is when the EBDA is marked - * as reserved. Again we fail safe with reasonable results - */ - if (top > clip) - top = clip; - - return top; -} - -/* - * Compute a set of MCR's to give maximum coverage - */ -static int centaur_mcr_compute(int nr, int key) -{ - u32 mem = ramtop(); - u32 root = power2(mem); - u32 base = root; - u32 top = root; - u32 floor = 0; - int ct = 0; - - while (ct < nr) { - u32 fspace = 0; - u32 high; - u32 low; - - /* - * Find the largest block we will fill going upwards - */ - high = power2(mem-top); - - /* - * Find the largest block we will fill going downwards - */ - low = base/2; - - /* - * Don't fill below 1Mb going downwards as there - * is an ISA hole in the way. - */ - if (base <= 1024*1024) - low = 0; - - /* - * See how much space we could cover by filling below - * the ISA hole - */ - - if (floor == 0) - fspace = 512*1024; - else if (floor == 512*1024) - fspace = 128*1024; - - /* And forget ROM space */ - - /* - * Now install the largest coverage we get - */ - if (fspace > high && fspace > low) { - centaur_mcr_insert(ct, floor, fspace, key); - floor += fspace; - } else if (high > low) { - centaur_mcr_insert(ct, top, high, key); - top += high; - } else if (low > 0) { - base -= low; - centaur_mcr_insert(ct, base, low, key); - } else - break; - ct++; - } - /* - * We loaded ct values. We now need to set the mask. The caller - * must do this bit. - */ - return ct; -} - -static void centaur_create_optimal_mcr(void) -{ - int used; - int i; - - /* - * Allocate up to 6 mcrs to mark as much of ram as possible - * as write combining and weak write ordered. - * - * To experiment with: Linux never uses stack operations for - * mmio spaces so we could globally enable stack operation wc - * - * Load the registers with type 31 - full write combining, all - * writes weakly ordered. - */ - used = centaur_mcr_compute(6, 31); - - /* - * Wipe unused MCRs - */ - for (i = used; i < 8; i++) - wrmsr(MSR_IDT_MCR0+i, 0, 0); -} - -static void winchip2_create_optimal_mcr(void) -{ - u32 lo, hi; - int used; - int i; - - /* - * Allocate up to 6 mcrs to mark as much of ram as possible - * as write combining, weak store ordered. - * - * Load the registers with type 25 - * 8 - weak write ordering - * 16 - weak read ordering - * 1 - write combining - */ - used = centaur_mcr_compute(6, 25); - - /* - * Mark the registers we are using. - */ - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - for (i = 0; i < used; i++) - lo |= 1<<(9+i); - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); - - /* - * Wipe unused MCRs - */ - - for (i = used; i < 8; i++) - wrmsr(MSR_IDT_MCR0+i, 0, 0); -} - -/* - * Handle the MCR key on the Winchip 2. - */ -static void winchip2_unprotect_mcr(void) -{ - u32 lo, hi; - u32 key; - - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - lo &= ~0x1C0; /* blank bits 8-6 */ - key = (lo>>17) & 7; - lo |= key<<6; /* replace with unlock key */ - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); -} - -static void winchip2_protect_mcr(void) -{ - u32 lo, hi; - - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - lo &= ~0x1C0; /* blank bits 8-6 */ - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); -} -#endif /* CONFIG_X86_OOSTORE */ - #define ACE_PRESENT (1 << 6) #define ACE_ENABLED (1 << 7) #define ACE_FCR (1 << 28) /* MSR_VIA_FCR */ @@ -362,20 +132,6 @@ static void init_centaur(struct cpuinfo_x86 *c) fcr_clr = DPDC; printk(KERN_NOTICE "Disabling bugged TSC.\n"); clear_cpu_cap(c, X86_FEATURE_TSC); -#ifdef CONFIG_X86_OOSTORE - centaur_create_optimal_mcr(); - /* - * Enable: - * write combining on non-stack, non-string - * write combining on string, all types - * weak write ordering - * - * The C6 original lacks weak read order - * - * Note 0x120 is write only on Winchip 1 - */ - wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); -#endif break; case 8: switch (c->x86_mask) { @@ -392,40 +148,12 @@ static void init_centaur(struct cpuinfo_x86 *c) fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK| E2MMX|EAMD3D; fcr_clr = DPDC; -#ifdef CONFIG_X86_OOSTORE - winchip2_unprotect_mcr(); - winchip2_create_optimal_mcr(); - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - /* - * Enable: - * write combining on non-stack, non-string - * write combining on string, all types - * weak write ordering - */ - lo |= 31; - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); - winchip2_protect_mcr(); -#endif break; case 9: name = "3"; fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK| E2MMX|EAMD3D; fcr_clr = DPDC; -#ifdef CONFIG_X86_OOSTORE - winchip2_unprotect_mcr(); - winchip2_create_optimal_mcr(); - rdmsr(MSR_IDT_MCR_CTRL, lo, hi); - /* - * Enable: - * write combining on non-stack, non-string - * write combining on string, all types - * weak write ordering - */ - lo |= 31; - wrmsr(MSR_IDT_MCR_CTRL, lo, hi); - winchip2_protect_mcr(); -#endif break; default: name = "??"; diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index 7d01b8c..cc04e67 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h @@ -40,11 +40,7 @@ #define smp_rmb() barrier() #endif /* CONFIG_X86_PPRO_FENCE */ -#ifdef CONFIG_X86_OOSTORE -#define smp_wmb() wmb() -#else /* CONFIG_X86_OOSTORE */ #define smp_wmb() barrier() -#endif /* CONFIG_X86_OOSTORE */ #define smp_read_barrier_depends() read_barrier_depends() #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) -- cgit v0.10.2 From 731bd6a93a6e9172094a2322bd0ee964bb1f4d63 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Sun, 2 Feb 2014 22:56:23 -0800 Subject: x86, fpu: Check tsk_used_math() in kernel_fpu_end() for eager FPU For non-eager fpu mode, thread's fpu state is allocated during the first fpu usage (in the context of device not available exception). This (math_state_restore()) can be a blocking call and hence we enable interrupts (which were originally disabled when the exception happened), allocate memory and disable interrupts etc. But the eager-fpu mode, call's the same math_state_restore() from kernel_fpu_end(). The assumption being that tsk_used_math() is always set for the eager-fpu mode and thus avoid the code path of enabling interrupts, allocating fpu state using blocking call and disable interrupts etc. But the below issue was noticed by Maarten Baert, Nate Eldredge and few others: If a user process dumps core on an ecrypt fs while aesni-intel is loaded, we get a BUG() in __find_get_block() complaining that it was called with interrupts disabled; then all further accesses to our ecrypt fs hang and we have to reboot. The aesni-intel code (encrypting the core file that we are writing) needs the FPU and quite properly wraps its code in kernel_fpu_{begin,end}(), the latter of which calls math_state_restore(). So after kernel_fpu_end(), interrupts may be disabled, which nobody seems to expect, and they stay that way until we eventually get to __find_get_block() which barfs. For eager fpu, most the time, tsk_used_math() is true. At few instances during thread exit, signal return handling etc, tsk_used_math() might be false. In kernel_fpu_end(), for eager-fpu, call math_state_restore() only if tsk_used_math() is set. Otherwise, don't bother. Kernel code path which cleared tsk_used_math() knows what needs to be done with the fpu state. Reported-by: Maarten Baert Reported-by: Nate Eldredge Suggested-by: Linus Torvalds Signed-off-by: Suresh Siddha Link: http://lkml.kernel.org/r/1391410583.3801.6.camel@europa Cc: George Spelvin Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index e8368c6..d5dd808 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -86,10 +86,19 @@ EXPORT_SYMBOL(__kernel_fpu_begin); void __kernel_fpu_end(void) { - if (use_eager_fpu()) - math_state_restore(); - else + if (use_eager_fpu()) { + /* + * For eager fpu, most the time, tsk_used_math() is true. + * Restore the user math as we are done with the kernel usage. + * At few instances during thread exit, signal handling etc, + * tsk_used_math() is false. Those few places will take proper + * actions, so we don't need to restore the math here. + */ + if (likely(tsk_used_math(current))) + math_state_restore(); + } else { stts(); + } } EXPORT_SYMBOL(__kernel_fpu_end); -- cgit v0.10.2 From d25f06ea466ea521b563b76661180b4e44714ae6 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 10 Mar 2014 06:55:55 -0400 Subject: vmxnet3: fix netpoll race condition vmxnet3's netpoll driver is incorrectly coded. It directly calls vmxnet3_do_poll, which is the driver internal napi poll routine. As the netpoll controller method doesn't block real napi polls in any way, there is a potential for race conditions in which the netpoll controller method and the napi poll method run concurrently. The result is data corruption causing panics such as this one recently observed: PID: 1371 TASK: ffff88023762caa0 CPU: 1 COMMAND: "rs:main Q:Reg" #0 [ffff88023abd5780] machine_kexec at ffffffff81038f3b #1 [ffff88023abd57e0] crash_kexec at ffffffff810c5d92 #2 [ffff88023abd58b0] oops_end at ffffffff8152b570 #3 [ffff88023abd58e0] die at ffffffff81010e0b #4 [ffff88023abd5910] do_trap at ffffffff8152add4 #5 [ffff88023abd5970] do_invalid_op at ffffffff8100cf95 #6 [ffff88023abd5a10] invalid_op at ffffffff8100bf9b [exception RIP: vmxnet3_rq_rx_complete+1968] RIP: ffffffffa00f1e80 RSP: ffff88023abd5ac8 RFLAGS: 00010086 RAX: 0000000000000000 RBX: ffff88023b5dcee0 RCX: 00000000000000c0 RDX: 0000000000000000 RSI: 00000000000005f2 RDI: ffff88023b5dcee0 RBP: ffff88023abd5b48 R8: 0000000000000000 R9: ffff88023a3b6048 R10: 0000000000000000 R11: 0000000000000002 R12: ffff8802398d4cd8 R13: ffff88023af35140 R14: ffff88023b60c890 R15: 0000000000000000 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #7 [ffff88023abd5b50] vmxnet3_do_poll at ffffffffa00f204a [vmxnet3] #8 [ffff88023abd5b80] vmxnet3_netpoll at ffffffffa00f209c [vmxnet3] #9 [ffff88023abd5ba0] netpoll_poll_dev at ffffffff81472bb7 The fix is to do as other drivers do, and have the poll controller call the top half interrupt handler, which schedules a napi poll properly to recieve frames Tested by myself, successfully. Signed-off-by: Neil Horman CC: Shreyas Bhatewara CC: "VMware, Inc." CC: "David S. Miller" CC: stable@vger.kernel.org Reviewed-by: Shreyas N Bhatewara Signed-off-by: David S. Miller diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 3be786f..b7daa02 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1761,12 +1761,18 @@ static void vmxnet3_netpoll(struct net_device *netdev) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); + int i; - if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE) - vmxnet3_disable_all_intrs(adapter); - - vmxnet3_do_poll(adapter, adapter->rx_queue[0].rx_ring[0].size); - vmxnet3_enable_all_intrs(adapter); + switch (adapter->intr.type) { + case VMXNET3_IT_MSIX: + for (i = 0; i < adapter->num_rx_queues; i++) + vmxnet3_msix_rx(0, &adapter->rx_queue[i]); + break; + case VMXNET3_IT_MSI: + default: + vmxnet3_intr(0, adapter->netdev); + break; + } } #endif /* CONFIG_NET_POLL_CONTROLLER */ -- cgit v0.10.2 From 83bf79b6bb64e686ccf0b66b6828b7bfe9f70ae9 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 10 Mar 2014 13:40:31 +0100 Subject: stmmac: disable at run-time the EEE if not supported This patch is to disable the EEE (so HW and timers) for example when the phy communicates that the EEE can be supported anymore. Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 078ad0e..b418f94 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -286,10 +286,25 @@ bool stmmac_eee_init(struct stmmac_priv *priv) /* MAC core supports the EEE feature. */ if (priv->dma_cap.eee) { + int tx_lpi_timer = priv->tx_lpi_timer; + /* Check if the PHY supports EEE */ - if (phy_init_eee(priv->phydev, 1)) + if (phy_init_eee(priv->phydev, 1)) { + /* To manage at run-time if the EEE cannot be supported + * anymore (for example because the lp caps have been + * changed). + * In that case the driver disable own timers. + */ + if (priv->eee_active) { + pr_debug("stmmac: disable EEE\n"); + del_timer_sync(&priv->eee_ctrl_timer); + priv->hw->mac->set_eee_timer(priv->ioaddr, 0, + tx_lpi_timer); + } + priv->eee_active = 0; goto out; - + } + /* Activate the EEE and start timers */ if (!priv->eee_active) { priv->eee_active = 1; init_timer(&priv->eee_ctrl_timer); @@ -300,13 +315,13 @@ bool stmmac_eee_init(struct stmmac_priv *priv) priv->hw->mac->set_eee_timer(priv->ioaddr, STMMAC_DEFAULT_LIT_LS, - priv->tx_lpi_timer); + tx_lpi_timer); } else /* Set HW EEE according to the speed */ priv->hw->mac->set_eee_pls(priv->ioaddr, priv->phydev->link); - pr_info("stmmac: Energy-Efficient Ethernet initialized\n"); + pr_debug("stmmac: Energy-Efficient Ethernet initialized\n"); ret = true; } -- cgit v0.10.2 From d916701c670701e1ab4d0ed3d4e64d6023bccec9 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 10 Mar 2014 13:40:32 +0100 Subject: stmmac: fix and better tune the default buffer sizes This patch is to fix and tune the default buffer sizes. It reduces the default bufsize used by the driver from 4KiB to 1536 bytes. Patch has been tested on both ARM and SH4 platform based. Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index b418f94..5365836 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -92,8 +92,8 @@ static int tc = TC_DEFAULT; module_param(tc, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(tc, "DMA threshold control value"); -#define DMA_BUFFER_SIZE BUF_SIZE_4KiB -static int buf_sz = DMA_BUFFER_SIZE; +#define DEFAULT_BUFSIZE 1536 +static int buf_sz = DEFAULT_BUFSIZE; module_param(buf_sz, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(buf_sz, "DMA buffer size"); @@ -136,8 +136,8 @@ static void stmmac_verify_args(void) dma_rxsize = DMA_RX_SIZE; if (unlikely(dma_txsize < 0)) dma_txsize = DMA_TX_SIZE; - if (unlikely((buf_sz < DMA_BUFFER_SIZE) || (buf_sz > BUF_SIZE_16KiB))) - buf_sz = DMA_BUFFER_SIZE; + if (unlikely((buf_sz < DEFAULT_BUFSIZE) || (buf_sz > BUF_SIZE_16KiB))) + buf_sz = DEFAULT_BUFSIZE; if (unlikely(flow_ctrl > 1)) flow_ctrl = FLOW_AUTO; else if (likely(flow_ctrl < 0)) @@ -901,10 +901,10 @@ static int stmmac_set_bfsize(int mtu, int bufsize) ret = BUF_SIZE_8KiB; else if (mtu >= BUF_SIZE_2KiB) ret = BUF_SIZE_4KiB; - else if (mtu >= DMA_BUFFER_SIZE) + else if (mtu > DEFAULT_BUFSIZE) ret = BUF_SIZE_2KiB; else - ret = DMA_BUFFER_SIZE; + ret = DEFAULT_BUFSIZE; return ret; } -- cgit v0.10.2 From 29896a674c8ef3d75134dacb7b9cbb3f5b894b6d Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 10 Mar 2014 13:40:33 +0100 Subject: stmmac: fix chained mode This patch is to fix the chain mode that was broken and generated a panic. This patch reviews the chain/ring modes now shaing the same structure and taking care about the pointers and callbacks. Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c index 72d282b..c553f6b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c @@ -151,7 +151,7 @@ static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p) sizeof(struct dma_desc))); } -const struct stmmac_chain_mode_ops chain_mode_ops = { +const struct stmmac_mode_ops chain_mode_ops = { .init = stmmac_init_dma_chain, .is_jumbo_frm = stmmac_is_jumbo_frm, .jumbo_frm = stmmac_jumbo_frm, diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 7834a39..74610f3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -419,20 +419,13 @@ struct mii_regs { unsigned int data; /* MII Data */ }; -struct stmmac_ring_mode_ops { - unsigned int (*is_jumbo_frm) (int len, int ehn_desc); - unsigned int (*jumbo_frm) (void *priv, struct sk_buff *skb, int csum); - void (*refill_desc3) (void *priv, struct dma_desc *p); - void (*init_desc3) (struct dma_desc *p); - void (*clean_desc3) (void *priv, struct dma_desc *p); - int (*set_16kib_bfsize) (int mtu); -}; - -struct stmmac_chain_mode_ops { +struct stmmac_mode_ops { void (*init) (void *des, dma_addr_t phy_addr, unsigned int size, unsigned int extend_desc); unsigned int (*is_jumbo_frm) (int len, int ehn_desc); unsigned int (*jumbo_frm) (void *priv, struct sk_buff *skb, int csum); + int (*set_16kib_bfsize)(int mtu); + void (*init_desc3)(struct dma_desc *p); void (*refill_desc3) (void *priv, struct dma_desc *p); void (*clean_desc3) (void *priv, struct dma_desc *p); }; @@ -441,8 +434,7 @@ struct mac_device_info { const struct stmmac_ops *mac; const struct stmmac_desc_ops *desc; const struct stmmac_dma_ops *dma; - const struct stmmac_ring_mode_ops *ring; - const struct stmmac_chain_mode_ops *chain; + const struct stmmac_mode_ops *mode; const struct stmmac_hwtimestamp *ptp; struct mii_regs mii; /* MII register Addresses */ struct mac_link link; @@ -460,7 +452,7 @@ void stmmac_get_mac_addr(void __iomem *ioaddr, unsigned char *addr, void stmmac_set_mac(void __iomem *ioaddr, bool enable); void dwmac_dma_flush_tx_fifo(void __iomem *ioaddr); -extern const struct stmmac_ring_mode_ops ring_mode_ops; -extern const struct stmmac_chain_mode_ops chain_mode_ops; +extern const struct stmmac_mode_ops ring_mode_ops; +extern const struct stmmac_mode_ops chain_mode_ops; #endif /* __COMMON_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c index a96c7c2..650a4be 100644 --- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c @@ -100,10 +100,9 @@ static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p) { struct stmmac_priv *priv = (struct stmmac_priv *)priv_ptr; - if (unlikely(priv->plat->has_gmac)) - /* Fill DES3 in case of RING mode */ - if (priv->dma_buf_sz >= BUF_SIZE_8KiB) - p->des3 = p->des2 + BUF_SIZE_8KiB; + /* Fill DES3 in case of RING mode */ + if (priv->dma_buf_sz >= BUF_SIZE_8KiB) + p->des3 = p->des2 + BUF_SIZE_8KiB; } /* In ring mode we need to fill the desc3 because it is used as buffer */ @@ -126,7 +125,7 @@ static int stmmac_set_16kib_bfsize(int mtu) return ret; } -const struct stmmac_ring_mode_ops ring_mode_ops = { +const struct stmmac_mode_ops ring_mode_ops = { .is_jumbo_frm = stmmac_is_jumbo_frm, .jumbo_frm = stmmac_jumbo_frm, .refill_desc3 = stmmac_refill_desc3, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5365836..8543e1c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -966,9 +966,9 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p, p->des2 = priv->rx_skbuff_dma[i]; - if ((priv->mode == STMMAC_RING_MODE) && + if ((priv->hw->mode->init_desc3) && (priv->dma_buf_sz == BUF_SIZE_16KiB)) - priv->hw->ring->init_desc3(p); + priv->hw->mode->init_desc3(p); return 0; } @@ -999,11 +999,8 @@ static int init_dma_desc_rings(struct net_device *dev) unsigned int bfsize = 0; int ret = -ENOMEM; - /* Set the max buffer size according to the DESC mode - * and the MTU. Note that RING mode allows 16KiB bsize. - */ - if (priv->mode == STMMAC_RING_MODE) - bfsize = priv->hw->ring->set_16kib_bfsize(dev->mtu); + if (priv->hw->mode->set_16kib_bfsize) + bfsize = priv->hw->mode->set_16kib_bfsize(dev->mtu); if (bfsize < BUF_SIZE_16KiB) bfsize = stmmac_set_bfsize(dev->mtu, priv->dma_buf_sz); @@ -1044,15 +1041,15 @@ static int init_dma_desc_rings(struct net_device *dev) /* Setup the chained descriptor addresses */ if (priv->mode == STMMAC_CHAIN_MODE) { if (priv->extend_desc) { - priv->hw->chain->init(priv->dma_erx, priv->dma_rx_phy, - rxsize, 1); - priv->hw->chain->init(priv->dma_etx, priv->dma_tx_phy, - txsize, 1); + priv->hw->mode->init(priv->dma_erx, priv->dma_rx_phy, + rxsize, 1); + priv->hw->mode->init(priv->dma_etx, priv->dma_tx_phy, + txsize, 1); } else { - priv->hw->chain->init(priv->dma_rx, priv->dma_rx_phy, - rxsize, 0); - priv->hw->chain->init(priv->dma_tx, priv->dma_tx_phy, - txsize, 0); + priv->hw->mode->init(priv->dma_rx, priv->dma_rx_phy, + rxsize, 0); + priv->hw->mode->init(priv->dma_tx, priv->dma_tx_phy, + txsize, 0); } } @@ -1303,7 +1300,7 @@ static void stmmac_tx_clean(struct stmmac_priv *priv) DMA_TO_DEVICE); priv->tx_skbuff_dma[entry] = 0; } - priv->hw->ring->clean_desc3(priv, p); + priv->hw->mode->clean_desc3(priv, p); if (likely(skb != NULL)) { dev_kfree_skb(skb); @@ -1859,6 +1856,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) int nfrags = skb_shinfo(skb)->nr_frags; struct dma_desc *desc, *first; unsigned int nopaged_len = skb_headlen(skb); + unsigned int enh_desc = priv->plat->enh_desc; if (unlikely(stmmac_tx_avail(priv) < nfrags + 1)) { if (!netif_queue_stopped(dev)) { @@ -1886,27 +1884,19 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) first = desc; /* To program the descriptors according to the size of the frame */ - if (priv->mode == STMMAC_RING_MODE) { - is_jumbo = priv->hw->ring->is_jumbo_frm(skb->len, - priv->plat->enh_desc); - if (unlikely(is_jumbo)) - entry = priv->hw->ring->jumbo_frm(priv, skb, - csum_insertion); - } else { - is_jumbo = priv->hw->chain->is_jumbo_frm(skb->len, - priv->plat->enh_desc); - if (unlikely(is_jumbo)) - entry = priv->hw->chain->jumbo_frm(priv, skb, - csum_insertion); - } + if (enh_desc) + is_jumbo = priv->hw->mode->is_jumbo_frm(skb->len, enh_desc); + if (likely(!is_jumbo)) { desc->des2 = dma_map_single(priv->device, skb->data, nopaged_len, DMA_TO_DEVICE); priv->tx_skbuff_dma[entry] = desc->des2; priv->hw->desc->prepare_tx_desc(desc, 1, nopaged_len, csum_insertion, priv->mode); - } else + } else { desc = first; + entry = priv->hw->mode->jumbo_frm(priv, skb, csum_insertion); + } for (i = 0; i < nfrags; i++) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; @@ -2044,7 +2034,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv) p->des2 = priv->rx_skbuff_dma[entry]; - priv->hw->ring->refill_desc3(priv, p); + priv->hw->mode->refill_desc3(priv, p); if (netif_msg_rx_status(priv)) pr_debug("\trefill entry #%d\n", entry); @@ -2648,11 +2638,11 @@ static int stmmac_hw_init(struct stmmac_priv *priv) /* To use the chained or ring mode */ if (chain_mode) { - priv->hw->chain = &chain_mode_ops; + priv->hw->mode = &chain_mode_ops; pr_info(" Chain mode enabled\n"); priv->mode = STMMAC_CHAIN_MODE; } else { - priv->hw->ring = &ring_mode_ops; + priv->hw->mode = &ring_mode_ops; pr_info(" Ring mode enabled\n"); priv->mode = STMMAC_RING_MODE; } -- cgit v0.10.2 From c5e9103dc397114523c65bd24ff0548bcdca54b4 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Mon, 10 Mar 2014 13:40:34 +0100 Subject: stmmac: dwmac-sti: fix broken STiD127 compatibility This is to fix the compatibility to the STiD127 SoC. Signed-off-by: Giuseppe Cavallaro Cc: Srinivas Kandagatla Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index c61bc72b..8fb32a8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -36,7 +36,7 @@ static const struct of_device_id stmmac_dt_ids[] = { #ifdef CONFIG_DWMAC_STI { .compatible = "st,stih415-dwmac", .data = &sti_gmac_data}, { .compatible = "st,stih416-dwmac", .data = &sti_gmac_data}, - { .compatible = "st,stih127-dwmac", .data = &sti_gmac_data}, + { .compatible = "st,stid127-dwmac", .data = &sti_gmac_data}, #endif /* SoC specific glue layers should come before generic bindings */ { .compatible = "st,spear600-gmac"}, -- cgit v0.10.2 From 89935315f192abf7068d0044cefc84f162c3c81f Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 11 Mar 2014 22:40:27 +0800 Subject: PNP / ACPI: proper handling of ACPI IO/Memory resource parsing failures Before commit b355cee88e3b (ACPI / resources: ignore invalid ACPI device resources), if acpi_dev_resource_memory()/acpi_dev_resource_io() returns false, it means the the resource is not a memeory/IO resource. But after commit b355cee88e3b, those functions return false if the given memory/IO resource entry is invalid (the length of the resource is zero). This breaks pnpacpi_allocated_resource(), because it now recognizes the invalid memory/io resources as resources of unknown type. Thus users see confusing warning messages on machines with zero length ACPI memory/IO resources. Fix the problem by rearranging pnpacpi_allocated_resource() so that it calls acpi_dev_resource_memory() for memory type and IO type resources only, respectively. Fixes: b355cee88e3b (ACPI / resources: ignore invalid ACPI device resources) Signed-off-by: Zhang Rui Reported-and-tested-by: Markus Trippelsdorf Reported-and-tested-by: Julian Wollrath Reported-and-tested-by: Paul Bolle [rjw: Changelog] Signed-off-by: Rafael J. Wysocki diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index 167f3d0..66977eb 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -183,9 +183,7 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, struct resource r = {0}; int i, flags; - if (acpi_dev_resource_memory(res, &r) - || acpi_dev_resource_io(res, &r) - || acpi_dev_resource_address_space(res, &r) + if (acpi_dev_resource_address_space(res, &r) || acpi_dev_resource_ext_address_space(res, &r)) { pnp_add_resource(dev, &r); return AE_OK; @@ -217,6 +215,17 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, } switch (res->type) { + case ACPI_RESOURCE_TYPE_MEMORY24: + case ACPI_RESOURCE_TYPE_MEMORY32: + case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: + if (acpi_dev_resource_memory(res, &r)) + pnp_add_resource(dev, &r); + break; + case ACPI_RESOURCE_TYPE_IO: + case ACPI_RESOURCE_TYPE_FIXED_IO: + if (acpi_dev_resource_io(res, &r)) + pnp_add_resource(dev, &r); + break; case ACPI_RESOURCE_TYPE_DMA: dma = &res->data.dma; if (dma->channel_count > 0 && dma->channels[0] != (u8) -1) -- cgit v0.10.2 From 8cb19905e9287a93ce7c2cbbdf742a060b00e219 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 10 Mar 2014 18:29:04 +0200 Subject: skbuff: skb_segment: s/frag/nskb_frag/ frag points at nskb, so name it appropriately Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5d6236d..60e8cd7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2876,7 +2876,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) do { struct sk_buff *nskb; - skb_frag_t *frag; + skb_frag_t *nskb_frag; int hsize; int size; @@ -2969,7 +2969,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) continue; } - frag = skb_shinfo(nskb)->frags; + nskb_frag = skb_shinfo(nskb)->frags; skb_copy_from_linear_data_offset(skb, offset, skb_put(nskb, hsize), hsize); @@ -2997,13 +2997,13 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) goto err; } - *frag = *skb_frag; - __skb_frag_ref(frag); - size = skb_frag_size(frag); + *nskb_frag = *skb_frag; + __skb_frag_ref(nskb_frag); + size = skb_frag_size(nskb_frag); if (pos < offset) { - frag->page_offset += offset - pos; - skb_frag_size_sub(frag, offset - pos); + nskb_frag->page_offset += offset - pos; + skb_frag_size_sub(nskb_frag, offset - pos); } skb_shinfo(nskb)->nr_frags++; @@ -3013,11 +3013,11 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) skb_frag++; pos += size; } else { - skb_frag_size_sub(frag, pos + size - (offset + len)); + skb_frag_size_sub(nskb_frag, pos + size - (offset + len)); goto skip_fraglist; } - frag++; + nskb_frag++; } skip_fraglist: -- cgit v0.10.2 From 4e1beba12d094c6c761ba5c49032b9b9e46380e8 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 10 Mar 2014 18:29:14 +0200 Subject: skbuff: skb_segment: s/skb_frag/frag/ skb_frag can in fact point at either skb or fskb so rename it generally "frag". Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 60e8cd7..d788a98 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2850,7 +2850,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; struct sk_buff *fskb = skb_shinfo(skb)->frag_list; - skb_frag_t *skb_frag = skb_shinfo(skb)->frags; + skb_frag_t *frag = skb_shinfo(skb)->frags; unsigned int mss = skb_shinfo(skb)->gso_size; unsigned int doffset = skb->data - skb_mac_header(skb); unsigned int offset = doffset; @@ -2896,19 +2896,19 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) i = 0; nfrags = skb_shinfo(fskb)->nr_frags; - skb_frag = skb_shinfo(fskb)->frags; + frag = skb_shinfo(fskb)->frags; pos += skb_headlen(fskb); while (pos < offset + len) { BUG_ON(i >= nfrags); - size = skb_frag_size(skb_frag); + size = skb_frag_size(frag); if (pos + size > offset + len) break; i++; pos += size; - skb_frag++; + frag++; } nskb = skb_clone(fskb, GFP_ATOMIC); @@ -2982,7 +2982,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) i = 0; nfrags = skb_shinfo(fskb)->nr_frags; - skb_frag = skb_shinfo(fskb)->frags; + frag = skb_shinfo(fskb)->frags; BUG_ON(!nfrags); @@ -2997,7 +2997,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) goto err; } - *nskb_frag = *skb_frag; + *nskb_frag = *frag; __skb_frag_ref(nskb_frag); size = skb_frag_size(nskb_frag); @@ -3010,7 +3010,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) if (pos + size <= offset + len) { i++; - skb_frag++; + frag++; pos += size; } else { skb_frag_size_sub(nskb_frag, pos + size - (offset + len)); -- cgit v0.10.2 From df5771ffefb13f8af5392bd54fd7e2b596a3a357 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 10 Mar 2014 18:29:19 +0200 Subject: skbuff: skb_segment: s/skb/head_skb/ rename local variable to make it easier to tell at a glance that we are dealing with a head skb. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d788a98..fdc065d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2838,41 +2838,42 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum); /** * skb_segment - Perform protocol segmentation on skb. - * @skb: buffer to segment + * @head_skb: buffer to segment * @features: features for the output path (see dev->features) * * This function performs segmentation on the given skb. It returns * a pointer to the first in a list of new skbs for the segments. * In case of error it returns ERR_PTR(err). */ -struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) +struct sk_buff *skb_segment(struct sk_buff *head_skb, + netdev_features_t features) { struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; - struct sk_buff *fskb = skb_shinfo(skb)->frag_list; - skb_frag_t *frag = skb_shinfo(skb)->frags; - unsigned int mss = skb_shinfo(skb)->gso_size; - unsigned int doffset = skb->data - skb_mac_header(skb); + struct sk_buff *fskb = skb_shinfo(head_skb)->frag_list; + skb_frag_t *frag = skb_shinfo(head_skb)->frags; + unsigned int mss = skb_shinfo(head_skb)->gso_size; + unsigned int doffset = head_skb->data - skb_mac_header(head_skb); unsigned int offset = doffset; - unsigned int tnl_hlen = skb_tnl_header_len(skb); + unsigned int tnl_hlen = skb_tnl_header_len(head_skb); unsigned int headroom; unsigned int len; __be16 proto; bool csum; int sg = !!(features & NETIF_F_SG); - int nfrags = skb_shinfo(skb)->nr_frags; + int nfrags = skb_shinfo(head_skb)->nr_frags; int err = -ENOMEM; int i = 0; int pos; - proto = skb_network_protocol(skb); + proto = skb_network_protocol(head_skb); if (unlikely(!proto)) return ERR_PTR(-EINVAL); csum = !!can_checksum_protocol(features, proto); - __skb_push(skb, doffset); - headroom = skb_headroom(skb); - pos = skb_headlen(skb); + __skb_push(head_skb, doffset); + headroom = skb_headroom(head_skb); + pos = skb_headlen(head_skb); do { struct sk_buff *nskb; @@ -2880,11 +2881,11 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) int hsize; int size; - len = skb->len - offset; + len = head_skb->len - offset; if (len > mss) len = mss; - hsize = skb_headlen(skb) - offset; + hsize = skb_headlen(head_skb) - offset; if (hsize < 0) hsize = 0; if (hsize > len || !sg) @@ -2933,7 +2934,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) __skb_push(nskb, doffset); } else { nskb = __alloc_skb(hsize + doffset + headroom, - GFP_ATOMIC, skb_alloc_rx_flag(skb), + GFP_ATOMIC, skb_alloc_rx_flag(head_skb), NUMA_NO_NODE); if (unlikely(!nskb)) @@ -2949,12 +2950,12 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) segs = nskb; tail = nskb; - __copy_skb_header(nskb, skb); - nskb->mac_len = skb->mac_len; + __copy_skb_header(nskb, head_skb); + nskb->mac_len = head_skb->mac_len; skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom); - skb_copy_from_linear_data_offset(skb, -tnl_hlen, + skb_copy_from_linear_data_offset(head_skb, -tnl_hlen, nskb->data - tnl_hlen, doffset + tnl_hlen); @@ -2963,7 +2964,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) if (!sg) { nskb->ip_summed = CHECKSUM_NONE; - nskb->csum = skb_copy_and_csum_bits(skb, offset, + nskb->csum = skb_copy_and_csum_bits(head_skb, offset, skb_put(nskb, len), len, 0); continue; @@ -2971,10 +2972,11 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) nskb_frag = skb_shinfo(nskb)->frags; - skb_copy_from_linear_data_offset(skb, offset, + skb_copy_from_linear_data_offset(head_skb, offset, skb_put(nskb, hsize), hsize); - skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; + skb_shinfo(nskb)->tx_flags = skb_shinfo(head_skb)->tx_flags & + SKBTX_SHARED_FRAG; while (pos < offset + len) { if (i >= nfrags) { @@ -3031,7 +3033,7 @@ perform_csum_check: nskb->len - doffset, 0); nskb->ip_summed = CHECKSUM_NONE; } - } while ((offset += len) < skb->len); + } while ((offset += len) < head_skb->len); return segs; -- cgit v0.10.2 From 1a4cedaf65491e66e1e55b8428c89209da729209 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 10 Mar 2014 19:27:59 +0200 Subject: skbuff: skb_segment: s/fskb/list_skb/ fskb is unrelated to frag: it's coming from frag_list. Rename it list_skb to avoid confusion. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller diff --git a/net/core/skbuff.c b/net/core/skbuff.c index fdc065d..dc4f768 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2850,7 +2850,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, { struct sk_buff *segs = NULL; struct sk_buff *tail = NULL; - struct sk_buff *fskb = skb_shinfo(head_skb)->frag_list; + struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list; skb_frag_t *frag = skb_shinfo(head_skb)->frags; unsigned int mss = skb_shinfo(head_skb)->gso_size; unsigned int doffset = head_skb->data - skb_mac_header(head_skb); @@ -2891,14 +2891,14 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, if (hsize > len || !sg) hsize = len; - if (!hsize && i >= nfrags && skb_headlen(fskb) && - (skb_headlen(fskb) == len || sg)) { - BUG_ON(skb_headlen(fskb) > len); + if (!hsize && i >= nfrags && skb_headlen(list_skb) && + (skb_headlen(list_skb) == len || sg)) { + BUG_ON(skb_headlen(list_skb) > len); i = 0; - nfrags = skb_shinfo(fskb)->nr_frags; - frag = skb_shinfo(fskb)->frags; - pos += skb_headlen(fskb); + nfrags = skb_shinfo(list_skb)->nr_frags; + frag = skb_shinfo(list_skb)->frags; + pos += skb_headlen(list_skb); while (pos < offset + len) { BUG_ON(i >= nfrags); @@ -2912,8 +2912,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, frag++; } - nskb = skb_clone(fskb, GFP_ATOMIC); - fskb = fskb->next; + nskb = skb_clone(list_skb, GFP_ATOMIC); + list_skb = list_skb->next; if (unlikely(!nskb)) goto err; @@ -2980,15 +2980,15 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, while (pos < offset + len) { if (i >= nfrags) { - BUG_ON(skb_headlen(fskb)); + BUG_ON(skb_headlen(list_skb)); i = 0; - nfrags = skb_shinfo(fskb)->nr_frags; - frag = skb_shinfo(fskb)->frags; + nfrags = skb_shinfo(list_skb)->nr_frags; + frag = skb_shinfo(list_skb)->frags; BUG_ON(!nfrags); - fskb = fskb->next; + list_skb = list_skb->next; } if (unlikely(skb_shinfo(nskb)->nr_frags >= -- cgit v0.10.2 From 1fd819ecb90cc9b822cd84d3056ddba315d3340f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 10 Mar 2014 19:28:08 +0200 Subject: skbuff: skb_segment: orphan frags before copying skb_segment copies frags around, so we need to copy them carefully to avoid accessing user memory after reporting completion to userspace through a callback. skb_segment doesn't normally happen on datapath: TSO needs to be disabled - so disabling zero copy in this case does not look like a big deal. Signed-off-by: Michael S. Tsirkin Acked-by: Herbert Xu Signed-off-by: David S. Miller diff --git a/net/core/skbuff.c b/net/core/skbuff.c index dc4f768..869c7af 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2854,6 +2854,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, skb_frag_t *frag = skb_shinfo(head_skb)->frags; unsigned int mss = skb_shinfo(head_skb)->gso_size; unsigned int doffset = head_skb->data - skb_mac_header(head_skb); + struct sk_buff *frag_skb = head_skb; unsigned int offset = doffset; unsigned int tnl_hlen = skb_tnl_header_len(head_skb); unsigned int headroom; @@ -2898,6 +2899,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, i = 0; nfrags = skb_shinfo(list_skb)->nr_frags; frag = skb_shinfo(list_skb)->frags; + frag_skb = list_skb; pos += skb_headlen(list_skb); while (pos < offset + len) { @@ -2985,6 +2987,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, i = 0; nfrags = skb_shinfo(list_skb)->nr_frags; frag = skb_shinfo(list_skb)->frags; + frag_skb = list_skb; BUG_ON(!nfrags); @@ -2999,6 +3002,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, goto err; } + if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC))) + goto err; + *nskb_frag = *frag; __skb_frag_ref(nskb_frag); size = skb_frag_size(nskb_frag); -- cgit v0.10.2 From c3f9b01849ef3bc69024990092b9f42e20df7797 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Mar 2014 09:50:11 -0700 Subject: tcp: tcp_release_cb() should release socket ownership Lars Persson reported following deadlock : -000 |M:0x0:0x802B6AF8(asm) <-- arch_spin_lock -001 |tcp_v4_rcv(skb = 0x8BD527A0) <-- sk = 0x8BE6B2A0 -002 |ip_local_deliver_finish(skb = 0x8BD527A0) -003 |__netif_receive_skb_core(skb = 0x8BD527A0, ?) -004 |netif_receive_skb(skb = 0x8BD527A0) -005 |elk_poll(napi = 0x8C770500, budget = 64) -006 |net_rx_action(?) -007 |__do_softirq() -008 |do_softirq() -009 |local_bh_enable() -010 |tcp_rcv_established(sk = 0x8BE6B2A0, skb = 0x87D3A9E0, th = 0x814EBE14, ?) -011 |tcp_v4_do_rcv(sk = 0x8BE6B2A0, skb = 0x87D3A9E0) -012 |tcp_delack_timer_handler(sk = 0x8BE6B2A0) -013 |tcp_release_cb(sk = 0x8BE6B2A0) -014 |release_sock(sk = 0x8BE6B2A0) -015 |tcp_sendmsg(?, sk = 0x8BE6B2A0, ?, ?) -016 |sock_sendmsg(sock = 0x8518C4C0, msg = 0x87D8DAA8, size = 4096) -017 |kernel_sendmsg(?, ?, ?, ?, size = 4096) -018 |smb_send_kvec() -019 |smb_send_rqst(server = 0x87C4D400, rqst = 0x87D8DBA0) -020 |cifs_call_async() -021 |cifs_async_writev(wdata = 0x87FD6580) -022 |cifs_writepages(mapping = 0x852096E4, wbc = 0x87D8DC88) -023 |__writeback_single_inode(inode = 0x852095D0, wbc = 0x87D8DC88) -024 |writeback_sb_inodes(sb = 0x87D6D800, wb = 0x87E4A9C0, work = 0x87D8DD88) -025 |__writeback_inodes_wb(wb = 0x87E4A9C0, work = 0x87D8DD88) -026 |wb_writeback(wb = 0x87E4A9C0, work = 0x87D8DD88) -027 |wb_do_writeback(wb = 0x87E4A9C0, force_wait = 0) -028 |bdi_writeback_workfn(work = 0x87E4A9CC) -029 |process_one_work(worker = 0x8B045880, work = 0x87E4A9CC) -030 |worker_thread(__worker = 0x8B045880) -031 |kthread(_create = 0x87CADD90) -032 |ret_from_kernel_thread(asm) Bug occurs because __tcp_checksum_complete_user() enables BH, assuming it is running from softirq context. Lars trace involved a NIC without RX checksum support but other points are problematic as well, like the prequeue stuff. Problem is triggered by a timer, that found socket being owned by user. tcp_release_cb() should call tcp_write_timer_handler() or tcp_delack_timer_handler() in the appropriate context : BH disabled and socket lock held, but 'owned' field cleared, as if they were running from timer handlers. Fixes: 6f458dfb4092 ("tcp: improve latencies of timer triggered events") Reported-by: Lars Persson Tested-by: Lars Persson Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/include/net/sock.h b/include/net/sock.h index 7c4167b..b9586a1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1488,6 +1488,11 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) */ #define sock_owned_by_user(sk) ((sk)->sk_lock.owned) +static inline void sock_release_ownership(struct sock *sk) +{ + sk->sk_lock.owned = 0; +} + /* * Macro so as to not evaluate some arguments when * lockdep is not enabled. diff --git a/net/core/sock.c b/net/core/sock.c index 5b6a943..c0fc6bd 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2357,10 +2357,13 @@ void release_sock(struct sock *sk) if (sk->sk_backlog.tail) __release_sock(sk); + /* Warning : release_cb() might need to release sk ownership, + * ie call sock_release_ownership(sk) before us. + */ if (sk->sk_prot->release_cb) sk->sk_prot->release_cb(sk); - sk->sk_lock.owned = 0; + sock_release_ownership(sk); if (waitqueue_active(&sk->sk_lock.wq)) wake_up(&sk->sk_lock.wq); spin_unlock_bh(&sk->sk_lock.slock); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f0eb4e3..17a11e6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -767,6 +767,17 @@ void tcp_release_cb(struct sock *sk) if (flags & (1UL << TCP_TSQ_DEFERRED)) tcp_tsq_handler(sk); + /* Here begins the tricky part : + * We are called from release_sock() with : + * 1) BH disabled + * 2) sk_lock.slock spinlock held + * 3) socket owned by us (sk->sk_lock.owned == 1) + * + * But following code is meant to be called from BH handlers, + * so we should keep BH disabled, but early release socket ownership + */ + sock_release_ownership(sk); + if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) { tcp_write_timer_handler(sk); __sock_put(sk); -- cgit v0.10.2 From 9ed973cc40c588abeaa58aea0683ea665132d11d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Mon, 10 Mar 2014 22:25:24 +0100 Subject: bridge: multicast: add sanity check for general query destination MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit General IGMP and MLD queries are supposed to have the multicast link-local all-nodes address as their destination according to RFC2236 section 9, RFC3376 section 4.1.12/9.1, RFC2710 section 8 and RFC3810 section 5.1.15. Without this check, such malformed IGMP/MLD queries can result in a denial of service: The queries are ignored by most IGMP/MLD listeners therefore they will not respond with an IGMP/MLD report. However, without this patch these malformed MLD queries would enable the snooping part in the bridge code, potentially shutting down the according ports towards these hosts for multicast traffic as the bridge did not learn about these listeners. Reported-by: Jan Stancek Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index fb0e36f..e56bae4 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1181,6 +1181,14 @@ static int br_ip4_multicast_query(struct net_bridge *br, IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1; } + /* RFC2236+RFC3376 (IGMPv2+IGMPv3) require the multicast link layer + * all-systems destination addresses (224.0.0.1) for general queries + */ + if (!group && iph->daddr != htonl(INADDR_ALLHOSTS_GROUP)) { + err = -EINVAL; + goto out; + } + br_multicast_query_received(br, port, &br->ip4_querier, !!iph->saddr, max_delay); @@ -1228,6 +1236,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, unsigned long max_delay; unsigned long now = jiffies; const struct in6_addr *group = NULL; + bool is_general_query; int err = 0; spin_lock(&br->multicast_lock); @@ -1262,6 +1271,16 @@ static int br_ip6_multicast_query(struct net_bridge *br, max_delay = max(msecs_to_jiffies(mldv2_mrc(mld2q)), 1UL); } + is_general_query = group && ipv6_addr_any(group); + + /* RFC2710+RFC3810 (MLDv1+MLDv2) require the multicast link layer + * all-nodes destination address (ff02::1) for general queries + */ + if (is_general_query && !ipv6_addr_is_ll_all_nodes(&ip6h->daddr)) { + err = -EINVAL; + goto out; + } + br_multicast_query_received(br, port, &br->ip6_querier, !ipv6_addr_any(&ip6h->saddr), max_delay); -- cgit v0.10.2 From 20a599bec95a52fa72432b2376a2ce47c5bb68fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Mon, 10 Mar 2014 22:25:25 +0100 Subject: bridge: multicast: enable snooping on general queries only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this check someone could easily create a denial of service by injecting multicast-specific queries to enable the bridge snooping part if no real querier issuing periodic general queries is present on the link which would result in the bridge wrongly shutting down ports for multicast traffic as the bridge did not learn about these listeners. With this patch the snooping code is enabled upon receiving valid, general queries only. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index e56bae4..93067ec 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1127,9 +1127,10 @@ static void br_multicast_query_received(struct net_bridge *br, struct net_bridge_port *port, struct bridge_mcast_querier *querier, int saddr, + bool is_general_query, unsigned long max_delay) { - if (saddr) + if (saddr && is_general_query) br_multicast_update_querier_timer(br, querier, max_delay); else if (timer_pending(&querier->timer)) return; @@ -1190,7 +1191,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, } br_multicast_query_received(br, port, &br->ip4_querier, !!iph->saddr, - max_delay); + !group, max_delay); if (!group) goto out; @@ -1282,7 +1283,8 @@ static int br_ip6_multicast_query(struct net_bridge *br, } br_multicast_query_received(br, port, &br->ip6_querier, - !ipv6_addr_any(&ip6h->saddr), max_delay); + !ipv6_addr_any(&ip6h->saddr), + is_general_query, max_delay); if (!group) goto out; -- cgit v0.10.2 From fdfaf64e75397567257e1051931f9a3377360665 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 10 Mar 2014 15:56:51 -0700 Subject: x86: bpf_jit: support negative offsets Commit a998d4342337 claimed to introduce negative offset support to x86 jit, but it couldn't be working, since at the time of the execution of LD+ABS or LD+IND instructions via call into bpf_internal_load_pointer_neg_helper() the %edx (3rd argument of this func) had junk value instead of access size in bytes (1 or 2 or 4). Store size into %edx instead of %ecx (what original commit intended to do) Fixes: a998d4342337 ("bpf jit: Let the x86 jit handle negative offsets") Signed-off-by: Alexei Starovoitov Cc: Jan Seiffert Cc: Eric Dumazet Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S index 877b9a1..0149575 100644 --- a/arch/x86/net/bpf_jit.S +++ b/arch/x86/net/bpf_jit.S @@ -140,7 +140,7 @@ bpf_slow_path_byte_msh: push %r9; \ push SKBDATA; \ /* rsi already has offset */ \ - mov $SIZE,%ecx; /* size */ \ + mov $SIZE,%edx; /* size */ \ call bpf_internal_load_pointer_neg_helper; \ test %rax,%rax; \ pop SKBDATA; \ -- cgit v0.10.2 From 406764622fb71e5f4efb39ff111ef87713815f6d Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Tue, 11 Mar 2014 08:49:39 +0000 Subject: tools/net/Makefile: Define PACKAGE to fix build problems Fixes the following build problem with binutils-2.24 gcc -Wall -O2 -c -o bpf_jit_disasm.o bpf_jit_disasm.c In file included from bpf_jit_disasm.c:25:0: /usr/include/bfd.h:35:2: error: #error config.h must be included before this header #error config.h must be included before this header This is similar to commit 3ce711a6abc27abce1554e1d671a8762b7187690 "perf tools: bfd.h/libbfd detection fails with recent binutils" See: https://sourceware.org/bugzilla/show_bug.cgi?id=14243 CC: David S. Miller CC: Daniel Borkmann CC: netdev@vger.kernel.org Acked-by: Daniel Borkmann Signed-off-by: Markos Chandras Signed-off-by: David S. Miller diff --git a/tools/net/Makefile b/tools/net/Makefile index 004cd74..ee577ea 100644 --- a/tools/net/Makefile +++ b/tools/net/Makefile @@ -12,7 +12,7 @@ YACC = bison all : bpf_jit_disasm bpf_dbg bpf_asm -bpf_jit_disasm : CFLAGS = -Wall -O2 +bpf_jit_disasm : CFLAGS = -Wall -O2 -DPACKAGE='bpf_jit_disasm' bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl bpf_jit_disasm : bpf_jit_disasm.o -- cgit v0.10.2 From f75761b6b5bf6277296505941d2dd8e11f9b5c35 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 11 Mar 2014 15:11:59 +0800 Subject: r8169: fix the incorrect tx descriptor version The tx descriptor version of RTL8111B belong to RTL_TD_0. Signed-off-by: Hayes Wang Acked-by: Francois Romieu Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index e977965..3ff7bc3 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -209,7 +209,7 @@ static const struct { [RTL_GIGA_MAC_VER_16] = _R("RTL8101e", RTL_TD_0, NULL, JUMBO_1K, true), [RTL_GIGA_MAC_VER_17] = - _R("RTL8168b/8111b", RTL_TD_1, NULL, JUMBO_4K, false), + _R("RTL8168b/8111b", RTL_TD_0, NULL, JUMBO_4K, false), [RTL_GIGA_MAC_VER_18] = _R("RTL8168cp/8111cp", RTL_TD_1, NULL, JUMBO_6K, false), [RTL_GIGA_MAC_VER_19] = -- cgit v0.10.2 From 0e6d6ec02f867fe8d1f785312b7343b21052322f Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 12 Mar 2014 10:41:32 +0100 Subject: drm/ttm: Work around performance regression with VM_PFNMAP A performance regression was introduced in TTM in linux 3.13 when we started using VM_PFNMAP for shared mappings. In theory this should've been faster due to less page book-keeping but it appears like VM_PFNMAP + x86 PAT + write-combine is a particularly cpu-hungry combination, as seen by largely increased cpu-usage on r200 GL video playback. Until we've sorted out why, revert to always use VM_MIXEDMAP. Reference: freedesktop.org bugzilla bug #75719 Reported-and-tested-by: Acked-by: Alex Deucher Signed-off-by: Thomas Hellstrom Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 801231c..0ce48e5 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -339,11 +339,13 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma, vma->vm_private_data = bo; /* - * PFNMAP is faster than MIXEDMAP due to reduced page - * administration. So use MIXEDMAP only if private VMA, where - * we need to support COW. + * We'd like to use VM_PFNMAP on shared mappings, where + * (vma->vm_flags & VM_SHARED) != 0, for performance reasons, + * but for some reason VM_PFNMAP + x86 PAT + write-combine is very + * bad for performance. Until that has been sorted out, use + * VM_MIXEDMAP on all mappings. See freedesktop.org bug #75719 */ - vma->vm_flags |= (vma->vm_flags & VM_SHARED) ? VM_PFNMAP : VM_MIXEDMAP; + vma->vm_flags |= VM_MIXEDMAP; vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; return 0; out_unref: @@ -359,7 +361,7 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo) vma->vm_ops = &ttm_bo_vm_ops; vma->vm_private_data = ttm_bo_reference(bo); - vma->vm_flags |= (vma->vm_flags & VM_SHARED) ? VM_PFNMAP : VM_MIXEDMAP; + vma->vm_flags |= VM_MIXEDMAP; vma->vm_flags |= VM_IO | VM_DONTEXPAND; return 0; } -- cgit v0.10.2 From 3cdeb713dc66057b50682048c151eae07b186c42 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 11 Mar 2014 14:22:19 -0600 Subject: PCI: Enable INTx in pci_reenable_device() only when MSI/MSI-X not enabled Andreas reported that after 1f42db786b14 ("PCI: Enable INTx if BIOS left them disabled"), pciehp surprise removal stopped working. This happens because pci_reenable_device() on the hotplug bridge (used in the pciehp_configure_device() path) clears the Interrupt Disable bit, which apparently breaks the bridge's MSI hotplug event reporting. Previously we cleared the Interrupt Disable bit in do_pci_enable_device(), which is used by both pci_enable_device() and pci_reenable_device(). But we use pci_reenable_device() after the driver may have enabled MSI or MSI-X, and we *set* Interrupt Disable as part of enabling MSI/MSI-X. This patch clears Interrupt Disable only when MSI/MSI-X has not been enabled. Fixes: 1f42db786b14 PCI: Enable INTx if BIOS left them disabled Link: https://bugzilla.kernel.org/show_bug.cgi?id=71691 Reported-and-tested-by: Andreas Noever Signed-off-by: Bjorn Helgaas CC: stable@vger.kernel.org CC: Sarah Sharp diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 6b05f61..fdbc294 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1192,6 +1192,9 @@ static int do_pci_enable_device(struct pci_dev *dev, int bars) return err; pci_fixup_device(pci_fixup_enable, dev); + if (dev->msi_enabled || dev->msix_enabled) + return 0; + pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); if (pin) { pci_read_config_word(dev, PCI_COMMAND, &cmd); -- cgit v0.10.2 From ac93ac7403493f8707b7734de9f40d5cb5db9045 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 11 Mar 2014 14:23:12 -0600 Subject: PCI: Don't check resource_size() in pci_bus_alloc_resource() Paul reported that after f75b99d5a77d ("PCI: Enforce bus address limits in resource allocation") on a 32-bit kernel (CONFIG_PHYS_ADDR_T_64BIT not set), intel-gtt complained "can't ioremap flush page - no chipset flushing". In addition, other PCI resource allocations, e.g., for bridge windows, failed. This happens because we incorrectly skip bus resources of [mem 0x00000000-0xffffffff] because we think they are of size zero. When resource_size_t is 32 bits wide, resource_size() on [mem 0x00000000-0xffffffff] returns 0 because (r->end - r->start + 1) overflows. Therefore, we can't use "resource_size() == 0" to decide that allocation from this resource will fail. allocate_resource() should fail anyway if it can't satisfy the address constraints, so we should just depend on that. A [mem 0x00000000-0xffffffff] bus resource is obviously not really valid, but we do fall back to it as a default when we don't have information about host bridge apertures. Link: https://bugzilla.kernel.org/show_bug.cgi?id=71611 Fixes: f75b99d5a77d PCI: Enforce bus address limits in resource allocation Reported-and-tested-by: Paul Bolle Signed-off-by: Bjorn Helgaas diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 00660cc..3890166 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -162,8 +162,6 @@ static int pci_bus_alloc_from_region(struct pci_bus *bus, struct resource *res, avail = *r; pci_clip_resource_to_region(bus, &avail, region); - if (!resource_size(&avail)) - continue; /* * "min" is typically PCIBIOS_MIN_IO or PCIBIOS_MIN_MEM to -- cgit v0.10.2 From 596f3142d2b7be307a1652d59e7b93adab918437 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Tue, 11 Mar 2014 19:11:18 +0100 Subject: KVM: SVM: fix cr8 intercept window MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We always disable cr8 intercept in its handler, but only re-enable it if handling KVM_REQ_EVENT, so there can be a window where we do not intercept cr8 writes, which allows an interrupt to disrupt a higher priority task. Fix this by disabling intercepts in the same function that re-enables them when needed. This fixes BSOD in Windows 2008. Cc: Signed-off-by: Radim Krčmář Reviewed-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index e81df8f..2de1bc0 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3002,10 +3002,8 @@ static int cr8_write_interception(struct vcpu_svm *svm) u8 cr8_prev = kvm_get_cr8(&svm->vcpu); /* instruction emulation calls kvm_set_cr8() */ r = cr_interception(svm); - if (irqchip_in_kernel(svm->vcpu.kvm)) { - clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); + if (irqchip_in_kernel(svm->vcpu.kvm)) return r; - } if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) return r; kvm_run->exit_reason = KVM_EXIT_SET_TPR; @@ -3567,6 +3565,8 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) return; + clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); + if (irr == -1) return; -- cgit v0.10.2 From 8b9d96666529a979acf4825391efcc7c8a3e9f12 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Wed, 12 Mar 2014 00:40:05 +0100 Subject: dm cache: fix truncation bug when copying a block to/from >2TB fast device During demotion or promotion to a cache's >2TB fast device we must not truncate the cache block's associated sector to 32bits. The 32bit temporary result of from_cblock() caused a 32bit multiplication when calculating the sector of the fast device in issue_copy_real(). Use an intermediate 64bit type to store the 32bit from_cblock() to allow for proper 64bit multiplication. Here is an example of how this bug manifests on an ext4 filesystem: EXT4-fs error (device dm-0): ext4_mb_generate_buddy:756: group 17136, 32768 clusters in bitmap, 30688 in gd; block bitmap corrupt. JBD2: Spotted dirty metadata buffer (dev = dm-0, blocknr = 0). There's a risk of filesystem corruption in case of system crash. Signed-off-by: Heinz Mauelshagen Acked-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 1af7014..354bbc1 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -979,12 +979,13 @@ static void issue_copy_real(struct dm_cache_migration *mg) int r; struct dm_io_region o_region, c_region; struct cache *cache = mg->cache; + sector_t cblock = from_cblock(mg->cblock); o_region.bdev = cache->origin_dev->bdev; o_region.count = cache->sectors_per_block; c_region.bdev = cache->cache_dev->bdev; - c_region.sector = from_cblock(mg->cblock) * cache->sectors_per_block; + c_region.sector = cblock * cache->sectors_per_block; c_region.count = cache->sectors_per_block; if (mg->writeback || mg->demote) { -- cgit v0.10.2 From e893fba90c09f9b57fb97daae204ea9cc2c52fa5 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Wed, 12 Mar 2014 16:13:39 +0100 Subject: dm cache: fix access beyond end of origin device In order to avoid wasting cache space a partial block at the end of the origin device is not cached. Unfortunately, the check for such a partial block at the end of the origin device was flawed. Fix accesses beyond the end of the origin device that occured due to attempted promotion of an undetected partial block by: - initializing the per bio data struct to allow cache_end_io to work properly - recognizing access to the partial block at the end of the origin device - avoiding out of bounds access to the discard bitset Otherwise, users can experience errors like the following: attempt to access beyond end of device dm-5: rw=0, want=20971520, limit=20971456 ... device-mapper: cache: promotion failed; couldn't copy block Signed-off-by: Heinz Mauelshagen Acked-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 354bbc1..074b9c8 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2465,20 +2465,18 @@ static int cache_map(struct dm_target *ti, struct bio *bio) bool discarded_block; struct dm_bio_prison_cell *cell; struct policy_result lookup_result; - struct per_bio_data *pb; + struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size); - if (from_oblock(block) > from_oblock(cache->origin_blocks)) { + if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) { /* * This can only occur if the io goes to a partial block at * the end of the origin device. We don't cache these. * Just remap to the origin and carry on. */ - remap_to_origin_clear_discard(cache, bio, block); + remap_to_origin(cache, bio); return DM_MAPIO_REMAPPED; } - pb = init_per_bio_data(bio, pb_data_size); - if (bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD)) { defer_bio(cache, bio); return DM_MAPIO_SUBMITTED; -- cgit v0.10.2 From 9ef7506f7eff3fc42724269f62e30164c141661f Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 12 Mar 2014 10:59:37 -0400 Subject: drm/ttm: don't oops if no invalidate_caches() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A few of the simpler TTM drivers (cirrus, ast, mgag200) do not implement this function. Yet can end up somehow with an evicted bo: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [< (null)>] (null) PGD 16e761067 PUD 16e6cf067 PMD 0 Oops: 0010 [#1] SMP Modules linked in: bnep bluetooth rfkill fuse ip6t_rpfilter ip6t_REJECT ipt_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw iptable_filter ip_tables sg btrfs zlib_deflate raid6_pq xor dm_queue_length iTCO_wdt iTCO_vendor_support coretemp kvm dcdbas dm_service_time microcode serio_raw pcspkr lpc_ich mfd_core i7core_edac edac_core ses enclosure ipmi_si ipmi_msghandler shpchp acpi_power_meter mperf nfsd auth_rpcgss nfs_acl lockd uinput sunrpc dm_multipath xfs libcrc32c ata_generic pata_acpi sr_mod cdrom sd_mod usb_storage mgag200 syscopyarea sysfillrect sysimgblt i2c_algo_bit lpfc drm_kms_helper ttm crc32c_intel ata_piix bfa drm ixgbe libata i2c_core mdio crc_t10dif ptp crct10dif_common pps_core scsi_transport_fc dca scsi_tgt megaraid_sas bnx2 dm_mirror dm_region_hash dm_log dm_mod CPU: 16 PID: 2572 Comm: X Not tainted 3.10.0-86.el7.x86_64 #1 Hardware name: Dell Inc. PowerEdge R810/0H235N, BIOS 0.3.0 11/14/2009 task: ffff8801799dabc0 ti: ffff88016c884000 task.ti: ffff88016c884000 RIP: 0010:[<0000000000000000>] [< (null)>] (null) RSP: 0018:ffff88016c885ad8 EFLAGS: 00010202 RAX: ffffffffa04e94c0 RBX: ffff880178937a20 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000240004 RDI: ffff880178937a00 RBP: ffff88016c885b60 R08: 00000000000171a0 R09: ffff88007cf171a0 R10: ffffea0005842540 R11: ffffffff810487b9 R12: ffff880178937b30 R13: ffff880178937a00 R14: ffff88016c885b78 R15: ffff880179929400 FS: 00007f81ba2ef980(0000) GS:ffff88007cf00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000016e763000 CR4: 00000000000007e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Stack: ffffffffa0306fae ffff8801799295c0 0000000000260004 0000000000000001 ffff88016c885b60 ffffffffa0307669 00ff88007cf17738 ffff88017cf17700 ffff880178937a00 ffff880100000000 ffff880100000000 0000000079929400 Call Trace: [] ? ttm_bo_handle_move_mem+0x54e/0x5b0 [ttm] [] ? ttm_bo_mem_space+0x169/0x340 [ttm] [] ttm_bo_move_buffer+0x117/0x130 [ttm] [] ? perf_event_init_context+0x141/0x220 [] ttm_bo_validate+0xc1/0x130 [ttm] [] mgag200_bo_pin+0x87/0xc0 [mgag200] [] mga_crtc_cursor_set+0x474/0xbb0 [mgag200] [] ? __mem_cgroup_commit_charge+0x152/0x3b0 [] ? mutex_lock+0x12/0x2f [] drm_mode_cursor_common+0x123/0x170 [drm] [] drm_mode_cursor_ioctl+0x41/0x50 [drm] [] drm_ioctl+0x502/0x630 [drm] [] ? __do_page_fault+0x1f4/0x510 [] ? __restore_xstate_sig+0x218/0x4f0 [] do_vfs_ioctl+0x2e5/0x4d0 [] ? file_has_perm+0x8e/0xa0 [] SyS_ioctl+0x81/0xa0 [] system_call_fastpath+0x16/0x1b Code: Bad RIP value. RIP [< (null)>] (null) RSP CR2: 0000000000000000 Signed-off-by: Rob Clark Reviewed-by: Jérôme Glisse Reviewed-by: Thomas Hellstrom Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index a066513..214b799 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -351,9 +351,11 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, moved: if (bo->evicted) { - ret = bdev->driver->invalidate_caches(bdev, bo->mem.placement); - if (ret) - pr_err("Can not flush read caches\n"); + if (bdev->driver->invalidate_caches) { + ret = bdev->driver->invalidate_caches(bdev, bo->mem.placement); + if (ret) + pr_err("Can not flush read caches\n"); + } bo->evicted = false; } -- cgit v0.10.2 From 836fbaf459f9c041826864021688e9bd131e722c Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Tue, 11 Mar 2014 12:45:32 +0000 Subject: xen-netback: use skb_is_gso in xenvif_start_xmit In 5bd076708 ("Xen-netback: Fix issue caused by using gso_type wrongly") we use skb_is_gso to determine if we need an extra slot to accommodate the SKB. There's similar error in interface.c. Change that to use skb_is_gso as well. Signed-off-by: Wei Liu Cc: Annie Li Cc: Ian Campbell Cc: Paul Durrant Acked-by: Ian Campbell Signed-off-by: David S. Miller diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 7669d49..301cc03 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -132,8 +132,7 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) /* If the skb is GSO then we'll also need an extra slot for the * metadata. */ - if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 || - skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) + if (skb_is_gso(skb)) min_slots_needed++; /* If the skb can't possibly fit in the remaining slots -- cgit v0.10.2 From 7848865914c6a63ead674f0f5604b77df7d3874f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 11 Mar 2014 15:02:30 -0400 Subject: drm/radeon: fix runpm disabling on non-PX harder Make sure runtime pm is disabled on non-PX hardware. Should fix powerdown problems without displays attached. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 2aecd6d..66ed3ea 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -33,6 +33,13 @@ #include #include #include + +#if defined(CONFIG_VGA_SWITCHEROO) +bool radeon_is_px(void); +#else +static inline bool radeon_is_px(void) { return false; } +#endif + /** * radeon_driver_unload_kms - Main unload function for KMS. * @@ -130,7 +137,8 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) "Error during ACPI methods call\n"); } - if (radeon_runtime_pm != 0) { + if ((radeon_runtime_pm == 1) || + ((radeon_runtime_pm == -1) && radeon_is_px())) { pm_runtime_use_autosuspend(dev->dev); pm_runtime_set_autosuspend_delay(dev->dev, 5000); pm_runtime_set_active(dev->dev); -- cgit v0.10.2 From 7b1bbe883b3ed962ca2be4daf321f318f5091340 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Mar 2014 15:15:58 -0400 Subject: drm/radeon/cik: properly set sdma ring status on disable When we disable the rings, set the status properly. If not other code pathes may try and use the rings which are not functional at this point. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 1ecb3f1..5d7a5fa 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -264,6 +264,8 @@ static void cik_sdma_gfx_stop(struct radeon_device *rdev) WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl); WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0); } + rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; + rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; } /** -- cgit v0.10.2 From 07ae78c9798b79bad3d3adf983c94ba23fde54d4 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Mar 2014 15:26:34 -0400 Subject: drm/radeon/cik: stop the sdma engines in the enable() function We always stop the rings when disabling the engines so just call the stop functions directly from the sdma enable function. This way the rings' status is set correctly on suspend so there are no problems on resume. Fixes resume failures that result in acceleration getting disabled. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 5d7a5fa..94626ea 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -293,6 +293,11 @@ void cik_sdma_enable(struct radeon_device *rdev, bool enable) u32 me_cntl, reg_offset; int i; + if (enable == false) { + cik_sdma_gfx_stop(rdev); + cik_sdma_rlc_stop(rdev); + } + for (i = 0; i < 2; i++) { if (i == 0) reg_offset = SDMA0_REGISTER_OFFSET; @@ -422,10 +427,6 @@ static int cik_sdma_load_microcode(struct radeon_device *rdev) if (!rdev->sdma_fw) return -EINVAL; - /* stop the gfx rings and rlc compute queues */ - cik_sdma_gfx_stop(rdev); - cik_sdma_rlc_stop(rdev); - /* halt the MEs */ cik_sdma_enable(rdev, false); @@ -494,9 +495,6 @@ int cik_sdma_resume(struct radeon_device *rdev) */ void cik_sdma_fini(struct radeon_device *rdev) { - /* stop the gfx rings and rlc compute queues */ - cik_sdma_gfx_stop(rdev); - cik_sdma_rlc_stop(rdev); /* halt the MEs */ cik_sdma_enable(rdev, false); radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); -- cgit v0.10.2 From 56cb456746a15c1025a178466492ca4c373b1a63 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 12 Mar 2014 17:16:30 +0200 Subject: net/mlx4_core: Fix wrong dump of the vxlan offloads device capability Fix the value used to dump the vxlan offloads device capability to align with the MLX4_DEV_CAP_FLAG2_yyy definition. While on that, add dump to the IPoIB flow-steering device capability and fix small typo. The vxlan cap value wasn't fully handled when a conflict was resolved between MLX4_DEV_CAP_FLAG2_DMFS_IPOIB coming from the IB tree to MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS coming from net-next. Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 8726e34..7e2995e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -129,13 +129,14 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [0] = "RSS support", [1] = "RSS Toeplitz Hash Function support", [2] = "RSS XOR Hash Function support", - [3] = "Device manage flow steering support", + [3] = "Device managed flow steering support", [4] = "Automatic MAC reassignment support", [5] = "Time stamping support", [6] = "VST (control vlan insertion/stripping) support", [7] = "FSM (MAC anti-spoofing) support", [8] = "Dynamic QP updates support", - [9] = "TCP/IP offloads/flow-steering for VXLAN support" + [9] = "Device managed flow steering IPoIB support", + [10] = "TCP/IP offloads/flow-steering for VXLAN support" }; int i; -- cgit v0.10.2 From 2a2083f7f3568c0192daa6ac0e6fa35d953f47bd Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 12 Mar 2014 17:16:31 +0200 Subject: net/mlx4_en: Handle vxlan steering rules for mac address changes When the device mac address is changed, we must deregister the vxlan steering rule associated with the previous mac, and register a new steering rule using the new mac. Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index fad4531..ba212611 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -742,6 +742,14 @@ static int mlx4_en_replace_mac(struct mlx4_en_priv *priv, int qpn, err = mlx4_en_uc_steer_add(priv, new_mac, &qpn, &entry->reg_id); + if (err) + return err; + if (priv->tunnel_reg_id) { + mlx4_flow_detach(priv->mdev->dev, priv->tunnel_reg_id); + priv->tunnel_reg_id = 0; + } + err = mlx4_en_tunnel_steer_add(priv, new_mac, qpn, + &priv->tunnel_reg_id); return err; } } -- cgit v0.10.2 From 7855bff42ea9938a0853321256f4c8ce3628aa73 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 12 Mar 2014 17:16:32 +0200 Subject: net/mlx4_core: Load the IB driver when the device supports IBoE When checking what protocol drivers to load, the IB driver should be requested also over Ethernet ports, if the device supports IBoE (RoCE). Fixes: b046ffe 'net/mlx4_core: Load higher level modules according to ports type' Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 30a08a6..936c153 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -751,7 +751,7 @@ static void mlx4_request_modules(struct mlx4_dev *dev) has_eth_port = true; } - if (has_ib_port) + if (has_ib_port || (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)) request_module_nowait(IB_DRV_NAME); if (has_eth_port) request_module_nowait(EN_DRV_NAME); -- cgit v0.10.2 From a93c12560498066765c58b032530798a26bd8f70 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 11 Mar 2014 11:23:42 +0100 Subject: packet: doc: Spelling s/than/that/ Signed-off-by: Geert Uytterhoeven Cc: David S. Miller Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt index 1404674..6fea79e 100644 --- a/Documentation/networking/packet_mmap.txt +++ b/Documentation/networking/packet_mmap.txt @@ -453,7 +453,7 @@ TP_STATUS_COPY : This flag indicates that the frame (and associated enabled previously with setsockopt() and the PACKET_COPY_THRESH option. - The number of frames than can be buffered to + The number of frames that can be buffered to be read with recvfrom is limited like a normal socket. See the SO_RCVBUF option in the socket (7) man page. -- cgit v0.10.2 From 7e814a6c50a1669118ffa4961568fea3aa955faf Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 11 Mar 2014 12:06:24 +0100 Subject: MAINTAINERS: Add tools/net to NETWORKING [GENERAL] Make sure patches for these tools go to the netdev list as well. References: https://marc.info/?l=linux-kernel&m=139450284501328&w=2 Cc: David S. Miller Cc: Daniel Borkmann Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index e1297ff..e1be77c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6002,6 +6002,7 @@ F: include/linux/netdevice.h F: include/uapi/linux/in.h F: include/uapi/linux/net.h F: include/uapi/linux/netdevice.h +F: tools/net/ NETWORKING [IPv4/IPv6] M: "David S. Miller" -- cgit v0.10.2 From b2b3d8d952e4f8d6ac2ce80be96b937f29f6e42e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 12 Mar 2014 16:20:44 -0400 Subject: drm/radeon/cik: properly set compute ring status on disable When we disable the rings, set the status properly. If not other code pathes may try and use the rings which are not functional at this point. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index e22be84..bbb1784 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -4134,8 +4134,11 @@ static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable) { if (enable) WREG32(CP_MEC_CNTL, 0); - else + else { WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT)); + rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false; + rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false; + } udelay(50); } -- cgit v0.10.2 From dbb490b96584d4e958533fb637f08b557f505657 Mon Sep 17 00:00:00 2001 From: Matthew Leach Date: Tue, 11 Mar 2014 11:58:27 +0000 Subject: net: socket: error on a negative msg_namelen When copying in a struct msghdr from the user, if the user has set the msg_namelen parameter to a negative value it gets clamped to a valid size due to a comparison between signed and unsigned values. Ensure the syscall errors when the user passes in a negative value. Signed-off-by: Matthew Leach Signed-off-by: David S. Miller diff --git a/net/socket.c b/net/socket.c index 879933a..32df584 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1985,6 +1985,10 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, { if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) return -EFAULT; + + if (kmsg->msg_namelen < 0) + return -EINVAL; + if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) kmsg->msg_namelen = sizeof(struct sockaddr_storage); return 0; -- cgit v0.10.2 From 2ed99e39cb9392312c100d9da591c20641c64d12 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 12 Mar 2014 21:49:33 +0100 Subject: cpufreq: Skip current frequency initialization for ->setpolicy drivers After commit da60ce9f2fac (cpufreq: call cpufreq_driver->get() after calling ->init()) __cpufreq_add_dev() sometimes fails for CPUs handled by intel_pstate, because that driver may return 0 from its ->get() callback if it has not run long enough to collect enough samples on the given CPU. That didn't happen before commit da60ce9f2fac which added policy->cur initialization to __cpufreq_add_dev() to help reduce code duplication in other cpufreq drivers. However, the code added by commit da60ce9f2fac need not be executed for cpufreq drivers having the ->setpolicy callback defined, because the subsequent invocation of cpufreq_set_policy() will use that callback to initialize the policy anyway and it doesn't need policy->cur to be initialized upfront. The analogous code in cpufreq_update_policy() is also unnecessary for cpufreq drivers having ->setpolicy set and may be skipped for them as well. Since intel_pstate provides ->setpolicy, skipping the upfront policy->cur initialization for cpufreq drivers with that callback set will cover intel_pstate and the problem it's been having after commit da60ce9f2fac will be addressed. Fixes: da60ce9f2fac (cpufreq: call cpufreq_driver->get() after calling ->init()) References: https://bugzilla.kernel.org/show_bug.cgi?id=71931 Reported-and-tested-by: Patrik Lundquist Acked-by: Dirk Brandewie Cc: 3.13+ # 3.13+ Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index cf485d9..199b52b 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1129,7 +1129,7 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif, per_cpu(cpufreq_cpu_data, j) = policy; write_unlock_irqrestore(&cpufreq_driver_lock, flags); - if (cpufreq_driver->get) { + if (cpufreq_driver->get && !cpufreq_driver->setpolicy) { policy->cur = cpufreq_driver->get(policy->cpu); if (!policy->cur) { pr_err("%s: ->get() failed\n", __func__); @@ -2143,7 +2143,7 @@ int cpufreq_update_policy(unsigned int cpu) * BIOS might change freq behind our back * -> ask driver for current freq and notify governors about a change */ - if (cpufreq_driver->get) { + if (cpufreq_driver->get && !cpufreq_driver->setpolicy) { new_policy.cur = cpufreq_driver->get(cpu); if (!policy->cur) { pr_debug("Driver did not initialize current freq"); -- cgit v0.10.2 From c4e1acbb35e4a3838cdfc0e7f8237e844aff00b6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 13 Mar 2014 00:22:58 +0100 Subject: ACPI / init: Invoke early ACPI initialization later Commit 73f7d1ca3263 (ACPI / init: Run acpi_early_init() before timekeeping_init()) optimistically moved the early ACPI initialization before timekeeping_init(), but that didn't work, because it broke fast TSC calibration for Julian Wollrath on Thinkpad x121e (and most likely for others too). The reason is that acpi_early_init() enables the SCI and that interferes with the fast TSC calibration mechanism. Thus follow the original idea to execute acpi_early_init() before efi_enter_virtual_mode() to help the EFI people for now and we can revisit the other problem that commit 73f7d1ca3263 attempted to address in the future (if really necessary). Fixes: 73f7d1ca3263 (ACPI / init: Run acpi_early_init() before timekeeping_init()) Reported-by: Julian Wollrath Reviewed-by: Thomas Gleixner Signed-off-by: Rafael J. Wysocki diff --git a/init/main.c b/init/main.c index eb03090..9c7fd4c 100644 --- a/init/main.c +++ b/init/main.c @@ -561,7 +561,6 @@ asmlinkage void __init start_kernel(void) init_timers(); hrtimers_init(); softirq_init(); - acpi_early_init(); timekeeping_init(); time_init(); sched_clock_postinit(); @@ -613,6 +612,7 @@ asmlinkage void __init start_kernel(void) calibrate_delay(); pidmap_init(); anon_vma_init(); + acpi_early_init(); #ifdef CONFIG_X86 if (efi_enabled(EFI_RUNTIME_SERVICES)) efi_enter_virtual_mode(); -- cgit v0.10.2 From 4b0c82529b92cda4723cf1d09e1e2ee9b9ae96f1 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Wed, 12 Mar 2014 20:42:31 +0100 Subject: drm/vmwgfx: Fix a surface reference corner-case in legacy emulation mode If running on a gb-object capable device with a non-gb capable surface exporter (X server) and a gb capable surface referencing client (GL driver), the referencing client expects to find a shareable backing buffer attached to the surface at reference time. This may not be the case if the surface has not yet been validated. This would cause the surface reference IOCTL to return an error. Signed-off-by: Thomas Hellstrom Reviewed-by: Jakob Bornecrantz diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 82468d9..e7af580 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -830,6 +830,24 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, if (unlikely(ret != 0)) goto out_unlock; + /* + * A gb-aware client referencing a shared surface will + * expect a backup buffer to be present. + */ + if (dev_priv->has_mob && req->shareable) { + uint32_t backup_handle; + + ret = vmw_user_dmabuf_alloc(dev_priv, tfile, + res->backup_size, + true, + &backup_handle, + &res->backup); + if (unlikely(ret != 0)) { + vmw_resource_unreference(&res); + goto out_unlock; + } + } + tmp = vmw_resource_reference(&srf->res); ret = ttm_prime_object_init(tfile, res->backup_size, &user_srf->prime, req->shareable, VMW_RES_SURFACE, -- cgit v0.10.2 From f4e53f9a4f7d01cef9f096b1eb3705329a7ce9c2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 13 Mar 2014 10:18:53 +0100 Subject: MAINTAINERS: add networking selftests to NETWORKING Add it to NETWORKING [GENERAL] to make sure patches for selftests go to the netdev list as well. Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index e1be77c..b359eb0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6003,6 +6003,7 @@ F: include/uapi/linux/in.h F: include/uapi/linux/net.h F: include/uapi/linux/netdevice.h F: tools/net/ +F: tools/testing/selftests/net/ NETWORKING [IPv4/IPv6] M: "David S. Miller" -- cgit v0.10.2 From 0a8d8c446b5429d15ff2d48f46e00d8a08552303 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 13 Mar 2014 10:44:34 +0100 Subject: vmxnet3: fix building without CONFIG_PCI_MSI Since commit d25f06ea466e "vmxnet3: fix netpoll race condition", the vmxnet3 driver fails to build when CONFIG_PCI_MSI is disabled, because it unconditionally references the vmxnet3_msix_rx() function. To fix this, use the same #ifdef in the caller that exists around the function definition. Signed-off-by: Arnd Bergmann Cc: Neil Horman Cc: Shreyas Bhatewara Cc: "VMware, Inc." Cc: "David S. Miller" Cc: stable@vger.kernel.org Acked-by: Neil Horman Signed-off-by: David S. Miller diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index b7daa02..0fa3b44 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1761,13 +1761,16 @@ static void vmxnet3_netpoll(struct net_device *netdev) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); - int i; switch (adapter->intr.type) { - case VMXNET3_IT_MSIX: +#ifdef CONFIG_PCI_MSI + case VMXNET3_IT_MSIX: { + int i; for (i = 0; i < adapter->num_rx_queues; i++) vmxnet3_msix_rx(0, &adapter->rx_queue[i]); break; + } +#endif case VMXNET3_IT_MSI: default: vmxnet3_intr(0, adapter->netdev); -- cgit v0.10.2 From de123268300fd33b7f7668fda3264059daffa6ef Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 13 Mar 2014 14:52:15 +0200 Subject: net/mlx4_en: Deregister multicast vxlan steering rules when going down When mlx4_en_stop_port() is called, we need to deregister also the tunnel steering rules that relate to multicast. Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index ba212611..84a96f7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1800,6 +1800,8 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) mc_list[5] = priv->port; mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH, mclist->reg_id); + if (mclist->tunnel_reg_id) + mlx4_flow_detach(mdev->dev, mclist->tunnel_reg_id); } mlx4_en_clear_list(dev); list_for_each_entry_safe(mclist, tmp, &priv->curr_list, list) { -- cgit v0.10.2 From 6e07a1e0b53d77dbcb08f29d1cca07c6d33a926f Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 12 Mar 2014 08:21:24 +0100 Subject: at86rf230: fix lockdep splats This patch fix a lockdep in the at86rf230 driver, otherwise we get: [ 30.206517] ================================= [ 30.211078] [ INFO: inconsistent lock state ] [ 30.215647] 3.14.0-20140108-1-00994-g32e9426 #163 Not tainted [ 30.221660] --------------------------------- [ 30.226222] inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage. [ 30.232514] systemd-udevd/157 [HC1[1]:SC0[0]:HE0:SE1] takes: [ 30.238439] (&(&lp->lock)->rlock){?.+...}, at: [] at86rf230_isr+0x18/0x44 [ 30.246621] {HARDIRQ-ON-W} state was registered at: [ 30.251728] [] __lock_acquire+0x7a4/0x18d8 [ 30.257135] [] lock_acquire+0x68/0x7c [ 30.262071] [] _raw_spin_lock+0x28/0x38 [ 30.267203] [] at86rf230_xmit+0x1c/0x144 [ 30.272412] [] mac802154_xmit_worker+0x88/0x148 [ 30.278271] [] process_one_work+0x274/0x404 [ 30.283761] [] worker_thread+0x228/0x374 [ 30.288971] [] kthread+0xd0/0xe4 [ 30.293455] [] ret_from_fork+0x14/0x2c [ 30.298493] irq event stamp: 8948 [ 30.301963] hardirqs last enabled at (8947): [] __kmalloc+0xb4/0x110 [ 30.309636] hardirqs last disabled at (8948): [] __irq_svc+0x34/0x5c [ 30.317215] softirqs last enabled at (8452): [] __do_softirq+0x1dc/0x264 [ 30.325243] softirqs last disabled at (8439): [] irq_exit+0x80/0xf4 We use the lp->lock inside the isr of at86rf230, that's why we need the irqsave spinlock calls. Signed-off-by: Alexander Aring Signed-off-by: David S. Miller diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index ab31544..a30258a 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -546,12 +546,12 @@ at86rf230_xmit(struct ieee802154_dev *dev, struct sk_buff *skb) int rc; unsigned long flags; - spin_lock(&lp->lock); + spin_lock_irqsave(&lp->lock, flags); if (lp->irq_busy) { - spin_unlock(&lp->lock); + spin_unlock_irqrestore(&lp->lock, flags); return -EBUSY; } - spin_unlock(&lp->lock); + spin_unlock_irqrestore(&lp->lock, flags); might_sleep(); @@ -725,10 +725,11 @@ static void at86rf230_irqwork_level(struct work_struct *work) static irqreturn_t at86rf230_isr(int irq, void *data) { struct at86rf230_local *lp = data; + unsigned long flags; - spin_lock(&lp->lock); + spin_lock_irqsave(&lp->lock, flags); lp->irq_busy = 1; - spin_unlock(&lp->lock); + spin_unlock_irqrestore(&lp->lock, flags); schedule_work(&lp->irqwork); -- cgit v0.10.2 From fb00bc2e6cd2046282ba4b03f4fe682aee70b2f8 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Wed, 12 Mar 2014 17:31:59 +0800 Subject: bonding: set correct vlan id for alb xmit path The commit d3ab3ffd1d728d7ee77340e7e7e2c7cfe6a4013e (bonding: use rlb_client_info->vlan_id instead of ->tag) remove the rlb_client_info->tag, but occur some issues, The vlan_get_tag() will return 0 for success and -EINVAL for error, so the client_info->vlan_id always be set to 0 if the vlan_get_tag return 0 for success, so the client_info would never get a correct vlan id. We should only set the vlan id to 0 when the vlan_get_tag return error. Fixes: d3ab3ffd1d7 (bonding: use rlb_client_info->vlan_id instead of ->tag) CC: Ding Tianhong CC: Jay Vosburgh CC: Andy Gospodarek Signed-off-by: Ding Tianhong Acked-by: Veaceslav Falico Signed-off-by: David S. Miller diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index a2c4747..e8f133e 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -730,7 +730,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon client_info->ntt = 0; } - if (!vlan_get_tag(skb, &client_info->vlan_id)) + if (vlan_get_tag(skb, &client_info->vlan_id)) client_info->vlan_id = 0; if (!client_info->assigned) { -- cgit v0.10.2 From 84fe61821e4ebab6322eeae3f3c27f77f0031978 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Wed, 12 Mar 2014 11:28:19 +0100 Subject: eth: fec: Fix lost promiscuous mode after reconnecting cable If the Freescale fec is in promiscuous mode and network cable is reconnected then the promiscuous mode get lost. The problem is caused by a too soon call of set_multicast_list to re-enable promisc mode. The FEC_R_CNTRL register changes are overwritten by fec_restart. This patch fixes this by moving the call behind the init of FEC_R_CNTRL register in fec_restart. Successful tested on a i.MX28 board. Signed-off-by: Stefan Wahren Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 479a7cb..03a3513 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -528,13 +528,6 @@ fec_restart(struct net_device *ndev, int duplex) /* Clear any outstanding interrupt. */ writel(0xffc00000, fep->hwp + FEC_IEVENT); - /* Setup multicast filter. */ - set_multicast_list(ndev); -#ifndef CONFIG_M5272 - writel(0, fep->hwp + FEC_HASH_TABLE_HIGH); - writel(0, fep->hwp + FEC_HASH_TABLE_LOW); -#endif - /* Set maximum receive buffer size. */ writel(PKT_MAXBLR_SIZE, fep->hwp + FEC_R_BUFF_SIZE); @@ -655,6 +648,13 @@ fec_restart(struct net_device *ndev, int duplex) writel(rcntl, fep->hwp + FEC_R_CNTRL); + /* Setup multicast filter. */ + set_multicast_list(ndev); +#ifndef CONFIG_M5272 + writel(0, fep->hwp + FEC_HASH_TABLE_HIGH); + writel(0, fep->hwp + FEC_HASH_TABLE_LOW); +#endif + if (id_entry->driver_data & FEC_QUIRK_ENET_MAC) { /* enable ENET endian swap */ ecntl |= (1 << 8); -- cgit v0.10.2 From ecab67015ef6e3f3635551dcc9971cf363cc1cd5 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 12 Mar 2014 22:13:19 +0100 Subject: ipv6: Avoid unnecessary temporary addresses being generated tmp_prefered_lft is an offset to ifp->tstamp, not now. Therefore age needs to be added to the condition. Age calculation in ipv6_create_tempaddr is different from the one in addrconf_verify and doesn't consider ADDRCONF_TIMER_FUZZ_MINUS. This can cause age in ipv6_create_tempaddr to be less than the one in addrconf_verify and therefore unnecessary temporary address to be generated. Use age calculation as in addrconf_modify to avoid this. Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index fdbfeca..344e972 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1103,8 +1103,11 @@ retry: * Lifetime is greater than REGEN_ADVANCE time units. In particular, * an implementation must not create a temporary address with a zero * Preferred Lifetime. + * Use age calculation as in addrconf_verify to avoid unnecessary + * temporary addresses being generated. */ - if (tmp_prefered_lft <= regen_advance) { + age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; + if (tmp_prefered_lft <= regen_advance + age) { in6_ifa_put(ifp); in6_dev_put(idev); ret = -1; -- cgit v0.10.2 From a4e90bed511220ff601d064c9e5d583e91308f65 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 13 Mar 2014 22:11:39 +0100 Subject: ACPI / sleep: Add extra checks for HW Reduced ACPI mode sleep states If the HW Reduced ACPI mode bit is set in the FADT, ACPICA uses the optional sleep control and sleep status registers for making the system enter sleep states (including S5), so it is not possible to use system sleep states or power it off using ACPI if the HW Reduced ACPI mode bit is set and those registers are not available. For this reason, add a new function, acpi_sleep_state_supported(), checking if the HW Reduced ACPI mode bit is set and whether or not system sleep states are usable in that case in addition to checking the return value of acpi_get_sleep_type_data() and make the ACPI sleep setup routines use that function to check the availability of system sleep states. Among other things, this prevents the kernel from attempting to use ACPI for powering off HW Reduced ACPI systems without the sleep control and sleep status registers, because ACPI power off doesn't have a chance to work on them. That allows alternative power off mechanisms that may actually work to be used on those systems. The affected machines include Dell Venue 8 Pro, Asus T100TA, Haswell Desktop SDP and Ivy Bridge EP Demo depot. References: https://bugzilla.kernel.org/show_bug.cgi?id=70931 Reported-by: Adam Williamson Tested-by: Aubrey Li Cc: 3.4+ # 3.4+ Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index b718806..c40fb2e 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -71,6 +71,17 @@ static int acpi_sleep_prepare(u32 acpi_state) return 0; } +static bool acpi_sleep_state_supported(u8 sleep_state) +{ + acpi_status status; + u8 type_a, type_b; + + status = acpi_get_sleep_type_data(sleep_state, &type_a, &type_b); + return ACPI_SUCCESS(status) && (!acpi_gbl_reduced_hardware + || (acpi_gbl_FADT.sleep_control.address + && acpi_gbl_FADT.sleep_status.address)); +} + #ifdef CONFIG_ACPI_SLEEP static u32 acpi_target_sleep_state = ACPI_STATE_S0; @@ -604,15 +615,9 @@ static void acpi_sleep_suspend_setup(void) { int i; - for (i = ACPI_STATE_S1; i < ACPI_STATE_S4; i++) { - acpi_status status; - u8 type_a, type_b; - - status = acpi_get_sleep_type_data(i, &type_a, &type_b); - if (ACPI_SUCCESS(status)) { + for (i = ACPI_STATE_S1; i < ACPI_STATE_S4; i++) + if (acpi_sleep_state_supported(i)) sleep_states[i] = 1; - } - } suspend_set_ops(old_suspend_ordering ? &acpi_suspend_ops_old : &acpi_suspend_ops); @@ -740,11 +745,7 @@ static const struct platform_hibernation_ops acpi_hibernation_ops_old = { static void acpi_sleep_hibernate_setup(void) { - acpi_status status; - u8 type_a, type_b; - - status = acpi_get_sleep_type_data(ACPI_STATE_S4, &type_a, &type_b); - if (ACPI_FAILURE(status)) + if (!acpi_sleep_state_supported(ACPI_STATE_S4)) return; hibernation_set_ops(old_suspend_ordering ? @@ -793,8 +794,6 @@ static void acpi_power_off(void) int __init acpi_sleep_init(void) { - acpi_status status; - u8 type_a, type_b; char supported[ACPI_S_STATE_COUNT * 3 + 1]; char *pos = supported; int i; @@ -806,8 +805,7 @@ int __init acpi_sleep_init(void) acpi_sleep_suspend_setup(); acpi_sleep_hibernate_setup(); - status = acpi_get_sleep_type_data(ACPI_STATE_S5, &type_a, &type_b); - if (ACPI_SUCCESS(status)) { + if (acpi_sleep_state_supported(ACPI_STATE_S5)) { sleep_states[ACPI_STATE_S5] = 1; pm_power_off_prepare = acpi_power_off_prepare; pm_power_off = acpi_power_off; -- cgit v0.10.2 From 62c19c9d29e65086e5ae76df371ed2e6b23f00cd Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 9 Feb 2014 19:47:40 +0100 Subject: i2c: Remove usage of orphaned symbol OF_I2C The symbol is an orphan, don't depend on it anymore. Signed-off-by: Richard Weinberger [wsa: enhanced commit message] Signed-off-by: Wolfram Sang Fixes: 687b81d083c0 (i2c: move OF helpers into the core) Cc: stable@kernel.org diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index f5ed031..de17c55 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -387,7 +387,7 @@ config I2C_CBUS_GPIO config I2C_CPM tristate "Freescale CPM1 or CPM2 (MPC8xx/826x)" - depends on (CPM1 || CPM2) && OF_I2C + depends on CPM1 || CPM2 help This supports the use of the I2C interface on Freescale processors with CPM1 or CPM2. -- cgit v0.10.2 From 847d7970defb45540735b3fb4e88471c27cacd85 Mon Sep 17 00:00:00 2001 From: Daniel J Blueman Date: Thu, 13 Mar 2014 19:43:01 +0800 Subject: x86/amd/numa: Fix northbridge quirk to assign correct NUMA node For systems with multiple servers and routed fabric, all northbridges get assigned to the first server. Fix this by also using the node reported from the PCI bus. For single-fabric systems, the northbriges are on PCI bus 0 by definition, which are on NUMA node 0 by definition, so this is invarient on most systems. Tested on fam10h and fam15h single and multi-fabric systems and candidate for stable. Signed-off-by: Daniel J Blueman Acked-by: Steffen Persvold Acked-by: Borislav Petkov Cc: Link: http://lkml.kernel.org/r/1394710981-3596-1-git-send-email-daniel@numascale.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 7c6acd4..ff898bb 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -529,7 +529,7 @@ static void quirk_amd_nb_node(struct pci_dev *dev) return; pci_read_config_dword(nb_ht, 0x60, &val); - node = val & 7; + node = pcibus_to_node(dev->bus) | (val & 7); /* * Some hardware may return an invalid node ID, * so check it first: -- cgit v0.10.2