From 049d75f72d9b1cce0f5ca66ea324f0b5318046eb Mon Sep 17 00:00:00 2001 From: Mike Dunn Date: Wed, 18 Sep 2013 08:07:41 -0700 Subject: Input: pxa27x_keypad - fix NULL pointer dereference A NULL pointer dereference exception occurs in the driver probe function when device tree is used. The pdata pointer will be NULL in this case, but the code dereferences it in all cases. When device tree is used, a platform data structure is allocated and initialized, and in all cases this pointer is copied to the driver's private data, so the variable being tested should be accessed through the driver's private data structure. Signed-off-by: Mike Dunn Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/keyboard/pxa27x_keypad.c b/drivers/input/keyboard/pxa27x_keypad.c index 134c3b4..a2e758d 100644 --- a/drivers/input/keyboard/pxa27x_keypad.c +++ b/drivers/input/keyboard/pxa27x_keypad.c @@ -786,10 +786,17 @@ static int pxa27x_keypad_probe(struct platform_device *pdev) input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP); input_set_capability(input_dev, EV_MSC, MSC_SCAN); - if (pdata) + if (pdata) { error = pxa27x_keypad_build_keycode(keypad); - else + } else { error = pxa27x_keypad_build_keycode_from_dt(keypad); + /* + * Data that we get from DT resides in dynamically + * allocated memory so we need to update our pdata + * pointer. + */ + pdata = keypad->pdata; + } if (error) { dev_err(&pdev->dev, "failed to build keycode\n"); goto failed_put_clk; -- cgit v0.10.2 From 2f0d2604134880f739642fd7c3ae55db33c838e7 Mon Sep 17 00:00:00 2001 From: Andrey Moiseev Date: Mon, 16 Sep 2013 15:17:31 -0700 Subject: Input: i8042 - i8042_flush fix for a full 8042 buffer When 8042 internal data buffer is full, the driver erroneously decides that the controller is not present. i8042_flush returns the number of flushed bytes, which is in 0 - I8042_BUFFER_SIZE range inclusive. Therefore, i8042_flush has no way to indicate an error. Moreover i8042_controller_check takes initially full buffer (i8042_flush returned I8042_BUFFER_SIZE) as a sign of absence of the controller. Let's change i8042 to return success/error instead and make sure we do not return error prematurely. Signed-off-by: Andrey Moiseev Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index 78e4de4..52c9ebf 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -223,21 +223,26 @@ static int i8042_flush(void) { unsigned long flags; unsigned char data, str; - int i = 0; + int count = 0; + int retval = 0; spin_lock_irqsave(&i8042_lock, flags); - while (((str = i8042_read_status()) & I8042_STR_OBF) && (i < I8042_BUFFER_SIZE)) { - udelay(50); - data = i8042_read_data(); - i++; - dbg("%02x <- i8042 (flush, %s)\n", - data, str & I8042_STR_AUXDATA ? "aux" : "kbd"); + while ((str = i8042_read_status()) & I8042_STR_OBF) { + if (count++ < I8042_BUFFER_SIZE) { + udelay(50); + data = i8042_read_data(); + dbg("%02x <- i8042 (flush, %s)\n", + data, str & I8042_STR_AUXDATA ? "aux" : "kbd"); + } else { + retval = -EIO; + break; + } } spin_unlock_irqrestore(&i8042_lock, flags); - return i; + return retval; } /* @@ -849,7 +854,7 @@ static int __init i8042_check_aux(void) static int i8042_controller_check(void) { - if (i8042_flush() == I8042_BUFFER_SIZE) { + if (i8042_flush()) { pr_err("No controller found\n"); return -ENODEV; } -- cgit v0.10.2 From b9b5ab11ea221a9f2d5af41da639e0898675c34c Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 13 Sep 2013 21:45:51 +0200 Subject: clk: nomadik: set all timers to use 2.4 MHz TIMCLK This fixes a regression for the Nomadik on the main system timers. The Nomadik seemed a bit slow and its heartbeat wasn't looking healthy. And it was not strange, because it has been connected to the 32768 Hz clock at boot, while being told by the clock driver that it was 2.4MHz. Actually connect the TIMCLK to 2.4MHz by default as this is what we want for nice scheduling, clocksource and clock event. Cc: stable@vger.kernel.org Signed-off-by: Linus Walleij Signed-off-by: Mike Turquette diff --git a/drivers/clk/clk-nomadik.c b/drivers/clk/clk-nomadik.c index 51410c2..4d978a3 100644 --- a/drivers/clk/clk-nomadik.c +++ b/drivers/clk/clk-nomadik.c @@ -27,6 +27,14 @@ */ #define SRC_CR 0x00U +#define SRC_CR_T0_ENSEL BIT(15) +#define SRC_CR_T1_ENSEL BIT(17) +#define SRC_CR_T2_ENSEL BIT(19) +#define SRC_CR_T3_ENSEL BIT(21) +#define SRC_CR_T4_ENSEL BIT(23) +#define SRC_CR_T5_ENSEL BIT(25) +#define SRC_CR_T6_ENSEL BIT(27) +#define SRC_CR_T7_ENSEL BIT(29) #define SRC_XTALCR 0x0CU #define SRC_XTALCR_XTALTIMEN BIT(20) #define SRC_XTALCR_SXTALDIS BIT(19) @@ -543,6 +551,19 @@ void __init nomadik_clk_init(void) __func__, np->name); return; } + + /* Set all timers to use the 2.4 MHz TIMCLK */ + val = readl(src_base + SRC_CR); + val |= SRC_CR_T0_ENSEL; + val |= SRC_CR_T1_ENSEL; + val |= SRC_CR_T2_ENSEL; + val |= SRC_CR_T3_ENSEL; + val |= SRC_CR_T4_ENSEL; + val |= SRC_CR_T5_ENSEL; + val |= SRC_CR_T6_ENSEL; + val |= SRC_CR_T7_ENSEL; + writel(val, src_base + SRC_CR); + val = readl(src_base + SRC_XTALCR); pr_info("SXTALO is %s\n", (val & SRC_XTALCR_SXTALDIS) ? "disabled" : "enabled"); -- cgit v0.10.2 From ca0a10672dad94aa1f89645f89eb6047b7bf2a19 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 30 Sep 2013 22:05:07 +0200 Subject: netfilter: ebt_ulog: fix info leaks The ulog messages leak heap bytes by the means of padding bytes and incompletely filled string arrays. Fix those by memset(0)'ing the whole struct before filling it. Signed-off-by: Mathias Krause Signed-off-by: Pablo Neira Ayuso diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 5180938..7c470c3 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -181,6 +181,7 @@ static void ebt_ulog_packet(struct net *net, unsigned int hooknr, ub->qlen++; pm = nlmsg_data(nlh); + memset(pm, 0, sizeof(*pm)); /* Fill in the ulog data */ pm->version = EBT_ULOG_VERSION; @@ -193,8 +194,6 @@ static void ebt_ulog_packet(struct net *net, unsigned int hooknr, pm->hook = hooknr; if (uloginfo->prefix != NULL) strcpy(pm->prefix, uloginfo->prefix); - else - *(pm->prefix) = '\0'; if (in) { strcpy(pm->physindev, in->name); @@ -204,16 +203,14 @@ static void ebt_ulog_packet(struct net *net, unsigned int hooknr, strcpy(pm->indev, br_port_get_rcu(in)->br->dev->name); else strcpy(pm->indev, in->name); - } else - pm->indev[0] = pm->physindev[0] = '\0'; + } if (out) { /* If out exists, then out is a bridge port */ strcpy(pm->physoutdev, out->name); /* rcu_read_lock()ed by nf_hook_slow */ strcpy(pm->outdev, br_port_get_rcu(out)->br->dev->name); - } else - pm->outdev[0] = pm->physoutdev[0] = '\0'; + } if (skb_copy_bits(skb, -ETH_HLEN, pm->data, copy_len) < 0) BUG(); -- cgit v0.10.2 From 278f2b3e2af5f32ea1afe34fa12a2518153e6e49 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 30 Sep 2013 22:05:08 +0200 Subject: netfilter: ipt_ULOG: fix info leaks The ulog messages leak heap bytes by the means of padding bytes and incompletely filled string arrays. Fix those by memset(0)'ing the whole struct before filling it. Signed-off-by: Mathias Krause Signed-off-by: Pablo Neira Ayuso diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index cbc2215..9cb993c 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -220,6 +220,7 @@ static void ipt_ulog_packet(struct net *net, ub->qlen++; pm = nlmsg_data(nlh); + memset(pm, 0, sizeof(*pm)); /* We might not have a timestamp, get one */ if (skb->tstamp.tv64 == 0) @@ -238,8 +239,6 @@ static void ipt_ulog_packet(struct net *net, } else if (loginfo->prefix[0] != '\0') strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix)); - else - *(pm->prefix) = '\0'; if (in && in->hard_header_len > 0 && skb->mac_header != skb->network_header && @@ -251,13 +250,9 @@ static void ipt_ulog_packet(struct net *net, if (in) strncpy(pm->indev_name, in->name, sizeof(pm->indev_name)); - else - pm->indev_name[0] = '\0'; if (out) strncpy(pm->outdev_name, out->name, sizeof(pm->outdev_name)); - else - pm->outdev_name[0] = '\0'; /* copy_len <= skb->len, so can't fail. */ if (skb_copy_bits(skb, 0, pm->payload, copy_len) < 0) -- cgit v0.10.2 From a60a71b035e4d2f4920ef091265b1474a14ab313 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Sun, 6 Oct 2013 01:15:08 -0700 Subject: Input: move name/timer init to input_alloc_dev() We want to allow drivers to call input_event() at any time after the device got allocated. This means input_event() and input_register_device() must be allowed to run in parallel. The only conflicting calls in input_register_device() are init_timer() and dev_set_name(). Both can safely be moved to device allocation and we're good to go. Signed-off-by: David Herrmann Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/input.c b/drivers/input/input.c index c044699..e75d015 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -1734,6 +1734,7 @@ EXPORT_SYMBOL_GPL(input_class); */ struct input_dev *input_allocate_device(void) { + static atomic_t input_no = ATOMIC_INIT(0); struct input_dev *dev; dev = kzalloc(sizeof(struct input_dev), GFP_KERNEL); @@ -1743,9 +1744,13 @@ struct input_dev *input_allocate_device(void) device_initialize(&dev->dev); mutex_init(&dev->mutex); spin_lock_init(&dev->event_lock); + init_timer(&dev->timer); INIT_LIST_HEAD(&dev->h_list); INIT_LIST_HEAD(&dev->node); + dev_set_name(&dev->dev, "input%ld", + (unsigned long) atomic_inc_return(&input_no) - 1); + __module_get(THIS_MODULE); } @@ -2019,7 +2024,6 @@ static void devm_input_device_unregister(struct device *dev, void *res) */ int input_register_device(struct input_dev *dev) { - static atomic_t input_no = ATOMIC_INIT(0); struct input_devres *devres = NULL; struct input_handler *handler; unsigned int packet_size; @@ -2059,7 +2063,6 @@ int input_register_device(struct input_dev *dev) * If delay and period are pre-set by the driver, then autorepeating * is handled by the driver itself and we don't do it in input.c. */ - init_timer(&dev->timer); if (!dev->rep[REP_DELAY] && !dev->rep[REP_PERIOD]) { dev->timer.data = (long) dev; dev->timer.function = input_repeat_key; @@ -2073,9 +2076,6 @@ int input_register_device(struct input_dev *dev) if (!dev->setkeycode) dev->setkeycode = input_default_setkeycode; - dev_set_name(&dev->dev, "input%ld", - (unsigned long) atomic_inc_return(&input_no) - 1); - error = device_add(&dev->dev); if (error) goto err_free_vals; -- cgit v0.10.2 From 0a6ad06c43315ce0e5115d5ce51b04464245dde8 Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Thu, 19 Sep 2013 14:24:04 -0700 Subject: Input: cm109 - convert high volume dev_err() to dev_err_ratelimited() BugLink: http://bugs.launchpad.net/bugs/1222850 This input device can get into a state that produces a high volume of device status errors. Attempt to throttle these error messages such that the kernel log is not flooded. Signed-off-by: Tim Gardner Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/misc/cm109.c b/drivers/input/misc/cm109.c index 082684e..9365535 100644 --- a/drivers/input/misc/cm109.c +++ b/drivers/input/misc/cm109.c @@ -351,7 +351,9 @@ static void cm109_urb_irq_callback(struct urb *urb) if (status) { if (status == -ESHUTDOWN) return; - dev_err(&dev->intf->dev, "%s: urb status %d\n", __func__, status); + dev_err_ratelimited(&dev->intf->dev, "%s: urb status %d\n", + __func__, status); + goto out; } /* Special keys */ @@ -418,8 +420,12 @@ static void cm109_urb_ctl_callback(struct urb *urb) dev->ctl_data->byte[2], dev->ctl_data->byte[3]); - if (status) - dev_err(&dev->intf->dev, "%s: urb status %d\n", __func__, status); + if (status) { + if (status == -ESHUTDOWN) + return; + dev_err_ratelimited(&dev->intf->dev, "%s: urb status %d\n", + __func__, status); + } spin_lock(&dev->ctl_submit_lock); @@ -427,7 +433,7 @@ static void cm109_urb_ctl_callback(struct urb *urb) if (likely(!dev->shutdown)) { - if (dev->buzzer_pending) { + if (dev->buzzer_pending || status) { dev->buzzer_pending = 0; dev->ctl_urb_pending = 1; cm109_submit_buzz_toggle(dev); -- cgit v0.10.2 From 1022c75f5abd3a3b25e679bc8793d21bedd009b4 Mon Sep 17 00:00:00 2001 From: Simon Guinot Date: Thu, 3 Oct 2013 12:05:02 +0200 Subject: clk: armada-370: fix tclk frequencies This patch fixes the tclk frequency array for the Armada-370 SoC. This bug has been introduced by commit 6b72333d ("clk: mvebu: add Armada 370 SoC-centric clock init"). A wrong tclk frequency affects the following drivers: mvsdio, mvneta, i2c-mv64xxx and mvebu-devbus. This list may be incomplete. About the mvneta Ethernet driver, note that the tclk frequency is used to compute the Rx time coalescence. Then, this bug harms the coalescence configuration and also degrades the networking performances with the default values. Signed-off-by: Simon Guinot Cc: Andrew Lunn Cc: Gregory CLEMENT Cc: Sebastian Hesselbarth Acked-by: Jason Cooper Cc: stable@vger.kernel.org Signed-off-by: Michael Turquette diff --git a/drivers/clk/mvebu/armada-370.c b/drivers/clk/mvebu/armada-370.c index fc777bd..81a202d 100644 --- a/drivers/clk/mvebu/armada-370.c +++ b/drivers/clk/mvebu/armada-370.c @@ -39,8 +39,8 @@ static const struct coreclk_ratio a370_coreclk_ratios[] __initconst = { }; static const u32 a370_tclk_freqs[] __initconst = { - 16600000, - 20000000, + 166000000, + 200000000, }; static u32 __init a370_get_tclk_freq(void __iomem *sar) -- cgit v0.10.2 From 79a2e998895ae3e75d6d3d9fdeec2be94bfcf6c8 Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Tue, 17 Sep 2013 11:23:05 -0500 Subject: clk: socfpga: Fix incorrect sdmmc clock name The SD/MMC clock is named "sdmmc_clk", and NOT "mmc_clk". Because of this, the SD driver was getting the incorrect clock value. This prevented the SD driver from initializing correctly. Signed-off-by: Dinh Nguyen CC: Arnd Bergmann CC: Olof Johansson Reviewed-by: Pavel Machek Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Mike Turquette diff --git a/drivers/clk/socfpga/clk.c b/drivers/clk/socfpga/clk.c index 5bb848c..81dd31a 100644 --- a/drivers/clk/socfpga/clk.c +++ b/drivers/clk/socfpga/clk.c @@ -49,7 +49,7 @@ #define SOCFPGA_L4_SP_CLK "l4_sp_clk" #define SOCFPGA_NAND_CLK "nand_clk" #define SOCFPGA_NAND_X_CLK "nand_x_clk" -#define SOCFPGA_MMC_CLK "mmc_clk" +#define SOCFPGA_MMC_CLK "sdmmc_clk" #define SOCFPGA_DB_CLK "gpio_db_clk" #define div_mask(width) ((1 << (width)) - 1) -- cgit v0.10.2 From 2f9f64bc5aa31836810cd25301aa4772ad73ebab Mon Sep 17 00:00:00 2001 From: Jonathan Austin Date: Tue, 23 Jul 2013 16:42:18 +0100 Subject: clk: fixup argument order when setting VCO parameters The order of arguments in the call to vco_set() for the ICST clocks appears to have been switched in error, which results in the VCO not being initialised correctly. This in turn stops the integrated LCD on things like Integrator/CP from working correctly. This patch fixes the order and restores the expected functionality. Reviewed-by: Linus Walleij Signed-off-by: Jonathan Austin Signed-off-by: Mike Turquette Cc: stable@vger.kernel.org diff --git a/drivers/clk/versatile/clk-icst.c b/drivers/clk/versatile/clk-icst.c index 67ccf4a..f5e4c21 100644 --- a/drivers/clk/versatile/clk-icst.c +++ b/drivers/clk/versatile/clk-icst.c @@ -107,7 +107,7 @@ static int icst_set_rate(struct clk_hw *hw, unsigned long rate, vco = icst_hz_to_vco(icst->params, rate); icst->rate = icst_hz(icst->params, vco); - vco_set(icst->vcoreg, icst->lockreg, vco); + vco_set(icst->lockreg, icst->vcoreg, vco); return 0; } -- cgit v0.10.2 From 934a138e970e5af73d6f79469a1eb363572abbaa Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 7 Oct 2013 15:20:07 -0700 Subject: target: Make target_do_xcopy failures return INVALID_PARAMETER_LIST This patch changes target_do_xcopy() to properly return TCM_INVALID_PARAMETER_LIST instead of TCM_INVALID_CDB_FIELD for failures related to the EXTENDED_COPY parameter list parsing. Also, move struct xcopy_op allocation ahead of kmapping to handle the special TCM_OUT_OF_RESOURCES case. Reported-by: Thomas Glanzmann Reported-by: Douglas Gilbert Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 3da4fd1..6b9774c 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -896,9 +896,17 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd) return TCM_UNSUPPORTED_SCSI_OPCODE; } + xop = kzalloc(sizeof(struct xcopy_op), GFP_KERNEL); + if (!xop) { + pr_err("Unable to allocate xcopy_op\n"); + return TCM_OUT_OF_RESOURCES; + } + xop->xop_se_cmd = se_cmd; + p = transport_kmap_data_sg(se_cmd); if (!p) { pr_err("transport_kmap_data_sg() failed in target_do_xcopy\n"); + kfree(xop); return TCM_OUT_OF_RESOURCES; } @@ -920,13 +928,6 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd) goto out; } - xop = kzalloc(sizeof(struct xcopy_op), GFP_KERNEL); - if (!xop) { - pr_err("Unable to allocate xcopy_op\n"); - goto out; - } - xop->xop_se_cmd = se_cmd; - pr_debug("Processing XCOPY with list_id: 0x%02x list_id_usage: 0x%02x" " tdll: %hu sdll: %u inline_dl: %u\n", list_id, list_id_usage, tdll, sdll, inline_dl); @@ -957,7 +958,7 @@ out: if (p) transport_kunmap_data_sg(se_cmd); kfree(xop); - return TCM_INVALID_CDB_FIELD; + return TCM_INVALID_PARAMETER_LIST; } static sense_reason_t target_rcr_operating_parameters(struct se_cmd *se_cmd) -- cgit v0.10.2 From 3f7a46c67440a7fa422919ab2059cbab838ca558 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 7 Oct 2013 15:22:15 -0700 Subject: target: Allow non zero ListID in EXTENDED_COPY parameter list This patch changes target_do_xcopy() to allow processing of non-zero ListIDs in EXTENDED_COPY parameter list data, instead of returning CHECK_CONDITION status. As the copy offload implementation reports SNLID=1 (Supports No ListID) in OPERATING PARAMETERS, any ListID value presented by the client is currently ignored. Also, properly extract list_id_usage for informational purposes. Reported-by: Thomas Glanzmann Reported-by: Douglas Gilbert Cc: Hannes Reinecke Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 6b9774c..fe98555 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -911,11 +911,8 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd) } list_id = p[0]; - if (list_id != 0x00) { - pr_err("XCOPY with non zero list_id: 0x%02x\n", list_id); - goto out; - } - list_id_usage = (p[1] & 0x18); + list_id_usage = (p[1] & 0x18) >> 3; + /* * Determine TARGET DESCRIPTOR LIST LENGTH + SEGMENT DESCRIPTOR LIST LENGTH */ -- cgit v0.10.2 From acb3f2600eb8cdd35643a8b9237719ce61c98d41 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 7 Oct 2013 18:05:14 -0700 Subject: target: Reject EXTENDED_COPY when emulate_3pc is disabled This patch rejects EXTENDED_COPY when the emulate_3pc attribute has been explicitly disabled for the receiving device. It also adds a similar check in target_xcopy_locate_se_dev_e4() to ignore these devices when doing a search based upon the identifier WWN provided by EXTENDED_COPY parameter list target descriptors. Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index fe98555..eeeaf99 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -82,6 +82,9 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op mutex_lock(&g_device_mutex); list_for_each_entry(se_dev, &g_device_list, g_dev_node) { + if (!se_dev->dev_attrib.emulate_3pc) + continue; + memset(&tmp_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN); target_xcopy_gen_naa_ieee(se_dev, &tmp_dev_wwn[0]); @@ -884,12 +887,18 @@ out: sense_reason_t target_do_xcopy(struct se_cmd *se_cmd) { + struct se_device *dev = se_cmd->se_dev; struct xcopy_op *xop = NULL; unsigned char *p = NULL, *seg_desc; unsigned int list_id, list_id_usage, sdll, inline_dl, sa; int rc; unsigned short tdll; + if (!dev->dev_attrib.emulate_3pc) { + pr_err("EXTENDED_COPY operation explicitly disabled\n"); + return TCM_UNSUPPORTED_SCSI_OPCODE; + } + sa = se_cmd->t_task_cdb[1] & 0x1f; if (sa != 0x00) { pr_err("EXTENDED_COPY(LID4) not supported\n"); -- cgit v0.10.2 From 2053a1db41193c2b5e1f47a91aaba0fd63ba7102 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 8 Oct 2013 09:47:22 -0700 Subject: target: Fix assignment of LUN in tracepoints The unpacked_lun field in the SCSI target tracepoints should be initialized with cmd->orig_fe_lun rather than cmd->se_lun->unpacked_lun for two reasons: - most importantly, if we are in the cmd_complete tracepoint returning a check condition due to no LUN found, cmd->se_lun will be NULL and we'll crash trying to dereference it. - also, in any case, cmd->se_lun->unpacked_lun is an internal index into the target's internal set of LUNs; cmd->orig_fe_lun is much more useful and interesting, since it's the value the initiator actually sent. Signed-off-by: Roland Dreier Cc: # 3.11+ Signed-off-by: Nicholas Bellinger diff --git a/include/trace/events/target.h b/include/trace/events/target.h index aef8fc3..da9cc0f 100644 --- a/include/trace/events/target.h +++ b/include/trace/events/target.h @@ -144,7 +144,7 @@ TRACE_EVENT(target_sequencer_start, ), TP_fast_assign( - __entry->unpacked_lun = cmd->se_lun->unpacked_lun; + __entry->unpacked_lun = cmd->orig_fe_lun; __entry->opcode = cmd->t_task_cdb[0]; __entry->data_length = cmd->data_length; __entry->task_attribute = cmd->sam_task_attr; @@ -182,7 +182,7 @@ TRACE_EVENT(target_cmd_complete, ), TP_fast_assign( - __entry->unpacked_lun = cmd->se_lun->unpacked_lun; + __entry->unpacked_lun = cmd->orig_fe_lun; __entry->opcode = cmd->t_task_cdb[0]; __entry->data_length = cmd->data_length; __entry->task_attribute = cmd->sam_task_attr; -- cgit v0.10.2 From 29114fd7db2fc82a34da8340d29b8fa413e03dca Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 7 Oct 2013 15:19:53 +0200 Subject: ARM: integrator: deactivate timer0 on the Integrator/CP This fixes a long-standing Integrator/CP regression from commit 870e2928cf3368ca9b06bc925d0027b0a56bcd8e "ARM: integrator-cp: convert use CLKSRC_OF for timer init" When this code was introduced, the both aliases pointing the system to use timer1 as primary (clocksource) and timer2 as secondary (clockevent) was ignored, and the system would simply use the first two timers found as clocksource and clockevent. However this made the system timeline accelerate by a factor x25, as it turns out that the way the clocking actually works (totally undocumented and found after some trial-and-error) is that timer0 runs @ 25MHz and timer1 and timer2 runs @ 1MHz. Presumably this divider setting is a boot-on default and configurable albeit the way to configure it is not documented. So as a quick fix to the problem, let's mark timer0 as disabled, so the code will chose timer1 and timer2 as it used to. This also deletes the two aliases for the primary and secondary timer as they have been superceded by the auto-selection Cc: stable@vger.kernel.org Cc: Rob Herring Cc: Russell King Signed-off-by: Linus Walleij Signed-off-by: Olof Johansson diff --git a/arch/arm/boot/dts/integratorcp.dts b/arch/arm/boot/dts/integratorcp.dts index ff1aea0..72693a6 100644 --- a/arch/arm/boot/dts/integratorcp.dts +++ b/arch/arm/boot/dts/integratorcp.dts @@ -9,11 +9,6 @@ model = "ARM Integrator/CP"; compatible = "arm,integrator-cp"; - aliases { - arm,timer-primary = &timer2; - arm,timer-secondary = &timer1; - }; - chosen { bootargs = "root=/dev/ram0 console=ttyAMA0,38400n8 earlyprintk"; }; @@ -24,14 +19,18 @@ }; timer0: timer@13000000 { + /* TIMER0 runs @ 25MHz */ compatible = "arm,integrator-cp-timer"; + status = "disabled"; }; timer1: timer@13000100 { + /* TIMER1 runs @ 1MHz */ compatible = "arm,integrator-cp-timer"; }; timer2: timer@13000200 { + /* TIMER2 runs @ 1MHz */ compatible = "arm,integrator-cp-timer"; }; -- cgit v0.10.2 From dcb7150368ef7c4cd93181979bf90632532f53bb Mon Sep 17 00:00:00 2001 From: Jason Cooper Date: Mon, 14 Oct 2013 18:32:21 +0000 Subject: MAINTAINERS: ARM: mvebu: add Sebastian Hesselbarth Sebastian is a hobbyist who has done a lot of heavy lifting converting mach-dove to devicetree, and assisting others with patches pertaining to mvebu. It is hoped that he will continue this work, and also assist the current mvebu maintainers with patch wrangling and pull request submissions. Signed-off-by: Jason Cooper Acked-by: Sebastian Hesselbarth Signed-off-by: Kevin Hilman diff --git a/MAINTAINERS b/MAINTAINERS index 8a0cbf3..7a45039 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1009,6 +1009,7 @@ ARM/Marvell Armada 370 and Armada XP SOC support M: Jason Cooper M: Andrew Lunn M: Gregory Clement +M: Sebastian Hesselbarth L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/mach-mvebu/ @@ -1016,6 +1017,7 @@ F: arch/arm/mach-mvebu/ ARM/Marvell Dove/Kirkwood/MV78xx0/Orion SOC support M: Jason Cooper M: Andrew Lunn +M: Sebastian Hesselbarth L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/mach-dove/ -- cgit v0.10.2 From 1b286bdd5b7684c681b63d5e75cada037064c315 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 11 Oct 2013 12:23:17 +0000 Subject: perf probe: Fix to initialize fname always before use it Fix perf probe --list to initialize fname local var always before use it. This may cause a SEGV if there is a probe which is in the function body but not in any inline function. Problem introduced in: commit e08cfd4bda76 Author: Masami Hiramatsu Date: Mon Sep 30 18:21:44 2013 +0900 perf probe: Fix to find line information for probe list Signed-off-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20131011122317.9662.29736.stgit@kbuild-fedora.novalocal Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index c09e0a9..f069273 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1357,10 +1357,10 @@ int debuginfo__find_probe_point(struct debuginfo *self, unsigned long addr, goto post; } + fname = dwarf_decl_file(&spdie); if (addr == (unsigned long)baseaddr) { /* Function entry - Relative line number is 0 */ lineno = baseline; - fname = dwarf_decl_file(&spdie); goto post; } -- cgit v0.10.2 From 244066f4be8ce2dc2d2d60f01bceeba55c71a783 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 15 Oct 2013 02:22:42 +0400 Subject: xtensa: fix fast_syscall_spill_registers_fixup fast_syscall_spill_registers_fixup was not correctly updated by the 'keep a3 and excsave1 on entry to exception handlers' patch: it doesn't preserve a3 that it gets on entry, breaking _spill_registers in case of page fault on stack during register spilling, leading to unhandled exception in kernel mode. Preserve a3 by saving it in the original _spill_registers stack frame's a3 during exception handling and restoring it afterwards. Also fix comments and function bounds annotations. Reported-by: Baruch Siach Signed-off-by: Max Filippov Tested-by: Baruch Siach Signed-off-by: Chris Zankel diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index de1dfa1..21dbe6b 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -1122,7 +1122,7 @@ ENDPROC(fast_syscall_spill_registers) * a3: exctable, original value in excsave1 */ -fast_syscall_spill_registers_fixup: +ENTRY(fast_syscall_spill_registers_fixup) rsr a2, windowbase # get current windowbase (a2 is saved) xsr a0, depc # restore depc and a0 @@ -1134,22 +1134,26 @@ fast_syscall_spill_registers_fixup: */ xsr a3, excsave1 # get spill-mask - slli a2, a3, 1 # shift left by one + slli a3, a3, 1 # shift left by one - slli a3, a2, 32-WSBITS - src a2, a2, a3 # a1 = xxwww1yyxxxwww1yy...... + slli a2, a3, 32-WSBITS + src a2, a3, a2 # a2 = xxwww1yyxxxwww1yy...... wsr a2, windowstart # set corrected windowstart - rsr a3, excsave1 - l32i a2, a3, EXC_TABLE_DOUBLE_SAVE # restore a2 - l32i a3, a3, EXC_TABLE_PARAM # original WB (in user task) + srli a3, a3, 1 + rsr a2, excsave1 + l32i a2, a2, EXC_TABLE_DOUBLE_SAVE # restore a2 + xsr a2, excsave1 + s32i a3, a2, EXC_TABLE_DOUBLE_SAVE # save a3 + l32i a3, a2, EXC_TABLE_PARAM # original WB (in user task) + xsr a2, excsave1 /* Return to the original (user task) WINDOWBASE. * We leave the following frame behind: * a0, a1, a2 same - * a3: trashed (saved in excsave_1) + * a3: trashed (saved in EXC_TABLE_DOUBLE_SAVE) * depc: depc (we have to return to that address) - * excsave_1: a3 + * excsave_1: exctable */ wsr a3, windowbase @@ -1159,9 +1163,9 @@ fast_syscall_spill_registers_fixup: * a0: return address * a1: used, stack pointer * a2: kernel stack pointer - * a3: available, saved in EXCSAVE_1 + * a3: available * depc: exception address - * excsave: a3 + * excsave: exctable * Note: This frame might be the same as above. */ @@ -1181,9 +1185,12 @@ fast_syscall_spill_registers_fixup: rsr a0, exccause addx4 a0, a0, a3 # find entry in table l32i a0, a0, EXC_TABLE_FAST_USER # load handler + l32i a3, a3, EXC_TABLE_DOUBLE_SAVE jx a0 -fast_syscall_spill_registers_fixup_return: +ENDPROC(fast_syscall_spill_registers_fixup) + +ENTRY(fast_syscall_spill_registers_fixup_return) /* When we return here, all registers have been restored (a2: DEPC) */ @@ -1191,13 +1198,13 @@ fast_syscall_spill_registers_fixup_return: /* Restore fixup handler. */ - xsr a3, excsave1 - movi a2, fast_syscall_spill_registers_fixup - s32i a2, a3, EXC_TABLE_FIXUP - s32i a0, a3, EXC_TABLE_DOUBLE_SAVE - rsr a2, windowbase - s32i a2, a3, EXC_TABLE_PARAM - l32i a2, a3, EXC_TABLE_KSTK + rsr a2, excsave1 + s32i a3, a2, EXC_TABLE_DOUBLE_SAVE + movi a3, fast_syscall_spill_registers_fixup + s32i a3, a2, EXC_TABLE_FIXUP + rsr a3, windowbase + s32i a3, a2, EXC_TABLE_PARAM + l32i a2, a2, EXC_TABLE_KSTK /* Load WB at the time the exception occurred. */ @@ -1206,8 +1213,12 @@ fast_syscall_spill_registers_fixup_return: wsr a3, windowbase rsync + rsr a3, excsave1 + l32i a3, a3, EXC_TABLE_DOUBLE_SAVE + rfde +ENDPROC(fast_syscall_spill_registers_fixup_return) /* * spill all registers. -- cgit v0.10.2 From cba9a90053e3b7973eff4f1946f33032e98eeed5 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Tue, 15 Oct 2013 02:22:43 +0400 Subject: xtensa: don't use alternate signal stack on threads According to create_thread(3): "The new thread does not inherit the creating thread's alternate signal stack". Since commit f9a3879a (Fix sigaltstack corruption among cloned threads), current->sas_ss_size is set to 0 for cloned processes sharing VM with their parent. Don't use the (nonexistent) alternate signal stack in this case. This has been broken since commit 29c4dfd9 ([XTENSA] Remove non-rt signal handling). Fixes the SA_ONSTACK part of the nptl/tst-cancel20 test from uClibc. Cc: Signed-off-by: Baruch Siach Signed-off-by: Max Filippov Signed-off-by: Chris Zankel diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index 718eca1..98b67d5 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -341,7 +341,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sp = regs->areg[1]; - if ((ka->sa.sa_flags & SA_ONSTACK) != 0 && ! on_sig_stack(sp)) { + if ((ka->sa.sa_flags & SA_ONSTACK) != 0 && sas_ss_flags(sp) == 0) { sp = current->sas_ss_sp + current->sas_ss_size; } -- cgit v0.10.2 From 6e2a6e8063e7b5d53ef1687286468791dd143f96 Mon Sep 17 00:00:00 2001 From: Bastien Nocera Date: Tue, 15 Oct 2013 23:33:00 -0700 Subject: Input: wacom - export battery scope This will stop UPower from detecting the tablet as a power supply, and using its battery status to hibernate or switch off the machine. https://bugs.freedesktop.org/show_bug.cgi?id=70321 Signed-off-by: Bastien Nocera Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/tablet/wacom_sys.c b/drivers/input/tablet/wacom_sys.c index 79b69ea..e53416a 100644 --- a/drivers/input/tablet/wacom_sys.c +++ b/drivers/input/tablet/wacom_sys.c @@ -1031,6 +1031,7 @@ static void wacom_destroy_leds(struct wacom *wacom) } static enum power_supply_property wacom_battery_props[] = { + POWER_SUPPLY_PROP_SCOPE, POWER_SUPPLY_PROP_CAPACITY }; @@ -1042,6 +1043,9 @@ static int wacom_battery_get_property(struct power_supply *psy, int ret = 0; switch (psp) { + case POWER_SUPPLY_PROP_SCOPE: + val->intval = POWER_SUPPLY_SCOPE_DEVICE; + break; case POWER_SUPPLY_PROP_CAPACITY: val->intval = wacom->wacom_wac.battery_capacity * 100 / 31; -- cgit v0.10.2 From 6541932ea2f7de0b0c5203decf666b143ad5fa33 Mon Sep 17 00:00:00 2001 From: Khalid Aziz Date: Wed, 25 Sep 2013 11:45:11 -0600 Subject: [SCSI] BusLogic: Fix an oops when intializing multimaster adapter This fixes an oops caused by buslogic driver when initializing a BusLogic MultiMaster adapter. Initialization code used scope of a variable incorrectly which created a NULL pointer. Oops message is below: BUG: unable to handle kernel NULL pointer dereference at 0000000c IP: [] blogic_init_mm_probeinfo.isra.17+0x20a/0x583 *pde = 00000000 Oops: 002 [#1] PREEMPT SMP Modules linked in: CPU: 1 PID: 1 Comm: swapper/0 Not tainted 3.11.1.puz1 #1 Hardware name: /Canterwood, BIOS 6.00 PG 05/16/2003 task: f7050000 ti: f7054000 task.ti: f7054000 EIP: 0060:[] EFLAGS: 00010246 CPU:1 EIP is at blogic_init_mm_probeinfo.isra.17+0x20a/0x583 EAX: 00000013 EBX: 00000000 ECX: 00000000 EDX: f8001000 ESI: f71cb800 EDI: f7388000 EBP: 00007800 ESP: f7055c84 DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 CR0: 8005003b CR2: 0000000c CR3: 0154f000 CR4: 000007d0 Stack: 0000001c 00000000 c11a59f6 f7055c98 00008130 ffffffff ffffffff 00000000 00000003 00000000 00000000 00000000 00000013 f8001000 00000001 000003d0 00000000 00000000 00000000 c14e3f84 f78803c8 00000000 f738c000 000000e9 Call Trace: [] ? pci_get_subsys+0x33/0x38 [] ? blogic_init_probeinfo_list+0x4b/0x19e [] ? __alloc_pages_nodemask+0xe3/0x623 [] ? __alloc_pages_nodemask+0xe3/0x623 [] ? sysfs_link_sibling+0x61/0x8d [] ? kmem_cache_alloc+0x8b/0xb5 [] ? blogic_init+0xa1/0x10e8 [] ? sysfs_add_one+0x10/0x9d [] ? sysfs_addrm_finish+0x12/0x85 [] ? sysfs_do_create_link_sd+0x9d/0x1b4 [] ? blk_register_queue+0x69/0xb3 [] ? sysfs_create_link+0x1a/0x2c [] ? add_disk+0x1a1/0x3c7 [] ? klist_next+0x60/0xc3 [] ? scsi_dh_detach+0x68/0x68 [] ? bus_for_each_dev+0x51/0x61 [] ? do_one_initcall+0x22/0x12c [] ? __proc_create+0x8c/0xba [] ? blogic_setup+0x5f6/0x5f6 [] ? repair_env_string+0xf/0x4d [] ? do_early_param+0x71/0x71 [] ? parse_args+0x21f/0x33d [] ? kernel_init_freeable+0xdf/0x17d [] ? do_early_param+0x71/0x71 [] ? kernel_init+0x8/0xc0 [] ? ret_from_kernel_thread+0x6/0x28 [] ? ret_from_kernel_thread+0x1b/0x28 [] ? rest_init+0x6c/0x6c Code: 89 44 24 10 0f b6 44 24 3d 89 44 24 0c c7 44 24 08 00 00 00 00 c7 44 24 04 38 62 46 c1 c7 04 24 02 00 00 00 e8 78 13 d2 ff 31 db <89> 6b 0c b0 20 89 ea ee c7 44 24 08 04 00 00 00 8d 44 24 4c 89 EIP: [] blogic_init_mm_probeinfo.isra.17+0x20a/0x583 SS:ESP 0068:f7055c84 CR2: 000000000000000c ---[ end trace 17f45f5196d40487 ]--- Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000009 Signed-off-by: Khalid Aziz Cc: # 3.11.x Reported-by: Pierre Uszynski Tested-by: Pierre Uszynski Signed-off-by: James Bottomley diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c index feab3a5..757eb07 100644 --- a/drivers/scsi/BusLogic.c +++ b/drivers/scsi/BusLogic.c @@ -696,7 +696,7 @@ static int __init blogic_init_mm_probeinfo(struct blogic_adapter *adapter) while ((pci_device = pci_get_device(PCI_VENDOR_ID_BUSLOGIC, PCI_DEVICE_ID_BUSLOGIC_MULTIMASTER, pci_device)) != NULL) { - struct blogic_adapter *adapter = adapter; + struct blogic_adapter *host_adapter = adapter; struct blogic_adapter_info adapter_info; enum blogic_isa_ioport mod_ioaddr_req; unsigned char bus; @@ -744,9 +744,9 @@ static int __init blogic_init_mm_probeinfo(struct blogic_adapter *adapter) known and enabled, note that the particular Standard ISA I/O Address should not be probed. */ - adapter->io_addr = io_addr; - blogic_intreset(adapter); - if (blogic_cmd(adapter, BLOGIC_INQ_PCI_INFO, NULL, 0, + host_adapter->io_addr = io_addr; + blogic_intreset(host_adapter); + if (blogic_cmd(host_adapter, BLOGIC_INQ_PCI_INFO, NULL, 0, &adapter_info, sizeof(adapter_info)) == sizeof(adapter_info)) { if (adapter_info.isa_port < 6) @@ -762,7 +762,7 @@ static int __init blogic_init_mm_probeinfo(struct blogic_adapter *adapter) I/O Address assigned at system initialization. */ mod_ioaddr_req = BLOGIC_IO_DISABLE; - blogic_cmd(adapter, BLOGIC_MOD_IOADDR, &mod_ioaddr_req, + blogic_cmd(host_adapter, BLOGIC_MOD_IOADDR, &mod_ioaddr_req, sizeof(mod_ioaddr_req), NULL, 0); /* For the first MultiMaster Host Adapter enumerated, @@ -779,12 +779,12 @@ static int __init blogic_init_mm_probeinfo(struct blogic_adapter *adapter) fetch_localram.offset = BLOGIC_AUTOSCSI_BASE + 45; fetch_localram.count = sizeof(autoscsi_byte45); - blogic_cmd(adapter, BLOGIC_FETCH_LOCALRAM, + blogic_cmd(host_adapter, BLOGIC_FETCH_LOCALRAM, &fetch_localram, sizeof(fetch_localram), &autoscsi_byte45, sizeof(autoscsi_byte45)); - blogic_cmd(adapter, BLOGIC_GET_BOARD_ID, NULL, 0, &id, - sizeof(id)); + blogic_cmd(host_adapter, BLOGIC_GET_BOARD_ID, NULL, 0, + &id, sizeof(id)); if (id.fw_ver_digit1 == '5') force_scan_order = autoscsi_byte45.force_scan_order; -- cgit v0.10.2 From f447fd30afdbb40c913054edaacf1a32df7a55d7 Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Thu, 19 Sep 2013 23:42:22 +0200 Subject: xtensa: Cocci spatch "noderef" sizeof when applied to a pointer typed expression gives the size of the pointer. Found by coccinelle spatch "misc/noderef.cocci" Signed-off-by: Thomas Meyer Signed-off-by: Chris Zankel diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c index 56f88b7..e9e1aad 100644 --- a/arch/xtensa/platforms/iss/network.c +++ b/arch/xtensa/platforms/iss/network.c @@ -737,7 +737,8 @@ static int __init iss_net_setup(char *str) return 1; } - if ((new = alloc_bootmem(sizeof new)) == NULL) { + new = alloc_bootmem(sizeof(*new)); + if (new == NULL) { printk("Alloc_bootmem failed\n"); return 1; } -- cgit v0.10.2 From 5cb770bf4b777dae832151f4bc4d35e7a99f9880 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 14 Oct 2013 15:49:23 -0700 Subject: target: Return an error for WRITE SAME with ANCHOR==1 Per SBC-3, since we report ANC_SUP==0 in VPD page B2h, we need to return an error (ILLEGAL REQUEST/INVALID FIELD IN CDB) for all WRITE SAME requests with ANCHOR==1. Signed-off-by: Roland Dreier Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 4714c6f..d9b92b2 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -263,6 +263,11 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o sectors, cmd->se_dev->dev_attrib.max_write_same_len); return TCM_INVALID_CDB_FIELD; } + /* We always have ANC_SUP == 0 so setting ANCHOR is always an error */ + if (flags[0] & 0x10) { + pr_warn("WRITE SAME with ANCHOR not supported\n"); + return TCM_INVALID_CDB_FIELD; + } /* * Special case for WRITE_SAME w/ UNMAP=1 that ends up getting * translated into block discard requests within backend code. -- cgit v0.10.2 From b07c26511e94ab856f3700c56d582c0da36d5b4d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 15 Oct 2013 14:54:11 -0700 Subject: openvswitch: fix vport-netdev unregister The combination of two commits: commit 8e4e1713e4 ("openvswitch: Simplify datapath locking.") commit 2537b4dd0a ("openvswitch:: link upper device for port devices") introduced a bug where upper_dev wasn't unlinked upon netdev_unregister notification The following steps: modprobe openvswitch ovs-dpctl add-dp test ip tuntap add dev tap1 mode tap ovs-dpctl add-if test tap1 ip tuntap del dev tap1 mode tap are causing multiple warnings: [ 62.747557] gre: GRE over IPv4 demultiplexor driver [ 62.749579] openvswitch: Open vSwitch switching datapath [ 62.755087] device test entered promiscuous mode [ 62.765911] device tap1 entered promiscuous mode [ 62.766033] IPv6: ADDRCONF(NETDEV_UP): tap1: link is not ready [ 62.769017] ------------[ cut here ]------------ [ 62.769022] WARNING: CPU: 1 PID: 3267 at net/core/dev.c:5501 rollback_registered_many+0x20f/0x240() [ 62.769023] Modules linked in: openvswitch gre vxlan ip_tunnel libcrc32c ip6table_filter ip6_tables ebtable_nat ebtables nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack xt_CHECKSUM iptable_mangle ipt_REJECT xt_tcpudp iptable_filter ip_tables x_tables bridge stp llc vhost_net macvtap macvlan vhost kvm_intel kvm dm_crypt iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi hid_generic mxm_wmi eeepc_wmi asus_wmi sparse_keymap dm_multipath psmouse serio_raw usbhid hid parport_pc ppdev firewire_ohci lpc_ich firewire_core e1000e crc_itu_t binfmt_misc igb dca ptp pps_core mac_hid wmi lp parport i2o_config i2o_block video [ 62.769051] CPU: 1 PID: 3267 Comm: ip Not tainted 3.12.0-rc3+ #60 [ 62.769052] Hardware name: System manufacturer System Product Name/P8Z77 WS, BIOS 3007 07/26/2012 [ 62.769053] 0000000000000009 ffff8807f25cbd28 ffffffff8175e575 0000000000000006 [ 62.769055] 0000000000000000 ffff8807f25cbd68 ffffffff8105314c ffff8807f25cbd58 [ 62.769057] ffff8807f2634000 ffff8807f25cbdc8 ffff8807f25cbd88 ffff8807f25cbdc8 [ 62.769059] Call Trace: [ 62.769062] [] dump_stack+0x55/0x76 [ 62.769065] [] warn_slowpath_common+0x8c/0xc0 [ 62.769067] [] warn_slowpath_null+0x1a/0x20 [ 62.769069] [] rollback_registered_many+0x20f/0x240 [ 62.769071] [] rollback_registered+0x31/0x40 [ 62.769073] [] unregister_netdevice_queue+0x58/0x90 [ 62.769075] [] __tun_detach+0x140/0x340 [ 62.769077] [] tun_chr_close+0x36/0x60 [ 62.769080] [] __fput+0xff/0x260 [ 62.769082] [] ____fput+0xe/0x10 [ 62.769084] [] task_work_run+0xb5/0xe0 [ 62.769087] [] do_notify_resume+0x59/0x80 [ 62.769089] [] ? trace_hardirqs_on_thunk+0x3a/0x3f [ 62.769091] [] int_signal+0x12/0x17 [ 62.769093] ---[ end trace 838756c62e156ffb ]--- [ 62.769481] ------------[ cut here ]------------ [ 62.769485] WARNING: CPU: 1 PID: 92 at fs/sysfs/inode.c:325 sysfs_hash_and_remove+0xa9/0xb0() [ 62.769486] sysfs: can not remove 'master', no directory [ 62.769486] Modules linked in: openvswitch gre vxlan ip_tunnel libcrc32c ip6table_filter ip6_tables ebtable_nat ebtables nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack xt_CHECKSUM iptable_mangle ipt_REJECT xt_tcpudp iptable_filter ip_tables x_tables bridge stp llc vhost_net macvtap macvlan vhost kvm_intel kvm dm_crypt iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi hid_generic mxm_wmi eeepc_wmi asus_wmi sparse_keymap dm_multipath psmouse serio_raw usbhid hid parport_pc ppdev firewire_ohci lpc_ich firewire_core e1000e crc_itu_t binfmt_misc igb dca ptp pps_core mac_hid wmi lp parport i2o_config i2o_block video [ 62.769514] CPU: 1 PID: 92 Comm: kworker/1:2 Tainted: G W 3.12.0-rc3+ #60 [ 62.769515] Hardware name: System manufacturer System Product Name/P8Z77 WS, BIOS 3007 07/26/2012 [ 62.769518] Workqueue: events ovs_dp_notify_wq [openvswitch] [ 62.769519] 0000000000000009 ffff880807ad3ac8 ffffffff8175e575 0000000000000006 [ 62.769521] ffff880807ad3b18 ffff880807ad3b08 ffffffff8105314c ffff880807ad3b28 [ 62.769523] 0000000000000000 ffffffff81a87a1f ffff8807f2634000 ffff880037038500 [ 62.769525] Call Trace: [ 62.769528] [] dump_stack+0x55/0x76 [ 62.769529] [] warn_slowpath_common+0x8c/0xc0 [ 62.769531] [] warn_slowpath_fmt+0x46/0x50 [ 62.769533] [] sysfs_hash_and_remove+0xa9/0xb0 [ 62.769535] [] sysfs_remove_link+0x26/0x30 [ 62.769538] [] __netdev_adjacent_dev_remove+0xf7/0x150 [ 62.769540] [] __netdev_adjacent_dev_unlink_lists+0x27/0x50 [ 62.769542] [] __netdev_adjacent_dev_unlink_neighbour+0x3a/0x50 [ 62.769544] [] netdev_upper_dev_unlink+0x3d/0x140 [ 62.769548] [] netdev_destroy+0x4b/0x80 [openvswitch] [ 62.769550] [] ovs_vport_del+0x46/0x60 [openvswitch] [ 62.769552] [] ovs_dp_detach_port+0x44/0x60 [openvswitch] [ 62.769555] [] ovs_dp_notify_wq+0xb4/0x150 [openvswitch] [ 62.769557] [] process_one_work+0x1d8/0x6a0 [ 62.769559] [] ? process_one_work+0x178/0x6a0 [ 62.769562] [] worker_thread+0x11b/0x370 [ 62.769564] [] ? rescuer_thread+0x350/0x350 [ 62.769566] [] kthread+0xea/0xf0 [ 62.769568] [] ? flush_kthread_worker+0x150/0x150 [ 62.769570] [] ret_from_fork+0x7c/0xb0 [ 62.769572] [] ? flush_kthread_worker+0x150/0x150 [ 62.769573] ---[ end trace 838756c62e156ffc ]--- [ 62.769574] ------------[ cut here ]------------ [ 62.769576] WARNING: CPU: 1 PID: 92 at fs/sysfs/inode.c:325 sysfs_hash_and_remove+0xa9/0xb0() [ 62.769577] sysfs: can not remove 'upper_test', no directory [ 62.769577] Modules linked in: openvswitch gre vxlan ip_tunnel libcrc32c ip6table_filter ip6_tables ebtable_nat ebtables nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack xt_CHECKSUM iptable_mangle ipt_REJECT xt_tcpudp iptable_filter ip_tables x_tables bridge stp llc vhost_net macvtap macvlan vhost kvm_intel kvm dm_crypt iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi hid_generic mxm_wmi eeepc_wmi asus_wmi sparse_keymap dm_multipath psmouse serio_raw usbhid hid parport_pc ppdev firewire_ohci lpc_ich firewire_core e1000e crc_itu_t binfmt_misc igb dca ptp pps_core mac_hid wmi lp parport i2o_config i2o_block video [ 62.769603] CPU: 1 PID: 92 Comm: kworker/1:2 Tainted: G W 3.12.0-rc3+ #60 [ 62.769604] Hardware name: System manufacturer System Product Name/P8Z77 WS, BIOS 3007 07/26/2012 [ 62.769606] Workqueue: events ovs_dp_notify_wq [openvswitch] [ 62.769607] 0000000000000009 ffff880807ad3ac8 ffffffff8175e575 0000000000000006 [ 62.769609] ffff880807ad3b18 ffff880807ad3b08 ffffffff8105314c ffff880807ad3b58 [ 62.769611] 0000000000000000 ffff880807ad3bd9 ffff8807f2634000 ffff880037038500 [ 62.769613] Call Trace: [ 62.769615] [] dump_stack+0x55/0x76 [ 62.769617] [] warn_slowpath_common+0x8c/0xc0 [ 62.769619] [] warn_slowpath_fmt+0x46/0x50 [ 62.769621] [] sysfs_hash_and_remove+0xa9/0xb0 [ 62.769622] [] sysfs_remove_link+0x26/0x30 [ 62.769624] [] __netdev_adjacent_dev_remove+0x122/0x150 [ 62.769627] [] __netdev_adjacent_dev_unlink_lists+0x27/0x50 [ 62.769629] [] __netdev_adjacent_dev_unlink_neighbour+0x3a/0x50 [ 62.769631] [] netdev_upper_dev_unlink+0x3d/0x140 [ 62.769633] [] netdev_destroy+0x4b/0x80 [openvswitch] [ 62.769636] [] ovs_vport_del+0x46/0x60 [openvswitch] [ 62.769638] [] ovs_dp_detach_port+0x44/0x60 [openvswitch] [ 62.769640] [] ovs_dp_notify_wq+0xb4/0x150 [openvswitch] [ 62.769642] [] process_one_work+0x1d8/0x6a0 [ 62.769644] [] ? process_one_work+0x178/0x6a0 [ 62.769646] [] worker_thread+0x11b/0x370 [ 62.769648] [] ? rescuer_thread+0x350/0x350 [ 62.769650] [] kthread+0xea/0xf0 [ 62.769652] [] ? flush_kthread_worker+0x150/0x150 [ 62.769654] [] ret_from_fork+0x7c/0xb0 [ 62.769656] [] ? flush_kthread_worker+0x150/0x150 [ 62.769657] ---[ end trace 838756c62e156ffd ]--- [ 62.769724] device tap1 left promiscuous mode This patch also affects moving devices between net namespaces. OVS used to ignore netns move notifications which caused problems. Like: ovs-dpctl add-if test tap1 ip link set tap1 netns 3512 and then removing tap1 inside the namespace will cause hang on missing dev_put. With this patch OVS will detach dev upon receiving netns move event. Signed-off-by: Alexei Starovoitov Signed-off-by: Jesse Gross diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c index c323567..5c2dab2 100644 --- a/net/openvswitch/dp_notify.c +++ b/net/openvswitch/dp_notify.c @@ -65,8 +65,7 @@ void ovs_dp_notify_wq(struct work_struct *work) continue; netdev_vport = netdev_vport_priv(vport); - if (netdev_vport->dev->reg_state == NETREG_UNREGISTERED || - netdev_vport->dev->reg_state == NETREG_UNREGISTERING) + if (!(netdev_vport->dev->priv_flags & IFF_OVS_DATAPATH)) dp_detach_port_notify(vport); } } @@ -88,6 +87,10 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event, return NOTIFY_DONE; if (event == NETDEV_UNREGISTER) { + /* upper_dev_unlink and decrement promisc immediately */ + ovs_netdev_detach_dev(vport); + + /* schedule vport destroy, dev_put and genl notification */ ovs_net = net_generic(dev_net(dev), ovs_net_id); queue_work(system_wq, &ovs_net->dp_notify_work); } diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 09d93c1..d21f77d 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -150,15 +150,25 @@ static void free_port_rcu(struct rcu_head *rcu) ovs_vport_free(vport_from_priv(netdev_vport)); } -static void netdev_destroy(struct vport *vport) +void ovs_netdev_detach_dev(struct vport *vport) { struct netdev_vport *netdev_vport = netdev_vport_priv(vport); - rtnl_lock(); + ASSERT_RTNL(); netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH; netdev_rx_handler_unregister(netdev_vport->dev); - netdev_upper_dev_unlink(netdev_vport->dev, get_dpdev(vport->dp)); + netdev_upper_dev_unlink(netdev_vport->dev, + netdev_master_upper_dev_get(netdev_vport->dev)); dev_set_promiscuity(netdev_vport->dev, -1); +} + +static void netdev_destroy(struct vport *vport) +{ + struct netdev_vport *netdev_vport = netdev_vport_priv(vport); + + rtnl_lock(); + if (netdev_vport->dev->priv_flags & IFF_OVS_DATAPATH) + ovs_netdev_detach_dev(vport); rtnl_unlock(); call_rcu(&netdev_vport->rcu, free_port_rcu); diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h index dd298b5..8df01c11 100644 --- a/net/openvswitch/vport-netdev.h +++ b/net/openvswitch/vport-netdev.h @@ -39,5 +39,6 @@ netdev_vport_priv(const struct vport *vport) } const char *ovs_netdev_get_name(const struct vport *); +void ovs_netdev_detach_dev(struct vport *); #endif /* vport_netdev.h */ -- cgit v0.10.2 From 3edc8376c06133e3386265a824869cad03a4efd4 Mon Sep 17 00:00:00 2001 From: "Geyslan G. Bem" Date: Fri, 11 Oct 2013 16:49:16 -0300 Subject: ecryptfs: Fix memory leakage in keystore.c In 'decrypt_pki_encrypted_session_key' function: Initializes 'payload' pointer and releases it on exit. Signed-off-by: Geyslan G. Bem Signed-off-by: Tyler Hicks Cc: stable@vger.kernel.org # v2.6.28+ diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 7d52806..4725a07 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1149,7 +1149,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, struct ecryptfs_msg_ctx *msg_ctx; struct ecryptfs_message *msg = NULL; char *auth_tok_sig; - char *payload; + char *payload = NULL; size_t payload_len = 0; int rc; @@ -1203,6 +1203,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, } out: kfree(msg); + kfree(payload); return rc; } -- cgit v0.10.2 From 3b16ff89676d9902dc39976aee3cb0314ee37d93 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 14 Oct 2013 18:25:12 -0300 Subject: perf scripting perl: Fix build error on Fedora 12 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cast __u64 to u64 to silence this warning on older distros, such as Fedora 12: CC /tmp/build/perf/util/scripting-engines/trace-event-perl.o cc1: warnings being treated as errors util/scripting-engines/trace-event-perl.c: In function ‘perl_process_tracepoint’: util/scripting-engines/trace-event-perl.c:285: error: format ‘%lu’ expects type ‘long unsigned int’, but argument 2 has type ‘__u64’ make[1]: *** [/tmp/build/perf/util/scripting-engines/trace-event-perl.o] Error 1 make: *** [install] Error 2 make: Leaving directory `/home/acme/git/linux/tools/perf' [acme@fedora12 linux]$ Reported-by: Waiman Long Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi Cc: Waiman Long Link: http://lkml.kernel.org/n/tip-nlxofdqcdjfm0w9o6bgq4kqv@git.kernel.org Link: http://lkml.kernel.org/r/1381265120-58532-1-git-send-email-Waiman.Long@hp.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index a85e4ae..c0c9795 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -282,7 +282,7 @@ static void perl_process_tracepoint(union perf_event *perf_event __maybe_unused, event = find_cache_event(evsel); if (!event) - die("ug! no event found for type %" PRIu64, evsel->attr.config); + die("ug! no event found for type %" PRIu64, (u64)evsel->attr.config); pid = raw_field_value(event, "common_pid", data); -- cgit v0.10.2 From 3090ffb5a2515990182f3f55b0688a7817325488 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 17 Oct 2013 19:32:15 +0200 Subject: perf: Disable PERF_RECORD_MMAP2 support For now, we disable the extended MMAP record support (MMAP2). We have identified cases where it would not report the correct mapping information, clone(VM_CLONE) but with separate pids. We will revisit the support once we find a solution for this case. The patch changes the kernel to return EINVAL if attr->mmap2 is set. The patch also modifies the perf tool to use regular PERF_RECORD_MMAP for synthetic events and it also prevents the tool from requesting attr->mmap2 mode because the kernel would reject it. The support will be revisited once the kenrel interface is updated. In V2, we reduce the patch to the strict minimum. In V3, we avoid calling perf_event_open() with mmap2 set because we know it will fail and require fallback retry. Signed-off-by: Stephane Eranian Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20131017173215.GA8820@quad Signed-off-by: Arnaldo Carvalho de Melo diff --git a/kernel/events/core.c b/kernel/events/core.c index d49a9d2..953c143 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6767,6 +6767,10 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, if (ret) return -EFAULT; + /* disabled for now */ + if (attr->mmap2) + return -EINVAL; + if (attr->__reserved_1) return -EINVAL; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 9b393e7..63df031 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -187,7 +187,7 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, return -1; } - event->header.type = PERF_RECORD_MMAP2; + event->header.type = PERF_RECORD_MMAP; /* * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c */ @@ -198,7 +198,6 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, char prot[5]; char execname[PATH_MAX]; char anonstr[] = "//anon"; - unsigned int ino; size_t size; ssize_t n; @@ -209,13 +208,10 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, strcpy(execname, ""); /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ - n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %s\n", - &event->mmap2.start, &event->mmap2.len, prot, - &event->mmap2.pgoff, &event->mmap2.maj, - &event->mmap2.min, - &ino, execname); - - event->mmap2.ino = (u64)ino; + n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %*x:%*x %*u %s\n", + &event->mmap.start, &event->mmap.len, prot, + &event->mmap.pgoff, + execname); if (n != 8) continue; @@ -227,15 +223,15 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, strcpy(execname, anonstr); size = strlen(execname) + 1; - memcpy(event->mmap2.filename, execname, size); + memcpy(event->mmap.filename, execname, size); size = PERF_ALIGN(size, sizeof(u64)); - event->mmap2.len -= event->mmap.start; - event->mmap2.header.size = (sizeof(event->mmap2) - - (sizeof(event->mmap2.filename) - size)); - memset(event->mmap2.filename + size, 0, machine->id_hdr_size); - event->mmap2.header.size += machine->id_hdr_size; - event->mmap2.pid = tgid; - event->mmap2.tid = pid; + event->mmap.len -= event->mmap.start; + event->mmap.header.size = (sizeof(event->mmap) - + (sizeof(event->mmap.filename) - size)); + memset(event->mmap.filename + size, 0, machine->id_hdr_size); + event->mmap.header.size += machine->id_hdr_size; + event->mmap.pid = tgid; + event->mmap.tid = pid; if (process(tool, event, &synth_sample, machine) != 0) { rc = -1; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 0ce9feb..9f1ef9b 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -678,7 +678,6 @@ void perf_evsel__config(struct perf_evsel *evsel, attr->sample_type |= PERF_SAMPLE_WEIGHT; attr->mmap = track; - attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; /* -- cgit v0.10.2 From 6e757ad2c92caf721fd0efaac7088247e3934c5e Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Thu, 17 Oct 2013 17:37:11 +0200 Subject: tty/serial: at91: fix uart/usart selection for older products Since commit 055560b04a8cd063aea916fd083b7aec02c2adb8 (serial: at91: distinguish usart and uart) the older products which do not have a name field in their register map are unable to use their serial output. As the main console output is usually the serial interface (aka DBGU) it is pretty unfortunate. So, instead of failing during probe() we just silently configure the serial peripheral as an uart. It allows us to use these serial outputs. The proper solution is proposed in another patch. Signed-off-by: Nicolas Ferre Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index d067285..6b0f75e 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -1499,7 +1499,7 @@ static void atmel_set_ops(struct uart_port *port) /* * Get ip name usart or uart */ -static int atmel_get_ip_name(struct uart_port *port) +static void atmel_get_ip_name(struct uart_port *port) { struct atmel_uart_port *atmel_port = to_atmel_uart_port(port); int name = UART_GET_IP_NAME(port); @@ -1518,10 +1518,7 @@ static int atmel_get_ip_name(struct uart_port *port) atmel_port->is_usart = false; } else { dev_err(port->dev, "Not supported ip name, set to uart\n"); - return -EINVAL; } - - return 0; } /* @@ -2405,9 +2402,7 @@ static int atmel_serial_probe(struct platform_device *pdev) /* * Get port name of usart or uart */ - ret = atmel_get_ip_name(&port->uart); - if (ret < 0) - goto err_add_port; + atmel_get_ip_name(&port->uart); return 0; -- cgit v0.10.2 From 12e3594698f6c3ab6ebacc79f2fb2ad2bb5952b5 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 17 Oct 2013 15:07:40 +0200 Subject: xfrm: prevent ipcomp scratch buffer race condition In ipcomp_compress(), sortirq is enabled too early, allowing the per-cpu scratch buffer to be rewritten by ipcomp_decompress() (called on the same CPU in softirq context) between populating the buffer and copying the compressed data to the skb. v2: as pointed out by Steffen Klassert, if we also move the local_bh_disable() before reading the per-cpu pointers, we can get rid of get_cpu()/put_cpu(). v3: removed ipcomp_decompress part (as explained by Herbert Xu, it cannot be called from process context), get rid of cpu variable (thanks to Eric Dumazet) Signed-off-by: Michal Kubecek Reviewed-by: Eric Dumazet Acked-by: Herbert Xu Signed-off-by: Steffen Klassert diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index 2906d52..3be02b6 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -141,14 +141,14 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) const int plen = skb->len; int dlen = IPCOMP_SCRATCH_SIZE; u8 *start = skb->data; - const int cpu = get_cpu(); - u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu); - struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu); + struct crypto_comp *tfm; + u8 *scratch; int err; local_bh_disable(); + scratch = *this_cpu_ptr(ipcomp_scratches); + tfm = *this_cpu_ptr(ipcd->tfms); err = crypto_comp_compress(tfm, start, plen, scratch, &dlen); - local_bh_enable(); if (err) goto out; @@ -158,13 +158,13 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) } memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen); - put_cpu(); + local_bh_enable(); pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr)); return 0; out: - put_cpu(); + local_bh_enable(); return err; } -- cgit v0.10.2 From adabdb0cc59e80f13c3a2c4e22c40cc55e9b3b8a Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Fri, 13 Sep 2013 13:00:57 -0600 Subject: MAINTAINERS: Tegra updates, and driver ownership Overhaul of MAINTAINERS for Tegra. This adds Thierry as a Tegra core maintainer, and adds specific entries for most individual Tegra-specific device drivers, pointing at relevant people. The tegradrm section is updated to be Supported since Thierry is now employed to work on this. Signed-off-by: Stephen Warren Acked-by: Thierry Reding diff --git a/MAINTAINERS b/MAINTAINERS index 7a45039..fdc6c64 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2823,7 +2823,7 @@ M: Terje Bergström L: dri-devel@lists.freedesktop.org L: linux-tegra@vger.kernel.org T: git git://anongit.freedesktop.org/tegra/linux.git -S: Maintained +S: Supported F: drivers/gpu/host1x/ F: include/uapi/drm/tegra_drm.h F: Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt @@ -8290,14 +8290,72 @@ L: linux-media@vger.kernel.org S: Maintained F: drivers/media/rc/ttusbir.c -TEGRA SUPPORT +TEGRA ARCHITECTURE SUPPORT M: Stephen Warren +M: Thierry Reding L: linux-tegra@vger.kernel.org Q: http://patchwork.ozlabs.org/project/linux-tegra/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/swarren/linux-tegra.git S: Supported N: [^a-z]tegra +TEGRA ASOC DRIVER +M: Stephen Warren +S: Supported +F: sound/soc/tegra/ + +TEGRA CLOCK DRIVER +M: Peter De Schrijver +M: Prashant Gaikwad +S: Supported +F: drivers/clk/tegra/ + +TEGRA DMA DRIVER +M: Laxman Dewangan +S: Supported +F: drivers/dma/tegra20-apb-dma.c + +TEGRA GPIO DRIVER +M: Stephen Warren +S: Supported +F: drivers/gpio/gpio-tegra.c + +TEGRA I2C DRIVER +M: Laxman Dewangan +S: Supported +F: drivers/i2c/busses/i2c-tegra.c + +TEGRA IOMMU DRIVERS +M: Hiroshi Doyu +S: Supported +F: drivers/iommu/tegra* + +TEGRA KBC DRIVER +M: Rakesh Iyer +M: Laxman Dewangan +S: Supported +F: drivers/input/keyboard/tegra-kbc.c + +TEGRA PINCTRL DRIVER +M: Stephen Warren +S: Supported +F: drivers/pinctrl/pinctrl-tegra* + +TEGRA PWM DRIVER +M: Thierry Reding +S: Supported +F: drivers/pwm/pwm-tegra.c + +TEGRA SERIAL DRIVER +M: Laxman Dewangan +S: Supported +F: drivers/tty/serial/serial-tegra.c + +TEGRA SPI DRIVER +M: Laxman Dewangan +S: Supported +F: drivers/spi/spi-tegra* + TEHUTI ETHERNET DRIVER M: Andy Gospodarek L: netdev@vger.kernel.org -- cgit v0.10.2 From 08ddbb0a899a4e7a9214bc7c063be436cabbc52a Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Mon, 14 Oct 2013 23:19:58 +0200 Subject: MAINTAINERS: Add maintainers entry for Rockchip SoCs I plan to stay with the Rockchip SoCs for the foreseable future and hope to expand its support along the way. Signed-off-by: Heiko Stuebner diff --git a/MAINTAINERS b/MAINTAINERS index fdc6c64..5712462 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1150,6 +1150,13 @@ F: drivers/net/ethernet/i825xx/ether1* F: drivers/net/ethernet/seeq/ether3* F: drivers/scsi/arm/ +ARM/Rockchip SoC support +M: Heiko Stuebner +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +S: Maintained +F: arch/arm/mach-rockchip/ +F: drivers/*/*rockchip* + ARM/SHARK MACHINE SUPPORT M: Alexander Schulz W: http://www.shark-linux.de/shark.html -- cgit v0.10.2 From b0267507dfd0187fb7840a0ec461a510a7f041c5 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 17 Oct 2013 19:45:29 +0900 Subject: mutex: Avoid gcc version dependent __builtin_constant_p() usage Commit 040a0a37 ("mutex: Add support for wound/wait style locks") used "!__builtin_constant_p(p == NULL)" but gcc 3.x cannot handle such expression correctly, leading to boot failure when built with CONFIG_DEBUG_MUTEXES=y. Fix it by explicitly passing a bool which tells whether p != NULL or not. [ PeterZ: This is a sad patch, but provided it actually generates similar code I suppose its the best we can do bar whole sale deprecating gcc-3. ] Signed-off-by: Tetsuo Handa Acked-by: Peter Zijlstra Acked-by: Maarten Lankhorst Cc: peterz@infradead.org Cc: imirkin@alum.mit.edu Cc: daniel.vetter@ffwll.ch Cc: robdclark@gmail.com Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/201310171945.AGB17114.FSQVtHOJFOOFML@I-love.SAKURA.ne.jp Signed-off-by: Ingo Molnar diff --git a/kernel/mutex.c b/kernel/mutex.c index 6d647ae..d24105b 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -410,7 +410,7 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, static __always_inline int __sched __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, struct lockdep_map *nest_lock, unsigned long ip, - struct ww_acquire_ctx *ww_ctx) + struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) { struct task_struct *task = current; struct mutex_waiter waiter; @@ -450,7 +450,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, struct task_struct *owner; struct mspin_node node; - if (!__builtin_constant_p(ww_ctx == NULL) && ww_ctx->acquired > 0) { + if (use_ww_ctx && ww_ctx->acquired > 0) { struct ww_mutex *ww; ww = container_of(lock, struct ww_mutex, base); @@ -480,7 +480,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, if ((atomic_read(&lock->count) == 1) && (atomic_cmpxchg(&lock->count, 1, 0) == 1)) { lock_acquired(&lock->dep_map, ip); - if (!__builtin_constant_p(ww_ctx == NULL)) { + if (use_ww_ctx) { struct ww_mutex *ww; ww = container_of(lock, struct ww_mutex, base); @@ -551,7 +551,7 @@ slowpath: goto err; } - if (!__builtin_constant_p(ww_ctx == NULL) && ww_ctx->acquired > 0) { + if (use_ww_ctx && ww_ctx->acquired > 0) { ret = __mutex_lock_check_stamp(lock, ww_ctx); if (ret) goto err; @@ -575,7 +575,7 @@ skip_wait: lock_acquired(&lock->dep_map, ip); mutex_set_owner(lock); - if (!__builtin_constant_p(ww_ctx == NULL)) { + if (use_ww_ctx) { struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); struct mutex_waiter *cur; @@ -615,7 +615,7 @@ mutex_lock_nested(struct mutex *lock, unsigned int subclass) { might_sleep(); __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, - subclass, NULL, _RET_IP_, NULL); + subclass, NULL, _RET_IP_, NULL, 0); } EXPORT_SYMBOL_GPL(mutex_lock_nested); @@ -625,7 +625,7 @@ _mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest) { might_sleep(); __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, - 0, nest, _RET_IP_, NULL); + 0, nest, _RET_IP_, NULL, 0); } EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock); @@ -635,7 +635,7 @@ mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass) { might_sleep(); return __mutex_lock_common(lock, TASK_KILLABLE, - subclass, NULL, _RET_IP_, NULL); + subclass, NULL, _RET_IP_, NULL, 0); } EXPORT_SYMBOL_GPL(mutex_lock_killable_nested); @@ -644,7 +644,7 @@ mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) { might_sleep(); return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, - subclass, NULL, _RET_IP_, NULL); + subclass, NULL, _RET_IP_, NULL, 0); } EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); @@ -682,7 +682,7 @@ __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) might_sleep(); ret = __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, - 0, &ctx->dep_map, _RET_IP_, ctx); + 0, &ctx->dep_map, _RET_IP_, ctx, 1); if (!ret && ctx->acquired > 1) return ww_mutex_deadlock_injection(lock, ctx); @@ -697,7 +697,7 @@ __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) might_sleep(); ret = __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, - 0, &ctx->dep_map, _RET_IP_, ctx); + 0, &ctx->dep_map, _RET_IP_, ctx, 1); if (!ret && ctx->acquired > 1) return ww_mutex_deadlock_injection(lock, ctx); @@ -809,28 +809,28 @@ __mutex_lock_slowpath(atomic_t *lock_count) struct mutex *lock = container_of(lock_count, struct mutex, count); __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, - NULL, _RET_IP_, NULL); + NULL, _RET_IP_, NULL, 0); } static noinline int __sched __mutex_lock_killable_slowpath(struct mutex *lock) { return __mutex_lock_common(lock, TASK_KILLABLE, 0, - NULL, _RET_IP_, NULL); + NULL, _RET_IP_, NULL, 0); } static noinline int __sched __mutex_lock_interruptible_slowpath(struct mutex *lock) { return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, - NULL, _RET_IP_, NULL); + NULL, _RET_IP_, NULL, 0); } static noinline int __sched __ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { return __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, 0, - NULL, _RET_IP_, ctx); + NULL, _RET_IP_, ctx, 1); } static noinline int __sched @@ -838,7 +838,7 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { return __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, 0, - NULL, _RET_IP_, ctx); + NULL, _RET_IP_, ctx, 1); } #endif -- cgit v0.10.2 From c7314d74fcb089b127ef5753b5263ac8473f33bc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 20 Oct 2013 08:44:39 -0400 Subject: nfsd regression since delayed fput() Background: nfsd v[23] had throughput regression since delayed fput went in; every read or write ends up doing fput() and we get a pair of extra context switches out of that (plus quite a bit of work in queue_work itselfi, apparently). Use of schedule_delayed_work() gives it a chance to accumulate a bit before we do __fput() on all of them. I'm not too happy about that solution, but... on at least one real-world setup it reverts about 10% throughput loss we got from switch to delayed fput. Signed-off-by: Al Viro diff --git a/fs/file_table.c b/fs/file_table.c index abdd15a..e900ca5 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -297,7 +297,7 @@ void flush_delayed_fput(void) delayed_fput(NULL); } -static DECLARE_WORK(delayed_fput_work, delayed_fput); +static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput); void fput(struct file *file) { @@ -317,7 +317,7 @@ void fput(struct file *file) } if (llist_add(&file->f_u.fu_llist, &delayed_fput_list)) - schedule_work(&delayed_fput_work); + schedule_delayed_work(&delayed_fput_work, 1); } } -- cgit v0.10.2 From d253d2a52676cfa3d89b8f0737a08ce7db665207 Mon Sep 17 00:00:00 2001 From: Brennan Shacklett Date: Mon, 21 Oct 2013 09:20:32 -0700 Subject: intel_pstate: Improve accuracy by not truncating until final result This patch addresses Bug 60727 (https://bugzilla.kernel.org/show_bug.cgi?id=60727) which was due to the truncation of intermediate values in the calculations, which causes the code to consistently underestimate the current cpu frequency, specifically 100% cpu utilization was truncated down to the setpoint of 97%. This patch fixes the problem by keeping the results of all intermediate calculations as fixed point numbers rather scaling them back and forth between integers and fixed point. References: https://bugzilla.kernel.org/show_bug.cgi?id=60727 Signed-off-by: Brennan Shacklett Acked-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index badf620..8b8677f 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -48,7 +48,7 @@ static inline int32_t div_fp(int32_t x, int32_t y) } struct sample { - int core_pct_busy; + int32_t core_pct_busy; u64 aperf; u64 mperf; int freq; @@ -68,7 +68,7 @@ struct _pid { int32_t i_gain; int32_t d_gain; int deadband; - int last_err; + int32_t last_err; }; struct cpudata { @@ -153,16 +153,15 @@ static inline void pid_d_gain_set(struct _pid *pid, int percent) pid->d_gain = div_fp(int_tofp(percent), int_tofp(100)); } -static signed int pid_calc(struct _pid *pid, int busy) +static signed int pid_calc(struct _pid *pid, int32_t busy) { - signed int err, result; + signed int result; int32_t pterm, dterm, fp_error; int32_t integral_limit; - err = pid->setpoint - busy; - fp_error = int_tofp(err); + fp_error = int_tofp(pid->setpoint) - busy; - if (abs(err) <= pid->deadband) + if (abs(fp_error) <= int_tofp(pid->deadband)) return 0; pterm = mul_fp(pid->p_gain, fp_error); @@ -176,8 +175,8 @@ static signed int pid_calc(struct _pid *pid, int busy) if (pid->integral < -integral_limit) pid->integral = -integral_limit; - dterm = mul_fp(pid->d_gain, (err - pid->last_err)); - pid->last_err = err; + dterm = mul_fp(pid->d_gain, fp_error - pid->last_err); + pid->last_err = fp_error; result = pterm + mul_fp(pid->integral, pid->i_gain) + dterm; @@ -436,8 +435,9 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu, struct sample *sample) { u64 core_pct; - core_pct = div64_u64(sample->aperf * 100, sample->mperf); - sample->freq = cpu->pstate.max_pstate * core_pct * 1000; + core_pct = div64_u64(int_tofp(sample->aperf * 100), + sample->mperf); + sample->freq = fp_toint(cpu->pstate.max_pstate * core_pct * 1000); sample->core_pct_busy = core_pct; } @@ -469,22 +469,19 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu) mod_timer_pinned(&cpu->timer, jiffies + delay); } -static inline int intel_pstate_get_scaled_busy(struct cpudata *cpu) +static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) { - int32_t busy_scaled; int32_t core_busy, max_pstate, current_pstate; - core_busy = int_tofp(cpu->samples[cpu->sample_ptr].core_pct_busy); + core_busy = cpu->samples[cpu->sample_ptr].core_pct_busy; max_pstate = int_tofp(cpu->pstate.max_pstate); current_pstate = int_tofp(cpu->pstate.current_pstate); - busy_scaled = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); - - return fp_toint(busy_scaled); + return mul_fp(core_busy, div_fp(max_pstate, current_pstate)); } static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) { - int busy_scaled; + int32_t busy_scaled; struct _pid *pid; signed int ctl = 0; int steps; -- cgit v0.10.2 From 7244cb62d96e735847dc9d08f870550df896898c Mon Sep 17 00:00:00 2001 From: Dirk Brandewie Date: Mon, 21 Oct 2013 09:20:33 -0700 Subject: intel_pstate: Correct calculation of min pstate value The minimum pstate is supposed to be a percentage of the maximum P state available. Calculate min using max pstate and not the current max which may have been limited by the user Signed-off-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 8b8677f..eb3fdc7 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -366,12 +366,13 @@ static int intel_pstate_turbo_pstate(void) static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) { int max_perf = cpu->pstate.turbo_pstate; + int max_perf_adj; int min_perf; if (limits.no_turbo) max_perf = cpu->pstate.max_pstate; - max_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf)); - *max = clamp_t(int, max_perf, + max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf)); + *max = clamp_t(int, max_perf_adj, cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf)); -- cgit v0.10.2 From b416c144f46af1a30ddfa4e4319a8f077381ad63 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 21 Oct 2013 13:14:53 +0100 Subject: netfilter: x_tables: fix ordering of jumpstack allocation and table update During kernel stability testing on an SMP ARMv7 system, Yalin Wang reported the following panic from the netfilter code: 1fe0: 0000001c 5e2d3b10 4007e779 4009e110 60000010 00000032 ff565656 ff545454 [] (ipt_do_table+0x448/0x584) from [] (nf_iterate+0x48/0x7c) [] (nf_iterate+0x48/0x7c) from [] (nf_hook_slow+0x58/0x104) [] (nf_hook_slow+0x58/0x104) from [] (ip_local_deliver+0x88/0xa8) [] (ip_local_deliver+0x88/0xa8) from [] (ip_rcv_finish+0x418/0x43c) [] (ip_rcv_finish+0x418/0x43c) from [] (__netif_receive_skb+0x4cc/0x598) [] (__netif_receive_skb+0x4cc/0x598) from [] (process_backlog+0x84/0x158) [] (process_backlog+0x84/0x158) from [] (net_rx_action+0x70/0x1dc) [] (net_rx_action+0x70/0x1dc) from [] (__do_softirq+0x11c/0x27c) [] (__do_softirq+0x11c/0x27c) from [] (do_softirq+0x44/0x50) [] (do_softirq+0x44/0x50) from [] (local_bh_enable_ip+0x8c/0xd0) [] (local_bh_enable_ip+0x8c/0xd0) from [] (inet_stream_connect+0x164/0x298) [] (inet_stream_connect+0x164/0x298) from [] (sys_connect+0x88/0xc8) [] (sys_connect+0x88/0xc8) from [] (ret_fast_syscall+0x0/0x30) Code: 2a000021 e59d2028 e59de01c e59f011c (e7824103) ---[ end trace da227214a82491bd ]--- Kernel panic - not syncing: Fatal exception in interrupt This comes about because CPU1 is executing xt_replace_table in response to a setsockopt syscall, resulting in: ret = xt_jumpstack_alloc(newinfo); --> newinfo->jumpstack = kzalloc(size, GFP_KERNEL); [...] table->private = newinfo; newinfo->initial_entries = private->initial_entries; Meanwhile, CPU0 is handling the network receive path and ends up in ipt_do_table, resulting in: private = table->private; [...] jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; On weakly ordered memory architectures, the writes to table->private and newinfo->jumpstack from CPU1 can be observed out of order by CPU0. Furthermore, on architectures which don't respect ordering of address dependencies (i.e. Alpha), the reads from CPU0 can also be re-ordered. This patch adds an smp_wmb() before the assignment to table->private (which is essentially publishing newinfo) to ensure that all writes to newinfo will be observed before plugging it into the table structure. A dependent-read barrier is also added on the consumer sides, to ensure the same ordering requirements are also respected there. Cc: Paul E. McKenney Reported-by: Wang, Yalin Tested-by: Wang, Yalin Signed-off-by: Will Deacon Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 85a4f21..59da7cd 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -271,6 +271,11 @@ unsigned int arpt_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); private = table->private; + /* + * Ensure we load private-> members after we've fetched the base + * pointer. + */ + smp_read_barrier_depends(); table_base = private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index d23118d..718dfbd 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -327,6 +327,11 @@ ipt_do_table(struct sk_buff *skb, addend = xt_write_recseq_begin(); private = table->private; cpu = smp_processor_id(); + /* + * Ensure we load private-> members after we've fetched the base + * pointer. + */ + smp_read_barrier_depends(); table_base = private->entries[cpu]; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; stackptr = per_cpu_ptr(private->stackptr, cpu); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 44400c2..710238f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -349,6 +349,11 @@ ip6t_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); private = table->private; + /* + * Ensure we load private-> members after we've fetched the base + * pointer. + */ + smp_read_barrier_depends(); cpu = smp_processor_id(); table_base = private->entries[cpu]; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 8b03028..227aa11 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -845,8 +845,13 @@ xt_replace_table(struct xt_table *table, return NULL; } - table->private = newinfo; newinfo->initial_entries = private->initial_entries; + /* + * Ensure contents of newinfo are visible before assigning to + * private. + */ + smp_wmb(); + table->private = newinfo; /* * Even though table entries have now been swapped, other CPU's -- cgit v0.10.2 From 9b4f60e5c9b2285fb08a7656418e8b19bf2a4c9c Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Tue, 15 Oct 2013 23:46:34 -0700 Subject: Input: wacom - add support for ISDv4 0x10F sensor Signed-off-by: Jason Gerecke Tested-by: Filip Zarnecki Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c index b2aa503..1535c3f 100644 --- a/drivers/input/tablet/wacom_wac.c +++ b/drivers/input/tablet/wacom_wac.c @@ -2054,6 +2054,9 @@ static const struct wacom_features wacom_features_0x101 = static const struct wacom_features wacom_features_0x10D = { "Wacom ISDv4 10D", WACOM_PKGLEN_MTTPC, 26202, 16325, 255, 0, MTTPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; +static const struct wacom_features wacom_features_0x10F = + { "Wacom ISDv4 10F", WACOM_PKGLEN_MTTPC, 27760, 15694, 255, + 0, MTTPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; static const struct wacom_features wacom_features_0x4001 = { "Wacom ISDv4 4001", WACOM_PKGLEN_MTTPC, 26202, 16325, 255, 0, MTTPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; @@ -2248,6 +2251,7 @@ const struct usb_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0x100) }, { USB_DEVICE_WACOM(0x101) }, { USB_DEVICE_WACOM(0x10D) }, + { USB_DEVICE_WACOM(0x10F) }, { USB_DEVICE_WACOM(0x300) }, { USB_DEVICE_WACOM(0x301) }, { USB_DEVICE_WACOM(0x304) }, -- cgit v0.10.2 From 2d3163f10256a99d05a64fa03d43747e5634a44b Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Tue, 22 Oct 2013 15:35:30 -0700 Subject: Input: wacom - add support for ISDv4 0x10E sensor Used in the Fujitsu T732 Signed-off-by: Jason Gerecke Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c index 1535c3f..c59b797 100644 --- a/drivers/input/tablet/wacom_wac.c +++ b/drivers/input/tablet/wacom_wac.c @@ -2054,6 +2054,9 @@ static const struct wacom_features wacom_features_0x101 = static const struct wacom_features wacom_features_0x10D = { "Wacom ISDv4 10D", WACOM_PKGLEN_MTTPC, 26202, 16325, 255, 0, MTTPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; +static const struct wacom_features wacom_features_0x10E = + { "Wacom ISDv4 10E", WACOM_PKGLEN_MTTPC, 27760, 15694, 255, + 0, MTTPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; static const struct wacom_features wacom_features_0x10F = { "Wacom ISDv4 10F", WACOM_PKGLEN_MTTPC, 27760, 15694, 255, 0, MTTPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; @@ -2251,6 +2254,7 @@ const struct usb_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0x100) }, { USB_DEVICE_WACOM(0x101) }, { USB_DEVICE_WACOM(0x10D) }, + { USB_DEVICE_WACOM(0x10E) }, { USB_DEVICE_WACOM(0x10F) }, { USB_DEVICE_WACOM(0x300) }, { USB_DEVICE_WACOM(0x301) }, -- cgit v0.10.2 From 97b9410643475d6557d2517c2aff9fd2221141a9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 24 Sep 2013 21:50:23 +0200 Subject: clockevents: Sanitize ticks to nsec conversion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Marc Kleine-Budde pointed out, that commit 77cc982 "clocksource: use clockevents_config_and_register() where possible" caused a regression for some of the converted subarchs. The reason is, that the clockevents core code converts the minimal hardware tick delta to a nanosecond value for core internal usage. This conversion is affected by integer math rounding loss, so the backwards conversion to hardware ticks will likely result in a value which is less than the configured hardware limitation. The affected subarchs used their own workaround (SIGH!) which got lost in the conversion. The solution for the issue at hand is simple: adding evt->mult - 1 to the shifted value before the integer divison in the core conversion function takes care of it. But this only works for the case where for the scaled math mult/shift pair "mult <= 1 << shift" is true. For the case where "mult > 1 << shift" we can apply the rounding add only for the minimum delta value to make sure that the backward conversion is not less than the given hardware limit. For the upper bound we need to omit the rounding add, because the backwards conversion is always larger than the original latch value. That would violate the upper bound of the hardware device. Though looking closer at the details of that function reveals another bogosity: The upper bounds check is broken as well. Checking for a resulting "clc" value greater than KTIME_MAX after the conversion is pointless. The conversion does: u64 clc = (latch << evt->shift) / evt->mult; So there is no sanity check for (latch << evt->shift) exceeding the 64bit boundary. The latch argument is "unsigned long", so on a 64bit arch the handed in argument could easily lead to an unnoticed shift overflow. With the above rounding fix applied the calculation before the divison is: u64 clc = (latch << evt->shift) + evt->mult - 1; So we need to make sure, that neither the shift nor the rounding add is overflowing the u64 boundary. [ukl: move assignment to rnd after eventually changing mult, fix build issue and correct comment with the right math] Signed-off-by: Thomas Gleixner Cc: Russell King - ARM Linux Cc: Marc Kleine-Budde Cc: nicolas.ferre@atmel.com Cc: Marc Pignat Cc: john.stultz@linaro.org Cc: kernel@pengutronix.de Cc: Ronald Wahl Cc: LAK Cc: Ludovic Desroches Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1380052223-24139-1-git-send-email-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 38959c8..662c579 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -33,29 +33,64 @@ struct ce_unbind { int res; }; -/** - * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds - * @latch: value to convert - * @evt: pointer to clock event device descriptor - * - * Math helper, returns latch value converted to nanoseconds (bound checked) - */ -u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt) +static u64 cev_delta2ns(unsigned long latch, struct clock_event_device *evt, + bool ismax) { u64 clc = (u64) latch << evt->shift; + u64 rnd; if (unlikely(!evt->mult)) { evt->mult = 1; WARN_ON(1); } + rnd = (u64) evt->mult - 1; + + /* + * Upper bound sanity check. If the backwards conversion is + * not equal latch, we know that the above shift overflowed. + */ + if ((clc >> evt->shift) != (u64)latch) + clc = ~0ULL; + + /* + * Scaled math oddities: + * + * For mult <= (1 << shift) we can safely add mult - 1 to + * prevent integer rounding loss. So the backwards conversion + * from nsec to device ticks will be correct. + * + * For mult > (1 << shift), i.e. device frequency is > 1GHz we + * need to be careful. Adding mult - 1 will result in a value + * which when converted back to device ticks can be larger + * than latch by up to (mult - 1) >> shift. For the min_delta + * calculation we still want to apply this in order to stay + * above the minimum device ticks limit. For the upper limit + * we would end up with a latch value larger than the upper + * limit of the device, so we omit the add to stay below the + * device upper boundary. + * + * Also omit the add if it would overflow the u64 boundary. + */ + if ((~0ULL - clc > rnd) && + (!ismax || evt->mult <= (1U << evt->shift))) + clc += rnd; do_div(clc, evt->mult); - if (clc < 1000) - clc = 1000; - if (clc > KTIME_MAX) - clc = KTIME_MAX; - return clc; + /* Deltas less than 1usec are pointless noise */ + return clc > 1000 ? clc : 1000; +} + +/** + * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds + * @latch: value to convert + * @evt: pointer to clock event device descriptor + * + * Math helper, returns latch value converted to nanoseconds (bound checked) + */ +u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt) +{ + return cev_delta2ns(latch, evt, false); } EXPORT_SYMBOL_GPL(clockevent_delta2ns); @@ -380,8 +415,8 @@ void clockevents_config(struct clock_event_device *dev, u32 freq) sec = 600; clockevents_calc_mult_shift(dev, freq, sec); - dev->min_delta_ns = clockevent_delta2ns(dev->min_delta_ticks, dev); - dev->max_delta_ns = clockevent_delta2ns(dev->max_delta_ticks, dev); + dev->min_delta_ns = cev_delta2ns(dev->min_delta_ticks, dev, false); + dev->max_delta_ns = cev_delta2ns(dev->max_delta_ticks, dev, true); } /** -- cgit v0.10.2 From 36008cf118235cee49b6753455f33b6f2c3a7543 Mon Sep 17 00:00:00 2001 From: Chad Dupuis Date: Thu, 3 Oct 2013 03:21:13 -0400 Subject: [SCSI] qla2xxx: Fix request queue null dereference. If an invalid IOCB is returned on the response queue then the index into the request queue map could be invalid and could return to us a bogus value. This could cause us to try to deference an invalid pointer and cause an exception. If we encounter this condition, simply return as no context can be established for this response. Signed-off-by: Chad Dupuis Signed-off-by: Saurav Kashyap Signed-off-by: James Bottomley diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 2ef497e..ee5c183 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -20,7 +20,7 @@ * | Device Discovery | 0x2095 | 0x2020-0x2022, | * | | | 0x2011-0x2012, | * | | | 0x2016 | - * | Queue Command and IO tracing | 0x3058 | 0x3006-0x300b | + * | Queue Command and IO tracing | 0x3059 | 0x3006-0x300b | * | | | 0x3027-0x3028 | * | | | 0x303d-0x3041 | * | | | 0x302d,0x3033 | diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index df1b30b..ff9c86b 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1957,6 +1957,15 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt) que = MSW(sts->handle); req = ha->req_q_map[que]; + /* Check for invalid queue pointer */ + if (req == NULL || + que >= find_first_zero_bit(ha->req_qid_map, ha->max_req_queues)) { + ql_dbg(ql_dbg_io, vha, 0x3059, + "Invalid status handle (0x%x): Bad req pointer. req=%p, " + "que=%u.\n", sts->handle, req, que); + return; + } + /* Validate handle. */ if (handle < req->num_outstanding_cmds) sp = req->outstanding_cmds[handle]; -- cgit v0.10.2 From 10c580e4239df5c3344ca00322eca86ab2de880b Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Thu, 10 Oct 2013 13:22:36 +0800 Subject: [SCSI] sd: call blk_pm_runtime_init before add_disk Sujit has found a race condition that would make q->nr_pending unbalanced, it occurs as Sujit explained: " sd_probe_async() -> add_disk() -> disk_add_event() -> schedule(disk_events_workfn) sd_revalidate_disk() blk_pm_runtime_init() return; Let's say the disk_events_workfn() calls sd_check_events() which tries to send test_unit_ready() and because of sd_revalidate_disk() trying to send another commands the test_unit_ready() might be re-queued as the tagged command queuing is disabled. So the race condition is - Thread 1 | Thread 2 sd_revalidate_disk() | sd_check_events() ...nr_pending = 0 as q->dev = NULL| scsi_queue_insert() blk_runtime_pm_init() | blk_pm_requeue_request() -> | nr_pending = -1 since | q->dev != NULL " The problem is, the test_unit_ready request doesn't get counted the first time it is queued, so the later decrement of q->nr_pending in blk_pm_requeue_request makes it unbalanced. Fix this by calling blk_pm_runtime_init before add_disk so that all requests initiated there will all be counted. Signed-off-by: Aaron Lu Reported-and-tested-by: Sujit Reddy Thumma Cc: stable@vger.kernel.org Signed-off-by: James Bottomley diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index e62d17d..5693f6d7 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2854,6 +2854,7 @@ static void sd_probe_async(void *data, async_cookie_t cookie) gd->events |= DISK_EVENT_MEDIA_CHANGE; } + blk_pm_runtime_init(sdp->request_queue, dev); add_disk(gd); if (sdkp->capacity) sd_dif_config_host(sdkp); @@ -2862,7 +2863,6 @@ static void sd_probe_async(void *data, async_cookie_t cookie) sd_printk(KERN_NOTICE, sdkp, "Attached SCSI %sdisk\n", sdp->removable ? "removable " : ""); - blk_pm_runtime_init(sdp->request_queue, dev); scsi_autopm_put_device(sdp); put_device(&sdkp->dev); } -- cgit v0.10.2 From 17b59560efcf3ba6f6935c4ce7a575ebd216ad51 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Tue, 22 Oct 2013 12:58:56 +0530 Subject: MAINTAINERS: add the new dmaengine mailing list We have a new mailing list hosted by vger for dmaengine Acked-by: Dan Williams Signed-off-by: Vinod Koul diff --git a/MAINTAINERS b/MAINTAINERS index 8a0cbf3..707533e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2718,6 +2718,8 @@ T: git git://git.linaro.org/people/sumitsemwal/linux-dma-buf.git DMA GENERIC OFFLOAD ENGINE SUBSYSTEM M: Vinod Koul M: Dan Williams +L: dmaengine@vger.kernel.org +Q: https://patchwork.kernel.org/project/linux-dmaengine/list/ S: Supported F: drivers/dma/ F: include/linux/dma* -- cgit v0.10.2 From 18ebd564e45eacd349daa31cf865183d578653d7 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 22 Oct 2013 15:29:20 -0700 Subject: MAINTAINERS: add to ioatdma maintainer list Signed-off-by: Dave Jiang [djbw: add dmaengine list] Signed-off-by: Dan Williams Signed-off-by: Vinod Koul diff --git a/MAINTAINERS b/MAINTAINERS index 707533e..cdcf649 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4353,7 +4353,10 @@ F: arch/x86/kernel/microcode_intel.c INTEL I/OAT DMA DRIVER M: Dan Williams -S: Maintained +M: Dave Jiang +L: dmaengine@vger.kernel.org +Q: https://patchwork.kernel.org/project/linux-dmaengine/list/ +S: Supported F: drivers/dma/ioat* INTEL IOMMU (VT-d) -- cgit v0.10.2 From de926800b155886c61b06146e28c0ba2e6fafc39 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 22 Oct 2013 10:46:59 -0400 Subject: drm/radeon: use sw CTS/N values for audio on DCE4+ Use the driver calculated CTS and N values rather than having hardware generate them. This allows us to use the modeline pixel clock rather than the actual pll clock when setting up the dto for audio. Fixes problems with audio playback rate on certain asics if the pll clock does not match the pixel clock exactly. Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c index fe1de85..57fcc4b 100644 --- a/drivers/gpu/drm/radeon/evergreen_hdmi.c +++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c @@ -291,6 +291,7 @@ void evergreen_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode /* fglrx clears sth in AFMT_AUDIO_PACKET_CONTROL2 here */ WREG32(HDMI_ACR_PACKET_CONTROL + offset, + HDMI_ACR_SOURCE | /* select SW CTS value */ HDMI_ACR_AUTO_SEND); /* allow hw to sent ACR packets when required */ evergreen_hdmi_update_ACR(encoder, mode->clock); -- cgit v0.10.2 From d48d88b21ede7990702b990bc36262c3d5fd7a1f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 23 Oct 2013 11:20:46 -0400 Subject: drm/radeon: disable bapm on KB May cause stability problems on some boards. Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c index 7139906..b419055 100644 --- a/drivers/gpu/drm/radeon/kv_dpm.c +++ b/drivers/gpu/drm/radeon/kv_dpm.c @@ -2635,7 +2635,7 @@ int kv_dpm_init(struct radeon_device *rdev) pi->caps_sclk_ds = true; pi->enable_auto_thermal_throttling = true; pi->disable_nb_ps3_in_battery = false; - pi->bapm_enable = true; + pi->bapm_enable = false; pi->voltage_drop_t = 0; pi->caps_sclk_throttle_low_notification = false; pi->caps_fps = false; /* true? */ -- cgit v0.10.2 From cdf6e8058415ba4d808537e30a0a6be9fb29e95a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 23 Oct 2013 16:13:42 -0400 Subject: drm/radeon/dpm: fix incompatible casting on big endian We use u16 for voltage values throughout the driver so switch the table values to a u16 as well. Fixes an incompatible cast error in ci_patch_clock_voltage_limits_with_vddc_leakage() picked up by coverity. Signed-off-by: Alex Deucher diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index a400ac1..24f4960 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1272,8 +1272,8 @@ struct radeon_blacklist_clocks struct radeon_clock_and_voltage_limits { u32 sclk; u32 mclk; - u32 vddc; - u32 vddci; + u16 vddc; + u16 vddci; }; struct radeon_clock_array { -- cgit v0.10.2 From fecda03493646b53f53892fa3c38c75ba9310374 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 22 Oct 2013 18:34:56 +0200 Subject: net: sctp: fix ASCONF to allow non SCTP_ADDR_SRC addresses in ipv6 Commit 8a07eb0a50 ("sctp: Add ASCONF operation on the single-homed host") implemented possible use of IPv4 addresses with non SCTP_ADDR_SRC state as source address when sending ASCONF (ADD) packets, but IPv6 part for that was not implemented in 8a07eb0a50. Therefore, as this is not restricted to IPv4-only, fix this up to allow the same for IPv6 addresses in SCTP. Signed-off-by: Daniel Borkmann Cc: Michio Honda Acked-by: Michio Honda Acked-by: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e7b2d4f..96a5591 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -279,7 +279,9 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, sctp_v6_to_addr(&dst_saddr, &fl6->saddr, htons(bp->port)); rcu_read_lock(); list_for_each_entry_rcu(laddr, &bp->address_list, list) { - if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC)) + if (!laddr->valid || laddr->state == SCTP_ADDR_DEL || + (laddr->state != SCTP_ADDR_SRC && + !asoc->src_out_of_asoc_ok)) continue; /* Do not compare against v4 addrs */ -- cgit v0.10.2 From 61e4947c99c4494336254ec540c50186d186150b Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Thu, 24 Oct 2013 12:55:17 +1100 Subject: md: Fix skipping recovery for read-only arrays. Since: commit 7ceb17e87bde79d285a8b988cfed9eaeebe60b86 md: Allow devices to be re-added to a read-only array. spares are activated on a read-only array. In case of raid1 and raid10 personalities it causes that not-in-sync devices are marked in-sync without checking if recovery has been finished. If a read-only array is degraded and one of its devices is not in-sync (because the array has been only partially recovered) recovery will be skipped. This patch adds checking if recovery has been finished before marking a device in-sync for raid1 and raid10 personalities. In case of raid5 personality such condition is already present (at raid5.c:6029). Bug was introduced in 3.10 and causes data corruption. Cc: stable@vger.kernel.org Signed-off-by: Pawel Baldysiak Signed-off-by: Lukasz Dorau Signed-off-by: NeilBrown diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index d60412c..aacf6bf 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1479,6 +1479,7 @@ static int raid1_spare_active(struct mddev *mddev) } } if (rdev + && rdev->recovery_offset == MaxSector && !test_bit(Faulty, &rdev->flags) && !test_and_set_bit(In_sync, &rdev->flags)) { count++; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index df7b0a0..73dc8a3 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1782,6 +1782,7 @@ static int raid10_spare_active(struct mddev *mddev) } sysfs_notify_dirent_safe(tmp->replacement->sysfs_state); } else if (tmp->rdev + && tmp->rdev->recovery_offset == MaxSector && !test_bit(Faulty, &tmp->rdev->flags) && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { count++; -- cgit v0.10.2 From 905b0297a9533d7a6ee00a01a990456636877dd6 Mon Sep 17 00:00:00 2001 From: Bian Yu Date: Sat, 12 Oct 2013 01:10:03 -0400 Subject: md: avoid deadlock when md_set_badblocks. When operate harddisk and hit errors, md_set_badblocks is called after scsi_restart_operations which already disabled the irq. but md_set_badblocks will call write_sequnlock_irq and enable irq. so softirq can preempt the current thread and that may cause a deadlock. I think this situation should use write_sequnlock_irqsave/irqrestore instead. I met the situation and the call trace is below: [ 638.919974] BUG: spinlock recursion on CPU#0, scsi_eh_13/1010 [ 638.921923] lock: 0xffff8800d4d51fc8, .magic: dead4ead, .owner: scsi_eh_13/1010, .owner_cpu: 0 [ 638.923890] CPU: 0 PID: 1010 Comm: scsi_eh_13 Not tainted 3.12.0-rc5+ #37 [ 638.925844] Hardware name: To be filled by O.E.M. To be filled by O.E.M./MAHOBAY, BIOS 4.6.5 03/05/2013 [ 638.927816] ffff880037ad4640 ffff880118c03d50 ffffffff8172ff85 0000000000000007 [ 638.929829] ffff8800d4d51fc8 ffff880118c03d70 ffffffff81730030 ffff8800d4d51fc8 [ 638.931848] ffffffff81a72eb0 ffff880118c03d90 ffffffff81730056 ffff8800d4d51fc8 [ 638.933884] Call Trace: [ 638.935867] [] dump_stack+0x55/0x76 [ 638.937878] [] spin_dump+0x8a/0x8f [ 638.939861] [] spin_bug+0x21/0x26 [ 638.941836] [] do_raw_spin_lock+0xa4/0xc0 [ 638.943801] [] _raw_spin_lock+0x66/0x80 [ 638.945747] [] ? scsi_device_unbusy+0x9d/0xd0 [ 638.947672] [] ? _raw_spin_unlock+0x2b/0x50 [ 638.949595] [] scsi_device_unbusy+0x9d/0xd0 [ 638.951504] [] scsi_finish_command+0x37/0xe0 [ 638.953388] [] scsi_softirq_done+0xa8/0x140 [ 638.955248] [] blk_done_softirq+0x7b/0x90 [ 638.957116] [] __do_softirq+0xfd/0x330 [ 638.958987] [] ? __lock_release+0x6f/0x100 [ 638.960861] [] call_softirq+0x1c/0x30 [ 638.962724] [] do_softirq+0x8d/0xc0 [ 638.964565] [] irq_exit+0x10e/0x150 [ 638.966390] [] smp_apic_timer_interrupt+0x4a/0x60 [ 638.968223] [] apic_timer_interrupt+0x6f/0x80 [ 638.970079] [] ? __lock_release+0x6f/0x100 [ 638.971899] [] ? _raw_spin_unlock_irq+0x3a/0x50 [ 638.973691] [] ? _raw_spin_unlock_irq+0x30/0x50 [ 638.975475] [] md_set_badblocks+0x1f3/0x4a0 [ 638.977243] [] rdev_set_badblocks+0x27/0x80 [ 638.978988] [] raid5_end_read_request+0x36b/0x4e0 [raid456] [ 638.980723] [] bio_endio+0x1d/0x40 [ 638.982463] [] req_bio_endio.isra.65+0x83/0xa0 [ 638.984214] [] blk_update_request+0x7f/0x350 [ 638.985967] [] blk_update_bidi_request+0x31/0x90 [ 638.987710] [] __blk_end_bidi_request+0x20/0x50 [ 638.989439] [] __blk_end_request_all+0x1f/0x30 [ 638.991149] [] blk_peek_request+0x106/0x250 [ 638.992861] [] ? scsi_kill_request.isra.32+0xe9/0x130 [ 638.994561] [] scsi_request_fn+0x4a/0x3d0 [ 638.996251] [] __blk_run_queue+0x37/0x50 [ 638.997900] [] blk_run_queue+0x2f/0x50 [ 638.999553] [] scsi_run_queue+0xe0/0x1c0 [ 639.001185] [] scsi_run_host_queues+0x21/0x40 [ 639.002798] [] scsi_restart_operations+0x177/0x200 [ 639.004391] [] scsi_error_handler+0xc9/0xe0 [ 639.005996] [] ? scsi_unjam_host+0xd0/0xd0 [ 639.007600] [] kthread+0xdb/0xe0 [ 639.009205] [] ? flush_kthread_worker+0x170/0x170 [ 639.010821] [] ret_from_fork+0x7c/0xb0 [ 639.012437] [] ? flush_kthread_worker+0x170/0x170 This bug was introduce in commit 2e8ac30312973dd20e68073653 (the first time rdev_set_badblock was call from interrupt context), so this patch is appropriate for 3.5 and subsequent kernels. Cc: (3.5+) Signed-off-by: Bian Yu Reviewed-by: Jianpeng Ma Signed-off-by: NeilBrown diff --git a/drivers/md/md.c b/drivers/md/md.c index adf4d7e..561a65f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8111,6 +8111,7 @@ static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors, u64 *p; int lo, hi; int rv = 1; + unsigned long flags; if (bb->shift < 0) /* badblocks are disabled */ @@ -8125,7 +8126,7 @@ static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors, sectors = next - s; } - write_seqlock_irq(&bb->lock); + write_seqlock_irqsave(&bb->lock, flags); p = bb->page; lo = 0; @@ -8241,7 +8242,7 @@ static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors, bb->changed = 1; if (!acknowledged) bb->unacked_exist = 1; - write_sequnlock_irq(&bb->lock); + write_sequnlock_irqrestore(&bb->lock, flags); return rv; } -- cgit v0.10.2 From 37c61ff31e9b5e3fcf3cc6579f5c68f6ad40c4b1 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Sat, 19 Oct 2013 14:50:28 +0800 Subject: raid5: set bio bi_vcnt 0 for discard request SCSI layer will add new payload for discard request. If two bios are merged to one, the second bio has bi_vcnt 1 which is set in raid5. This will confuse SCSI and cause oops. Suitable for backport to 3.7+ Cc: stable@vger.kernel.org (v3.7+) Reported-by: Jes Sorensen Signed-off-by: Shaohua Li Signed-off-by: NeilBrown Acked-by: Martin K. Petersen diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7ff4f25..302d7cd 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -778,6 +778,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) bi->bi_io_vec[0].bv_len = STRIPE_SIZE; bi->bi_io_vec[0].bv_offset = 0; bi->bi_size = STRIPE_SIZE; + /* + * If this is discard request, set bi_vcnt 0. We don't + * want to confuse SCSI because SCSI will replace payload + */ + if (rw & REQ_DISCARD) + bi->bi_vcnt = 0; if (rrdev) set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); @@ -816,6 +822,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; rbi->bi_io_vec[0].bv_offset = 0; rbi->bi_size = STRIPE_SIZE; + /* + * If this is discard request, set bi_vcnt 0. We don't + * want to confuse SCSI because SCSI will replace payload + */ + if (rw & REQ_DISCARD) + rbi->bi_vcnt = 0; if (conf->mddev->gendisk) trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev), rbi, disk_devt(conf->mddev->gendisk), -- cgit v0.10.2 From d47648fcf0611812286f68131b40251c6fa54f5e Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Sat, 19 Oct 2013 14:51:42 +0800 Subject: raid5: avoid finding "discard" stripe SCSI discard will damage discard stripe bio setting, eg, some fields are changed. If the stripe is reused very soon, we have wrong bios setting. We remove discard stripe from hash list, so next time the strip will be fully initialized. Suitable for backport to 3.7+. Cc: (3.7+) Signed-off-by: Shaohua Li Signed-off-by: NeilBrown diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 302d7cd..f8b9068 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2922,6 +2922,14 @@ static void handle_stripe_clean_event(struct r5conf *conf, } /* now that discard is done we can proceed with any sync */ clear_bit(STRIPE_DISCARD, &sh->state); + /* + * SCSI discard will change some bio fields and the stripe has + * no updated data, so remove it from hash list and the stripe + * will be reinitialized + */ + spin_lock_irq(&conf->device_lock); + remove_hash(sh); + spin_unlock_irq(&conf->device_lock); if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) set_bit(STRIPE_HANDLE, &sh->state); -- cgit v0.10.2 From 0a66614b937c4cfe98c68613259367bf402f368b Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Tue, 22 Oct 2013 00:48:54 +0300 Subject: iser-target: check device before dereferencing its variable This patch changes isert_connect_release() to correctly check for the existence struct isert_device *device before checking for isert_device->use_frwr. Signed-off-by: Vu Pham Signed-off-by: Nicholas Bellinger diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 3591855..6df2350 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -594,7 +594,7 @@ isert_connect_release(struct isert_conn *isert_conn) pr_debug("Entering isert_connect_release(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"); - if (device->use_frwr) + if (device && device->use_frwr) isert_conn_free_frwr_pool(isert_conn); if (isert_conn->conn_qp) { -- cgit v0.10.2 From e6bbe666673ab044a3d39ddb74e4d9a401cf1d6f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 24 Oct 2013 01:20:24 +0200 Subject: ALSA: hda - Fix unbalanced runtime PM refcount after S3/S4 When a machine goes to S3/S4 after power-save is enabled, the runtime PM refcount might be incorrectly decreased because the power-down triggered soon after resume assumes that the controller was already powered up, and issues the pm_notify down. This patch fixes the incorrect pm_notify call simply by checking the current value properly. Cc: Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 5b6c4e3..748c6a9 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -4864,8 +4864,8 @@ static void hda_power_work(struct work_struct *work) spin_unlock(&codec->power_lock); state = hda_call_codec_suspend(codec, true); - codec->pm_down_notified = 0; - if (!bus->power_keep_link_on && (state & AC_PWRST_CLK_STOP_OK)) { + if (!codec->pm_down_notified && + !bus->power_keep_link_on && (state & AC_PWRST_CLK_STOP_OK)) { codec->pm_down_notified = 1; hda_call_pm_notify(bus, false); } -- cgit v0.10.2 From 366bda191c344ec4d7a5b908cf047bc09639ad3d Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 24 Oct 2013 00:10:36 -0700 Subject: target: Add missing XCOPY I/O operation sense_buffer This patch adds the missing xcopy_pt_cmd->sense_buffer[] required for correctly handling CHECK_CONDITION exceptions within the locally generated XCOPY I/O path. Also update target_xcopy_read_source() + target_xcopy_setup_pt_cmd() to pass this buffer into transport_init_se_cmd() to correctly setup se_cmd->sense_buffer. Reported-by: Thomas Glanzmann Reported-by: Douglas Gilbert Cc: Thomas Glanzmann Cc: Douglas Gilbert Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index eeeaf99..5edcd2b 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -360,6 +360,7 @@ struct xcopy_pt_cmd { struct se_cmd se_cmd; struct xcopy_op *xcopy_op; struct completion xpt_passthrough_sem; + unsigned char sense_buffer[TRANSPORT_SENSE_BUFFER]; }; static struct se_port xcopy_pt_port; @@ -711,7 +712,7 @@ static int target_xcopy_read_source( (unsigned long long)src_lba, src_sectors, length); transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, NULL, length, - DMA_FROM_DEVICE, 0, NULL); + DMA_FROM_DEVICE, 0, &xpt_cmd->sense_buffer[0]); xop->src_pt_cmd = xpt_cmd; rc = target_xcopy_setup_pt_cmd(xpt_cmd, xop, src_dev, &cdb[0], @@ -771,7 +772,7 @@ static int target_xcopy_write_destination( (unsigned long long)dst_lba, dst_sectors, length); transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, NULL, length, - DMA_TO_DEVICE, 0, NULL); + DMA_TO_DEVICE, 0, &xpt_cmd->sense_buffer[0]); xop->dst_pt_cmd = xpt_cmd; rc = target_xcopy_setup_pt_cmd(xpt_cmd, xop, dst_dev, &cdb[0], -- cgit v0.10.2 From 8a955d6dcc1840fa9cba73eb6db831c8fea19d95 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 24 Oct 2013 00:15:27 -0700 Subject: target: Generate failure for XCOPY I/O with non-zero scsi_status This patch adds the missing non-zero se_cmd->scsi_status check required for local XCOPY I/O within target_xcopy_issue_pt_cmd() to signal an exception case failure. This will trigger the generation of SAM_STAT_CHECK_CONDITION status from within target_xcopy_do_work() process context code. Reported-by: Thomas Glanzmann Reported-by: Douglas Gilbert Cc: Thomas Glanzmann Cc: Douglas Gilbert Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 5edcd2b..0e41143 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -679,7 +679,8 @@ static int target_xcopy_issue_pt_cmd(struct xcopy_pt_cmd *xpt_cmd) pr_debug("target_xcopy_issue_pt_cmd(): SCSI status: 0x%02x\n", se_cmd->scsi_status); - return 0; + + return (se_cmd->scsi_status) ? -EINVAL : 0; } static int target_xcopy_read_source( -- cgit v0.10.2 From 48502ddbfb9840803f633ff81eee507e0fdae7c5 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 24 Oct 2013 00:27:00 -0700 Subject: target: Fail XCOPY for non matching source + destination block_size This patch adds an explicit check + failure for XCOPY I/O to source + destination devices with a non-matching block_size. This limitiation is currently due to the fact that the scatterlist memory allocated for the XCOPY READ operation is passed zero-copy to the XCOPY WRITE operation. Reported-by: Thomas Glanzmann Reported-by: Douglas Gilbert Cc: Thomas Glanzmann Cc: Douglas Gilbert Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 0e41143..474cd44 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -893,6 +893,7 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd) struct xcopy_op *xop = NULL; unsigned char *p = NULL, *seg_desc; unsigned int list_id, list_id_usage, sdll, inline_dl, sa; + sense_reason_t ret = TCM_INVALID_PARAMETER_LIST; int rc; unsigned short tdll; @@ -944,6 +945,17 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd) if (rc <= 0) goto out; + if (xop->src_dev->dev_attrib.block_size != + xop->dst_dev->dev_attrib.block_size) { + pr_err("XCOPY: Non matching src_dev block_size: %u + dst_dev" + " block_size: %u currently unsupported\n", + xop->src_dev->dev_attrib.block_size, + xop->dst_dev->dev_attrib.block_size); + xcopy_pt_undepend_remotedev(xop); + ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + goto out; + } + pr_debug("XCOPY: Processed %d target descriptors, length: %u\n", rc, rc * XCOPY_TARGET_DESC_LEN); seg_desc = &p[16]; @@ -966,7 +978,7 @@ out: if (p) transport_kunmap_data_sg(se_cmd); kfree(xop); - return TCM_INVALID_PARAMETER_LIST; + return ret; } static sense_reason_t target_rcr_operating_parameters(struct se_cmd *se_cmd) -- cgit v0.10.2 From c0268e8d1f450e286fc55e77f53a9ede6b72acab Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Thu, 24 Oct 2013 10:10:51 -0300 Subject: perf script python: Fix mem leak due to missing Py_DECREFs on dict entries We are using the Python scripting interface in perf to extract kernel events relevant for performance analysis of HPC codes. We noticed that the "perf script" call allocates a significant amount of memory (in the order of several 100 MiB) during it's run, e.g. 125 MiB for a 25 MiB input file: $> perf record -o perf.data -a -R -g fp \ -e power:cpu_frequency -e sched:sched_switch \ -e sched:sched_migrate_task -e sched:sched_process_exit \ -e sched:sched_process_fork -e sched:sched_process_exec \ -e cycles -m 4096 --freq 4000 $> /usr/bin/time perf script -i perf.data -s dummy_script.py 0.84user 0.13system 0:01.92elapsed 51%CPU (0avgtext+0avgdata 125532maxresident)k 73072inputs+0outputs (57major+33086minor)pagefaults 0swaps Upon further investigation using the valgrind massif tool, we noticed that Python objects that are created in trace-event-python.c via PyString_FromString*() (and their Integer and Long counterparts) are never free'd. The reason for this seem to be missing Py_DECREF calls on the objects that are returned by these functions and stored in the Python dictionaries. The Python dictionaries do not steal references (as opposed to Python tuples and lists) but instead add their own reference. Hence, the reference that is returned by these object creation functions is never released and the memory is leaked. (see [1,2]) The attached patch fixes this by wrapping all relevant calls to PyDict_SetItemString() and decrementing the reference counter immediately after the Python function call. This reduces the allocated memory to a reasonable amount: $> /usr/bin/time perf script -i perf.data -s dummy_script.py 0.73user 0.05system 0:00.79elapsed 99%CPU (0avgtext+0avgdata 49132maxresident)k 0inputs+0outputs (0major+14045minor)pagefaults 0swaps For comparison, with a 120 MiB input file the memory consumption reported by time drops from almost 600 MiB to 146 MiB. The patch has been tested using Linux 3.8.2 with Python 2.7.4 and Linux 3.11.6 with Python 2.7.5. Please let me know if you need any further information. [1] http://docs.python.org/2/c-api/tuple.html#PyTuple_SetItem [2] http://docs.python.org/2/c-api/dict.html#PyDict_SetItemString Signed-off-by: Joseph Schuchart Reviewed-by: Tom Zanussi Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Tom Zanussi Link: http://lkml.kernel.org/r/1381468543-25334-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index cc75a3c..95d91a0 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -56,6 +56,17 @@ static void handler_call_die(const char *handler_name) Py_FatalError("problem in Python trace event handler"); } +/* + * Insert val into into the dictionary and decrement the reference counter. + * This is necessary for dictionaries since PyDict_SetItemString() does not + * steal a reference, as opposed to PyTuple_SetItem(). + */ +static void pydict_set_item_string_decref(PyObject *dict, const char *key, PyObject *val) +{ + PyDict_SetItemString(dict, key, val); + Py_DECREF(val); +} + static void define_value(enum print_arg_type field_type, const char *ev_name, const char *field_name, @@ -279,11 +290,11 @@ static void python_process_tracepoint(union perf_event *perf_event PyTuple_SetItem(t, n++, PyInt_FromLong(pid)); PyTuple_SetItem(t, n++, PyString_FromString(comm)); } else { - PyDict_SetItemString(dict, "common_cpu", PyInt_FromLong(cpu)); - PyDict_SetItemString(dict, "common_s", PyInt_FromLong(s)); - PyDict_SetItemString(dict, "common_ns", PyInt_FromLong(ns)); - PyDict_SetItemString(dict, "common_pid", PyInt_FromLong(pid)); - PyDict_SetItemString(dict, "common_comm", PyString_FromString(comm)); + pydict_set_item_string_decref(dict, "common_cpu", PyInt_FromLong(cpu)); + pydict_set_item_string_decref(dict, "common_s", PyInt_FromLong(s)); + pydict_set_item_string_decref(dict, "common_ns", PyInt_FromLong(ns)); + pydict_set_item_string_decref(dict, "common_pid", PyInt_FromLong(pid)); + pydict_set_item_string_decref(dict, "common_comm", PyString_FromString(comm)); } for (field = event->format.fields; field; field = field->next) { if (field->flags & FIELD_IS_STRING) { @@ -313,7 +324,7 @@ static void python_process_tracepoint(union perf_event *perf_event if (handler) PyTuple_SetItem(t, n++, obj); else - PyDict_SetItemString(dict, field->name, obj); + pydict_set_item_string_decref(dict, field->name, obj); } if (!handler) @@ -370,21 +381,21 @@ static void python_process_general_event(union perf_event *perf_event if (!handler || !PyCallable_Check(handler)) goto exit; - PyDict_SetItemString(dict, "ev_name", PyString_FromString(perf_evsel__name(evsel))); - PyDict_SetItemString(dict, "attr", PyString_FromStringAndSize( + pydict_set_item_string_decref(dict, "ev_name", PyString_FromString(perf_evsel__name(evsel))); + pydict_set_item_string_decref(dict, "attr", PyString_FromStringAndSize( (const char *)&evsel->attr, sizeof(evsel->attr))); - PyDict_SetItemString(dict, "sample", PyString_FromStringAndSize( + pydict_set_item_string_decref(dict, "sample", PyString_FromStringAndSize( (const char *)sample, sizeof(*sample))); - PyDict_SetItemString(dict, "raw_buf", PyString_FromStringAndSize( + pydict_set_item_string_decref(dict, "raw_buf", PyString_FromStringAndSize( (const char *)sample->raw_data, sample->raw_size)); - PyDict_SetItemString(dict, "comm", + pydict_set_item_string_decref(dict, "comm", PyString_FromString(thread->comm)); if (al->map) { - PyDict_SetItemString(dict, "dso", + pydict_set_item_string_decref(dict, "dso", PyString_FromString(al->map->dso->name)); } if (al->sym) { - PyDict_SetItemString(dict, "symbol", + pydict_set_item_string_decref(dict, "symbol", PyString_FromString(al->sym->name)); } -- cgit v0.10.2 From 4b6271a64463f4fcbaf8b2e1d84704b7eb8c407c Mon Sep 17 00:00:00 2001 From: Valentin Ilie Date: Thu, 24 Oct 2013 16:14:22 +0300 Subject: dma: edma: Fix memory leak When it fails to allocate a slot, edesc should be free'd before return; Signed-off-by: Valentin Ilie Signed-off-by: Vinod Koul diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index 3519111..134fa96 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -305,6 +305,7 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg( edma_alloc_slot(EDMA_CTLR(echan->ch_num), EDMA_SLOT_ANY); if (echan->slot[i] < 0) { + kfree(edesc); dev_err(dev, "Failed to allocate slot\n"); kfree(edesc); return NULL; -- cgit v0.10.2 From 7261828776b33ff434837674413df2920e9ca2ff Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Thu, 24 Oct 2013 22:17:50 +0530 Subject: dmaengine: edma: fix another memory leak commit 4b6271a6 fix a menory leak but one more existed in driver so fix that Signed-off-by: Vinod Koul diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index 134fa96..10b577f 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -347,6 +347,7 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg( ccnt = sg_dma_len(sg) / (acnt * bcnt); if (ccnt > (SZ_64K - 1)) { dev_err(dev, "Exceeded max SG segment size\n"); + kfree(edesc); return NULL; } cidx = acnt * bcnt; -- cgit v0.10.2 From 43b7c6c6a4e3916edd186ceb61be0c67d1e0969e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 24 Oct 2013 14:08:07 +0000 Subject: eCryptfs: fix 32 bit corruption issue Shifting page->index on 32 bit systems was overflowing, causing data corruption of > 4GB files. Fix this by casting it first. https://launchpad.net/bugs/1243636 Signed-off-by: Colin Ian King Reported-by: Lars Duesing Cc: stable@vger.kernel.org # v3.11+ Signed-off-by: Tyler Hicks diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index c88e355..000eae2 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -408,7 +408,7 @@ static loff_t lower_offset_for_page(struct ecryptfs_crypt_stat *crypt_stat, struct page *page) { return ecryptfs_lower_header_size(crypt_stat) + - (page->index << PAGE_CACHE_SHIFT); + ((loff_t)page->index << PAGE_CACHE_SHIFT); } /** -- cgit v0.10.2 From 18a84e935ef3b283e86426827a2a1d524bb7eb8e Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Sat, 19 Oct 2013 18:19:25 -0300 Subject: mtd: nand: pxa3xx: Fix registered MTD name In a recent commit: commit f455578dd961087a5cf94730d9f6489bb1d355f0 Author: Ezequiel Garcia Date: Mon Aug 12 14:14:53 2013 -0300 mtd: nand: pxa3xx: Remove hardcoded mtd name There's no advantage in using a hardcoded name for the mtd device. Instead use the provided by the platform_device. The MTD name was changed to use the one provided by the platform_device. However, this can be problematic as some users want to set partitions using the kernel parameter 'mtdparts', where the name is needed. Therefore, to avoid regressions in users relying in 'mtdparts' we revert the change and use the previous one 'pxa3xx_nand-0'. While at it, let's put a big comment and prevent this change from happening ever again. Signed-off-by: Ezequiel Garcia Signed-off-by: Brian Norris diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c index dd03dfd..c28d4e2 100644 --- a/drivers/mtd/nand/pxa3xx_nand.c +++ b/drivers/mtd/nand/pxa3xx_nand.c @@ -1320,7 +1320,12 @@ static int pxa3xx_nand_probe(struct platform_device *pdev) for (cs = 0; cs < pdata->num_cs; cs++) { struct mtd_info *mtd = info->host[cs]->mtd; - mtd->name = pdev->name; + /* + * The mtd name matches the one used in 'mtdparts' kernel + * parameter. This name cannot be changed or otherwise + * user's mtd partitions configuration would get broken. + */ + mtd->name = "pxa3xx_nand-0"; info->cs = cs; ret = pxa3xx_nand_scan(mtd); if (ret) { -- cgit v0.10.2 From d3c345dbc7c083414ef74eb22ff26ba2bd100759 Mon Sep 17 00:00:00 2001 From: Russ Dill Date: Thu, 24 Oct 2013 14:25:26 +0100 Subject: PM / hibernate: Move software_resume to late_initcall_sync software_resume is being called after deferred_probe_initcall in drivers base. If the probing of the device that contains the resume image is deferred, and the system has been instructed to wait for it to show up, this wait will occur in software_resume. This causes a deadlock. Move software_resume into late_initcall_sync so that it happens after all the other late_initcalls. Signed-off-by: Russ Dill Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index c9c759d..0121dab 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -846,7 +846,7 @@ static int software_resume(void) goto Finish; } -late_initcall(software_resume); +late_initcall_sync(software_resume); static const char * const hibernation_modes[] = { -- cgit v0.10.2 From c0d3b9c29ed22d449481bcfac267a879034a3a5b Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 25 Oct 2013 10:21:57 +0100 Subject: [SCSI] Revert "sg: push file descriptor list locking down to per-device locking" This reverts commit 1f962ebcdfa15cede59e9edb299d1330949eec92. This is one of four patches that was causing this bug [ 205.372823] ================================================ [ 205.372901] [ BUG: lock held when returning to user space! ] [ 205.372979] 3.12.0-rc6-hw-debug-pagealloc+ #67 Not tainted [ 205.373055] ------------------------------------------------ [ 205.373132] megarc.bin/5283 is leaving the kernel with locks still held! [ 205.373212] 1 lock held by megarc.bin/5283: [ 205.373285] #0: (&sdp->o_sem){.+.+..}, at: [] sg_open+0x3a0/0x4d0 Cc: Vaughan Cao Acked-by: Douglas Gilbert Signed-off-by: James Bottomley diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 5cbc4bb..64df1ab 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -106,7 +106,8 @@ static int sg_add(struct device *, struct class_interface *); static void sg_remove(struct device *, struct class_interface *); static DEFINE_IDR(sg_index_idr); -static DEFINE_RWLOCK(sg_index_lock); +static DEFINE_RWLOCK(sg_index_lock); /* Also used to lock + file descriptor list for device */ static struct class_interface sg_interface = { .add_dev = sg_add, @@ -143,7 +144,8 @@ typedef struct sg_request { /* SG_MAX_QUEUE requests outstanding per file */ } Sg_request; typedef struct sg_fd { /* holds the state of a file descriptor */ - struct list_head sfd_siblings; /* protected by sfd_lock of device */ + /* sfd_siblings is protected by sg_index_lock */ + struct list_head sfd_siblings; struct sg_device *parentdp; /* owning device */ wait_queue_head_t read_wait; /* queue read until command done */ rwlock_t rq_list_lock; /* protect access to list in req_arr */ @@ -168,7 +170,7 @@ typedef struct sg_device { /* holds the state of each scsi generic device */ struct scsi_device *device; int sg_tablesize; /* adapter's max scatter-gather table size */ u32 index; /* device index number */ - spinlock_t sfd_lock; /* protect file descriptor list for device */ + /* sfds is protected by sg_index_lock */ struct list_head sfds; struct rw_semaphore o_sem; /* exclude open should hold this rwsem */ volatile char detached; /* 0->attached, 1->detached pending removal */ @@ -225,9 +227,9 @@ static int sfds_list_empty(Sg_device *sdp) unsigned long flags; int ret; - spin_lock_irqsave(&sdp->sfd_lock, flags); + read_lock_irqsave(&sg_index_lock, flags); ret = list_empty(&sdp->sfds); - spin_unlock_irqrestore(&sdp->sfd_lock, flags); + read_unlock_irqrestore(&sg_index_lock, flags); return ret; } @@ -1391,7 +1393,6 @@ static Sg_device *sg_alloc(struct gendisk *disk, struct scsi_device *scsidp) disk->first_minor = k; sdp->disk = disk; sdp->device = scsidp; - spin_lock_init(&sdp->sfd_lock); INIT_LIST_HEAD(&sdp->sfds); init_rwsem(&sdp->o_sem); sdp->sg_tablesize = queue_max_segments(q); @@ -1526,13 +1527,11 @@ static void sg_remove(struct device *cl_dev, struct class_interface *cl_intf) /* Need a write lock to set sdp->detached. */ write_lock_irqsave(&sg_index_lock, iflags); - spin_lock(&sdp->sfd_lock); sdp->detached = 1; list_for_each_entry(sfp, &sdp->sfds, sfd_siblings) { wake_up_interruptible(&sfp->read_wait); kill_fasync(&sfp->async_qp, SIGPOLL, POLL_HUP); } - spin_unlock(&sdp->sfd_lock); write_unlock_irqrestore(&sg_index_lock, iflags); sysfs_remove_link(&scsidp->sdev_gendev.kobj, "generic"); @@ -2057,13 +2056,13 @@ sg_add_sfp(Sg_device * sdp, int dev) sfp->cmd_q = SG_DEF_COMMAND_Q; sfp->keep_orphan = SG_DEF_KEEP_ORPHAN; sfp->parentdp = sdp; - spin_lock_irqsave(&sdp->sfd_lock, iflags); + write_lock_irqsave(&sg_index_lock, iflags); if (sdp->detached) { - spin_unlock_irqrestore(&sdp->sfd_lock, iflags); + write_unlock_irqrestore(&sg_index_lock, iflags); return ERR_PTR(-ENODEV); } list_add_tail(&sfp->sfd_siblings, &sdp->sfds); - spin_unlock_irqrestore(&sdp->sfd_lock, iflags); + write_unlock_irqrestore(&sg_index_lock, iflags); SCSI_LOG_TIMEOUT(3, printk("sg_add_sfp: sfp=0x%p\n", sfp)); if (unlikely(sg_big_buff != def_reserved_size)) sg_big_buff = def_reserved_size; @@ -2110,12 +2109,11 @@ static void sg_remove_sfp_usercontext(struct work_struct *work) static void sg_remove_sfp(struct kref *kref) { struct sg_fd *sfp = container_of(kref, struct sg_fd, f_ref); - struct sg_device *sdp = sfp->parentdp; unsigned long iflags; - spin_lock_irqsave(&sdp->sfd_lock, iflags); + write_lock_irqsave(&sg_index_lock, iflags); list_del(&sfp->sfd_siblings); - spin_unlock_irqrestore(&sdp->sfd_lock, iflags); + write_unlock_irqrestore(&sg_index_lock, iflags); INIT_WORK(&sfp->ew.work, sg_remove_sfp_usercontext); schedule_work(&sfp->ew.work); @@ -2502,7 +2500,7 @@ static int sg_proc_seq_show_devstrs(struct seq_file *s, void *v) return 0; } -/* must be called while holding sg_index_lock and sfd_lock */ +/* must be called while holding sg_index_lock */ static void sg_proc_debug_helper(struct seq_file *s, Sg_device * sdp) { int k, m, new_interface, blen, usg; @@ -2587,26 +2585,22 @@ static int sg_proc_seq_show_debug(struct seq_file *s, void *v) read_lock_irqsave(&sg_index_lock, iflags); sdp = it ? sg_lookup_dev(it->index) : NULL; - if (sdp) { - spin_lock(&sdp->sfd_lock); - if (!list_empty(&sdp->sfds)) { - struct scsi_device *scsidp = sdp->device; + if (sdp && !list_empty(&sdp->sfds)) { + struct scsi_device *scsidp = sdp->device; - seq_printf(s, " >>> device=%s ", sdp->disk->disk_name); - if (sdp->detached) - seq_printf(s, "detached pending close "); - else - seq_printf - (s, "scsi%d chan=%d id=%d lun=%d em=%d", - scsidp->host->host_no, - scsidp->channel, scsidp->id, - scsidp->lun, - scsidp->host->hostt->emulated); - seq_printf(s, " sg_tablesize=%d excl=%d\n", - sdp->sg_tablesize, sdp->exclude); - sg_proc_debug_helper(s, sdp); - } - spin_unlock(&sdp->sfd_lock); + seq_printf(s, " >>> device=%s ", sdp->disk->disk_name); + if (sdp->detached) + seq_printf(s, "detached pending close "); + else + seq_printf + (s, "scsi%d chan=%d id=%d lun=%d em=%d", + scsidp->host->host_no, + scsidp->channel, scsidp->id, + scsidp->lun, + scsidp->host->hostt->emulated); + seq_printf(s, " sg_tablesize=%d excl=%d\n", + sdp->sg_tablesize, sdp->exclude); + sg_proc_debug_helper(s, sdp); } read_unlock_irqrestore(&sg_index_lock, iflags); return 0; -- cgit v0.10.2 From bafc8ad82d482f9ecb9111969a3fdcef366bf8cb Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 25 Oct 2013 10:25:14 +0100 Subject: [SCSI] Revert "sg: checking sdp->detached isn't protected when open" This reverts commit e32c9e6300e3af659cbfe45e90a1e7dcd3572ada. This is one of four patches that was causing this bug [ 205.372823] ================================================ [ 205.372901] [ BUG: lock held when returning to user space! ] [ 205.372979] 3.12.0-rc6-hw-debug-pagealloc+ #67 Not tainted [ 205.373055] ------------------------------------------------ [ 205.373132] megarc.bin/5283 is leaving the kernel with locks still held! [ 205.373212] 1 lock held by megarc.bin/5283: [ 205.373285] #0: (&sdp->o_sem){.+.+..}, at: [] sg_open+0x3a0/0x4d0 Cc: Vaughan Cao Acked-by: Douglas Gilbert Signed-off-by: James Bottomley diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 64df1ab..d4af132 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -295,20 +295,23 @@ sg_open(struct inode *inode, struct file *filp) if (flags & O_EXCL) sdp->exclude = 1; /* used by release lock */ + if (sdp->detached) { + retval = -ENODEV; + goto sem_out; + } if (sfds_list_empty(sdp)) { /* no existing opens on this device */ sdp->sgdebug = 0; q = sdp->device->request_queue; sdp->sg_tablesize = queue_max_segments(q); } - sfp = sg_add_sfp(sdp, dev); - if (!IS_ERR(sfp)) + if ((sfp = sg_add_sfp(sdp, dev))) filp->private_data = sfp; /* retval is already provably zero at this point because of the * check after retval = scsi_autopm_get_device(sdp->device)) */ else { - retval = PTR_ERR(sfp); - + retval = -ENOMEM; +sem_out: if (flags & O_EXCL) { sdp->exclude = 0; /* undo if error */ up_write(&sdp->o_sem); @@ -2042,7 +2045,7 @@ sg_add_sfp(Sg_device * sdp, int dev) sfp = kzalloc(sizeof(*sfp), GFP_ATOMIC | __GFP_NOWARN); if (!sfp) - return ERR_PTR(-ENOMEM); + return NULL; init_waitqueue_head(&sfp->read_wait); rwlock_init(&sfp->rq_list_lock); @@ -2057,10 +2060,6 @@ sg_add_sfp(Sg_device * sdp, int dev) sfp->keep_orphan = SG_DEF_KEEP_ORPHAN; sfp->parentdp = sdp; write_lock_irqsave(&sg_index_lock, iflags); - if (sdp->detached) { - write_unlock_irqrestore(&sg_index_lock, iflags); - return ERR_PTR(-ENODEV); - } list_add_tail(&sfp->sfd_siblings, &sdp->sfds); write_unlock_irqrestore(&sg_index_lock, iflags); SCSI_LOG_TIMEOUT(3, printk("sg_add_sfp: sfp=0x%p\n", sfp)); -- cgit v0.10.2 From 98481ff0bb8792ebfb832e330e56d3c629ba5fa6 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 25 Oct 2013 10:26:38 +0100 Subject: [SCSI] Revert "sg: no need sg_open_exclusive_lock" This reverts commit 00b2d9d6d05b56fc1d77071ff8ccbd2c65b48dec. This is one of four patches that was causing this bug [ 205.372823] ================================================ [ 205.372901] [ BUG: lock held when returning to user space! ] [ 205.372979] 3.12.0-rc6-hw-debug-pagealloc+ #67 Not tainted [ 205.373055] ------------------------------------------------ [ 205.373132] megarc.bin/5283 is leaving the kernel with locks still held! [ 205.373212] 1 lock held by megarc.bin/5283: [ 205.373285] #0: (&sdp->o_sem){.+.+..}, at: [] sg_open+0x3a0/0x4d0 Cc: Vaughan Cao Acked-by: Douglas Gilbert Signed-off-by: James Bottomley diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index d4af132..4efa9b5 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -105,6 +105,8 @@ static int scatter_elem_sz_prev = SG_SCATTER_SZ; static int sg_add(struct device *, struct class_interface *); static void sg_remove(struct device *, struct class_interface *); +static DEFINE_SPINLOCK(sg_open_exclusive_lock); + static DEFINE_IDR(sg_index_idr); static DEFINE_RWLOCK(sg_index_lock); /* Also used to lock file descriptor list for device */ @@ -174,6 +176,7 @@ typedef struct sg_device { /* holds the state of each scsi generic device */ struct list_head sfds; struct rw_semaphore o_sem; /* exclude open should hold this rwsem */ volatile char detached; /* 0->attached, 1->detached pending removal */ + /* exclude protected by sg_open_exclusive_lock */ char exclude; /* opened for exclusive access */ char sgdebug; /* 0->off, 1->sense, 9->dump dev, 10-> all devs */ struct gendisk *disk; @@ -222,6 +225,27 @@ static int sg_allow_access(struct file *filp, unsigned char *cmd) return blk_verify_command(cmd, filp->f_mode & FMODE_WRITE); } +static int get_exclude(Sg_device *sdp) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&sg_open_exclusive_lock, flags); + ret = sdp->exclude; + spin_unlock_irqrestore(&sg_open_exclusive_lock, flags); + return ret; +} + +static int set_exclude(Sg_device *sdp, char val) +{ + unsigned long flags; + + spin_lock_irqsave(&sg_open_exclusive_lock, flags); + sdp->exclude = val; + spin_unlock_irqrestore(&sg_open_exclusive_lock, flags); + return val; +} + static int sfds_list_empty(Sg_device *sdp) { unsigned long flags; @@ -293,7 +317,7 @@ sg_open(struct inode *inode, struct file *filp) } /* Since write lock is held, no need to check sfd_list */ if (flags & O_EXCL) - sdp->exclude = 1; /* used by release lock */ + set_exclude(sdp, 1); if (sdp->detached) { retval = -ENODEV; @@ -313,7 +337,7 @@ sg_open(struct inode *inode, struct file *filp) retval = -ENOMEM; sem_out: if (flags & O_EXCL) { - sdp->exclude = 0; /* undo if error */ + set_exclude(sdp, 0); /* undo if error */ up_write(&sdp->o_sem); } else up_read(&sdp->o_sem); @@ -340,8 +364,8 @@ sg_release(struct inode *inode, struct file *filp) return -ENXIO; SCSI_LOG_TIMEOUT(3, printk("sg_release: %s\n", sdp->disk->disk_name)); - excl = sdp->exclude; - sdp->exclude = 0; + excl = get_exclude(sdp); + set_exclude(sdp, 0); if (excl) up_write(&sdp->o_sem); else @@ -2598,7 +2622,7 @@ static int sg_proc_seq_show_debug(struct seq_file *s, void *v) scsidp->lun, scsidp->host->hostt->emulated); seq_printf(s, " sg_tablesize=%d excl=%d\n", - sdp->sg_tablesize, sdp->exclude); + sdp->sg_tablesize, get_exclude(sdp)); sg_proc_debug_helper(s, sdp); } read_unlock_irqrestore(&sg_index_lock, iflags); -- cgit v0.10.2 From 065b4a2f5952df2c46aa04d24ffcce65cc75a1a9 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 25 Oct 2013 10:27:02 +0100 Subject: [SCSI] Revert "sg: use rwsem to solve race during exclusive open" This reverts commit 15b06f9a02406e5460001db6d5af5c738cd3d4e7. This is one of four patches that was causing this bug [ 205.372823] ================================================ [ 205.372901] [ BUG: lock held when returning to user space! ] [ 205.372979] 3.12.0-rc6-hw-debug-pagealloc+ #67 Not tainted [ 205.373055] ------------------------------------------------ [ 205.373132] megarc.bin/5283 is leaving the kernel with locks still held! [ 205.373212] 1 lock held by megarc.bin/5283: [ 205.373285] #0: (&sdp->o_sem){.+.+..}, at: [] sg_open+0x3a0/0x4d0 Cc: Vaughan Cao Acked-by: Douglas Gilbert Signed-off-by: James Bottomley diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 4efa9b5..df5e961 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -170,11 +170,11 @@ typedef struct sg_fd { /* holds the state of a file descriptor */ typedef struct sg_device { /* holds the state of each scsi generic device */ struct scsi_device *device; + wait_queue_head_t o_excl_wait; /* queue open() when O_EXCL in use */ int sg_tablesize; /* adapter's max scatter-gather table size */ u32 index; /* device index number */ /* sfds is protected by sg_index_lock */ struct list_head sfds; - struct rw_semaphore o_sem; /* exclude open should hold this rwsem */ volatile char detached; /* 0->attached, 1->detached pending removal */ /* exclude protected by sg_open_exclusive_lock */ char exclude; /* opened for exclusive access */ @@ -265,6 +265,7 @@ sg_open(struct inode *inode, struct file *filp) struct request_queue *q; Sg_device *sdp; Sg_fd *sfp; + int res; int retval; nonseekable_open(inode, filp); @@ -293,35 +294,35 @@ sg_open(struct inode *inode, struct file *filp) goto error_out; } - if ((flags & O_EXCL) && (O_RDONLY == (flags & O_ACCMODE))) { - retval = -EPERM; /* Can't lock it with read only access */ - goto error_out; - } - if (flags & O_NONBLOCK) { - if (flags & O_EXCL) { - if (!down_write_trylock(&sdp->o_sem)) { - retval = -EBUSY; - goto error_out; - } - } else { - if (!down_read_trylock(&sdp->o_sem)) { - retval = -EBUSY; - goto error_out; - } + if (flags & O_EXCL) { + if (O_RDONLY == (flags & O_ACCMODE)) { + retval = -EPERM; /* Can't lock it with read only access */ + goto error_out; + } + if (!sfds_list_empty(sdp) && (flags & O_NONBLOCK)) { + retval = -EBUSY; + goto error_out; + } + res = wait_event_interruptible(sdp->o_excl_wait, + ((!sfds_list_empty(sdp) || get_exclude(sdp)) ? 0 : set_exclude(sdp, 1))); + if (res) { + retval = res; /* -ERESTARTSYS because signal hit process */ + goto error_out; + } + } else if (get_exclude(sdp)) { /* some other fd has an exclusive lock on dev */ + if (flags & O_NONBLOCK) { + retval = -EBUSY; + goto error_out; + } + res = wait_event_interruptible(sdp->o_excl_wait, !get_exclude(sdp)); + if (res) { + retval = res; /* -ERESTARTSYS because signal hit process */ + goto error_out; } - } else { - if (flags & O_EXCL) - down_write(&sdp->o_sem); - else - down_read(&sdp->o_sem); } - /* Since write lock is held, no need to check sfd_list */ - if (flags & O_EXCL) - set_exclude(sdp, 1); - if (sdp->detached) { retval = -ENODEV; - goto sem_out; + goto error_out; } if (sfds_list_empty(sdp)) { /* no existing opens on this device */ sdp->sgdebug = 0; @@ -330,18 +331,17 @@ sg_open(struct inode *inode, struct file *filp) } if ((sfp = sg_add_sfp(sdp, dev))) filp->private_data = sfp; - /* retval is already provably zero at this point because of the - * check after retval = scsi_autopm_get_device(sdp->device)) - */ else { - retval = -ENOMEM; -sem_out: if (flags & O_EXCL) { set_exclude(sdp, 0); /* undo if error */ - up_write(&sdp->o_sem); - } else - up_read(&sdp->o_sem); + wake_up_interruptible(&sdp->o_excl_wait); + } + retval = -ENOMEM; + goto error_out; + } + retval = 0; error_out: + if (retval) { scsi_autopm_put_device(sdp->device); sdp_put: scsi_device_put(sdp->device); @@ -358,18 +358,13 @@ sg_release(struct inode *inode, struct file *filp) { Sg_device *sdp; Sg_fd *sfp; - int excl; if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) return -ENXIO; SCSI_LOG_TIMEOUT(3, printk("sg_release: %s\n", sdp->disk->disk_name)); - excl = get_exclude(sdp); set_exclude(sdp, 0); - if (excl) - up_write(&sdp->o_sem); - else - up_read(&sdp->o_sem); + wake_up_interruptible(&sdp->o_excl_wait); scsi_autopm_put_device(sdp->device); kref_put(&sfp->f_ref, sg_remove_sfp); @@ -1421,7 +1416,7 @@ static Sg_device *sg_alloc(struct gendisk *disk, struct scsi_device *scsidp) sdp->disk = disk; sdp->device = scsidp; INIT_LIST_HEAD(&sdp->sfds); - init_rwsem(&sdp->o_sem); + init_waitqueue_head(&sdp->o_excl_wait); sdp->sg_tablesize = queue_max_segments(q); sdp->index = k; kref_init(&sdp->d_ref); @@ -2132,11 +2127,13 @@ static void sg_remove_sfp_usercontext(struct work_struct *work) static void sg_remove_sfp(struct kref *kref) { struct sg_fd *sfp = container_of(kref, struct sg_fd, f_ref); + struct sg_device *sdp = sfp->parentdp; unsigned long iflags; write_lock_irqsave(&sg_index_lock, iflags); list_del(&sfp->sfd_siblings); write_unlock_irqrestore(&sg_index_lock, iflags); + wake_up_interruptible(&sdp->o_excl_wait); INIT_WORK(&sfp->ew.work, sg_remove_sfp_usercontext); schedule_work(&sfp->ew.work); -- cgit v0.10.2 From 75c0758137c7ac647927b4b12bb5cfca96a0e4e6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 25 Oct 2013 16:22:47 +0200 Subject: acpi-cpufreq: Fail initialization if driver cannot be registered Make acpi_cpufreq_init() return error codes when the driver cannot be registered so that the module doesn't stay useless in memory and so that acpi_cpufreq_exit() doesn't attempt to unregister things that have never been registered when the module is unloaded. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index d2c3253..506fd23 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -986,12 +986,12 @@ static int __init acpi_cpufreq_init(void) { int ret; + if (acpi_disabled) + return -ENODEV; + /* don't keep reloading if cpufreq_driver exists */ if (cpufreq_get_current_driver()) - return 0; - - if (acpi_disabled) - return 0; + return -EEXIST; pr_debug("acpi_cpufreq_init\n"); -- cgit v0.10.2 From 05e16745c0c471bba313961b605b6da3b21a853d Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Fri, 25 Oct 2013 18:15:06 +0800 Subject: seq_file: always update file->f_pos in seq_lseek() This issue was first pointed out by Jiaxing Wang several months ago, but no further comments: https://lkml.org/lkml/2013/6/29/41 As we know pread() does not change f_pos, so after pread(), file->f_pos and m->read_pos become different. And seq_lseek() does not update file->f_pos if offset equals to m->read_pos, so after pread() and seq_lseek()(lseek to m->read_pos), then a subsequent read may read from a wrong position, the following program produces the problem: char str1[32] = { 0 }; char str2[32] = { 0 }; int poffset = 10; int count = 20; /*open any seq file*/ int fd = open("/proc/modules", O_RDONLY); pread(fd, str1, count, poffset); printf("pread:%s\n", str1); /*seek to where m->read_pos is*/ lseek(fd, poffset+count, SEEK_SET); /*supposed to read from poffset+count, but this read from position 0*/ read(fd, str2, count); printf("read:%s\n", str2); out put: pread: ck_netbios_ns 12665 read: nf_conntrack_netbios /proc/modules: nf_conntrack_netbios_ns 12665 0 - Live 0xffffffffa038b000 nf_conntrack_broadcast 12589 1 nf_conntrack_netbios_ns, Live 0xffffffffa0386000 So we always update file->f_pos to offset in seq_lseek() to fix this issue. Signed-off-by: Jiaxing Wang Signed-off-by: Gu Zheng Signed-off-by: Al Viro diff --git a/fs/seq_file.c b/fs/seq_file.c index 3135c25..a290157 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -328,6 +328,8 @@ loff_t seq_lseek(struct file *file, loff_t offset, int whence) m->read_pos = offset; retval = file->f_pos = offset; } + } else { + file->f_pos = offset; } } file->f_version = m->version; -- cgit v0.10.2 From 031e2777e03401d629e62602c8ce42b017732d4d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 25 Oct 2013 15:03:59 +0100 Subject: mtd: gpmi: fix ECC regression The "legacy" ECC layout used until 3.12-rc1 uses all the OOB area by computing the ECC strength and ECC step size ourselves. Commit 2febcdf84b ("mtd: gpmi: set the BCHs geometry with the ecc info") makes the driver use the ECC info (ECC strength and ECC step size) provided by the MTD code, and creates a different NAND ECC layout for the BCH, and use the new ECC layout. This causes a regression: We can not mount the ubifs which was created by the old NAND ECC layout. This patch fixes this issue by reverting to the legacy ECC layout. We will probably introduce a new device-tree property to indicate that the new ECC layout can be used. For now though, for the imminent 3.12 release, we just unconditionally revert to the 3.11 behaviour. This leaves a harmless cosmetic warning about an unused function. At this point in the cycle I really don't care. Signed-off-by: David Woodhouse Signed-off-by: Brian Norris Acked-by: Huang Shijie Acked-by: Marek Vasut Tested-by: Marek Vasut diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c index 59ab069..a9830ff 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c @@ -349,7 +349,7 @@ static int legacy_set_geometry(struct gpmi_nand_data *this) int common_nfc_set_geometry(struct gpmi_nand_data *this) { - return set_geometry_by_ecc_info(this) ? 0 : legacy_set_geometry(this); + return legacy_set_geometry(this); } struct dma_chan *get_dma_chan(struct gpmi_nand_data *this) -- cgit v0.10.2 From 58932e96e438cd78f75e765d7b87ef39d3533d15 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 25 Oct 2013 21:53:33 +0800 Subject: target/pscsi: fix return value check In case of error, the function scsi_host_lookup() returns NULL pointer not ERR_PTR(). The IS_ERR() test in the return value check should be replaced with NULL test. Signed-off-by: Wei Yongjun Cc: Signed-off-by: Nicholas Bellinger diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 551c96c..0f199f6 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -134,10 +134,10 @@ static int pscsi_pmode_enable_hba(struct se_hba *hba, unsigned long mode_flag) * pSCSI Host ID and enable for phba mode */ sh = scsi_host_lookup(phv->phv_host_id); - if (IS_ERR(sh)) { + if (!sh) { pr_err("pSCSI: Unable to locate SCSI Host for" " phv_host_id: %d\n", phv->phv_host_id); - return PTR_ERR(sh); + return -EINVAL; } phv->phv_lld_host = sh; @@ -515,10 +515,10 @@ static int pscsi_configure_device(struct se_device *dev) sh = phv->phv_lld_host; } else { sh = scsi_host_lookup(pdv->pdv_host_id); - if (IS_ERR(sh)) { + if (!sh) { pr_err("pSCSI: Unable to locate" " pdv_host_id: %d\n", pdv->pdv_host_id); - return PTR_ERR(sh); + return -EINVAL; } } } else { -- cgit v0.10.2 From 60a01f558af9c48b0bb31f303c479e32721add3f Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Fri, 25 Oct 2013 10:44:15 -0700 Subject: vhost/scsi: Fix incorrect usage of get_user_pages_fast write parameter This patch addresses a long-standing bug where the get_user_pages_fast() write parameter used for setting the underlying page table entry permission bits was incorrectly set to write=1 for data_direction=DMA_TO_DEVICE, and passed into get_user_pages_fast() via vhost_scsi_map_iov_to_sgl(). However, this parameter is intended to signal WRITEs to pinned userspace PTEs for the virtio-scsi DMA_FROM_DEVICE -> READ payload case, and *not* for the virtio-scsi DMA_TO_DEVICE -> WRITE payload case. This bug would manifest itself as random process segmentation faults on KVM host after repeated vhost starts + stops and/or with lots of vhost endpoints + LUNs. Cc: Stefan Hajnoczi Cc: Michael S. Tsirkin Cc: Asias He Cc: # 3.6+ Signed-off-by: Nicholas Bellinger diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index ce5221f..e663921 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1056,7 +1056,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) if (data_direction != DMA_NONE) { ret = vhost_scsi_map_iov_to_sgl(cmd, &vq->iov[data_first], data_num, - data_direction == DMA_TO_DEVICE); + data_direction == DMA_FROM_DEVICE); if (unlikely(ret)) { vq_err(vq, "Failed to map iov to sgl\n"); goto err_free; -- cgit v0.10.2 From b63eae0a6c84839275a4638a7baa391be965cd0e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 25 Oct 2013 23:43:10 +0200 Subject: ALSA: hda - Add missing initial vmaster hook at build_controls callback The generic parser has a support of vmaster hook, but this is initialized only in the init callback with the check of the presence of the corresponding kctl. However, since kctl is NULL at the very first init callback that is called before build_controls callback, the vmaster hook sync is skipped there. Eventually this leads to the uninitialized state depending on the hook implementation. This patch adds a simple workaround, just calling the sync function explicitly at build_controls callback. Cc: Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 26ad4f0..b7c89df 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -4475,9 +4475,11 @@ int snd_hda_gen_build_controls(struct hda_codec *codec) true, &spec->vmaster_mute.sw_kctl); if (err < 0) return err; - if (spec->vmaster_mute.hook) + if (spec->vmaster_mute.hook) { snd_hda_add_vmaster_hook(codec, &spec->vmaster_mute, spec->vmaster_mute_enum); + snd_hda_sync_vmaster_hook(&spec->vmaster_mute); + } } free_kctls(spec); /* no longer needed */ -- cgit v0.10.2 From 1ac3293095deb01ccc491f3c171e12722ebd0bc9 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 26 Oct 2013 00:24:14 +0200 Subject: ALSA: hda - Fix silent headphone on Thinkpads with AD1984A codec AD1984A codec has a couple of pins with EAPD controls, and the generic codec driver tries to turn each of them on/off depending on the pin active state. However, Thinkpads seem to use EAPD of the speaker pin as a master EAPD for controlling the mute of all outputs, including the headphone. This results in the dead headphone output via the headphone plugging because it mutes the speaker and turns off EAPD. The fix is to simply add spec->gen.keep_on_eapd flag. [This is a regression fix on 3.12 where we moved the AD codec parser to the generic parser. 3.11 and earlier didn't show this problem because still static quirks have been used.] Reported-and-tested-by: Vito Caputo Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 0cbdd87..2aa2f57 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -968,6 +968,15 @@ static void ad1884_fixup_hp_eapd(struct hda_codec *codec, } } +static void ad1884_fixup_thinkpad(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct ad198x_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) + spec->gen.keep_eapd_on = 1; +} + /* set magic COEFs for dmic */ static const struct hda_verb ad1884_dmic_init_verbs[] = { {0x01, AC_VERB_SET_COEF_INDEX, 0x13f7}, @@ -979,6 +988,7 @@ enum { AD1884_FIXUP_AMP_OVERRIDE, AD1884_FIXUP_HP_EAPD, AD1884_FIXUP_DMIC_COEF, + AD1884_FIXUP_THINKPAD, AD1884_FIXUP_HP_TOUCHSMART, }; @@ -997,6 +1007,12 @@ static const struct hda_fixup ad1884_fixups[] = { .type = HDA_FIXUP_VERBS, .v.verbs = ad1884_dmic_init_verbs, }, + [AD1884_FIXUP_THINKPAD] = { + .type = HDA_FIXUP_FUNC, + .v.func = ad1884_fixup_thinkpad, + .chained = true, + .chain_id = AD1884_FIXUP_DMIC_COEF, + }, [AD1884_FIXUP_HP_TOUCHSMART] = { .type = HDA_FIXUP_VERBS, .v.verbs = ad1884_dmic_init_verbs, @@ -1008,7 +1024,7 @@ static const struct hda_fixup ad1884_fixups[] = { static const struct snd_pci_quirk ad1884_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x2a82, "HP Touchsmart", AD1884_FIXUP_HP_TOUCHSMART), SND_PCI_QUIRK_VENDOR(0x103c, "HP", AD1884_FIXUP_HP_EAPD), - SND_PCI_QUIRK_VENDOR(0x17aa, "Lenovo Thinkpad", AD1884_FIXUP_DMIC_COEF), + SND_PCI_QUIRK_VENDOR(0x17aa, "Lenovo Thinkpad", AD1884_FIXUP_THINKPAD), {} }; -- cgit v0.10.2 From 0e67d9903d71ce3c5889fa2e1788d4335794a0f6 Mon Sep 17 00:00:00 2001 From: Sebastian Siewior Date: Tue, 22 Oct 2013 20:36:25 +0200 Subject: net: wan: sbni: remove assembly crc32 code There is also a C function doing the same thing. Unless the asm code is 110% faster we could stick to the C function. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c index 5bbcb5e..388ddf6 100644 --- a/drivers/net/wan/sbni.c +++ b/drivers/net/wan/sbni.c @@ -148,10 +148,6 @@ static int enslave( struct net_device *, struct net_device * ); static int emancipate( struct net_device * ); #endif -#ifdef __i386__ -#define ASM_CRC 1 -#endif - static const char version[] = "Granch SBNI12 driver ver 5.0.1 Jun 22 2001 Denis I.Timofeev.\n"; @@ -1551,88 +1547,6 @@ __setup( "sbni=", sbni_setup ); /* -------------------------------------------------------------------------- */ -#ifdef ASM_CRC - -static u32 -calc_crc32( u32 crc, u8 *p, u32 len ) -{ - register u32 _crc; - _crc = crc; - - __asm__ __volatile__ ( - "xorl %%ebx, %%ebx\n" - "movl %2, %%esi\n" - "movl %3, %%ecx\n" - "movl $crc32tab, %%edi\n" - "shrl $2, %%ecx\n" - "jz 1f\n" - - ".align 4\n" - "0:\n" - "movb %%al, %%bl\n" - "movl (%%esi), %%edx\n" - "shrl $8, %%eax\n" - "xorb %%dl, %%bl\n" - "shrl $8, %%edx\n" - "xorl (%%edi,%%ebx,4), %%eax\n" - - "movb %%al, %%bl\n" - "shrl $8, %%eax\n" - "xorb %%dl, %%bl\n" - "shrl $8, %%edx\n" - "xorl (%%edi,%%ebx,4), %%eax\n" - - "movb %%al, %%bl\n" - "shrl $8, %%eax\n" - "xorb %%dl, %%bl\n" - "movb %%dh, %%dl\n" - "xorl (%%edi,%%ebx,4), %%eax\n" - - "movb %%al, %%bl\n" - "shrl $8, %%eax\n" - "xorb %%dl, %%bl\n" - "addl $4, %%esi\n" - "xorl (%%edi,%%ebx,4), %%eax\n" - - "decl %%ecx\n" - "jnz 0b\n" - - "1:\n" - "movl %3, %%ecx\n" - "andl $3, %%ecx\n" - "jz 2f\n" - - "movb %%al, %%bl\n" - "shrl $8, %%eax\n" - "xorb (%%esi), %%bl\n" - "xorl (%%edi,%%ebx,4), %%eax\n" - - "decl %%ecx\n" - "jz 2f\n" - - "movb %%al, %%bl\n" - "shrl $8, %%eax\n" - "xorb 1(%%esi), %%bl\n" - "xorl (%%edi,%%ebx,4), %%eax\n" - - "decl %%ecx\n" - "jz 2f\n" - - "movb %%al, %%bl\n" - "shrl $8, %%eax\n" - "xorb 2(%%esi), %%bl\n" - "xorl (%%edi,%%ebx,4), %%eax\n" - "2:\n" - : "=a" (_crc) - : "0" (_crc), "g" (p), "g" (len) - : "bx", "cx", "dx", "si", "di" - ); - - return _crc; -} - -#else /* ASM_CRC */ - static u32 calc_crc32( u32 crc, u8 *p, u32 len ) { @@ -1642,9 +1556,6 @@ calc_crc32( u32 crc, u8 *p, u32 len ) return crc; } -#endif /* ASM_CRC */ - - static u32 crc32tab[] __attribute__ ((aligned(8))) = { 0xD202EF8D, 0xA505DF1B, 0x3C0C8EA1, 0x4B0BBE37, 0xD56F2B94, 0xA2681B02, 0x3B614AB8, 0x4C667A2E, -- cgit v0.10.2 From 45e526e80e6fdc796d3bc05716d5c930a427df4d Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 23 Oct 2013 15:04:49 +0200 Subject: netconsole: fix NULL pointer dereference We need to disable the netconsole (enabled = 0) before setting nt->np.dev to NULL because otherwise we might still have users after the netpoll_cleanup() since nt->enabled is set afterwards and we can have a message which will result in a NULL pointer dereference. It is very easy to hit dereferences all over the netpoll_send_udp function by running the following two loops in parallel: while [ 1 ]; do echo 1 > enabled; echo 0 > enabled; done; while [ 1 ]; do echo 00:11:22:33:44:55 > remote_mac; done; (the second loop is to generate messages, it can be done by anything) We're safe to set nt->np.dev = NULL and nt->enabled = 0 with the spinlock since it's required in the write_msg() function. Signed-off-by: Nikolay Aleksandrov Reviewed-by: Veacelsav Falico Signed-off-by: David S. Miller diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index adeee61..1505dcb 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -310,6 +310,7 @@ static ssize_t store_enabled(struct netconsole_target *nt, const char *buf, size_t count) { + unsigned long flags; int enabled; int err; @@ -342,6 +343,13 @@ static ssize_t store_enabled(struct netconsole_target *nt, printk(KERN_INFO "netconsole: network logging started\n"); } else { /* 0 */ + /* We need to disable the netconsole before cleaning it up + * otherwise we might end up in write_msg() with + * nt->np.dev == NULL and nt->enabled == 1 + */ + spin_lock_irqsave(&target_list_lock, flags); + nt->enabled = 0; + spin_unlock_irqrestore(&target_list_lock, flags); netpoll_cleanup(&nt->np); } -- cgit v0.10.2 From c7c6effdeffcafd792b9f880ad52e48689eea4ad Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 24 Oct 2013 12:09:24 +0200 Subject: netconsole: fix multiple race conditions In every netconsole option that can be set through configfs there's a race when checking for nt->enabled since it can be modified at the same time. Probably the most damage can be done by store_enabled when racing with another instance of itself. Fix all the races with one stone by moving the mutex lock around the ->store call for all options. Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 1505dcb..c9a1592 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -325,9 +325,7 @@ static ssize_t store_enabled(struct netconsole_target *nt, return -EINVAL; } - mutex_lock(&nt->mutex); if (enabled) { /* 1 */ - /* * Skip netpoll_parse_options() -- all the attributes are * already configured via configfs. Just print them out. @@ -335,13 +333,10 @@ static ssize_t store_enabled(struct netconsole_target *nt, netpoll_print_options(&nt->np); err = netpoll_setup(&nt->np); - if (err) { - mutex_unlock(&nt->mutex); + if (err) return err; - } printk(KERN_INFO "netconsole: network logging started\n"); - } else { /* 0 */ /* We need to disable the netconsole before cleaning it up * otherwise we might end up in write_msg() with @@ -354,7 +349,6 @@ static ssize_t store_enabled(struct netconsole_target *nt, } nt->enabled = enabled; - mutex_unlock(&nt->mutex); return strnlen(buf, count); } @@ -571,8 +565,10 @@ static ssize_t netconsole_target_attr_store(struct config_item *item, struct netconsole_target_attr *na = container_of(attr, struct netconsole_target_attr, attr); + mutex_lock(&nt->mutex); if (na->store) ret = na->store(nt, buf, count); + mutex_unlock(&nt->mutex); return ret; } -- cgit v0.10.2 From 8fb479a47c869820966e7298f38038aa334d889c Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Wed, 23 Oct 2013 23:36:30 +0200 Subject: netpoll: fix rx_hook() interface by passing the skb Right now skb->data is passed to rx_hook() even if the skb has not been linearised and without giving rx_hook() a way to linearise it. Change the rx_hook() interface and make it accept the skb and the offset to the UDP payload as arguments. rx_hook() is also renamed to rx_skb_hook() to ensure that out of the tree users notice the API change. In this way any rx_skb_hook() implementation can perform all the needed operations to properly (and safely) access the skb data. Signed-off-by: Antonio Quartulli Signed-off-by: David S. Miller diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index f3c7c24..fbfdb9d 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -24,7 +24,8 @@ struct netpoll { struct net_device *dev; char dev_name[IFNAMSIZ]; const char *name; - void (*rx_hook)(struct netpoll *, int, char *, int); + void (*rx_skb_hook)(struct netpoll *np, int source, struct sk_buff *skb, + int offset, int len); union inet_addr local_ip, remote_ip; bool ipv6; @@ -41,7 +42,7 @@ struct netpoll_info { unsigned long rx_flags; spinlock_t rx_lock; struct semaphore dev_lock; - struct list_head rx_np; /* netpolls that registered an rx_hook */ + struct list_head rx_np; /* netpolls that registered an rx_skb_hook */ struct sk_buff_head neigh_tx; /* list of neigh requests to reply to */ struct sk_buff_head txq; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index fc75c9e..8f97199 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -636,8 +636,9 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo netpoll_send_skb(np, send_skb); - /* If there are several rx_hooks for the same address, - we're fine by sending a single reply */ + /* If there are several rx_skb_hooks for the same + * address we're fine by sending a single reply + */ break; } spin_unlock_irqrestore(&npinfo->rx_lock, flags); @@ -719,8 +720,9 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo netpoll_send_skb(np, send_skb); - /* If there are several rx_hooks for the same address, - we're fine by sending a single reply */ + /* If there are several rx_skb_hooks for the same + * address, we're fine by sending a single reply + */ break; } spin_unlock_irqrestore(&npinfo->rx_lock, flags); @@ -756,11 +758,12 @@ static bool pkt_is_ns(struct sk_buff *skb) int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) { - int proto, len, ulen; - int hits = 0; + int proto, len, ulen, data_len; + int hits = 0, offset; const struct iphdr *iph; struct udphdr *uh; struct netpoll *np, *tmp; + uint16_t source; if (list_empty(&npinfo->rx_np)) goto out; @@ -820,7 +823,10 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) len -= iph->ihl*4; uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); + offset = (unsigned char *)(uh + 1) - skb->data; ulen = ntohs(uh->len); + data_len = skb->len - offset; + source = ntohs(uh->source); if (ulen != len) goto out; @@ -834,9 +840,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) if (np->local_port && np->local_port != ntohs(uh->dest)) continue; - np->rx_hook(np, ntohs(uh->source), - (char *)(uh+1), - ulen - sizeof(struct udphdr)); + np->rx_skb_hook(np, source, skb, offset, data_len); hits++; } } else { @@ -859,7 +863,10 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto out; uh = udp_hdr(skb); + offset = (unsigned char *)(uh + 1) - skb->data; ulen = ntohs(uh->len); + data_len = skb->len - offset; + source = ntohs(uh->source); if (ulen != skb->len) goto out; if (udp6_csum_init(skb, uh, IPPROTO_UDP)) @@ -872,9 +879,7 @@ int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) if (np->local_port && np->local_port != ntohs(uh->dest)) continue; - np->rx_hook(np, ntohs(uh->source), - (char *)(uh+1), - ulen - sizeof(struct udphdr)); + np->rx_skb_hook(np, source, skb, offset, data_len); hits++; } #endif @@ -1062,7 +1067,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) npinfo->netpoll = np; - if (np->rx_hook) { + if (np->rx_skb_hook) { spin_lock_irqsave(&npinfo->rx_lock, flags); npinfo->rx_flags |= NETPOLL_RX_ENABLED; list_add_tail(&np->rx, &npinfo->rx_np); -- cgit v0.10.2 From 01ba16d6ec85a1ec4669c75513a76b61ec53ee50 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 24 Oct 2013 10:14:27 +0200 Subject: ipv6: reset dst.expires value when clearing expire flag On receiving a packet too big icmp error we update the expire value by calling rt6_update_expires. This function uses dst_set_expires which is implemented that it can only reduce the expiration value of the dst entry. If we insert new routing non-expiry information into the ipv6 fib where we already have a matching rt6_info we only clear the RTF_EXPIRES flag in rt6i_flags and leave the dst.expires value as is. When new mtu information arrives for that cached dst_entry we again call dst_set_expires. This time it won't update the dst.expire value because we left the dst.expire value intact from the last update. So dst_set_expires won't touch dst.expires. Fix this by resetting dst.expires when clearing the RTF_EXPIRE flag. dst_set_expires checks for a zero expiration and updates the dst.expires. In the past this (not updating dst.expires) was necessary because dst.expire was placed in a union with the dst_entry *from reference and rt6_clean_expires did assign NULL to it. This split happend in ecd9883724b78cc72ed92c98bcb1a46c764fff21 ("ipv6: fix race condition regarding dst->expires and dst->from"). Reported-by: Steinar H. Gunderson Reported-by: Valentijn Sessink Cc: YOSHIFUJI Hideaki Acked-by: Eric Dumazet Tested-by: Valentijn Sessink Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 48ec25a..5e661a9 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -165,6 +165,7 @@ static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) static inline void rt6_clean_expires(struct rt6_info *rt) { rt->rt6i_flags &= ~RTF_EXPIRES; + rt->dst.expires = 0; } static inline void rt6_set_expires(struct rt6_info *rt, unsigned long expires) -- cgit v0.10.2 From e3bc10bd95d7fcc3f2ac690c6ff22833ea6781d6 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 24 Oct 2013 07:48:24 +0200 Subject: ipv6: ip6_dst_check needs to check for expired dst_entries On receiving a packet too big icmp error we check if our current cached dst_entry in the socket is still valid. This validation check did not care about the expiration of the (cached) route. The error path I traced down: The socket receives a packet too big mtu notification. It still has a valid dst_entry and thus issues the ip6_rt_pmtu_update on this dst_entry, setting RTF_EXPIRE and updates the dst.expiration value (which could fail because of not up-to-date expiration values, see previous patch). In some seldom cases we race with a) the ip6_fib gc or b) another routing lookup which would result in a recreation of the cached rt6_info from its parent non-cached rt6_info. While copying the rt6_info we reinitialize the metrics store by copying it over from the parent thus invalidating the just installed pmtu update (both dsts use the same key to the inetpeer storage). The dst_entry with the just invalidated metrics data would just get its RTF_EXPIRES flag cleared and would continue to stay valid for the socket. We should have not issued the pmtu update on the already expired dst_entry in the first placed. By checking the expiration on the dst entry and doing a relookup in case it is out of date we close the race because we would install a new rt6_info into the fib before we issue the pmtu update, thus closing this race. Not reliably updating the dst.expire value was fixed by the patch "ipv6: reset dst.expires value when clearing expire flag". Reported-by: Steinar H. Gunderson Reported-by: Valentijn Sessink Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Reviewed-by: Eric Dumazet Tested-by: Valentijn Sessink Signed-off-by: David S. Miller diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f54e3a1..04e17b3 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1087,10 +1087,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev))) return NULL; - if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) - return dst; + if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) + return NULL; - return NULL; + if (rt6_check_expired(rt)) + return NULL; + + return dst; } static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) -- cgit v0.10.2 From 4c8e84b22aa1bfee40b047d7810ba08615235c05 Mon Sep 17 00:00:00 2001 From: Freddy Xin Date: Thu, 24 Oct 2013 14:58:25 +0800 Subject: ax88179_178a: Remove AX_MEDIUM_ALWAYS_ONE bit in AX_MEDIUM_STATUS_MODE register to avoid TX throttling Remove AX_MEDIUM_ALWAYS_ONE in AX_MEDIUM_STATUS_MODE register. Setting this bit may cause TX throttling in Half-Duplex mode. Signed-off-by: Freddy Xin Signed-off-by: David S. Miller diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 846cc19..8e8d0fc 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -78,7 +78,6 @@ #define AX_MEDIUM_STATUS_MODE 0x22 #define AX_MEDIUM_GIGAMODE 0x01 #define AX_MEDIUM_FULL_DUPLEX 0x02 - #define AX_MEDIUM_ALWAYS_ONE 0x04 #define AX_MEDIUM_EN_125MHZ 0x08 #define AX_MEDIUM_RXFLOW_CTRLEN 0x10 #define AX_MEDIUM_TXFLOW_CTRLEN 0x20 @@ -1065,8 +1064,8 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf) /* Configure default medium type => giga */ *tmp16 = AX_MEDIUM_RECEIVE_EN | AX_MEDIUM_TXFLOW_CTRLEN | - AX_MEDIUM_RXFLOW_CTRLEN | AX_MEDIUM_ALWAYS_ONE | - AX_MEDIUM_FULL_DUPLEX | AX_MEDIUM_GIGAMODE; + AX_MEDIUM_RXFLOW_CTRLEN | AX_MEDIUM_FULL_DUPLEX | + AX_MEDIUM_GIGAMODE; ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, 2, 2, tmp16); @@ -1225,7 +1224,7 @@ static int ax88179_link_reset(struct usbnet *dev) } mode = AX_MEDIUM_RECEIVE_EN | AX_MEDIUM_TXFLOW_CTRLEN | - AX_MEDIUM_RXFLOW_CTRLEN | AX_MEDIUM_ALWAYS_ONE; + AX_MEDIUM_RXFLOW_CTRLEN; ax88179_read_cmd(dev, AX_ACCESS_MAC, PHYSICAL_LINK_STATUS, 1, 1, &link_sts); @@ -1339,8 +1338,8 @@ static int ax88179_reset(struct usbnet *dev) /* Configure default medium type => giga */ *tmp16 = AX_MEDIUM_RECEIVE_EN | AX_MEDIUM_TXFLOW_CTRLEN | - AX_MEDIUM_RXFLOW_CTRLEN | AX_MEDIUM_ALWAYS_ONE | - AX_MEDIUM_FULL_DUPLEX | AX_MEDIUM_GIGAMODE; + AX_MEDIUM_RXFLOW_CTRLEN | AX_MEDIUM_FULL_DUPLEX | + AX_MEDIUM_GIGAMODE; ax88179_write_cmd(dev, AX_ACCESS_MAC, AX_MEDIUM_STATUS_MODE, 2, 2, tmp16); -- cgit v0.10.2 From 598c45b309eb401510653fed45fe74efae93be4e Mon Sep 17 00:00:00 2001 From: Shahed Shaikh Date: Fri, 25 Oct 2013 10:38:36 -0400 Subject: qlcnic: Do not force adapter to perform LRO without destination IP check Forcing adapter to perform LRO without destination IP check degrades the performance. Signed-off-by: Shahed Shaikh Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c index f8adc7b..b64e2be 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c @@ -785,8 +785,6 @@ void qlcnic_82xx_config_intr_coalesce(struct qlcnic_adapter *adapter) #define QLCNIC_ENABLE_IPV4_LRO 1 #define QLCNIC_ENABLE_IPV6_LRO 2 -#define QLCNIC_NO_DEST_IPV4_CHECK (1 << 8) -#define QLCNIC_NO_DEST_IPV6_CHECK (2 << 8) int qlcnic_82xx_config_hw_lro(struct qlcnic_adapter *adapter, int enable) { @@ -806,11 +804,10 @@ int qlcnic_82xx_config_hw_lro(struct qlcnic_adapter *adapter, int enable) word = 0; if (enable) { - word = QLCNIC_ENABLE_IPV4_LRO | QLCNIC_NO_DEST_IPV4_CHECK; + word = QLCNIC_ENABLE_IPV4_LRO; if (adapter->ahw->extra_capability[0] & QLCNIC_FW_CAP2_HW_LRO_IPV6) - word |= QLCNIC_ENABLE_IPV6_LRO | - QLCNIC_NO_DEST_IPV6_CHECK; + word |= QLCNIC_ENABLE_IPV6_LRO; } req.words[0] = cpu_to_le64(word); -- cgit v0.10.2 From d6994ca798f5897a4342f727b21d77e01d92f093 Mon Sep 17 00:00:00 2001 From: Shahed Shaikh Date: Fri, 25 Oct 2013 10:38:37 -0400 Subject: qlcnic: Do not read QLCNIC_FW_CAPABILITY_MORE_CAPS bit for 83xx adapter Only 82xx adapter advertises QLCNIC_FW_CAPABILITY_MORE_CAPS bit. Reading this bit from 83xx adapter causes the driver to skip extra capabilities registers. Because of this, driver was not issuing qlcnic_fw_cmd_set_drv_version() for 83xx adapter. This bug was introduced in commit 8af3f33db05c6d0146ad14905145a5c923770856 ("qlcnic: Add support for 'set driver version' in 83XX"). Signed-off-by: Shahed Shaikh Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index 3ca00e0..ace217c 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -2276,9 +2276,9 @@ int qlcnic_83xx_get_nic_info(struct qlcnic_adapter *adapter, temp = (cmd.rsp.arg[8] & 0x7FFE0000) >> 17; npar_info->max_linkspeed_reg_offset = temp; } - if (npar_info->capabilities & QLCNIC_FW_CAPABILITY_MORE_CAPS) - memcpy(ahw->extra_capability, &cmd.rsp.arg[16], - sizeof(ahw->extra_capability)); + + memcpy(ahw->extra_capability, &cmd.rsp.arg[16], + sizeof(ahw->extra_capability)); out: qlcnic_free_mbx_args(&cmd); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 9e61eb8..d8f4897 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -1131,7 +1131,10 @@ qlcnic_initialize_nic(struct qlcnic_adapter *adapter) if (err == -EIO) return err; adapter->ahw->extra_capability[0] = temp; + } else { + adapter->ahw->extra_capability[0] = 0; } + adapter->ahw->max_mac_filters = nic_info.max_mac_filters; adapter->ahw->max_mtu = nic_info.max_mtu; @@ -2159,8 +2162,7 @@ void qlcnic_set_drv_version(struct qlcnic_adapter *adapter) else if (qlcnic_83xx_check(adapter)) fw_cmd = QLCNIC_CMD_83XX_SET_DRV_VER; - if ((ahw->capabilities & QLCNIC_FW_CAPABILITY_MORE_CAPS) && - (ahw->extra_capability[0] & QLCNIC_FW_CAPABILITY_SET_DRV_VER)) + if (ahw->extra_capability[0] & QLCNIC_FW_CAPABILITY_SET_DRV_VER) qlcnic_fw_cmd_set_drv_version(adapter, fw_cmd); } -- cgit v0.10.2 From 54e181e073fc1415e41917d725ebdbd7de956455 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 26 Oct 2013 23:19:25 +0200 Subject: parisc: Do not crash 64bit SMP kernels on machines with >= 4GB RAM Since the beginning of the parisc-linux port, sometimes 64bit SMP kernels were not able to bring up other CPUs than the monarch CPU and instead crashed the kernel. The reason was unclear, esp. since it involved various machines (e.g. J5600, J6750 and SuperDome). Testing showed, that those crashes didn't happened when less than 4GB were installed, or if a 32bit Linux kernel was booted. In the end, the fix for those SMP problems is trivial: During the early phase of the initialization of the CPUs, including the monarch CPU, the PDC_PSW firmware function to enable WIDE (=64bit) mode is called. It's documented that this firmware function may clobber various registers, and one one of those possibly clobbered registers is %cr30 which holds the task thread info pointer. Now, if %cr30 would always have been clobbered, then this bug would have been detected much earlier. But lots of testing finally showed, that - at least for %cr30 - on some machines only the upper 32bits of the 64bit register suddenly turned zero after the firmware call. So, after finding the root cause, the explanation for the various crashes became clear: - On 32bit SMP Linux kernels all upper 32bit were zero, so we didn't faced this problem. - Monarch CPUs in 64bit mode always booted sucessfully, because the inital task thread info pointer was below 4GB. - Secondary CPUs booted sucessfully on machines with less than 4GB RAM because the upper 32bit were zero anyay. - Secondary CPus failed to boot if we had more than 4GB RAM and the task thread info pointer was located above the 4GB boundary. Finally, the patch to fix this problem is trivial by saving the %cr30 register before the firmware call and restoring it afterwards. Signed-off-by: Helge Deller Signed-off-by: John David Anglin Cc: # 2.6.12+ Signed-off-by: Helge Deller diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index 37aabd7..d2d5825 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S @@ -195,6 +195,8 @@ common_stext: ldw MEM_PDC_HI(%r0),%r6 depd %r6, 31, 32, %r3 /* move to upper word */ + mfctl %cr30,%r6 /* PCX-W2 firmware bug */ + ldo PDC_PSW(%r0),%arg0 /* 21 */ ldo PDC_PSW_SET_DEFAULTS(%r0),%arg1 /* 2 */ ldo PDC_PSW_WIDE_BIT(%r0),%arg2 /* 2 */ @@ -203,6 +205,8 @@ common_stext: copy %r0,%arg3 stext_pdc_ret: + mtctl %r6,%cr30 /* restore task thread info */ + /* restore rfi target address*/ ldd TI_TASK-THREAD_SZ_ALGN(%sp), %r10 tophys_r1 %r10 -- cgit v0.10.2 From e9e2a904ef0a4f46ee5c845f3ae04e62b917bb6d Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Thu, 24 Oct 2013 14:37:53 +0530 Subject: be2net: Warn users of possible broken functionality on BE2 cards with very old FW versions with latest driver On very old FW versions < 4.0, the mailbox command to set interrupts on the card succeeds even though it is not supported and should have failed, leading to a scenario where interrupts do not work. Hence warn users to upgrade to a suitable FW version to avoid seeing broken functionality. Signed-off-by: Somnath Kotur Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index db02023..c99dac6 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -696,6 +696,15 @@ static inline int qnq_async_evt_rcvd(struct be_adapter *adapter) return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD; } +static inline int fw_major_num(const char *fw_ver) +{ + int fw_major = 0; + + sscanf(fw_ver, "%d.", &fw_major); + + return fw_major; +} + extern void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped); extern void be_link_status_update(struct be_adapter *adapter, u8 link_status); diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 2c38cc4..53ed58b 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -3247,6 +3247,12 @@ static int be_setup(struct be_adapter *adapter) be_cmd_get_fw_ver(adapter, adapter->fw_ver, adapter->fw_on_flash); + if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) { + dev_err(dev, "Firmware on card is old(%s), IRQs may not work.", + adapter->fw_ver); + dev_err(dev, "Please upgrade firmware to version >= 4.0\n"); + } + if (adapter->vlans_added) be_vid_config(adapter); -- cgit v0.10.2 From bc15afa39ecc16f01c3389d15d8f6015a427fe85 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 24 Oct 2013 08:44:25 -0700 Subject: tcp: fix SYNACK RTT estimation in Fast Open tp->lsndtime may not always be the SYNACK timestamp if a passive Fast Open socket sends data before handshake completes. And if the remote acknowledges both the data and the SYNACK, the RTT sample is already taken in tcp_ack(), so no need to call tcp_update_ack_rtt() in tcp_synack_rtt_meas() aagain. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a16b01b..305cd05 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2871,14 +2871,19 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, } /* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */ -static void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req) +static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp) { struct tcp_sock *tp = tcp_sk(sk); s32 seq_rtt = -1; - if (tp->lsndtime && !tp->total_retrans) - seq_rtt = tcp_time_stamp - tp->lsndtime; - tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1); + if (synack_stamp && !tp->total_retrans) + seq_rtt = tcp_time_stamp - synack_stamp; + + /* If the ACK acks both the SYNACK and the (Fast Open'd) data packets + * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack() + */ + if (!tp->srtt) + tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt, -1); } static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) @@ -5587,6 +5592,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct request_sock *req; int queued = 0; bool acceptable; + u32 synack_stamp; tp->rx_opt.saw_tstamp = 0; @@ -5669,9 +5675,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * so release it. */ if (req) { + synack_stamp = tcp_rsk(req)->snt_synack; tp->total_retrans = req->num_retrans; reqsk_fastopen_remove(sk, req, false); } else { + synack_stamp = tp->lsndtime; /* Make sure socket is routed, for correct metrics. */ icsk->icsk_af_ops->rebuild_header(sk); tcp_init_congestion_control(sk); @@ -5694,7 +5702,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, tp->snd_una = TCP_SKB_CB(skb)->ack_seq; tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); - tcp_synack_rtt_meas(sk, req); + tcp_synack_rtt_meas(sk, synack_stamp); if (tp->rx_opt.tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; -- cgit v0.10.2 From 2909d874f34eae157aecab0af27c6dc4a1751f8f Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 24 Oct 2013 08:55:25 -0700 Subject: tcp: only take RTT from timestamps if new data is acked Patch ed08495c3 "tcp: use RTT from SACK for RTO" has a bug that it does not check if the ACK acknowledge new data before taking the RTT sample from TCP timestamps. This patch adds the check back as required by the RFC. Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 305cd05..6ffe41a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2856,7 +2856,8 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, * left edge of the send window. * See draft-ietf-tcplw-high-performance-00, section 3.3. */ - if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) + if (seq_rtt < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && + flag & FLAG_ACKED) seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; if (seq_rtt < 0) -- cgit v0.10.2 From 2f715c1dde6e1760f3101358dc26f8c9489be0bf Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 24 Oct 2013 08:59:27 -0700 Subject: tcp: do not rearm RTO when future data are sacked Patch ed08495c3 "tcp: use RTT from SACK for RTO" always re-arms RTO upon obtaining a RTT sample from newly sacked data. But technically RTO should only be re-armed when the data sent before the last (re)transmission of write queue head are (s)acked. Otherwise the RTO may continue to extend during loss recovery on data sent in the future. Note that RTTs from ACK or timestamps do not have this problem, as the RTT source must be from data sent before. The new RTO re-arm policy is 1) Always re-arm RTO if SND.UNA is advanced 2) Re-arm RTO if sack RTT is available, provided the sacked data was sent before the last time write_queue_head was sent. Signed-off-by: Larry Brakmo Signed-off-by: Yuchung Cheng Acked-by: Neal Cardwell Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6ffe41a..068c8fb 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2987,6 +2987,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, s32 seq_rtt = -1; s32 ca_seq_rtt = -1; ktime_t last_ackt = net_invalid_timestamp(); + bool rtt_update; while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); @@ -3063,14 +3064,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) flag |= FLAG_SACK_RENEGING; - if (tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt) || - (flag & FLAG_ACKED)) - tcp_rearm_rto(sk); + rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt, sack_rtt); if (flag & FLAG_ACKED) { const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; + tcp_rearm_rto(sk); if (unlikely(icsk->icsk_mtup.probe_size && !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) { tcp_mtup_probe_success(sk); @@ -3109,6 +3109,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, ca_ops->pkts_acked(sk, pkts_acked, rtt_us); } + } else if (skb && rtt_update && sack_rtt >= 0 && + sack_rtt > (s32)(now - TCP_SKB_CB(skb)->when)) { + /* Do not re-arm RTO if the sack RTT is measured from data sent + * after when the head was last (re)transmitted. Otherwise the + * timeout may continue to extend in loss recovery. + */ + tcp_rearm_rto(sk); } #if FASTRETRANS_DEBUG > 0 -- cgit v0.10.2 From ad86de802d0ea6776eccd0a2526ba31101d89267 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 24 Oct 2013 18:56:57 -0700 Subject: Documentation/networking: netdev-FAQ typo corrections Various typo fixes to netdev-FAQ.txt: - capitalize Linux - hyphenate dual-word adjectives - minor punctuation fixes Signed-off-by: Randy Dunlap Cc: Paul Gortmaker Acked-by: Paul Gortmaker Signed-off-by: David S. Miller diff --git a/Documentation/networking/netdev-FAQ.txt b/Documentation/networking/netdev-FAQ.txt index d9112f0..3a2c586 100644 --- a/Documentation/networking/netdev-FAQ.txt +++ b/Documentation/networking/netdev-FAQ.txt @@ -4,23 +4,23 @@ Information you need to know about netdev Q: What is netdev? -A: It is a mailing list for all network related linux stuff. This includes +A: It is a mailing list for all network-related Linux stuff. This includes anything found under net/ (i.e. core code like IPv6) and drivers/net - (i.e. hardware specific drivers) in the linux source tree. + (i.e. hardware specific drivers) in the Linux source tree. Note that some subsystems (e.g. wireless drivers) which have a high volume of traffic have their own specific mailing lists. - The netdev list is managed (like many other linux mailing lists) through + The netdev list is managed (like many other Linux mailing lists) through VGER ( http://vger.kernel.org/ ) and archives can be found below: http://marc.info/?l=linux-netdev http://www.spinics.net/lists/netdev/ - Aside from subsystems like that mentioned above, all network related linux - development (i.e. RFC, review, comments, etc) takes place on netdev. + Aside from subsystems like that mentioned above, all network-related Linux + development (i.e. RFC, review, comments, etc.) takes place on netdev. -Q: How do the changes posted to netdev make their way into linux? +Q: How do the changes posted to netdev make their way into Linux? A: There are always two trees (git repositories) in play. Both are driven by David Miller, the main network maintainer. There is the "net" tree, @@ -35,7 +35,7 @@ A: There are always two trees (git repositories) in play. Both are driven Q: How often do changes from these trees make it to the mainline Linus tree? A: To understand this, you need to know a bit of background information - on the cadence of linux development. Each new release starts off with + on the cadence of Linux development. Each new release starts off with a two week "merge window" where the main maintainers feed their new stuff to Linus for merging into the mainline tree. After the two weeks, the merge window is closed, and it is called/tagged "-rc1". No new @@ -46,7 +46,7 @@ A: To understand this, you need to know a bit of background information things are in a state of churn), and a week after the last vX.Y-rcN was done, the official "vX.Y" is released. - Relating that to netdev: At the beginning of the 2 week merge window, + Relating that to netdev: At the beginning of the 2-week merge window, the net-next tree will be closed - no new changes/features. The accumulated new content of the past ~10 weeks will be passed onto mainline/Linus via a pull request for vX.Y -- at the same time, @@ -59,12 +59,12 @@ A: To understand this, you need to know a bit of background information IMPORTANT: Do not send new net-next content to netdev during the period during which net-next tree is closed. - Shortly after the two weeks have passed, (and vX.Y-rc1 is released) the + Shortly after the two weeks have passed (and vX.Y-rc1 is released), the tree for net-next reopens to collect content for the next (vX.Y+1) release. If you aren't subscribed to netdev and/or are simply unsure if net-next has re-opened yet, simply check the net-next git repository link above for - any new networking related commits. + any new networking-related commits. The "net" tree continues to collect fixes for the vX.Y content, and is fed back to Linus at regular (~weekly) intervals. Meaning that the @@ -217,7 +217,7 @@ A: Attention to detail. Re-read your own work as if you were the to why it happens, and then if necessary, explain why the fix proposed is the best way to get things done. Don't mangle whitespace, and as is common, don't mis-indent function arguments that span multiple lines. - If it is your 1st patch, mail it to yourself so you can test apply + If it is your first patch, mail it to yourself so you can test apply it to an unpatched tree to confirm infrastructure didn't mangle it. Finally, go back and read Documentation/SubmittingPatches to be -- cgit v0.10.2 From 959f58544b7f20c92d5eb43d1232c96c15c01bfb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 27 Oct 2013 16:12:03 -0700 Subject: Linux 3.12-rc7 diff --git a/Makefile b/Makefile index 126321d..868c0eb 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = One Giant Leap for Frogkind # *DOCUMENTATION* -- cgit v0.10.2 From fc59d5bdf1e3dca0336d155e55d812286db075ad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 27 Oct 2013 16:26:43 -0700 Subject: pkt_sched: fq: clear time_next_packet for reused flows When a socket is freed/reallocated, we need to clear time_next_packet or else we can inherit a prior value and delay first packets of the new flow. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index a9dfdda..fdc041c 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -255,6 +255,7 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) f->socket_hash != sk->sk_hash)) { f->credit = q->initial_quantum; f->socket_hash = sk->sk_hash; + f->time_next_packet = 0ULL; } return f; } -- cgit v0.10.2 From eeb1b73378b560e00ff1da2ef09fed9254f4e128 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 25 Oct 2013 10:21:32 +0200 Subject: xfrm: Increase the garbage collector threshold With the removal of the routing cache, we lost the option to tweak the garbage collector threshold along with the maximum routing cache size. So git commit 703fb94ec ("xfrm: Fix the gc threshold value for ipv4") moved back to a static threshold. It turned out that the current threshold before we start garbage collecting is much to small for some workloads, so increase it from 1024 to 32768. This means that we start the garbage collector if we have more than 32768 dst entries in the system and refuse new allocations if we are above 65536. Reported-by: Wolfgang Walter Signed-off-by: Steffen Klassert diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index ccde542..4764ee4 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -236,7 +236,7 @@ static struct dst_ops xfrm4_dst_ops = { .destroy = xfrm4_dst_destroy, .ifdown = xfrm4_dst_ifdown, .local_out = __ip_local_out, - .gc_thresh = 1024, + .gc_thresh = 32768, }; static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 08ed277..dd503a3 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -285,7 +285,7 @@ static struct dst_ops xfrm6_dst_ops = { .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, - .gc_thresh = 1024, + .gc_thresh = 32768, }; static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { -- cgit v0.10.2 From 4f56d12ebb28fceac4c6e60c8993fbfc122e1399 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 21 Oct 2013 10:52:06 +0300 Subject: drm/i915: Add support for pipe_bpp readout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On CTG+ read out the pipe bpp setting from hardware and fill it into pipe config. Also check it appropriately. v2: Don't do the pipe_bpp extraction inside the PCH only code block on ILK+. Avoid the PIPECONF read as we already have read it for the PIPECONF_EANBLE check. Note: This is already in drm-intel-next-queued as commit 42571aefafb1d330ef84eb29418832f72e7dfb4c Author: Ville Syrjälä Date: Fri Sep 6 23:29:00 2013 +0300 drm/i915: Add support for pipe_bpp readout but is needed for the following bugfix. Signed-off-by: Ville Syrjälä Reviewed-by: Jani Nikula Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 63de270..beb7f65 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1268,6 +1268,23 @@ static void intel_ddi_get_config(struct intel_encoder *encoder, flags |= DRM_MODE_FLAG_NVSYNC; pipe_config->adjusted_mode.flags |= flags; + + switch (temp & TRANS_DDI_BPC_MASK) { + case TRANS_DDI_BPC_6: + pipe_config->pipe_bpp = 18; + break; + case TRANS_DDI_BPC_8: + pipe_config->pipe_bpp = 24; + break; + case TRANS_DDI_BPC_10: + pipe_config->pipe_bpp = 30; + break; + case TRANS_DDI_BPC_12: + pipe_config->pipe_bpp = 36; + break; + default: + break; + } } static void intel_ddi_destroy(struct drm_encoder *encoder) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 581fb4b..725f0be 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4983,6 +4983,22 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc, if (!(tmp & PIPECONF_ENABLE)) return false; + if (IS_G4X(dev) || IS_VALLEYVIEW(dev)) { + switch (tmp & PIPECONF_BPC_MASK) { + case PIPECONF_6BPC: + pipe_config->pipe_bpp = 18; + break; + case PIPECONF_8BPC: + pipe_config->pipe_bpp = 24; + break; + case PIPECONF_10BPC: + pipe_config->pipe_bpp = 30; + break; + default: + break; + } + } + intel_get_pipe_timings(crtc, pipe_config); i9xx_get_pfit_config(crtc, pipe_config); @@ -5881,6 +5897,23 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc, if (!(tmp & PIPECONF_ENABLE)) return false; + switch (tmp & PIPECONF_BPC_MASK) { + case PIPECONF_6BPC: + pipe_config->pipe_bpp = 18; + break; + case PIPECONF_8BPC: + pipe_config->pipe_bpp = 24; + break; + case PIPECONF_10BPC: + pipe_config->pipe_bpp = 30; + break; + case PIPECONF_12BPC: + pipe_config->pipe_bpp = 36; + break; + default: + break; + } + if (I915_READ(PCH_TRANSCONF(crtc->pipe)) & TRANS_ENABLE) { struct intel_shared_dpll *pll; @@ -8612,6 +8645,9 @@ intel_pipe_config_compare(struct drm_device *dev, PIPE_CONF_CHECK_X(dpll_hw_state.fp0); PIPE_CONF_CHECK_X(dpll_hw_state.fp1); + if (IS_G4X(dev) || INTEL_INFO(dev)->gen >= 5) + PIPE_CONF_CHECK_I(pipe_bpp); + #undef PIPE_CONF_CHECK_X #undef PIPE_CONF_CHECK_I #undef PIPE_CONF_CHECK_FLAGS -- cgit v0.10.2 From 2fd869f08aec5a8e4cbf01bc3fa345c4e53342d7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 Oct 2013 09:38:12 -0300 Subject: perf tools: Fix up /proc/PID/maps parsing When introducing support for MMAP2 we considered more parts of each map representation in /proc/PID/maps, and when disabling it we forgot to reduce the number of expected parsed/assigned entries in the sscanf call, fix it to expect the right number of desired fields, 5. Reported-by: Markus Trippelsdorf Based-on-a-patch-by: Markus Trippelsdorf Cc: Adrian Hunter Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-vrbo1wik997ahjzl1chm3bdm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 63df031..49096ea 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -213,7 +213,7 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool, &event->mmap.pgoff, execname); - if (n != 8) + if (n != 5) continue; if (prot[2] != 'x') -- cgit v0.10.2 From ff18620c2157671a8ee21ebb8e6a3520ea209b1f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 28 Oct 2013 14:21:49 +0100 Subject: ASoC: dapm: Fix source list debugfs outputs ... due to a copy & paste error. Spotted by coverity CID 710923. Signed-off-by: Takashi Iwai Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index c17c14c..e650e99 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -1949,7 +1949,7 @@ static ssize_t dapm_widget_power_read_file(struct file *file, w->active ? "active" : "inactive"); list_for_each_entry(p, &w->sources, list_sink) { - if (p->connected && !p->connected(w, p->sink)) + if (p->connected && !p->connected(w, p->source)) continue; if (p->connect) -- cgit v0.10.2 From 298402a3858e17e6a78acafa1dcd490167dd9f74 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 28 Oct 2013 14:21:50 +0100 Subject: ASoC: dapm: Return -ENOMEM in snd_soc_dapm_new_dai_widgets() ... instead of NULL dereferences. Spotted by coverity CID 402004. Signed-off-by: Takashi Iwai Signed-off-by: Mark Brown diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index e650e99..b2949ae 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -3495,6 +3495,7 @@ int snd_soc_dapm_new_dai_widgets(struct snd_soc_dapm_context *dapm, if (!w) { dev_err(dapm->dev, "ASoC: Failed to create %s widget\n", dai->driver->playback.stream_name); + return -ENOMEM; } w->priv = dai; @@ -3513,6 +3514,7 @@ int snd_soc_dapm_new_dai_widgets(struct snd_soc_dapm_context *dapm, if (!w) { dev_err(dapm->dev, "ASoC: Failed to create %s widget\n", dai->driver->capture.stream_name); + return -ENOMEM; } w->priv = dai; -- cgit v0.10.2 From 7195a50b5c7e00cc3312934fd022c3006b533d12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 24 Sep 2013 14:24:05 +0300 Subject: drm/i915: Add HSW CRT output readout support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Call intel_ddi_get_config() to get the pipe_bpp settings from DDI. The sync polarity settings from DDI are irrelevant for CRT output, so override them with data from the ADPA register. Note: This is already merged in drm-intel-next-queued as commit 6801c18c0a43386bb44712cbc028a7e05adb9f0d Author: Ville Syrjälä Date: Tue Sep 24 14:24:05 2013 +0300 drm/i915: Add HSW CRT output readout support but is required for the following edp bpp bugfix. v2: Extract intel_crt_get_flags() Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=69691 Tested-by: Qingshuai Tian Signed-off-by: Ville Syrjälä Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index ea9022e..db59bb9 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -83,8 +83,7 @@ static bool intel_crt_get_hw_state(struct intel_encoder *encoder, return true; } -static void intel_crt_get_config(struct intel_encoder *encoder, - struct intel_crtc_config *pipe_config) +static unsigned int intel_crt_get_flags(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = encoder->base.dev->dev_private; struct intel_crt *crt = intel_encoder_to_crt(encoder); @@ -102,7 +101,27 @@ static void intel_crt_get_config(struct intel_encoder *encoder, else flags |= DRM_MODE_FLAG_NVSYNC; - pipe_config->adjusted_mode.flags |= flags; + return flags; +} + +static void intel_crt_get_config(struct intel_encoder *encoder, + struct intel_crtc_config *pipe_config) +{ + struct drm_device *dev = encoder->base.dev; + + pipe_config->adjusted_mode.flags |= intel_crt_get_flags(encoder); +} + +static void hsw_crt_get_config(struct intel_encoder *encoder, + struct intel_crtc_config *pipe_config) +{ + intel_ddi_get_config(encoder, pipe_config); + + pipe_config->adjusted_mode.flags &= ~(DRM_MODE_FLAG_PHSYNC | + DRM_MODE_FLAG_NHSYNC | + DRM_MODE_FLAG_PVSYNC | + DRM_MODE_FLAG_NVSYNC); + pipe_config->adjusted_mode.flags |= intel_crt_get_flags(encoder); } /* Note: The caller is required to filter out dpms modes not supported by the @@ -799,7 +818,10 @@ void intel_crt_init(struct drm_device *dev) crt->base.mode_set = intel_crt_mode_set; crt->base.disable = intel_disable_crt; crt->base.enable = intel_enable_crt; - crt->base.get_config = intel_crt_get_config; + if (IS_HASWELL(dev)) + crt->base.get_config = hsw_crt_get_config; + else + crt->base.get_config = intel_crt_get_config; if (I915_HAS_HOTPLUG(dev)) crt->base.hpd_pin = HPD_CRT; if (HAS_DDI(dev)) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index beb7f65..b53fff8 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1249,8 +1249,8 @@ static void intel_ddi_hot_plug(struct intel_encoder *intel_encoder) intel_dp_check_link_status(intel_dp); } -static void intel_ddi_get_config(struct intel_encoder *encoder, - struct intel_crtc_config *pipe_config) +void intel_ddi_get_config(struct intel_encoder *encoder, + struct intel_crtc_config *pipe_config) { struct drm_i915_private *dev_priv = encoder->base.dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 9b7b68f..7f2b384 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -765,6 +765,8 @@ extern void intel_ddi_prepare_link_retrain(struct drm_encoder *encoder); extern bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector); extern void intel_ddi_fdi_disable(struct drm_crtc *crtc); +extern void intel_ddi_get_config(struct intel_encoder *encoder, + struct intel_crtc_config *pipe_config); extern void intel_display_handle_reset(struct drm_device *dev); extern bool intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev, -- cgit v0.10.2 From c6cd2ee2d59111a07cd9199564c9bdcb2d11e5cf Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 21 Oct 2013 10:52:07 +0300 Subject: drm/i915/dp: workaround BIOS eDP bpp clamping issue This isn't a real fix to the problem, but rather a stopgap measure while trying to find a proper solution. There are several laptops out there that fail to light up the eDP panel in UEFI boot mode. They seem to be mostly IVB machines, including but apparently not limited to Dell XPS 13, Asus TX300, Asus UX31A, Asus UX32VD, Acer Aspire S7. They seem to work in CSM or legacy boot. The difference between UEFI and CSM is that the BIOS provides a different VBT to the kernel. The UEFI VBT typically specifies 18 bpp and 1.62 GHz link for eDP, while CSM VBT has 24 bpp and 2.7 GHz link. We end up clamping to 18 bpp in UEFI mode, which we can fit in the 1.62 Ghz link, and for reasons yet unknown fail to light up the panel. Dithering from 24 to 18 bpp itself seems to work; if we use 18 bpp with 2.7 GHz link, the eDP panel lights up. So essentially this is a link speed issue, and *not* a bpp clamping issue. The bug raised its head since commit 657445fe8660100ad174600ebfa61536392b7624 Author: Daniel Vetter Date: Sat May 4 10:09:18 2013 +0200 Revert "drm/i915: revert eDP bpp clamping code changes" which started clamping bpp *before* computing the link requirements, and thus affecting the required bandwidth. Clamping after the computations kept the link at 2.7 GHz. Even though the BIOS tells us to use 18 bpp through the VBT, it happily boots up at 24 bpp and 2.7 GHz itself! Use this information to selectively ignore the VBT provided value. We can't ignore the VBT eDP bpp altogether, as there are other laptops that do require the clamping to be used due to EDID reporting higher bpp than the panel can support. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=59841 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=67950 Tested-by: Ulf Winkelvos Tested-by: jkp CC: stable@vger.kernel.org Signed-off-by: Jani Nikula Signed-off-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 2c555f9..1a43137 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1401,6 +1401,26 @@ static void intel_dp_get_config(struct intel_encoder *encoder, else pipe_config->port_clock = 270000; } + + if (is_edp(intel_dp) && dev_priv->vbt.edp_bpp && + pipe_config->pipe_bpp > dev_priv->vbt.edp_bpp) { + /* + * This is a big fat ugly hack. + * + * Some machines in UEFI boot mode provide us a VBT that has 18 + * bpp and 1.62 GHz link bandwidth for eDP, which for reasons + * unknown we fail to light up. Yet the same BIOS boots up with + * 24 bpp and 2.7 GHz link. Use the same bpp as the BIOS uses as + * max, not what it tells us to use. + * + * Note: This will still be broken if the eDP panel is not lit + * up by the BIOS, and thus we can't get the mode at module + * load. + */ + DRM_DEBUG_KMS("pipe has %d bpp for eDP panel, overriding BIOS-provided max %d bpp\n", + pipe_config->pipe_bpp, dev_priv->vbt.edp_bpp); + dev_priv->vbt.edp_bpp = pipe_config->pipe_bpp; + } } static bool is_edp_psr(struct intel_dp *intel_dp) -- cgit v0.10.2 From 645378d85ee524e429aa4cf52806047b56cdc596 Mon Sep 17 00:00:00 2001 From: Rob Pearce Date: Sun, 27 Oct 2013 16:13:42 +0000 Subject: drm/i915: No LVDS hardware on Intel D410PT and D425KT The Intel D410PT(LW) and D425KT Mini-ITX desktop boards both show up as having LVDS but the hardware is not populated. This patch adds them to the list of such systems. Patch is against 3.11.4 v2: Patch revised to match the D425KT exactly as the D425KTW does have LVDS. According to Intel's documentation, the D410PTL and D410PLTW don't. Signed-off-by: Rob Pearce Cc: stable@vger.kernel.org [danvet: Pimp commit message to my liking and add cc: stable.] Signed-off-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 831a5c0..b8af94a 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -700,6 +700,22 @@ static const struct dmi_system_id intel_no_lvds[] = { }, { .callback = intel_no_lvds_dmi_callback, + .ident = "Intel D410PT", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Intel"), + DMI_MATCH(DMI_BOARD_NAME, "D410PT"), + }, + }, + { + .callback = intel_no_lvds_dmi_callback, + .ident = "Intel D425KT", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Intel"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "D425KT"), + }, + }, + { + .callback = intel_no_lvds_dmi_callback, .ident = "Intel D510MO", .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Intel"), -- cgit v0.10.2 From 9754c4f9b23d5ce6756514acdf134ad61470734a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 25 Oct 2013 13:24:53 +0200 Subject: perf hists: Add color overhead for stdio output buffer Following commit tightened up the buffer size for output to strict width of used format columns: 99cf666 perf hists: Fix formatting of long symbol names This works fine until you hit color overhead output which places extra bytes into output buffer. We need to account for color overhead in the output buffer. Adding maximum color byte size to the output buffer size. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1382700293-1803-1-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 194e2f4..6c15268 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -315,8 +315,7 @@ static inline void advance_hpp(struct perf_hpp *hpp, int inc) } static int hist_entry__period_snprintf(struct perf_hpp *hpp, - struct hist_entry *he, - bool color) + struct hist_entry *he) { const char *sep = symbol_conf.field_sep; struct perf_hpp_fmt *fmt; @@ -338,7 +337,7 @@ static int hist_entry__period_snprintf(struct perf_hpp *hpp, } else first = false; - if (color && fmt->color) + if (perf_hpp__use_color() && fmt->color) ret = fmt->color(fmt, hpp, he); else ret = fmt->entry(fmt, hpp, he); @@ -358,12 +357,11 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, .buf = bf, .size = size, }; - bool color = !symbol_conf.field_sep; if (size == 0 || size > bfsz) size = hpp.size = bfsz; - ret = hist_entry__period_snprintf(&hpp, he, color); + ret = hist_entry__period_snprintf(&hpp, he); hist_entry__sort_snprintf(he, bf + ret, size - ret, hists); ret = fprintf(fp, "%s\n", bf); @@ -482,6 +480,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, print_entries: linesz = hists__sort_list_width(hists) + 3 + 1; + linesz += perf_hpp__color_overhead(); line = malloc(linesz); if (line == NULL) { ret = -1; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 1329b6b..ce8dc61 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -5,6 +5,7 @@ #include #include "callchain.h" #include "header.h" +#include "color.h" extern struct callchain_param callchain_param; @@ -175,6 +176,18 @@ void perf_hpp__init(void); void perf_hpp__column_register(struct perf_hpp_fmt *format); void perf_hpp__column_enable(unsigned col); +static inline size_t perf_hpp__use_color(void) +{ + return !symbol_conf.field_sep; +} + +static inline size_t perf_hpp__color_overhead(void) +{ + return perf_hpp__use_color() ? + (COLOR_MAXLEN + sizeof(PERF_COLOR_RESET)) * PERF_HPP__MAX_INDEX + : 0; +} + struct perf_evlist; struct hist_browser_timer { -- cgit v0.10.2 From 09b0fd45ff63413df94cbd832a765076b201edbb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 26 Oct 2013 16:25:33 +0200 Subject: perf record: Split -g and --call-graph Splitting -g and --call-graph for record command, so we could use '-g' with no option. The '-g' option now takes NO argument and enables the configured unwind method, which is currently the frame pointers method. It will be possible to configure unwind method via config file in upcoming patches. All current '-g' arguments is overtaken by --call-graph option. Signed-off-by: Jiri Olsa Tested-by: David Ahern Tested-by: Ingo Molnar Reviewed-by: David Ahern Acked-by: Ingo Molnar Cc: Adrian Hunter Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1382797536-32303-2-git-send-email-jolsa@redhat.com [ reordered -g/--call-graph on --help and expanded the man page according to comments by David Ahern and Namhyung Kim ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index e297b74..ca0d3d9 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -90,8 +90,20 @@ OPTIONS Number of mmap data pages. Must be a power of two. -g:: + Enables call-graph (stack chain/backtrace) recording. + --call-graph:: - Do call-graph (stack chain/backtrace) recording. + Setup and enable call-graph (stack chain/backtrace) recording, + implies -g. + + Allows specifying "fp" (frame pointer) or "dwarf" + (DWARF's CFI - Call Frame Information) as the method to collect + the information used to show the call graphs. + + In some systems, where binaries are build with gcc + --fomit-frame-pointer, using the "fp" method will produce bogus + call graphs, using "dwarf", if available (perf tools linked to + the libunwind library) should be used instead. -q:: --quiet:: diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index a41ac415..d046514 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -712,21 +712,12 @@ static int get_stack_size(char *str, unsigned long *_size) } #endif /* LIBUNWIND_SUPPORT */ -int record_parse_callchain_opt(const struct option *opt, - const char *arg, int unset) +int record_parse_callchain(const char *arg, struct perf_record_opts *opts) { - struct perf_record_opts *opts = opt->value; char *tok, *name, *saveptr = NULL; char *buf; int ret = -1; - /* --no-call-graph */ - if (unset) - return 0; - - /* We specified default option if none is provided. */ - BUG_ON(!arg); - /* We need buffer that we know we can write to. */ buf = malloc(strlen(arg) + 1); if (!buf) @@ -764,13 +755,9 @@ int record_parse_callchain_opt(const struct option *opt, ret = get_stack_size(tok, &size); opts->stack_dump_size = size; } - - if (!ret) - pr_debug("callchain: stack dump size %d\n", - opts->stack_dump_size); #endif /* LIBUNWIND_SUPPORT */ } else { - pr_err("callchain: Unknown -g option " + pr_err("callchain: Unknown --call-graph option " "value: %s\n", arg); break; } @@ -778,13 +765,52 @@ int record_parse_callchain_opt(const struct option *opt, } while (0); free(buf); + return ret; +} + +static void callchain_debug(struct perf_record_opts *opts) +{ + pr_debug("callchain: type %d\n", opts->call_graph); + if (opts->call_graph == CALLCHAIN_DWARF) + pr_debug("callchain: stack dump size %d\n", + opts->stack_dump_size); +} + +int record_parse_callchain_opt(const struct option *opt, + const char *arg, + int unset) +{ + struct perf_record_opts *opts = opt->value; + int ret; + + /* --no-call-graph */ + if (unset) { + opts->call_graph = CALLCHAIN_NONE; + pr_debug("callchain: disabled\n"); + return 0; + } + + ret = record_parse_callchain(arg, opts); if (!ret) - pr_debug("callchain: type %d\n", opts->call_graph); + callchain_debug(opts); return ret; } +int record_callchain_opt(const struct option *opt, + const char *arg __maybe_unused, + int unset __maybe_unused) +{ + struct perf_record_opts *opts = opt->value; + + if (opts->call_graph == CALLCHAIN_NONE) + opts->call_graph = CALLCHAIN_FP; + + callchain_debug(opts); + return 0; +} + static const char * const record_usage[] = { "perf record [] []", "perf record [] -- []", @@ -813,12 +839,12 @@ static struct perf_record record = { }, }; -#define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: " +#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: " #ifdef LIBUNWIND_SUPPORT -const char record_callchain_help[] = CALLCHAIN_HELP "[fp] dwarf"; +const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf"; #else -const char record_callchain_help[] = CALLCHAIN_HELP "[fp]"; +const char record_callchain_help[] = CALLCHAIN_HELP "fp"; #endif /* @@ -858,9 +884,12 @@ const struct option record_options[] = { "number of mmap data pages"), OPT_BOOLEAN(0, "group", &record.opts.group, "put the counters into a counter group"), - OPT_CALLBACK_DEFAULT('g', "call-graph", &record.opts, - "mode[,dump_size]", record_callchain_help, - &record_parse_callchain_opt, "fp"), + OPT_CALLBACK_NOOPT('g', NULL, &record.opts, + NULL, "enables call-graph recording" , + &record_callchain_opt), + OPT_CALLBACK(0, "call-graph", &record.opts, + "mode[,dump_size]", record_callchain_help, + &record_parse_callchain_opt), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 2b585bc..9e99060 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -147,6 +147,9 @@ static inline void callchain_cursor_advance(struct callchain_cursor *cursor) struct option; +int record_parse_callchain(const char *arg, struct perf_record_opts *opts); int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset); +int record_callchain_opt(const struct option *opt, const char *arg, int unset); + extern const char record_callchain_help[]; #endif /* __PERF_CALLCHAIN_H */ -- cgit v0.10.2 From ae779a630977d93fbebfa06216ea47df5b5c62c8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 26 Oct 2013 16:25:34 +0200 Subject: perf top: Split -G and --call-graph Splitting -G and --call-graph for record command, so we could use '-G' with no option. The '-G' option now takes NO argument and enables the configured unwind method, which is currently the frame pointers method. It will be possible to configure unwind method via config file in upcoming patches. All current '-G' arguments is overtaken by --call-graph option. NOTE: The documentation for top --call-graph option was wrongly copied from report command. Signed-off-by: Jiri Olsa Tested-by: David Ahern Tested-by: Ingo Molnar Reviewed-by: David Ahern Acked-by: Ingo Molnar Cc: Adrian Hunter Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1382797536-32303-3-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 58d6598..6a118e7 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -140,20 +140,12 @@ Default is to monitor all CPUS. --asm-raw:: Show raw instruction encoding of assembly instructions. --G [type,min,order]:: +-G:: + Enables call-graph (stack chain/backtrace) recording. + --call-graph:: - Display call chains using type, min percent threshold and order. - type can be either: - - flat: single column, linear exposure of call chains. - - graph: use a graph tree, displaying absolute overhead rates. - - fractal: like graph, but displays relative rates. Each branch of - the tree is considered as a new profiled object. - - order can be either: - - callee: callee based call graph. - - caller: inverted caller based call graph. - - Default: fractal,0.5,callee. + Setup and enable call-graph (stack chain/backtrace) recording, + implies -G. --ignore-callees=:: Ignore callees of the function(s) matching the given regex. diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 2122141..0df298a 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1016,16 +1016,16 @@ out_delete: } static int -parse_callchain_opt(const struct option *opt, const char *arg, int unset) +callchain_opt(const struct option *opt, const char *arg, int unset) { - /* - * --no-call-graph - */ - if (unset) - return 0; - symbol_conf.use_callchain = true; + return record_callchain_opt(opt, arg, unset); +} +static int +parse_callchain_opt(const struct option *opt, const char *arg, int unset) +{ + symbol_conf.use_callchain = true; return record_parse_callchain_opt(opt, arg, unset); } @@ -1106,9 +1106,12 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) "sort by key(s): pid, comm, dso, symbol, parent, weight, local_weight"), OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, "Show a column with the number of samples"), - OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts, - "mode[,dump_size]", record_callchain_help, - &parse_callchain_opt, "fp"), + OPT_CALLBACK_NOOPT('G', NULL, &top.record_opts, + NULL, "enables call-graph recording", + &callchain_opt), + OPT_CALLBACK(0, "call-graph", &top.record_opts, + "mode[,dump_size]", record_callchain_help, + &parse_callchain_opt), OPT_CALLBACK(0, "ignore-callees", NULL, "regex", "ignore callees of these functions in call graphs", report_parse_ignore_callees_opt), -- cgit v0.10.2 From 8e50d384cc1d5afd2989cf0f7093756ed7164eb2 Mon Sep 17 00:00:00 2001 From: Zhouyi Zhou Date: Thu, 24 Oct 2013 15:43:33 +0800 Subject: perf tools: Fixup mmap event consumption The tail position of the event buffer should only be modified after actually use that event. If not the event buffer could be invalid before use, and segment fault occurs when invoking perf top -G. Signed-off-by: Zhouyi Zhou Cc: David Ahern Cc: Zhouyi Zhou Link: http://lkml.kernel.org/r/1382600613-32177-1-git-send-email-zhouzhouyi@gmail.com [ Simplified the logic using exit gotos and renamed write_tail method to mmap_consume ] Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 935d522..fbc2888 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -888,11 +888,18 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) { err = perf_evlist__parse_sample(kvm->evlist, event, &sample); if (err) { + perf_evlist__mmap_consume(kvm->evlist, idx); pr_err("Failed to parse sample\n"); return -1; } err = perf_session_queue_event(kvm->session, event, &sample, 0); + /* + * FIXME: Here we can't consume the event, as perf_session_queue_event will + * point to it, and it'll get possibly overwritten by the kernel. + */ + perf_evlist__mmap_consume(kvm->evlist, idx); + if (err) { pr_err("Failed to enqueue sample: %d\n", err); return -1; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 0df298a..5a11f13 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -810,7 +810,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) ret = perf_evlist__parse_sample(top->evlist, event, &sample); if (ret) { pr_err("Can't parse sample, err = %d\n", ret); - continue; + goto next_event; } evsel = perf_evlist__id2evsel(session->evlist, sample.id); @@ -825,13 +825,13 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) case PERF_RECORD_MISC_USER: ++top->us_samples; if (top->hide_user_symbols) - continue; + goto next_event; machine = &session->machines.host; break; case PERF_RECORD_MISC_KERNEL: ++top->kernel_samples; if (top->hide_kernel_symbols) - continue; + goto next_event; machine = &session->machines.host; break; case PERF_RECORD_MISC_GUEST_KERNEL: @@ -847,7 +847,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) */ /* Fall thru */ default: - continue; + goto next_event; } @@ -859,6 +859,8 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) machine__process_event(machine, event); } else ++session->stats.nr_unknown_events; +next_event: + perf_evlist__mmap_consume(top->evlist, idx); } } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 71aa3e3..99c8d9a 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -987,7 +987,7 @@ again: err = perf_evlist__parse_sample(evlist, event, &sample); if (err) { fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err); - continue; + goto next_event; } if (trace->base_time == 0) @@ -1001,18 +1001,20 @@ again: evsel = perf_evlist__id2evsel(evlist, sample.id); if (evsel == NULL) { fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id); - continue; + goto next_event; } if (sample.raw_data == NULL) { fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", perf_evsel__name(evsel), sample.tid, sample.cpu, sample.raw_size); - continue; + goto next_event; } handler = evsel->handler.func; handler(trace, evsel, &sample); +next_event: + perf_evlist__mmap_consume(evlist, i); if (done) goto out_unmap_evlist; diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 6fb781d..e3fedfa 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -290,6 +290,7 @@ static int process_events(struct machine *machine, struct perf_evlist *evlist, for (i = 0; i < evlist->nr_mmaps; i++) { while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { ret = process_event(machine, evlist, event, state); + perf_evlist__mmap_consume(evlist, i); if (ret < 0) return ret; } diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index d444ea2..376c356 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -36,6 +36,7 @@ static int find_comm(struct perf_evlist *evlist, const char *comm) (pid_t)event->comm.tid == getpid() && strcmp(event->comm.comm, comm) == 0) found += 1; + perf_evlist__mmap_consume(evlist, i); } } return found; diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index c4185b9..a7232c2 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -122,6 +122,7 @@ int test__basic_mmap(void) goto out_munmap; } nr_events[evsel->idx]++; + perf_evlist__mmap_consume(evlist, 0); } err = 0; diff --git a/tools/perf/tests/open-syscall-tp-fields.c b/tools/perf/tests/open-syscall-tp-fields.c index fc5b9fc..524b221 100644 --- a/tools/perf/tests/open-syscall-tp-fields.c +++ b/tools/perf/tests/open-syscall-tp-fields.c @@ -77,8 +77,10 @@ int test__syscall_open_tp_fields(void) ++nr_events; - if (type != PERF_RECORD_SAMPLE) + if (type != PERF_RECORD_SAMPLE) { + perf_evlist__mmap_consume(evlist, i); continue; + } err = perf_evsel__parse_sample(evsel, event, &sample); if (err) { diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index b8a7056..7923b06 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -263,6 +263,8 @@ int test__PERF_RECORD(void) type); ++errs; } + + perf_evlist__mmap_consume(evlist, i); } } diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index 0ab61b1..4ca1b93 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -122,7 +122,7 @@ int test__perf_time_to_tsc(void) if (event->header.type != PERF_RECORD_COMM || (pid_t)event->comm.pid != getpid() || (pid_t)event->comm.tid != getpid()) - continue; + goto next_event; if (strcmp(event->comm.comm, comm1) == 0) { CHECK__(perf_evsel__parse_sample(evsel, event, @@ -134,6 +134,8 @@ int test__perf_time_to_tsc(void) &sample)); comm2_time = sample.time; } +next_event: + perf_evlist__mmap_consume(evlist, i); } } diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 2e41e2d..6e2b44e 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -78,7 +78,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) struct perf_sample sample; if (event->header.type != PERF_RECORD_SAMPLE) - continue; + goto next_event; err = perf_evlist__parse_sample(evlist, event, &sample); if (err < 0) { @@ -88,6 +88,8 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) total_periods += sample.period; nr_samples++; +next_event: + perf_evlist__mmap_consume(evlist, 0); } if ((u64) nr_samples == total_periods) { diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 28fe589..a3e6487 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -96,10 +96,10 @@ int test__task_exit(void) retry: while ((event = perf_evlist__mmap_read(evlist, 0)) != NULL) { - if (event->header.type != PERF_RECORD_EXIT) - continue; + if (event->header.type == PERF_RECORD_EXIT) + nr_exit++; - nr_exit++; + perf_evlist__mmap_consume(evlist, 0); } if (!exited || !nr_exit) { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index f9f77be..e584cd3 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -545,12 +545,19 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) md->prev = old; - if (!evlist->overwrite) - perf_mmap__write_tail(md, old); - return event; } +void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) +{ + if (!evlist->overwrite) { + struct perf_mmap *md = &evlist->mmap[idx]; + unsigned int old = md->prev; + + perf_mmap__write_tail(md, old); + } +} + static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx) { if (evlist->mmap[idx].base != NULL) { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 880d713..206d093 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -89,6 +89,8 @@ struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id); union perf_event *perf_evlist__mmap_read(struct perf_evlist *self, int idx); +void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx); + int perf_evlist__open(struct perf_evlist *evlist); void perf_evlist__close(struct perf_evlist *evlist); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 71b5412..2ac4bc9 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -822,6 +822,8 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, PyObject *pyevent = pyrf_event__new(event); struct pyrf_event *pevent = (struct pyrf_event *)pyevent; + perf_evlist__mmap_consume(evlist, cpu); + if (pyevent == NULL) return PyErr_NoMemory(); -- cgit v0.10.2 From 0d08c42cf9a71530fef5ebcfe368f38f2dd0476f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 25 Oct 2013 17:26:17 -0700 Subject: tcp: gso: fix truesize tracking commit 6ff50cd55545 ("tcp: gso: do not generate out of order packets") had an heuristic that can trigger a warning in skb_try_coalesce(), because skb->truesize of the gso segments were exactly set to mss. This breaks the requirement that skb->truesize >= skb->len + truesizeof(struct sk_buff); It can trivially be reproduced by : ifconfig lo mtu 1500 ethtool -K lo tso off netperf As the skbs are looped into the TCP networking stack, skb_try_coalesce() warns us of these skb under-estimating their truesize. Signed-off-by: Eric Dumazet Reported-by: Alexei Starovoitov Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 3a7525e..533c58a 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -18,6 +18,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); + unsigned int sum_truesize = 0; struct tcphdr *th; unsigned int thlen; unsigned int seq; @@ -102,13 +103,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, if (copy_destructor) { skb->destructor = gso_skb->destructor; skb->sk = gso_skb->sk; - /* {tcp|sock}_wfree() use exact truesize accounting : - * sum(skb->truesize) MUST be exactly be gso_skb->truesize - * So we account mss bytes of 'true size' for each segment. - * The last segment will contain the remaining. - */ - skb->truesize = mss; - gso_skb->truesize -= mss; + sum_truesize += skb->truesize; } skb = skb->next; th = tcp_hdr(skb); @@ -125,7 +120,9 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, if (copy_destructor) { swap(gso_skb->sk, skb->sk); swap(gso_skb->destructor, skb->destructor); - swap(gso_skb->truesize, skb->truesize); + sum_truesize += skb->truesize; + atomic_add(sum_truesize - gso_skb->truesize, + &skb->sk->sk_wmem_alloc); } delta = htonl(oldlen + (skb_tail_pointer(skb) - -- cgit v0.10.2 From e3ed4eaef4932fd3867465784d11a36deaa6d22c Mon Sep 17 00:00:00 2001 From: Dmitry Kravkov Date: Sun, 27 Oct 2013 13:07:00 +0200 Subject: bnx2x: prevent FW assert on low mem during unload Buffers for FW statistics were allocated at an inappropriate time; In a machine where the driver encounters problems allocating all of its queues, the driver would still create FW requests for the statistics of the non-existing queues. The wrong order of memory allocation could lead to zeroed statistics messages being sent, leading to fw assert in case function 0 was down. This changes the order of allocations, guaranteeing that statistic requests will only be generated for actual queues. Signed-off-by: Dmitry Kravkov Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 4ab4c89..74d6486 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2545,10 +2545,6 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) } } - /* Allocated memory for FW statistics */ - if (bnx2x_alloc_fw_stats_mem(bp)) - LOAD_ERROR_EXIT(bp, load_error0); - /* need to be done after alloc mem, since it's self adjusting to amount * of memory available for RSS queues */ @@ -2558,6 +2554,10 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) LOAD_ERROR_EXIT(bp, load_error0); } + /* Allocated memory for FW statistics */ + if (bnx2x_alloc_fw_stats_mem(bp)) + LOAD_ERROR_EXIT(bp, load_error0); + /* request pf to initialize status blocks */ if (IS_VF(bp)) { rc = bnx2x_vfpf_init(bp); @@ -2812,8 +2812,8 @@ load_error1: if (IS_PF(bp)) bnx2x_clear_pf_load(bp); load_error0: - bnx2x_free_fp_mem(bp); bnx2x_free_fw_stats_mem(bp); + bnx2x_free_fp_mem(bp); bnx2x_free_mem(bp); return rc; -- cgit v0.10.2 From 826cb7b43b5bd8995f84edeacbbf569946a58f7c Mon Sep 17 00:00:00 2001 From: Ariel Elior Date: Sun, 27 Oct 2013 13:07:01 +0200 Subject: bnx2x: Disable VF access on PF removal When the bnx2x driver is rmmoded, if VFs of a given PF will be assigned to a VM then that PF will be unable to call `pci_disable_sriov()'. If for that same PF there would also exist unassigned VFs in the hypervisor, the result will be that after the removal there will still be virtual PCI functions on the hypervisor. If the bnx2x module were to be re-inserted, the result will be that the VFs on the hypervisor will be re-probed directly following the PF's probe, even though that in regular loading flow sriov is only enabled once PF is loaded. The probed VF will then try to access its bar, causing a PCI error as the HW is not in a state enabling such a request. This patch adds a missing disablement procedure to the PF's removal, one that sets registers viewable to the VF to indicate that the VFs have no permission to access the bar, thus resulting in probe errors instead of PCI errors. Signed-off-by: Ariel Elior Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index bf08ad6..5e07efb 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -2018,6 +2018,8 @@ failed: void bnx2x_iov_remove_one(struct bnx2x *bp) { + int vf_idx; + /* if SRIOV is not enabled there's nothing to do */ if (!IS_SRIOV(bp)) return; @@ -2026,6 +2028,18 @@ void bnx2x_iov_remove_one(struct bnx2x *bp) pci_disable_sriov(bp->pdev); DP(BNX2X_MSG_IOV, "sriov disabled\n"); + /* disable access to all VFs */ + for (vf_idx = 0; vf_idx < bp->vfdb->sriov.total; vf_idx++) { + bnx2x_pretend_func(bp, + HW_VF_HANDLE(bp, + bp->vfdb->sriov.first_vf_in_pf + + vf_idx)); + DP(BNX2X_MSG_IOV, "disabling internal access for vf %d\n", + bp->vfdb->sriov.first_vf_in_pf + vf_idx); + bnx2x_vf_enable_internal(bp, 0); + bnx2x_pretend_func(bp, BP_ABS_FUNC(bp)); + } + /* free vf database */ __bnx2x_iov_free_vfdb(bp); } @@ -3197,7 +3211,7 @@ int bnx2x_enable_sriov(struct bnx2x *bp) * the "acquire" messages to appear on the VF PF channel. */ DP(BNX2X_MSG_IOV, "about to call enable sriov\n"); - pci_disable_sriov(bp->pdev); + bnx2x_disable_sriov(bp); rc = pci_enable_sriov(bp->pdev, req_vfs); if (rc) { BNX2X_ERR("pci_enable_sriov failed with %d\n", rc); -- cgit v0.10.2 From 262e827fe745642589450ae241b7afd3912c3f25 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 27 Oct 2013 21:02:39 +0000 Subject: cxgb3: Fix length calculation in write_ofld_wr() on 32-bit architectures The length calculation here is now invalid on 32-bit architectures, since sk_buff::tail is a pointer and sk_buff::transport_header is an integer offset: drivers/net/ethernet/chelsio/cxgb3/sge.c: In function 'write_ofld_wr': drivers/net/ethernet/chelsio/cxgb3/sge.c:1603:9: warning: passing argument 4 of 'make_sgl' makes integer from pointer without a cast [enabled by default] adap->pdev); ^ drivers/net/ethernet/chelsio/cxgb3/sge.c:964:28: note: expected 'unsigned int' but argument is of type 'sk_buff_data_t' static inline unsigned int make_sgl(const struct sk_buff *skb, ^ Use the appropriate skb accessor functions. Compile-tested only. Signed-off-by: Ben Hutchings Fixes: 1a37e412a022 ('net: Use 16bits for *_headers fields of struct skbuff') Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c index 9c89dc8..632b318 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c @@ -1599,7 +1599,8 @@ static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb, flits = skb_transport_offset(skb) / 8; sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl; sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb), - skb->tail - skb->transport_header, + skb_tail_pointer(skb) - + skb_transport_header(skb), adap->pdev); if (need_skb_unmap()) { setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits); -- cgit v0.10.2 From 059dfa6a93b779516321e5112db9d7621b1367ba Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Mon, 28 Oct 2013 12:07:57 +0000 Subject: xen-netback: use jiffies_64 value to calculate credit timeout time_after_eq() only works if the delta is < MAX_ULONG/2. For a 32bit Dom0, if netfront sends packets at a very low rate, the time between subsequent calls to tx_credit_exceeded() may exceed MAX_ULONG/2 and the test for timer_after_eq() will be incorrect. Credit will not be replenished and the guest may become unable to send packets (e.g., if prior to the long gap, all credit was exhausted). Use jiffies_64 variant to mitigate this problem for 32bit Dom0. Suggested-by: Jan Beulich Signed-off-by: Wei Liu Reviewed-by: David Vrabel Cc: Ian Campbell Cc: Jason Luan Signed-off-by: David S. Miller diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 5715318..400fea1 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -163,6 +163,7 @@ struct xenvif { unsigned long credit_usec; unsigned long remaining_credit; struct timer_list credit_timeout; + u64 credit_window_start; /* Statistics */ unsigned long rx_gso_checksum_fixup; diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 01bb854..459935a 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -312,8 +312,7 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid, vif->credit_bytes = vif->remaining_credit = ~0UL; vif->credit_usec = 0UL; init_timer(&vif->credit_timeout); - /* Initialize 'expires' now: it's used to track the credit window. */ - vif->credit_timeout.expires = jiffies; + vif->credit_window_start = get_jiffies_64(); dev->netdev_ops = &xenvif_netdev_ops; dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO; diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index f3e591c..900da4b 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1185,9 +1185,8 @@ out: static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) { - unsigned long now = jiffies; - unsigned long next_credit = - vif->credit_timeout.expires + + u64 now = get_jiffies_64(); + u64 next_credit = vif->credit_window_start + msecs_to_jiffies(vif->credit_usec / 1000); /* Timer could already be pending in rare cases. */ @@ -1195,8 +1194,8 @@ static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) return true; /* Passed the point where we can replenish credit? */ - if (time_after_eq(now, next_credit)) { - vif->credit_timeout.expires = now; + if (time_after_eq64(now, next_credit)) { + vif->credit_window_start = now; tx_add_credit(vif); } @@ -1208,6 +1207,7 @@ static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) tx_credit_callback; mod_timer(&vif->credit_timeout, next_credit); + vif->credit_window_start = next_credit; return true; } -- cgit v0.10.2 From 1dd49bfa3465756b3ce72214b58a33e4afb67aa3 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 7 Oct 2013 11:28:42 +0100 Subject: mm: numa: Do not account for a hinting fault if we raced If another task handled a hinting fault in parallel then do not double account for it. Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel Cc: Andrea Arcangeli Cc: Johannes Weiner Cc: Srikar Dronamraju Cc: Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1381141781-10992-5-git-send-email-mgorman@suse.de Signed-off-by: Ingo Molnar diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 610e3df..33ee637 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1325,8 +1325,11 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, check_same: spin_lock(&mm->page_table_lock); - if (unlikely(!pmd_same(pmd, *pmdp))) + if (unlikely(!pmd_same(pmd, *pmdp))) { + /* Someone else took our fault */ + current_nid = -1; goto out_unlock; + } clear_pmdnuma: pmd = pmd_mknonnuma(pmd); set_pmd_at(mm, haddr, pmdp, pmd); -- cgit v0.10.2 From 42836f5f8baa33085f547098b74aa98991ee9216 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 7 Oct 2013 11:28:43 +0100 Subject: mm: Wait for THP migrations to complete during NUMA hinting faults The locking for migrating THP is unusual. While normal page migration prevents parallel accesses using a migration PTE, THP migration relies on a combination of the page_table_lock, the page lock and the existance of the NUMA hinting PTE to guarantee safety but there is a bug in the scheme. If a THP page is currently being migrated and another thread traps a fault on the same page it checks if the page is misplaced. If it is not, then pmd_numa is cleared. The problem is that it checks if the page is misplaced without holding the page lock meaning that the racing thread can be migrating the THP when the second thread clears the NUMA bit and faults a stale page. This patch checks if the page is potentially being migrated and stalls using the lock_page if it is potentially being migrated before checking if the page is misplaced or not. Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel Cc: Andrea Arcangeli Cc: Johannes Weiner Cc: Srikar Dronamraju Cc: Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1381141781-10992-6-git-send-email-mgorman@suse.de Signed-off-by: Ingo Molnar diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 33ee637..e10d780 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1295,13 +1295,14 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, if (current_nid == numa_node_id()) count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); - target_nid = mpol_misplaced(page, vma, haddr); - if (target_nid == -1) { - put_page(page); - goto clear_pmdnuma; - } + /* + * Acquire the page lock to serialise THP migrations but avoid dropping + * page_table_lock if at all possible + */ + if (trylock_page(page)) + goto got_lock; - /* Acquire the page lock to serialise THP migrations */ + /* Serialise against migrationa and check placement check placement */ spin_unlock(&mm->page_table_lock); lock_page(page); @@ -1312,9 +1313,17 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, put_page(page); goto out_unlock; } - spin_unlock(&mm->page_table_lock); + +got_lock: + target_nid = mpol_misplaced(page, vma, haddr); + if (target_nid == -1) { + unlock_page(page); + put_page(page); + goto clear_pmdnuma; + } /* Migrate the THP to the requested node */ + spin_unlock(&mm->page_table_lock); migrated = migrate_misplaced_transhuge_page(mm, vma, pmdp, pmd, addr, page, target_nid); if (!migrated) -- cgit v0.10.2 From 587fe586f44a48f9691001ba6c45b86c8e4ba21f Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 7 Oct 2013 11:28:44 +0100 Subject: mm: Prevent parallel splits during THP migration THP migrations are serialised by the page lock but on its own that does not prevent THP splits. If the page is split during THP migration then the pmd_same checks will prevent page table corruption but the unlock page and other fix-ups potentially will cause corruption. This patch takes the anon_vma lock to prevent parallel splits during migration. Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel Cc: Andrea Arcangeli Cc: Johannes Weiner Cc: Srikar Dronamraju Cc: Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1381141781-10992-7-git-send-email-mgorman@suse.de Signed-off-by: Ingo Molnar diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e10d780..d8534b3 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1278,18 +1278,18 @@ out: int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pmd_t pmd, pmd_t *pmdp) { + struct anon_vma *anon_vma = NULL; struct page *page; unsigned long haddr = addr & HPAGE_PMD_MASK; int target_nid; int current_nid = -1; - bool migrated; + bool migrated, page_locked; spin_lock(&mm->page_table_lock); if (unlikely(!pmd_same(pmd, *pmdp))) goto out_unlock; page = pmd_page(pmd); - get_page(page); current_nid = page_to_nid(page); count_vm_numa_event(NUMA_HINT_FAULTS); if (current_nid == numa_node_id()) @@ -1299,12 +1299,29 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, * Acquire the page lock to serialise THP migrations but avoid dropping * page_table_lock if at all possible */ - if (trylock_page(page)) - goto got_lock; + page_locked = trylock_page(page); + target_nid = mpol_misplaced(page, vma, haddr); + if (target_nid == -1) { + /* If the page was locked, there are no parallel migrations */ + if (page_locked) { + unlock_page(page); + goto clear_pmdnuma; + } - /* Serialise against migrationa and check placement check placement */ + /* Otherwise wait for potential migrations and retry fault */ + spin_unlock(&mm->page_table_lock); + wait_on_page_locked(page); + goto out; + } + + /* Page is misplaced, serialise migrations and parallel THP splits */ + get_page(page); spin_unlock(&mm->page_table_lock); - lock_page(page); + if (!page_locked) { + lock_page(page); + page_locked = true; + } + anon_vma = page_lock_anon_vma_read(page); /* Confirm the PTE did not while locked */ spin_lock(&mm->page_table_lock); @@ -1314,14 +1331,6 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, goto out_unlock; } -got_lock: - target_nid = mpol_misplaced(page, vma, haddr); - if (target_nid == -1) { - unlock_page(page); - put_page(page); - goto clear_pmdnuma; - } - /* Migrate the THP to the requested node */ spin_unlock(&mm->page_table_lock); migrated = migrate_misplaced_transhuge_page(mm, vma, @@ -1330,6 +1339,8 @@ got_lock: goto check_same; task_numa_fault(target_nid, HPAGE_PMD_NR, true); + if (anon_vma) + page_unlock_anon_vma_read(anon_vma); return 0; check_same: @@ -1346,6 +1357,11 @@ clear_pmdnuma: update_mmu_cache_pmd(vma, addr, pmdp); out_unlock: spin_unlock(&mm->page_table_lock); + +out: + if (anon_vma) + page_unlock_anon_vma_read(anon_vma); + if (current_nid != -1) task_numa_fault(current_nid, HPAGE_PMD_NR, false); return 0; -- cgit v0.10.2 From c61109e34f60f6e85bb43c5a1cd51c0e3db40847 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 7 Oct 2013 11:28:45 +0100 Subject: mm: numa: Sanitize task_numa_fault() callsites There are three callers of task_numa_fault(): - do_huge_pmd_numa_page(): Accounts against the current node, not the node where the page resides, unless we migrated, in which case it accounts against the node we migrated to. - do_numa_page(): Accounts against the current node, not the node where the page resides, unless we migrated, in which case it accounts against the node we migrated to. - do_pmd_numa_page(): Accounts not at all when the page isn't migrated, otherwise accounts against the node we migrated towards. This seems wrong to me; all three sites should have the same sementaics, furthermore we should accounts against where the page really is, we already know where the task is. So modify all three sites to always account; we did after all receive the fault; and always account to where the page is after migration, regardless of success. They all still differ on when they clear the PTE/PMD; ideally that would get sorted too. Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel Cc: Andrea Arcangeli Cc: Johannes Weiner Cc: Srikar Dronamraju Cc: Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1381141781-10992-8-git-send-email-mgorman@suse.de Signed-off-by: Ingo Molnar diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d8534b3..00ddfcd 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1281,18 +1281,19 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, struct anon_vma *anon_vma = NULL; struct page *page; unsigned long haddr = addr & HPAGE_PMD_MASK; + int page_nid = -1, this_nid = numa_node_id(); int target_nid; - int current_nid = -1; - bool migrated, page_locked; + bool page_locked; + bool migrated = false; spin_lock(&mm->page_table_lock); if (unlikely(!pmd_same(pmd, *pmdp))) goto out_unlock; page = pmd_page(pmd); - current_nid = page_to_nid(page); + page_nid = page_to_nid(page); count_vm_numa_event(NUMA_HINT_FAULTS); - if (current_nid == numa_node_id()) + if (page_nid == this_nid) count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); /* @@ -1335,19 +1336,18 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, spin_unlock(&mm->page_table_lock); migrated = migrate_misplaced_transhuge_page(mm, vma, pmdp, pmd, addr, page, target_nid); - if (!migrated) + if (migrated) + page_nid = target_nid; + else goto check_same; - task_numa_fault(target_nid, HPAGE_PMD_NR, true); - if (anon_vma) - page_unlock_anon_vma_read(anon_vma); - return 0; + goto out; check_same: spin_lock(&mm->page_table_lock); if (unlikely(!pmd_same(pmd, *pmdp))) { /* Someone else took our fault */ - current_nid = -1; + page_nid = -1; goto out_unlock; } clear_pmdnuma: @@ -1362,8 +1362,9 @@ out: if (anon_vma) page_unlock_anon_vma_read(anon_vma); - if (current_nid != -1) - task_numa_fault(current_nid, HPAGE_PMD_NR, false); + if (page_nid != -1) + task_numa_fault(page_nid, HPAGE_PMD_NR, migrated); + return 0; } diff --git a/mm/memory.c b/mm/memory.c index 1311f26..d176154 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3521,12 +3521,12 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, } int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, - unsigned long addr, int current_nid) + unsigned long addr, int page_nid) { get_page(page); count_vm_numa_event(NUMA_HINT_FAULTS); - if (current_nid == numa_node_id()) + if (page_nid == numa_node_id()) count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL); return mpol_misplaced(page, vma, addr); @@ -3537,7 +3537,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, { struct page *page = NULL; spinlock_t *ptl; - int current_nid = -1; + int page_nid = -1; int target_nid; bool migrated = false; @@ -3567,15 +3567,10 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, return 0; } - current_nid = page_to_nid(page); - target_nid = numa_migrate_prep(page, vma, addr, current_nid); + page_nid = page_to_nid(page); + target_nid = numa_migrate_prep(page, vma, addr, page_nid); pte_unmap_unlock(ptep, ptl); if (target_nid == -1) { - /* - * Account for the fault against the current node if it not - * being replaced regardless of where the page is located. - */ - current_nid = numa_node_id(); put_page(page); goto out; } @@ -3583,11 +3578,11 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, /* Migrate to the requested node */ migrated = migrate_misplaced_page(page, target_nid); if (migrated) - current_nid = target_nid; + page_nid = target_nid; out: - if (current_nid != -1) - task_numa_fault(current_nid, 1, migrated); + if (page_nid != -1) + task_numa_fault(page_nid, 1, migrated); return 0; } @@ -3602,7 +3597,6 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long offset; spinlock_t *ptl; bool numa = false; - int local_nid = numa_node_id(); spin_lock(&mm->page_table_lock); pmd = *pmdp; @@ -3625,9 +3619,10 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) { pte_t pteval = *pte; struct page *page; - int curr_nid = local_nid; + int page_nid = -1; int target_nid; - bool migrated; + bool migrated = false; + if (!pte_present(pteval)) continue; if (!pte_numa(pteval)) @@ -3649,25 +3644,19 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(page_mapcount(page) != 1)) continue; - /* - * Note that the NUMA fault is later accounted to either - * the node that is currently running or where the page is - * migrated to. - */ - curr_nid = local_nid; - target_nid = numa_migrate_prep(page, vma, addr, - page_to_nid(page)); - if (target_nid == -1) { + page_nid = page_to_nid(page); + target_nid = numa_migrate_prep(page, vma, addr, page_nid); + pte_unmap_unlock(pte, ptl); + if (target_nid != -1) { + migrated = migrate_misplaced_page(page, target_nid); + if (migrated) + page_nid = target_nid; + } else { put_page(page); - continue; } - /* Migrate to the requested node */ - pte_unmap_unlock(pte, ptl); - migrated = migrate_misplaced_page(page, target_nid); - if (migrated) - curr_nid = target_nid; - task_numa_fault(curr_nid, 1, migrated); + if (page_nid != -1) + task_numa_fault(page_nid, 1, migrated); pte = pte_offset_map_lock(mm, pmdp, addr, &ptl); } -- cgit v0.10.2 From 3f926ab945b60a5824369d21add7710622a2eac0 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 7 Oct 2013 11:28:46 +0100 Subject: mm: Close races between THP migration and PMD numa clearing THP migration uses the page lock to guard against parallel allocations but there are cases like this still open Task A Task B --------------------- --------------------- do_huge_pmd_numa_page do_huge_pmd_numa_page lock_page mpol_misplaced == -1 unlock_page goto clear_pmdnuma lock_page mpol_misplaced == 2 migrate_misplaced_transhuge pmd = pmd_mknonnuma set_pmd_at During hours of testing, one crashed with weird errors and while I have no direct evidence, I suspect something like the race above happened. This patch extends the page lock to being held until the pmd_numa is cleared to prevent migration starting in parallel while the pmd_numa is being cleared. It also flushes the old pmd entry and orders pagetable insertion before rmap insertion. Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel Cc: Andrea Arcangeli Cc: Johannes Weiner Cc: Srikar Dronamraju Cc: Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1381141781-10992-9-git-send-email-mgorman@suse.de Signed-off-by: Ingo Molnar diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 00ddfcd..cca80d9 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1304,24 +1304,25 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, target_nid = mpol_misplaced(page, vma, haddr); if (target_nid == -1) { /* If the page was locked, there are no parallel migrations */ - if (page_locked) { - unlock_page(page); + if (page_locked) goto clear_pmdnuma; - } - /* Otherwise wait for potential migrations and retry fault */ + /* + * Otherwise wait for potential migrations and retry. We do + * relock and check_same as the page may no longer be mapped. + * As the fault is being retried, do not account for it. + */ spin_unlock(&mm->page_table_lock); wait_on_page_locked(page); + page_nid = -1; goto out; } /* Page is misplaced, serialise migrations and parallel THP splits */ get_page(page); spin_unlock(&mm->page_table_lock); - if (!page_locked) { + if (!page_locked) lock_page(page); - page_locked = true; - } anon_vma = page_lock_anon_vma_read(page); /* Confirm the PTE did not while locked */ @@ -1329,32 +1330,28 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(!pmd_same(pmd, *pmdp))) { unlock_page(page); put_page(page); + page_nid = -1; goto out_unlock; } - /* Migrate the THP to the requested node */ + /* + * Migrate the THP to the requested node, returns with page unlocked + * and pmd_numa cleared. + */ spin_unlock(&mm->page_table_lock); migrated = migrate_misplaced_transhuge_page(mm, vma, pmdp, pmd, addr, page, target_nid); if (migrated) page_nid = target_nid; - else - goto check_same; goto out; - -check_same: - spin_lock(&mm->page_table_lock); - if (unlikely(!pmd_same(pmd, *pmdp))) { - /* Someone else took our fault */ - page_nid = -1; - goto out_unlock; - } clear_pmdnuma: + BUG_ON(!PageLocked(page)); pmd = pmd_mknonnuma(pmd); set_pmd_at(mm, haddr, pmdp, pmd); VM_BUG_ON(pmd_numa(*pmdp)); update_mmu_cache_pmd(vma, addr, pmdp); + unlock_page(page); out_unlock: spin_unlock(&mm->page_table_lock); diff --git a/mm/migrate.c b/mm/migrate.c index 7a7325e..c046927 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1715,12 +1715,12 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, unlock_page(new_page); put_page(new_page); /* Free it */ - unlock_page(page); + /* Retake the callers reference and putback on LRU */ + get_page(page); putback_lru_page(page); - - count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); - isolated = 0; - goto out; + mod_zone_page_state(page_zone(page), + NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR); + goto out_fail; } /* @@ -1737,9 +1737,9 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); entry = pmd_mkhuge(entry); - page_add_new_anon_rmap(new_page, vma, haddr); - + pmdp_clear_flush(vma, haddr, pmd); set_pmd_at(mm, haddr, pmd, entry); + page_add_new_anon_rmap(new_page, vma, haddr); update_mmu_cache_pmd(vma, address, &entry); page_remove_rmap(page); /* @@ -1758,7 +1758,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR); count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR); -out: mod_zone_page_state(page_zone(page), NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR); @@ -1767,6 +1766,10 @@ out: out_fail: count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); out_dropref: + entry = pmd_mknonnuma(entry); + set_pmd_at(mm, haddr, pmd, entry); + update_mmu_cache_pmd(vma, address, &entry); + unlock_page(page); put_page(page); return 0; -- cgit v0.10.2 From 0255d491848032f6c601b6410c3b8ebded3a37b1 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 7 Oct 2013 11:28:47 +0100 Subject: mm: Account for a THP NUMA hinting update as one PTE update A THP PMD update is accounted for as 512 pages updated in vmstat. This is large difference when estimating the cost of automatic NUMA balancing and can be misleading when comparing results that had collapsed versus split THP. This patch addresses the accounting issue. Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel Cc: Andrea Arcangeli Cc: Johannes Weiner Cc: Srikar Dronamraju Cc: Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1381141781-10992-10-git-send-email-mgorman@suse.de Signed-off-by: Ingo Molnar diff --git a/mm/mprotect.c b/mm/mprotect.c index a3af058..412ba2b 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -148,7 +148,7 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, split_huge_page_pmd(vma, addr, pmd); else if (change_huge_pmd(vma, pmd, addr, newprot, prot_numa)) { - pages += HPAGE_PMD_NR; + pages++; continue; } /* fall through */ -- cgit v0.10.2 From bf378d341e4873ed928dc3c636252e6895a21f50 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 28 Oct 2013 13:55:29 +0100 Subject: perf: Fix perf ring buffer memory ordering The PPC64 people noticed a missing memory barrier and crufty old comments in the perf ring buffer code. So update all the comments and add the missing barrier. When the architecture implements local_t using atomic_long_t there will be double barriers issued; but short of introducing more conditional barrier primitives this is the best we can do. Reported-by: Victor Kaplansky Tested-by: Victor Kaplansky Signed-off-by: Peter Zijlstra Cc: Mathieu Desnoyers Cc: michael@ellerman.id.au Cc: Paul McKenney Cc: Michael Neuling Cc: Frederic Weisbecker Cc: anton@samba.org Cc: benh@kernel.crashing.org Link: http://lkml.kernel.org/r/20131025173749.GG19466@laptop.lan Signed-off-by: Ingo Molnar diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 009a655..2fc1602 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -456,13 +456,15 @@ struct perf_event_mmap_page { /* * Control data for the mmap() data buffer. * - * User-space reading the @data_head value should issue an rmb(), on - * SMP capable platforms, after reading this value -- see - * perf_event_wakeup(). + * User-space reading the @data_head value should issue an smp_rmb(), + * after reading this value. * * When the mapping is PROT_WRITE the @data_tail value should be - * written by userspace to reflect the last read data. In this case - * the kernel will not over-write unread data. + * written by userspace to reflect the last read data, after issueing + * an smp_mb() to separate the data read from the ->data_tail store. + * In this case the kernel will not over-write unread data. + * + * See perf_output_put_handle() for the data ordering. */ __u64 data_head; /* head in the data section */ __u64 data_tail; /* user-space written tail */ diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index cd55144..9c2ddfb 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -87,10 +87,31 @@ again: goto out; /* - * Publish the known good head. Rely on the full barrier implied - * by atomic_dec_and_test() order the rb->head read and this - * write. + * Since the mmap() consumer (userspace) can run on a different CPU: + * + * kernel user + * + * READ ->data_tail READ ->data_head + * smp_mb() (A) smp_rmb() (C) + * WRITE $data READ $data + * smp_wmb() (B) smp_mb() (D) + * STORE ->data_head WRITE ->data_tail + * + * Where A pairs with D, and B pairs with C. + * + * I don't think A needs to be a full barrier because we won't in fact + * write data until we see the store from userspace. So we simply don't + * issue the data WRITE until we observe it. Be conservative for now. + * + * OTOH, D needs to be a full barrier since it separates the data READ + * from the tail WRITE. + * + * For B a WMB is sufficient since it separates two WRITEs, and for C + * an RMB is sufficient since it separates two READs. + * + * See perf_output_begin(). */ + smp_wmb(); rb->user_page->data_head = head; /* @@ -154,9 +175,11 @@ int perf_output_begin(struct perf_output_handle *handle, * Userspace could choose to issue a mb() before updating the * tail pointer. So that all reads will be completed before the * write is issued. + * + * See perf_output_put_handle(). */ tail = ACCESS_ONCE(rb->user_page->data_tail); - smp_rmb(); + smp_mb(); offset = head = local_read(&rb->head); head += size; if (unlikely(!perf_output_space(rb, tail, offset, head))) -- cgit v0.10.2 From e8a923cc1fff6e627f906655ad52ee694ef2f6d7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 17 Oct 2013 15:32:10 +0200 Subject: perf/x86: Fix NMI measurements OK, so what I'm actually seeing on my WSM is that sched/clock.c is 'broken' for the purpose we're using it for. What triggered it is that my WSM-EP is broken :-( [ 0.001000] tsc: Fast TSC calibration using PIT [ 0.002000] tsc: Detected 2533.715 MHz processor [ 0.500180] TSC synchronization [CPU#0 -> CPU#6]: [ 0.505197] Measured 3 cycles TSC warp between CPUs, turning off TSC clock. [ 0.004000] tsc: Marking TSC unstable due to check_tsc_sync_source failed For some reason it consistently detects TSC skew, even though NHM+ should have a single clock domain for 'reasonable' systems. This marks sched_clock_stable=0, which means that we do fancy stuff to try and get a 'sane' clock. Part of this fancy stuff relies on the tick, clearly that's gone when NOHZ=y. So for idle cpus time gets stuck, until it either wakes up or gets kicked by another cpu. While this is perfectly fine for the scheduler -- it only cares about actually running stuff, and when we're running stuff we're obviously not idle. This does somewhat break down for perf which can trigger events just fine on an otherwise idle cpu. So I've got NMIs get get 'measured' as taking ~1ms, which actually don't last nearly that long: -0 [013] d.h. 886.311970: rcu_nmi_enter <-do_nmi ... -0 [013] d.h. 886.311997: perf_sample_event_took: HERE!!! : 1040990 So ftrace (which uses sched_clock(), not the fancy bits) only sees ~27us, but we measure ~1ms !! Now since all this measurement stuff lives in x86 code, we can actually fix it. Signed-off-by: Peter Zijlstra Cc: mingo@kernel.org Cc: dave.hansen@linux.intel.com Cc: eranian@google.com Cc: Don Zickus Cc: jmario@redhat.com Cc: acme@infradead.org Link: http://lkml.kernel.org/r/20131017133350.GG3364@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 9d84491..8a87a32 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1276,16 +1276,16 @@ void perf_events_lapic_init(void) static int __kprobes perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) { - int ret; u64 start_clock; u64 finish_clock; + int ret; if (!atomic_read(&active_events)) return NMI_DONE; - start_clock = local_clock(); + start_clock = sched_clock(); ret = x86_pmu.handle_irq(regs); - finish_clock = local_clock(); + finish_clock = sched_clock(); perf_sample_event_took(finish_clock - start_clock); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index ba77ebc..6fcb49c 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -113,10 +113,10 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2 u64 before, delta, whole_msecs; int remainder_ns, decimal_msecs, thishandled; - before = local_clock(); + before = sched_clock(); thishandled = a->handler(type, regs); handled += thishandled; - delta = local_clock() - before; + delta = sched_clock() - before; trace_nmi_handler(a->handler, (int)delta, thishandled); if (delta < nmi_longest_ns) -- cgit v0.10.2 From d954777324ffcba0b2f8119c102237426c654eeb Mon Sep 17 00:00:00 2001 From: Holger Eitzenberger Date: Mon, 28 Oct 2013 14:42:33 +0100 Subject: netfilter: xt_NFQUEUE: fix --queue-bypass regression V3 of the NFQUEUE target ignores the --queue-bypass flag, causing packets to be dropped when the userspace listener isn't running. Regression is in since 8746ddcf12bb26 ("netfilter: xt_NFQUEUE: introduce CPU fanout"). Reported-by: Florian Westphal Signed-off-by: Holger Eitzenberger Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 1e2fae3..ed00fef 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -147,6 +147,7 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_NFQ_info_v3 *info = par->targinfo; u32 queue = info->queuenum; + int ret; if (info->queues_total > 1) { if (info->flags & NFQ_FLAG_CPU_FANOUT) { @@ -157,7 +158,11 @@ nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par) queue = nfqueue_hash(skb, par); } - return NF_QUEUE_NR(queue); + ret = NF_QUEUE_NR(queue); + if (info->flags & NFQ_FLAG_BYPASS) + ret |= NF_VERDICT_FLAG_QUEUE_BYPASS; + + return ret; } static struct xt_target nfqueue_tg_reg[] __read_mostly = { -- cgit v0.10.2 From 1fbc0d789d12fec313c91912fc11733fdfbab863 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 29 Oct 2013 12:04:08 +0100 Subject: drm/i915: Fix the PPT fdi lane bifurcate state handling on ivb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Originally I've thought that this is leftover hw state dirt from the BIOS. But after way too much helpless flailing around on my part I've noticed that the actual bug is when we change the state of an already active pipe. For example when we change the fdi lines from 2 to 3 without switching off outputs in-between we'll never see the crucial on->off transition in the ->modeset_global_resources hook the current logic relies on. Patch version 2 got this right by instead also checking whether the pipe is indeed active. But that in turn broke things when pipes have been turned off through dpms since the bifurcate enabling is done in the ->crtc_mode_set callback. To address this issues discussed with Ville in the patch review move the setting of the bifurcate bit into the ->crtc_enable hook. That way we won't wreak havoc with this state when userspace puts all other outputs into dpms off state. This also moves us forward with our overall goal to unify the modeset and dpms on paths (which we need to have to allow runtime pm in the dpms off state). Unfortunately this requires us to move the bifurcate helpers around a bit. Also update the commit message, I've misanalyzed the bug rather badly. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=70507 Tested-by: Jan-Michael Brummer Cc: stable@vger.kernel.org Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 725f0be..d78d33f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2327,9 +2327,10 @@ static void intel_fdi_normal_train(struct drm_crtc *crtc) FDI_FE_ERRC_ENABLE); } -static bool pipe_has_enabled_pch(struct intel_crtc *intel_crtc) +static bool pipe_has_enabled_pch(struct intel_crtc *crtc) { - return intel_crtc->base.enabled && intel_crtc->config.has_pch_encoder; + return crtc->base.enabled && crtc->active && + crtc->config.has_pch_encoder; } static void ivb_modeset_global_resources(struct drm_device *dev) @@ -2979,6 +2980,48 @@ static void ironlake_pch_transcoder_set_timings(struct intel_crtc *crtc, I915_READ(VSYNCSHIFT(cpu_transcoder))); } +static void cpt_enable_fdi_bc_bifurcation(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t temp; + + temp = I915_READ(SOUTH_CHICKEN1); + if (temp & FDI_BC_BIFURCATION_SELECT) + return; + + WARN_ON(I915_READ(FDI_RX_CTL(PIPE_B)) & FDI_RX_ENABLE); + WARN_ON(I915_READ(FDI_RX_CTL(PIPE_C)) & FDI_RX_ENABLE); + + temp |= FDI_BC_BIFURCATION_SELECT; + DRM_DEBUG_KMS("enabling fdi C rx\n"); + I915_WRITE(SOUTH_CHICKEN1, temp); + POSTING_READ(SOUTH_CHICKEN1); +} + +static void ivybridge_update_fdi_bc_bifurcation(struct intel_crtc *intel_crtc) +{ + struct drm_device *dev = intel_crtc->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + + switch (intel_crtc->pipe) { + case PIPE_A: + break; + case PIPE_B: + if (intel_crtc->config.fdi_lanes > 2) + WARN_ON(I915_READ(SOUTH_CHICKEN1) & FDI_BC_BIFURCATION_SELECT); + else + cpt_enable_fdi_bc_bifurcation(dev); + + break; + case PIPE_C: + cpt_enable_fdi_bc_bifurcation(dev); + + break; + default: + BUG(); + } +} + /* * Enable PCH resources required for PCH ports: * - PCH PLLs @@ -2997,6 +3040,9 @@ static void ironlake_pch_enable(struct drm_crtc *crtc) assert_pch_transcoder_disabled(dev_priv, pipe); + if (IS_IVYBRIDGE(dev)) + ivybridge_update_fdi_bc_bifurcation(intel_crtc); + /* Write the TU size bits before fdi link training, so that error * detection works. */ I915_WRITE(FDI_RX_TUSIZE1(pipe), @@ -5592,48 +5638,6 @@ static bool ironlake_compute_clocks(struct drm_crtc *crtc, return true; } -static void cpt_enable_fdi_bc_bifurcation(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t temp; - - temp = I915_READ(SOUTH_CHICKEN1); - if (temp & FDI_BC_BIFURCATION_SELECT) - return; - - WARN_ON(I915_READ(FDI_RX_CTL(PIPE_B)) & FDI_RX_ENABLE); - WARN_ON(I915_READ(FDI_RX_CTL(PIPE_C)) & FDI_RX_ENABLE); - - temp |= FDI_BC_BIFURCATION_SELECT; - DRM_DEBUG_KMS("enabling fdi C rx\n"); - I915_WRITE(SOUTH_CHICKEN1, temp); - POSTING_READ(SOUTH_CHICKEN1); -} - -static void ivybridge_update_fdi_bc_bifurcation(struct intel_crtc *intel_crtc) -{ - struct drm_device *dev = intel_crtc->base.dev; - struct drm_i915_private *dev_priv = dev->dev_private; - - switch (intel_crtc->pipe) { - case PIPE_A: - break; - case PIPE_B: - if (intel_crtc->config.fdi_lanes > 2) - WARN_ON(I915_READ(SOUTH_CHICKEN1) & FDI_BC_BIFURCATION_SELECT); - else - cpt_enable_fdi_bc_bifurcation(dev); - - break; - case PIPE_C: - cpt_enable_fdi_bc_bifurcation(dev); - - break; - default: - BUG(); - } -} - int ironlake_get_lanes_required(int target_clock, int link_bw, int bpp) { /* @@ -5827,9 +5831,6 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc, &intel_crtc->config.fdi_m_n); } - if (IS_IVYBRIDGE(dev)) - ivybridge_update_fdi_bc_bifurcation(intel_crtc); - ironlake_set_pipeconf(crtc); /* Set up the display plane register */ -- cgit v0.10.2 From 2a999aa0a10f4d0d9a57a06974df620f8a856239 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 29 Oct 2013 08:33:36 -0700 Subject: Kconfig: make KOBJECT_RELEASE debugging require timer debugging Without the timer debugging, the delayed kobject release will just result in undebuggable oopses if it triggers any latent bugs. That doesn't actually help debugging at all. So make DEBUG_KOBJECT_RELEASE depend on DEBUG_OBJECTS_TIMERS to avoid having people enable one without the other. Signed-off-by: Linus Torvalds diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 06344d9..094f315 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -983,7 +983,7 @@ config DEBUG_KOBJECT config DEBUG_KOBJECT_RELEASE bool "kobject release debugging" - depends on DEBUG_KERNEL + depends on DEBUG_OBJECTS_TIMERS help kobjects are reference counted objects. This means that their last reference count put is not predictable, and the kobject can -- cgit v0.10.2 From 7314e613d5ff9f0934f7a0f74ed7973b903315d1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 29 Oct 2013 10:21:34 -0700 Subject: Fix a few incorrectly checked [io_]remap_pfn_range() calls Nico Golde reports a few straggling uses of [io_]remap_pfn_range() that really should use the vm_iomap_memory() helper. This trivially converts two of them to the helper, and comments about why the third one really needs to continue to use remap_pfn_range(), and adds the missing size check. Reported-by: Nico Golde Cc: stable@kernel.org Signed-off-by: Linus Torvalds vm_private_data; int mi = uio_find_mem_index(vma); + struct uio_mem *mem; if (mi < 0) return -EINVAL; + mem = idev->info->mem + mi; - vma->vm_ops = &uio_physical_vm_ops; + if (vma->vm_end - vma->vm_start > mem->size) + return -EINVAL; + vma->vm_ops = &uio_physical_vm_ops; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + /* + * We cannot use the vm_iomap_memory() helper here, + * because vma->vm_pgoff is the map index we looked + * up above in uio_find_mem_index(), rather than an + * actual page offset into the mmap. + * + * So we just do the physical mmap without a page + * offset. + */ return remap_pfn_range(vma, vma->vm_start, - idev->info->mem[mi].addr >> PAGE_SHIFT, + mem->addr >> PAGE_SHIFT, vma->vm_end - vma->vm_start, vma->vm_page_prot); } diff --git a/drivers/video/au1100fb.c b/drivers/video/au1100fb.c index a54ccdc..22ad8524 100644 --- a/drivers/video/au1100fb.c +++ b/drivers/video/au1100fb.c @@ -361,37 +361,13 @@ void au1100fb_fb_rotate(struct fb_info *fbi, int angle) int au1100fb_fb_mmap(struct fb_info *fbi, struct vm_area_struct *vma) { struct au1100fb_device *fbdev; - unsigned int len; - unsigned long start=0, off; fbdev = to_au1100fb_device(fbi); - if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT)) { - return -EINVAL; - } - - start = fbdev->fb_phys & PAGE_MASK; - len = PAGE_ALIGN((start & ~PAGE_MASK) + fbdev->fb_len); - - off = vma->vm_pgoff << PAGE_SHIFT; - - if ((vma->vm_end - vma->vm_start + off) > len) { - return -EINVAL; - } - - off += start; - vma->vm_pgoff = off >> PAGE_SHIFT; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); pgprot_val(vma->vm_page_prot) |= (6 << 9); //CCA=6 - if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT, - vma->vm_end - vma->vm_start, - vma->vm_page_prot)) { - return -EAGAIN; - } - - return 0; + return vm_iomap_memory(vma, fbdev->fb_phys, fbdev->fb_len); } static struct fb_ops au1100fb_ops = diff --git a/drivers/video/au1200fb.c b/drivers/video/au1200fb.c index 301224e..1d02897 100644 --- a/drivers/video/au1200fb.c +++ b/drivers/video/au1200fb.c @@ -1233,34 +1233,13 @@ static int au1200fb_fb_blank(int blank_mode, struct fb_info *fbi) * method mainly to allow the use of the TLB streaming flag (CCA=6) */ static int au1200fb_fb_mmap(struct fb_info *info, struct vm_area_struct *vma) - { - unsigned int len; - unsigned long start=0, off; struct au1200fb_device *fbdev = info->par; - if (vma->vm_pgoff > (~0UL >> PAGE_SHIFT)) { - return -EINVAL; - } - - start = fbdev->fb_phys & PAGE_MASK; - len = PAGE_ALIGN((start & ~PAGE_MASK) + fbdev->fb_len); - - off = vma->vm_pgoff << PAGE_SHIFT; - - if ((vma->vm_end - vma->vm_start + off) > len) { - return -EINVAL; - } - - off += start; - vma->vm_pgoff = off >> PAGE_SHIFT; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); pgprot_val(vma->vm_page_prot) |= _CACHE_MASK; /* CCA=7 */ - return io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT, - vma->vm_end - vma->vm_start, - vma->vm_page_prot); + return vm_iomap_memory(vma, fbdev->fb_phys, fbdev->fb_len); } static void set_global(u_int cmd, struct au1200_lcd_global_regs_t *pdata) -- cgit v0.10.2 From 7f081f175502373673c015a4d0fa1d5cc264758a Mon Sep 17 00:00:00 2001 From: Deng-Cheng Zhu Date: Tue, 8 Oct 2013 16:17:48 +0100 Subject: MIPS: Perf: Fix 74K cache map According to Software User's Manual, the event of last-level-cache read/write misses is mapped to even counters. Odd counters of that event number count miss cycles. Signed-off-by: Deng-Cheng Zhu Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/6036/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 45f1ffc..24cdf64 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -971,11 +971,11 @@ static const struct mips_perf_event mipsxx74Kcore_cache_map [C(LL)] = { [C(OP_READ)] = { [C(RESULT_ACCESS)] = { 0x1c, CNTR_ODD, P }, - [C(RESULT_MISS)] = { 0x1d, CNTR_EVEN | CNTR_ODD, P }, + [C(RESULT_MISS)] = { 0x1d, CNTR_EVEN, P }, }, [C(OP_WRITE)] = { [C(RESULT_ACCESS)] = { 0x1c, CNTR_ODD, P }, - [C(RESULT_MISS)] = { 0x1d, CNTR_EVEN | CNTR_ODD, P }, + [C(RESULT_MISS)] = { 0x1d, CNTR_EVEN, P }, }, }, [C(ITLB)] = { -- cgit v0.10.2 From 706e282b6975305285980ddd903bcd246d2e4fcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Dr=C3=BCing?= Date: Mon, 28 Oct 2013 18:33:12 +0100 Subject: net: x25: Fix dead URLs in Kconfig MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the URLs in the Kconfig file to the new pages at sangoma.com and cisco.com Signed-off-by: Michael Drüing Signed-off-by: David S. Miller diff --git a/net/x25/Kconfig b/net/x25/Kconfig index c959312c..e2fa133 100644 --- a/net/x25/Kconfig +++ b/net/x25/Kconfig @@ -16,8 +16,8 @@ config X25 if you want that) and the lower level data link layer protocol LAPB (say Y to "LAPB Data Link Driver" below if you want that). - You can read more about X.25 at and - . + You can read more about X.25 at and + . Information about X.25 for Linux is contained in the files and . -- cgit v0.10.2 From 06499098a02b9ed906a7b6060f2c60fb813918d4 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 28 Oct 2013 15:45:07 -0400 Subject: bridge: pass correct vlan id to multicast code Currently multicast code attempts to extrace the vlan id from the skb even when vlan filtering is disabled. This can lead to mdb entries being created with the wrong vlan id. Pass the already extracted vlan id to the multicast filtering code to make the correct id is used in creation as well as lookup. Signed-off-by: Vlad Yasevich Acked-by: Toshiaki Makita Signed-off-by: David S. Miller diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index ca04163..e6b7fec 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -64,7 +64,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) br_flood_deliver(br, skb, false); goto out; } - if (br_multicast_rcv(br, NULL, skb)) { + if (br_multicast_rcv(br, NULL, skb, vid)) { kfree_skb(skb); goto out; } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index a2fd37e..7e73c32 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -80,7 +80,7 @@ int br_handle_frame_finish(struct sk_buff *skb) br_fdb_update(br, p, eth_hdr(skb)->h_source, vid); if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) && - br_multicast_rcv(br, p, skb)) + br_multicast_rcv(br, p, skb, vid)) goto drop; if (p->state == BR_STATE_LEARNING) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 8b0b610..686284f 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -947,7 +947,8 @@ void br_multicast_disable_port(struct net_bridge_port *port) static int br_ip4_multicast_igmp3_report(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { struct igmpv3_report *ih; struct igmpv3_grec *grec; @@ -957,12 +958,10 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, int type; int err = 0; __be32 group; - u16 vid = 0; if (!pskb_may_pull(skb, sizeof(*ih))) return -EINVAL; - br_vlan_get_tag(skb, &vid); ih = igmpv3_report_hdr(skb); num = ntohs(ih->ngrec); len = sizeof(*ih); @@ -1005,7 +1004,8 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br, #if IS_ENABLED(CONFIG_IPV6) static int br_ip6_multicast_mld2_report(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { struct icmp6hdr *icmp6h; struct mld2_grec *grec; @@ -1013,12 +1013,10 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, int len; int num; int err = 0; - u16 vid = 0; if (!pskb_may_pull(skb, sizeof(*icmp6h))) return -EINVAL; - br_vlan_get_tag(skb, &vid); icmp6h = icmp6_hdr(skb); num = ntohs(icmp6h->icmp6_dataun.un_data16[1]); len = sizeof(*icmp6h); @@ -1141,7 +1139,8 @@ static void br_multicast_query_received(struct net_bridge *br, static int br_ip4_multicast_query(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { const struct iphdr *iph = ip_hdr(skb); struct igmphdr *ih = igmp_hdr(skb); @@ -1153,7 +1152,6 @@ static int br_ip4_multicast_query(struct net_bridge *br, unsigned long now = jiffies; __be32 group; int err = 0; - u16 vid = 0; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || @@ -1189,7 +1187,6 @@ static int br_ip4_multicast_query(struct net_bridge *br, if (!group) goto out; - br_vlan_get_tag(skb, &vid); mp = br_mdb_ip4_get(mlock_dereference(br->mdb, br), group, vid); if (!mp) goto out; @@ -1219,7 +1216,8 @@ out: #if IS_ENABLED(CONFIG_IPV6) static int br_ip6_multicast_query(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); struct mld_msg *mld; @@ -1231,7 +1229,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, unsigned long now = jiffies; const struct in6_addr *group = NULL; int err = 0; - u16 vid = 0; spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || @@ -1265,7 +1262,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (!group) goto out; - br_vlan_get_tag(skb, &vid); mp = br_mdb_ip6_get(mlock_dereference(br->mdb, br), group, vid); if (!mp) goto out; @@ -1439,7 +1435,8 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, static int br_multicast_ipv4_rcv(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { struct sk_buff *skb2 = skb; const struct iphdr *iph; @@ -1447,7 +1444,6 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, unsigned int len; unsigned int offset; int err; - u16 vid = 0; /* We treat OOM as packet loss for now. */ if (!pskb_may_pull(skb, sizeof(*iph))) @@ -1508,7 +1504,6 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = 0; - br_vlan_get_tag(skb2, &vid); BR_INPUT_SKB_CB(skb)->igmp = 1; ih = igmp_hdr(skb2); @@ -1519,10 +1514,10 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, err = br_ip4_multicast_add_group(br, port, ih->group, vid); break; case IGMPV3_HOST_MEMBERSHIP_REPORT: - err = br_ip4_multicast_igmp3_report(br, port, skb2); + err = br_ip4_multicast_igmp3_report(br, port, skb2, vid); break; case IGMP_HOST_MEMBERSHIP_QUERY: - err = br_ip4_multicast_query(br, port, skb2); + err = br_ip4_multicast_query(br, port, skb2, vid); break; case IGMP_HOST_LEAVE_MESSAGE: br_ip4_multicast_leave_group(br, port, ih->group, vid); @@ -1540,7 +1535,8 @@ err_out: #if IS_ENABLED(CONFIG_IPV6) static int br_multicast_ipv6_rcv(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { struct sk_buff *skb2; const struct ipv6hdr *ip6h; @@ -1550,7 +1546,6 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, unsigned int len; int offset; int err; - u16 vid = 0; if (!pskb_may_pull(skb, sizeof(*ip6h))) return -EINVAL; @@ -1640,7 +1635,6 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, err = 0; - br_vlan_get_tag(skb, &vid); BR_INPUT_SKB_CB(skb)->igmp = 1; switch (icmp6_type) { @@ -1657,10 +1651,10 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, break; } case ICMPV6_MLD2_REPORT: - err = br_ip6_multicast_mld2_report(br, port, skb2); + err = br_ip6_multicast_mld2_report(br, port, skb2, vid); break; case ICMPV6_MGM_QUERY: - err = br_ip6_multicast_query(br, port, skb2); + err = br_ip6_multicast_query(br, port, skb2, vid); break; case ICMPV6_MGM_REDUCTION: { @@ -1681,7 +1675,7 @@ out: #endif int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, u16 vid) { BR_INPUT_SKB_CB(skb)->igmp = 0; BR_INPUT_SKB_CB(skb)->mrouters_only = 0; @@ -1691,10 +1685,10 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, switch (skb->protocol) { case htons(ETH_P_IP): - return br_multicast_ipv4_rcv(br, port, skb); + return br_multicast_ipv4_rcv(br, port, skb, vid); #if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): - return br_multicast_ipv6_rcv(br, port, skb); + return br_multicast_ipv6_rcv(br, port, skb, vid); #endif } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index e14c33b..2e8244e 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -451,7 +451,8 @@ extern int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __us extern unsigned int br_mdb_rehash_seq; extern int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb); + struct sk_buff *skb, + u16 vid); extern struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br, struct sk_buff *skb, u16 vid); extern void br_multicast_add_port(struct net_bridge_port *port); @@ -522,7 +523,8 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br, #else static inline int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, - struct sk_buff *skb) + struct sk_buff *skb, + u16 vid) { return 0; } -- cgit v0.10.2 From ec9debbd9a88d8ea86c488d6ffcac419ee7d46d9 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 29 Oct 2013 15:11:07 +0800 Subject: virtio-net: correctly handle cpu hotplug notifier during resuming commit 3ab098df35f8b98b6553edc2e40234af512ba877 (virtio-net: don't respond to cpu hotplug notifier if we're not ready) tries to bypass the cpu hotplug notifier by checking the config_enable and does nothing is it was false. So it need to try to hold the config_lock mutex which may happen in atomic environment which leads the following warnings: [ 622.944441] CPU0 attaching NULL sched-domain. [ 622.944446] CPU1 attaching NULL sched-domain. [ 622.944485] CPU0 attaching NULL sched-domain. [ 622.950795] BUG: sleeping function called from invalid context at kernel/mutex.c:616 [ 622.950796] in_atomic(): 1, irqs_disabled(): 1, pid: 10, name: migration/1 [ 622.950796] no locks held by migration/1/10. [ 622.950798] CPU: 1 PID: 10 Comm: migration/1 Not tainted 3.12.0-rc5-wl-01249-gb91e82d #317 [ 622.950799] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 622.950802] 0000000000000000 ffff88001d42dba0 ffffffff81a32f22 ffff88001bfb9c70 [ 622.950803] ffff88001d42dbb0 ffffffff810edb02 ffff88001d42dc38 ffffffff81a396ed [ 622.950805] 0000000000000046 ffff88001d42dbe8 ffffffff810e861d 0000000000000000 [ 622.950805] Call Trace: [ 622.950810] [] dump_stack+0x54/0x74 [ 622.950815] [] __might_sleep+0x112/0x114 [ 622.950817] [] mutex_lock_nested+0x3c/0x3c6 [ 622.950818] [] ? up+0x39/0x3e [ 622.950821] [] ? acpi_os_signal_semaphore+0x21/0x2d [ 622.950824] [] ? acpi_ut_release_mutex+0x5e/0x62 [ 622.950828] [] virtnet_cpu_callback+0x33/0x87 [ 622.950830] [] notifier_call_chain+0x3c/0x5e [ 622.950832] [] __raw_notifier_call_chain+0xe/0x10 [ 622.950835] [] __cpu_notify+0x20/0x37 [ 622.950836] [] cpu_notify+0x13/0x15 [ 622.950838] [] take_cpu_down+0x27/0x3a [ 622.950841] [] stop_machine_cpu_stop+0x93/0xf1 [ 622.950842] [] cpu_stopper_thread+0xa0/0x12f [ 622.950844] [] ? cpu_stopper_thread+0x12f/0x12f [ 622.950847] [] ? lock_release_holdtime.part.7+0xa3/0xa8 [ 622.950848] [] ? cpu_stop_should_run+0x3f/0x47 [ 622.950850] [] smpboot_thread_fn+0x1c5/0x1e3 [ 622.950852] [] ? lg_global_unlock+0x67/0x67 [ 622.950854] [] kthread+0xd8/0xe0 [ 622.950857] [] ? wait_for_common+0x12f/0x164 [ 622.950859] [] ? kthread_create_on_node+0x124/0x124 [ 622.950861] [] ret_from_fork+0x7c/0xb0 [ 622.950862] [] ? kthread_create_on_node+0x124/0x124 [ 622.950876] smpboot: CPU 1 is now offline [ 623.194556] SMP alternatives: lockdep: fixing up alternatives [ 623.194559] smpboot: Booting Node 0 Processor 1 APIC 0x1 ... A correct fix is to unregister the hotcpu notifier during restore and register a new one in resume. Reported-by: Fengguang Wu Tested-by: Fengguang Wu Cc: Wanlong Gao Cc: Rusty Russell Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Reviewed-by: Wanlong Gao Signed-off-by: David S. Miller diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 9fbdfcd..bbc9cb8 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1118,11 +1118,6 @@ static int virtnet_cpu_callback(struct notifier_block *nfb, { struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb); - mutex_lock(&vi->config_lock); - - if (!vi->config_enable) - goto done; - switch(action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: case CPU_DOWN_FAILED: @@ -1136,8 +1131,6 @@ static int virtnet_cpu_callback(struct notifier_block *nfb, break; } -done: - mutex_unlock(&vi->config_lock); return NOTIFY_OK; } @@ -1699,6 +1692,8 @@ static int virtnet_freeze(struct virtio_device *vdev) struct virtnet_info *vi = vdev->priv; int i; + unregister_hotcpu_notifier(&vi->nb); + /* Prevent config work handler from accessing the device */ mutex_lock(&vi->config_lock); vi->config_enable = false; @@ -1747,6 +1742,10 @@ static int virtnet_restore(struct virtio_device *vdev) virtnet_set_queues(vi, vi->curr_queue_pairs); rtnl_unlock(); + err = register_hotcpu_notifier(&vi->nb); + if (err) + return err; + return 0; } #endif -- cgit v0.10.2 From b4dfd326c29c241c2bb8463167217eb2438b7c3d Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Wed, 30 Oct 2013 10:50:37 +1100 Subject: ibm emac: Don't call napi_complete if napi_reschedule failed This patch fixes a bug which would trigger the BUG_ON() at net/core/dev.c:4156. It was found that this was due to continuing processing in the current poll call even when the call to napi_reschedule failed, indicating the device was already on the polling list. This resulted in an extra call to napi_complete which triggered the BUG_ON(). This patch ensures that we only contine processing rotting packets in the current mal_poll call if we are not already on the polling list. Signed-off-by: Alistair Popple Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c index dac564c..909f9b6 100644 --- a/drivers/net/ethernet/ibm/emac/mal.c +++ b/drivers/net/ethernet/ibm/emac/mal.c @@ -442,15 +442,11 @@ static int mal_poll(struct napi_struct *napi, int budget) if (unlikely(mc->ops->peek_rx(mc->dev) || test_bit(MAL_COMMAC_RX_STOPPED, &mc->flags))) { MAL_DBG2(mal, "rotting packet" NL); - if (napi_reschedule(napi)) - mal_disable_eob_irq(mal); - else - MAL_DBG2(mal, "already in poll list" NL); - - if (budget > 0) - goto again; - else + if (!napi_reschedule(napi)) goto more_work; + + mal_disable_eob_irq(mal); + goto again; } mc->ops->poll_tx(mc->dev); } -- cgit v0.10.2 From 32663b8b8948cc05f812ab82c1c7db2db3ddf717 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Wed, 30 Oct 2013 10:50:38 +1100 Subject: ibm emac: Fix locking for enable/disable eob irq Calls to mal_enable_eob_irq perform a read-write-modify of a dcr to enable device irqs which is protected by a spin lock. However calls to mal_disable_eob_irq do not take the corresponding lock. This patch resolves the problem by ensuring that calls to mal_disable_eob_irq also take the lock. Signed-off-by: Alistair Popple Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c index 909f9b6..e784751 100644 --- a/drivers/net/ethernet/ibm/emac/mal.c +++ b/drivers/net/ethernet/ibm/emac/mal.c @@ -263,7 +263,9 @@ static inline void mal_schedule_poll(struct mal_instance *mal) { if (likely(napi_schedule_prep(&mal->napi))) { MAL_DBG2(mal, "schedule_poll" NL); + spin_lock(&mal->lock); mal_disable_eob_irq(mal); + spin_unlock(&mal->lock); __napi_schedule(&mal->napi); } else MAL_DBG2(mal, "already in poll" NL); @@ -445,7 +447,9 @@ static int mal_poll(struct napi_struct *napi, int budget) if (!napi_reschedule(napi)) goto more_work; + spin_lock_irqsave(&mal->lock, flags); mal_disable_eob_irq(mal); + spin_unlock_irqrestore(&mal->lock, flags); goto again; } mc->ops->poll_tx(mc->dev); -- cgit v0.10.2 From b757a62e9f0df5c997c666dca4ad81197b5d8917 Mon Sep 17 00:00:00 2001 From: Nathan Hintz Date: Tue, 29 Oct 2013 19:32:01 -0700 Subject: bgmac: don't update slot on skb alloc/dma mapping error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Don't update the slot in "bgmac_dma_rx_skb_for_slot" unless both the skb alloc and dma mapping are successful; and free the newly allocated skb if a dma mapping error occurs. This relieves the caller of the need to deduce/execute the appropriate cleanup action required when an error occurs. Signed-off-by: Nathan Hintz Acked-by: Rafał Miłecki Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 249468f..9e8a3e0 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -244,25 +244,33 @@ static int bgmac_dma_rx_skb_for_slot(struct bgmac *bgmac, struct bgmac_slot_info *slot) { struct device *dma_dev = bgmac->core->dma_dev; + struct sk_buff *skb; + dma_addr_t dma_addr; struct bgmac_rx_header *rx; /* Alloc skb */ - slot->skb = netdev_alloc_skb(bgmac->net_dev, BGMAC_RX_BUF_SIZE); - if (!slot->skb) + skb = netdev_alloc_skb(bgmac->net_dev, BGMAC_RX_BUF_SIZE); + if (!skb) return -ENOMEM; /* Poison - if everything goes fine, hardware will overwrite it */ - rx = (struct bgmac_rx_header *)slot->skb->data; + rx = (struct bgmac_rx_header *)skb->data; rx->len = cpu_to_le16(0xdead); rx->flags = cpu_to_le16(0xbeef); /* Map skb for the DMA */ - slot->dma_addr = dma_map_single(dma_dev, slot->skb->data, - BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(dma_dev, slot->dma_addr)) { + dma_addr = dma_map_single(dma_dev, skb->data, + BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(dma_dev, dma_addr)) { bgmac_err(bgmac, "DMA mapping error\n"); + dev_kfree_skb(skb); return -ENOMEM; } + + /* Update the slot */ + slot->skb = skb; + slot->dma_addr = dma_addr; + if (slot->dma_addr & 0xC0000000) bgmac_warn(bgmac, "DMA address using 0xC0000000 bit(s), it may need translation trick\n"); -- cgit v0.10.2 From 3d3b78c06c827bfc072a11056d7eb70aeb90e449 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Mon, 28 Oct 2013 10:55:49 +0100 Subject: drm: allow DRM_IOCTL_VERSION on render-nodes DRM_IOCTL_VERSION is a reliable way to get the driver-name and version information. It's not related to the interface-version (SET_VERSION ioctl) so we can safely enable it on render-nodes. Note that gbm uses udev-BUSID to load the correct mesa driver. However, the VERSION ioctl should be the more reliable way to do this (in case we add new DRM-bus drivers which have no BUSID or similar). Signed-off-by: David Herrmann Reviewed-by: Daniel Vetter Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 05ad9ba..fe58d08 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -61,7 +61,7 @@ static int drm_version(struct drm_device *dev, void *data, /** Ioctl table */ static const struct drm_ioctl_desc drm_ioctls[] = { - DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version, DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_GET_UNIQUE, drm_getunique, 0), DRM_IOCTL_DEF(DRM_IOCTL_GET_MAGIC, drm_getmagic, 0), DRM_IOCTL_DEF(DRM_IOCTL_IRQ_BUSID, drm_irq_by_busid, DRM_MASTER|DRM_ROOT_ONLY), -- cgit v0.10.2 From d780a31271b2f455cb4b83eb018ecfb1c28ef5c1 Mon Sep 17 00:00:00 2001 From: Tim Gardner Date: Tue, 29 Oct 2013 09:13:54 -0600 Subject: KVM: Fix modprobe failure for kvm_intel/kvm_amd The x86 specific kvm init creates a new conflicting debugfs directory which causes modprobe issues with kvm_intel and kvm_amd. For example, sudo modprobe kvm_amd modprobe: ERROR: could not insert 'kvm_amd': Bad address The simplest fix is to just rename the directory. The following KVM config options are set: CONFIG_KVM_GUEST=y CONFIG_KVM_DEBUG_FS=y CONFIG_HAVE_KVM=y CONFIG_HAVE_KVM_IRQCHIP=y CONFIG_HAVE_KVM_IRQ_ROUTING=y CONFIG_HAVE_KVM_EVENTFD=y CONFIG_KVM_APIC_ARCHITECTURE=y CONFIG_KVM_MMIO=y CONFIG_KVM_ASYNC_PF=y CONFIG_HAVE_KVM_MSI=y CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT=y CONFIG_KVM=m CONFIG_KVM_INTEL=m CONFIG_KVM_AMD=m CONFIG_KVM_DEVICE_ASSIGNMENT=y Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Gleb Natapov Cc: Raghavendra K T Cc: Marcelo Tosatti Signed-off-by: Tim Gardner [Change debugfs directory name. - Paolo] Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index a0e2a8a..b2046e4 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -609,7 +609,7 @@ static struct dentry *d_kvm_debug; struct dentry *kvm_init_debugfs(void) { - d_kvm_debug = debugfs_create_dir("kvm", NULL); + d_kvm_debug = debugfs_create_dir("kvm-guest", NULL); if (!d_kvm_debug) printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n"); -- cgit v0.10.2 From 0c8eb04a6241da28deb108181213b791c378123b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 30 Oct 2013 12:12:13 +0100 Subject: KVM: use a more sensible error number when debugfs directory creation fails I don't know if this was due to cut and paste, or somebody was really using a D20 to pick the error code for kvm_init_debugfs as suggested by Linus (EFAULT is 14, so the possibility cannot be entirely ruled out). In any case, this patch fixes it. Reported-by: Tim Gardner Signed-off-by: Paolo Bonzini diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a9dd682..1cf9ccb 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3091,7 +3091,7 @@ static const struct file_operations *stat_fops[] = { static int kvm_init_debug(void) { - int r = -EFAULT; + int r = -EEXIST; struct kvm_stats_debugfs_item *p; kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); -- cgit v0.10.2 From 6fc16e58adf50c0f1e4478538983fb5ff6f453d4 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 30 Oct 2013 12:29:40 +0100 Subject: ALSA: hda - Add a fixup for ASUS N76VZ ASUS N76VZ needs the same fixup as N56VZ for supporting the boost speaker. Bugzilla: https://bugzilla.novell.com/show_bug.cgi?id=846529 Cc: Signed-off-by: Takashi Iwai diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index bf313be..8ad5543 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4623,6 +4623,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x05db, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800), SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_ASUS_MODE4), + SND_PCI_QUIRK(0x1043, 0x1bf3, "ASUS N76VZ", ALC662_FIXUP_ASUS_MODE4), SND_PCI_QUIRK(0x1043, 0x8469, "ASUS mobo", ALC662_FIXUP_NO_JACK_DETECT), SND_PCI_QUIRK(0x105b, 0x0cd6, "Foxconn", ALC662_FIXUP_ASUS_MODE2), SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD), -- cgit v0.10.2 From c511851de162e8ec03d62e7d7feecbdf590d881d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 29 Oct 2013 13:12:56 +0100 Subject: Revert "epoll: use freezable blocking call" This reverts commit 1c441e921201 (epoll: use freezable blocking call) which is reported to cause user space memory corruption to happen after suspend to RAM. Since it appears to be extremely difficult to root cause this problem, it is best to revert the offending commit and try to address the original issue in a better way later. References: https://bugzilla.kernel.org/show_bug.cgi?id=61781 Reported-by: Natrio Reported-by: Jeff Pohlmeyer Bisected-by: Leo Wolf Fixes: 1c441e921201 (epoll: use freezable blocking call) Signed-off-by: Rafael J. Wysocki Cc: 3.11+ # 3.11+ diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 473e09d..810c28f 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -1605,8 +1604,7 @@ fetch_events: } spin_unlock_irqrestore(&ep->lock, flags); - if (!freezable_schedule_hrtimeout_range(to, slack, - HRTIMER_MODE_ABS)) + if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) timed_out = 1; spin_lock_irqsave(&ep->lock, flags); -- cgit v0.10.2 From 59612d187912750f416fbffe0c00bc0811c54ab5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 29 Oct 2013 23:43:08 +0100 Subject: Revert "select: use freezable blocking call" This reverts commit 9745cdb36da8 (select: use freezable blocking call) that triggers problems during resume from suspend to RAM on Paul Bolle's 32-bit x86 machines. Paul says: Ever since I tried running (release candidates of) v3.11 on the two working i686s I still have lying around I ran into issues on resuming from suspend. Reverting 9745cdb36da8 (select: use freezable blocking call) resolves those issues. Resuming from suspend on i686 on (release candidates of) v3.11 and later triggers issues like: traps: systemd[1] general protection ip:b738e490 sp:bf882fc0 error:0 in libc-2.16.so[b731c000+1b0000] and traps: rtkit-daemon[552] general protection ip:804d6e5 sp:b6cb32f0 error:0 in rtkit-daemon[8048000+d000] Once I hit the systemd error I can only get out of the mess that the system is at that point by power cycling it. Since we are reverting another freezer-related change causing similar problems to happen, this one should be reverted as well. References: https://lkml.org/lkml/2013/10/29/583 Reported-by: Paul Bolle Fixes: 9745cdb36da8 (select: use freezable blocking call) Signed-off-by: Rafael J. Wysocki Cc: 3.11+ # 3.11+ diff --git a/fs/select.c b/fs/select.c index 35d4adc7..dfd5cb1 100644 --- a/fs/select.c +++ b/fs/select.c @@ -238,8 +238,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state, set_current_state(state); if (!pwq->triggered) - rc = freezable_schedule_hrtimeout_range(expires, slack, - HRTIMER_MODE_ABS); + rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS); __set_current_state(TASK_RUNNING); /* -- cgit v0.10.2 From ab1225901da2d4cd2dcbae6840e93abbef417064 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 30 Oct 2013 14:40:36 +0200 Subject: Revert "ACPI / hotplug / PCI: Avoid doing too much for spurious notifies" Commit 2dc4128 (ACPI / hotplug / PCI: Avoid doing too much for spurious notifies) changed the enable_slot() to check return value of pci_scan_slot() and if it is zero return early from the function. It means that there were no new devices in this particular slot. However, if a device appeared deeper in the hierarchy the code now ignores it causing things like Thunderbolt chaining fail to recognize new devices. The problem with Alex Williamson's machine was solved with commit a47d8c8 (ACPI / hotplug / PCI: Avoid parent bus rescans on spurious device checks) and hence we should be able to restore the original functionality that we always rescan on bus check notification. On a device check notification we still check what acpiphp_rescan_slot() returns and on zero bail out early. Fixes: 2dc41281b1d1 (ACPI / hotplug / PCI: Avoid doing too much for spurious notifies) Signed-off-by: Mika Westerberg Tested-by: Alex Williamson Signed-off-by: Rafael J. Wysocki diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index be12fbf..1ea7523 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -552,9 +552,8 @@ static void __ref enable_slot(struct acpiphp_slot *slot) struct acpiphp_func *func; int max, pass; LIST_HEAD(add_list); - int nr_found; - nr_found = acpiphp_rescan_slot(slot); + acpiphp_rescan_slot(slot); max = acpiphp_max_busnr(bus); for (pass = 0; pass < 2; pass++) { list_for_each_entry(dev, &bus->devices, bus_list) { @@ -574,9 +573,6 @@ static void __ref enable_slot(struct acpiphp_slot *slot) } } __pci_bus_assign_resources(bus, &add_list, NULL); - /* Nothing more to do here if there are no new devices on this bus. */ - if (!nr_found && (slot->flags & SLOT_ENABLED)) - return; acpiphp_sanitize_bus(bus); acpiphp_set_hpp_values(bus); -- cgit v0.10.2 From 13b7ea6377fb23f02784a38e894f8fad49816376 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 30 Oct 2013 14:27:48 +0000 Subject: MIPS: malta: Fix GIC interrupt offsets The GIC interrupt offsets are calculated based on the value of NR_CPUS. However, this is wrong because NR_CPUS may or may not contain the real number of the actual cpus present in the system. We fix that by using the 'nr_cpu_ids' variable which contains the real number of cpus in the system. Previously, an MT core (eg with 8 VPEs) will fail to boot if NR_CPUS was > 8 with the following errors: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at kernel/irq/chip.c:670 __irq_set_handler+0x15c/0x164() Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W 3.12.0-rc5-00087-gced5633 5 Stack : 00000006 00000004 00000000 00000000 00000000 00000000 807a4f36 00000053 807a0000 00000000 80173218 80565aa8 00000000 00000000 00000000 0000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 0000000 00000000 00000000 00000000 8054fd00 8054fd94 80500514 805657a7 8016eb4 807a0000 80500514 00000000 00000000 80565aa8 8079a5d8 80565766 8054fd0 ... Call Trace: [<801098c0>] show_stack+0x64/0x7c [<8049c6b0>] dump_stack+0x64/0x84 [<8012efc4>] warn_slowpath_common+0x84/0xb4 [<8012f00c>] warn_slowpath_null+0x18/0x24 [<80173218>] __irq_set_handler+0x15c/0x164 [<80587cf4>] arch_init_ipiirq+0x2c/0x3c [<805880c8>] arch_init_irq+0x3c4/0x4bc [<80588e28>] init_IRQ+0x3c/0x50 [<805847e8>] start_kernel+0x230/0x3d8 ---[ end trace 4eaa2a86a8e2da26 ]--- This is now fixed and the Malta board can boot with any NR_CPUS value which also helps supporting more processors in a single kernel binary. Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/6091/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c index c69da37..5b28e81 100644 --- a/arch/mips/mti-malta/malta-int.c +++ b/arch/mips/mti-malta/malta-int.c @@ -473,7 +473,7 @@ static void __init fill_ipi_map(void) { int cpu; - for (cpu = 0; cpu < NR_CPUS; cpu++) { + for (cpu = 0; cpu < nr_cpu_ids; cpu++) { fill_ipi_map1(gic_resched_int_base, cpu, GIC_CPU_INT1); fill_ipi_map1(gic_call_int_base, cpu, GIC_CPU_INT2); } @@ -574,8 +574,9 @@ void __init arch_init_irq(void) /* FIXME */ int i; #if defined(CONFIG_MIPS_MT_SMP) - gic_call_int_base = GIC_NUM_INTRS - NR_CPUS; - gic_resched_int_base = gic_call_int_base - NR_CPUS; + gic_call_int_base = GIC_NUM_INTRS - + (NR_CPUS - nr_cpu_ids) * 2 - nr_cpu_ids; + gic_resched_int_base = gic_call_int_base - nr_cpu_ids; fill_ipi_map(); #endif gic_init(GIC_BASE_ADDR, GIC_ADDRSPACE_SZ, gic_intr_map, @@ -599,7 +600,7 @@ void __init arch_init_irq(void) printk("CPU%d: status register now %08x\n", smp_processor_id(), read_c0_status()); write_c0_status(0x1100dc00); printk("CPU%d: status register frc %08x\n", smp_processor_id(), read_c0_status()); - for (i = 0; i < NR_CPUS; i++) { + for (i = 0; i < nr_cpu_ids; i++) { arch_init_ipiirq(MIPS_GIC_IRQ_BASE + GIC_RESCHED_INT(i), &irq_resched); arch_init_ipiirq(MIPS_GIC_IRQ_BASE + -- cgit v0.10.2 From 268ff14525edba31da29a12a9dd693cdd6a7872e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 30 Oct 2013 08:35:02 +0100 Subject: ASoC: wm_hubs: Add missing break in hp_supply_event() Spotted by coverity CID 115170. Signed-off-by: Takashi Iwai Signed-off-by: Mark Brown Cc: stable@vger.kernel.org diff --git a/sound/soc/codecs/wm_hubs.c b/sound/soc/codecs/wm_hubs.c index 8b50e59..01daf65 100644 --- a/sound/soc/codecs/wm_hubs.c +++ b/sound/soc/codecs/wm_hubs.c @@ -530,6 +530,7 @@ static int hp_supply_event(struct snd_soc_dapm_widget *w, hubs->hp_startup_mode); break; } + break; case SND_SOC_DAPM_PRE_PMD: snd_soc_update_bits(codec, WM8993_CHARGE_PUMP_1, -- cgit v0.10.2 From 201f99f170df14ba52ea4c52847779042b7a623b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 29 Oct 2013 22:06:04 +0300 Subject: uml: check length in exitcode_proc_write() We don't cap the size of buffer from the user so we could write past the end of the array here. Only root can write to this file. Reported-by: Nico Golde Reported-by: Fabian Yamaguchi Signed-off-by: Dan Carpenter Cc: stable@kernel.org Signed-off-by: Linus Torvalds diff --git a/arch/um/kernel/exitcode.c b/arch/um/kernel/exitcode.c index 829df49..41ebbfe 100644 --- a/arch/um/kernel/exitcode.c +++ b/arch/um/kernel/exitcode.c @@ -40,9 +40,11 @@ static ssize_t exitcode_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *pos) { char *end, buf[sizeof("nnnnn\0")]; + size_t size; int tmp; - if (copy_from_user(buf, buffer, count)) + size = min(count, sizeof(buf)); + if (copy_from_user(buf, buffer, size)) return -EFAULT; tmp = simple_strtol(buf, &end, 0); -- cgit v0.10.2 From c2c65cd2e14ada6de44cb527e7f1990bede24e15 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 29 Oct 2013 22:07:47 +0300 Subject: staging: ozwpan: prevent overflow in oz_cdev_write() We need to check "count" so we don't overflow the ei->data buffer. Reported-by: Nico Golde Reported-by: Fabian Yamaguchi Signed-off-by: Dan Carpenter Cc: stable@kernel.org Signed-off-by: Linus Torvalds diff --git a/drivers/staging/ozwpan/ozcdev.c b/drivers/staging/ozwpan/ozcdev.c index 6ccb64f..6ce0af9 100644 --- a/drivers/staging/ozwpan/ozcdev.c +++ b/drivers/staging/ozwpan/ozcdev.c @@ -155,6 +155,9 @@ static ssize_t oz_cdev_write(struct file *filp, const char __user *buf, struct oz_app_hdr *app_hdr; struct oz_serial_ctx *ctx; + if (count > sizeof(ei->data) - sizeof(*elt) - sizeof(*app_hdr)) + return -EINVAL; + spin_lock_bh(&g_cdev.lock); pd = g_cdev.active_pd; if (pd) -- cgit v0.10.2 From f856567b930dfcdbc3323261bf77240ccdde01f5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 29 Oct 2013 22:11:06 +0300 Subject: aacraid: missing capable() check in compat ioctl In commit d496f94d22d1 ('[SCSI] aacraid: fix security weakness') we added a check on CAP_SYS_RAWIO to the ioctl. The compat ioctls need the check as well. Signed-off-by: Dan Carpenter Cc: stable@kernel.org Signed-off-by: Linus Torvalds diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 408a42e..f0d432c 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -771,6 +771,8 @@ static long aac_compat_do_ioctl(struct aac_dev *dev, unsigned cmd, unsigned long static int aac_compat_ioctl(struct scsi_device *sdev, int cmd, void __user *arg) { struct aac_dev *dev = (struct aac_dev *)sdev->host->hostdata; + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; return aac_compat_do_ioctl(dev, cmd, (unsigned long)arg); } -- cgit v0.10.2 From b5e2f339865fb443107e5b10603e53bbc92dc054 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 29 Oct 2013 23:00:15 +0300 Subject: staging: wlags49_h2: buffer overflow setting station name We need to check the length parameter before doing the memcpy(). I've actually changed it to strlcpy() as well so that it's NUL terminated. You need CAP_NET_ADMIN to trigger these so it's not the end of the world. Reported-by: Nico Golde Reported-by: Fabian Yamaguchi Signed-off-by: Dan Carpenter Cc: stable@kernel.org Signed-off-by: Linus Torvalds diff --git a/drivers/staging/wlags49_h2/wl_priv.c b/drivers/staging/wlags49_h2/wl_priv.c index c97e0e1..7e10dcd 100644 --- a/drivers/staging/wlags49_h2/wl_priv.c +++ b/drivers/staging/wlags49_h2/wl_priv.c @@ -570,6 +570,7 @@ int wvlan_uil_put_info(struct uilreq *urq, struct wl_private *lp) ltv_t *pLtv; bool_t ltvAllocated = FALSE; ENCSTRCT sEncryption; + size_t len; #ifdef USE_WDS hcf_16 hcfPort = HCF_PORT_0; @@ -686,7 +687,8 @@ int wvlan_uil_put_info(struct uilreq *urq, struct wl_private *lp) break; case CFG_CNF_OWN_NAME: memset(lp->StationName, 0, sizeof(lp->StationName)); - memcpy((void *)lp->StationName, (void *)&pLtv->u.u8[2], (size_t)pLtv->u.u16[0]); + len = min_t(size_t, pLtv->u.u16[0], sizeof(lp->StationName)); + strlcpy(lp->StationName, &pLtv->u.u8[2], len); pLtv->u.u16[0] = CNV_INT_TO_LITTLE(pLtv->u.u16[0]); break; case CFG_CNF_LOAD_BALANCING: @@ -1783,6 +1785,7 @@ int wvlan_set_station_nickname(struct net_device *dev, { struct wl_private *lp = wl_priv(dev); unsigned long flags; + size_t len; int ret = 0; /*------------------------------------------------------------------------*/ @@ -1793,8 +1796,8 @@ int wvlan_set_station_nickname(struct net_device *dev, wl_lock(lp, &flags); memset(lp->StationName, 0, sizeof(lp->StationName)); - - memcpy(lp->StationName, extra, wrqu->data.length); + len = min_t(size_t, wrqu->data.length, sizeof(lp->StationName)); + strlcpy(lp->StationName, extra, len); /* Commit the adapter parameters */ wl_apply(lp); -- cgit v0.10.2 From 8d1e72250c847fa96498ec029891de4dc638a5ba Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 29 Oct 2013 23:01:11 +0300 Subject: Staging: bcm: info leak in ioctl The DevInfo.u32Reserved[] array isn't initialized so it leaks kernel information to user space. Reported-by: Nico Golde Reported-by: Fabian Yamaguchi Signed-off-by: Dan Carpenter Cc: stable@kernel.org Signed-off-by: Linus Torvalds diff --git a/drivers/staging/bcm/Bcmchar.c b/drivers/staging/bcm/Bcmchar.c index f91bc1f..639ba96 100644 --- a/drivers/staging/bcm/Bcmchar.c +++ b/drivers/staging/bcm/Bcmchar.c @@ -1960,6 +1960,7 @@ cntrlEnd: BCM_DEBUG_PRINT(Adapter, DBG_TYPE_OTHERS, OSAL_DBG, DBG_LVL_ALL, "Called IOCTL_BCM_GET_DEVICE_DRIVER_INFO\n"); + memset(&DevInfo, 0, sizeof(DevInfo)); DevInfo.MaxRDMBufferSize = BUFFER_4K; DevInfo.u32DSDStartOffset = EEPROM_CALPARAM_START; DevInfo.u32RxAlignmentCorrection = 0; -- cgit v0.10.2 From a8b33654b1e3b0c74d4a1fed041c9aae50b3c427 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 29 Oct 2013 23:01:43 +0300 Subject: Staging: sb105x: info leak in mp_get_count() The icount.reserved[] array isn't initialized so it leaks stack information to userspace. Reported-by: Nico Golde Reported-by: Fabian Yamaguchi Signed-off-by: Dan Carpenter Cc: stable@kernel.org Signed-off-by: Linus Torvalds diff --git a/drivers/staging/sb105x/sb_pci_mp.c b/drivers/staging/sb105x/sb_pci_mp.c index 23db32f..a10cdb1 100644 --- a/drivers/staging/sb105x/sb_pci_mp.c +++ b/drivers/staging/sb105x/sb_pci_mp.c @@ -1063,7 +1063,7 @@ static int mp_wait_modem_status(struct sb_uart_state *state, unsigned long arg) static int mp_get_count(struct sb_uart_state *state, struct serial_icounter_struct *icnt) { - struct serial_icounter_struct icount; + struct serial_icounter_struct icount = {}; struct sb_uart_icount cnow; struct sb_uart_port *port = state->port; -- cgit v0.10.2 From c56b097af26cb11c1f49a4311ba538c825666fed Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 30 Oct 2013 14:16:16 +0000 Subject: mm: list_lru: fix almost infinite loop causing effective livelock I've seen a fair number of issues with kswapd and other processes appearing to get stuck in v3.12-rc. Using sysrq-p many times seems to indicate that it gets stuck somewhere in list_lru_walk_node(), called from prune_icache_sb() and super_cache_scan(). I never seem to be able to trigger a calltrace for functions above that point. So I decided to add the following to super_cache_scan(): @@ -81,10 +81,14 @@ static unsigned long super_cache_scan(struct shrinker *shrink, inodes = list_lru_count_node(&sb->s_inode_lru, sc->nid); dentries = list_lru_count_node(&sb->s_dentry_lru, sc->nid); total_objects = dentries + inodes + fs_objects + 1; +printk("%s:%u: %s: dentries %lu inodes %lu total %lu\n", current->comm, current->pid, __func__, dentries, inodes, total_objects); /* proportion the scan between the caches */ dentries = mult_frac(sc->nr_to_scan, dentries, total_objects); inodes = mult_frac(sc->nr_to_scan, inodes, total_objects); +printk("%s:%u: %s: dentries %lu inodes %lu\n", current->comm, current->pid, __func__, dentries, inodes); +BUG_ON(dentries == 0); +BUG_ON(inodes == 0); /* * prune the dcache first as the icache is pinned by it, then @@ -99,7 +103,7 @@ static unsigned long super_cache_scan(struct shrinker *shrink, freed += sb->s_op->free_cached_objects(sb, fs_objects, sc->nid); } - +printk("%s:%u: %s: dentries %lu inodes %lu freed %lu\n", current->comm, current->pid, __func__, dentries, inodes, freed); drop_super(sb); return freed; } and shortly thereafter, having applied some pressure, I got this: update-apt-xapi:1616: super_cache_scan: dentries 25632 inodes 2 total 25635 update-apt-xapi:1616: super_cache_scan: dentries 1023 inodes 0 ------------[ cut here ]------------ Kernel BUG at c0101994 [verbose debug info unavailable] Internal error: Oops - BUG: 0 [#3] SMP ARM Modules linked in: fuse rfcomm bnep bluetooth hid_cypress CPU: 0 PID: 1616 Comm: update-apt-xapi Tainted: G D 3.12.0-rc7+ #154 task: daea1200 ti: c3bf8000 task.ti: c3bf8000 PC is at super_cache_scan+0x1c0/0x278 LR is at trace_hardirqs_on+0x14/0x18 Process update-apt-xapi (pid: 1616, stack limit = 0xc3bf8240) ... Backtrace: (super_cache_scan) from [] (shrink_slab+0x254/0x4c8) (shrink_slab) from [] (try_to_free_pages+0x3a0/0x5e0) (try_to_free_pages) from [] (__alloc_pages_nodemask+0x5) (__alloc_pages_nodemask) from [] (__pte_alloc+0x2c/0x13) (__pte_alloc) from [] (handle_mm_fault+0x84c/0x914) (handle_mm_fault) from [] (do_page_fault+0x1f0/0x3bc) (do_page_fault) from [] (do_translation_fault+0xac/0xb8) (do_translation_fault) from [] (do_DataAbort+0x38/0xa0) (do_DataAbort) from [] (__dabt_usr+0x38/0x40) Notice that we had a very low number of inodes, which were reduced to zero my mult_frac(). Now, prune_icache_sb() calls list_lru_walk_node() passing that number of inodes (0) into that as the number of objects to scan: long prune_icache_sb(struct super_block *sb, unsigned long nr_to_scan, int nid) { LIST_HEAD(freeable); long freed; freed = list_lru_walk_node(&sb->s_inode_lru, nid, inode_lru_isolate, &freeable, &nr_to_scan); which does: unsigned long list_lru_walk_node(struct list_lru *lru, int nid, list_lru_walk_cb isolate, void *cb_arg, unsigned long *nr_to_walk) { struct list_lru_node *nlru = &lru->node[nid]; struct list_head *item, *n; unsigned long isolated = 0; spin_lock(&nlru->lock); restart: list_for_each_safe(item, n, &nlru->list) { enum lru_status ret; /* * decrement nr_to_walk first so that we don't livelock if we * get stuck on large numbesr of LRU_RETRY items */ if (--(*nr_to_walk) == 0) break; So, if *nr_to_walk was zero when this function was entered, that means we're wanting to operate on (~0UL)+1 objects - which might as well be infinite. Clearly this is not correct behaviour. If we think about the behaviour of this function when *nr_to_walk is 1, then clearly it's wrong - we decrement first and then test for zero - which results in us doing nothing at all. A post-decrement would give the desired behaviour - we'd try to walk one object and one object only if *nr_to_walk were one. It also gives the correct behaviour for zero - we exit at this point. Fixes: 5cedf721a7cd ("list_lru: fix broken LRU_RETRY behaviour") Signed-off-by: Russell King Cc: Dave Chinner Cc: Al Viro Cc: Andrew Morton [ Modified to make sure we never underflow the count: this function gets called in a loop, so the 0 -> ~0ul transition is dangerous - Linus ] Signed-off-by: Linus Torvalds diff --git a/mm/list_lru.c b/mm/list_lru.c index 7246791..72f9dec 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -81,8 +81,9 @@ restart: * decrement nr_to_walk first so that we don't livelock if we * get stuck on large numbesr of LRU_RETRY items */ - if (--(*nr_to_walk) == 0) + if (!*nr_to_walk) break; + --*nr_to_walk; ret = isolate(item, &nlru->lock, cb_arg); switch (ret) { -- cgit v0.10.2 From c17cb8b55b104c549aa20a72fa44141ad2c65ec2 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Wed, 30 Oct 2013 16:46:15 +0900 Subject: doc:net: Fix typo in Documentation/networking Correct spelling typo in Documentation/networking Signed-off-by: Masanari Iida Acked-by: Randy Dunlap Signed-off-by: David S. Miller diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt index d718bc2..bf5dbe3 100644 --- a/Documentation/networking/dccp.txt +++ b/Documentation/networking/dccp.txt @@ -18,8 +18,8 @@ Introduction Datagram Congestion Control Protocol (DCCP) is an unreliable, connection oriented protocol designed to solve issues present in UDP and TCP, particularly for real-time and multimedia (streaming) traffic. -It divides into a base protocol (RFC 4340) and plugable congestion control -modules called CCIDs. Like plugable TCP congestion control, at least one CCID +It divides into a base protocol (RFC 4340) and pluggable congestion control +modules called CCIDs. Like pluggable TCP congestion control, at least one CCID needs to be enabled in order for the protocol to function properly. In the Linux implementation, this is the TCP-like CCID2 (RFC 4341). Additional CCIDs, such as the TCP-friendly CCID3 (RFC 4342), are optional. diff --git a/Documentation/networking/e100.txt b/Documentation/networking/e100.txt index 13a3212..f862cf3 100644 --- a/Documentation/networking/e100.txt +++ b/Documentation/networking/e100.txt @@ -103,7 +103,7 @@ Additional Configurations PRO/100 Family of Adapters is e100. As an example, if you install the e100 driver for two PRO/100 adapters - (eth0 and eth1), add the following to a configuraton file in /etc/modprobe.d/ + (eth0 and eth1), add the following to a configuration file in /etc/modprobe.d/ alias eth0 e100 alias eth1 e100 diff --git a/Documentation/networking/ieee802154.txt b/Documentation/networking/ieee802154.txt index 09eb573..22bbc72 100644 --- a/Documentation/networking/ieee802154.txt +++ b/Documentation/networking/ieee802154.txt @@ -4,7 +4,7 @@ Introduction ============ -The IEEE 802.15.4 working group focuses on standartization of bottom +The IEEE 802.15.4 working group focuses on standardization of bottom two layers: Medium Access Control (MAC) and Physical (PHY). And there are mainly two options available for upper layers: - ZigBee - proprietary protocol from ZigBee Alliance @@ -66,7 +66,7 @@ net_device, with .type = ARPHRD_IEEE802154. Data is exchanged with socket family code via plain sk_buffs. On skb reception skb->cb must contain additional info as described in the struct ieee802154_mac_cb. During packet transmission the skb->cb is used to provide additional data to device's header_ops->create -function. Be aware, that this data can be overriden later (when socket code +function. Be aware that this data can be overridden later (when socket code submits skb to qdisc), so if you need something from that cb later, you should store info in the skb->data on your own. diff --git a/Documentation/networking/l2tp.txt b/Documentation/networking/l2tp.txt index e63fc1f..c74434d 100644 --- a/Documentation/networking/l2tp.txt +++ b/Documentation/networking/l2tp.txt @@ -197,7 +197,7 @@ state information because the file format is subject to change. It is implemented to provide extra debug information to help diagnose problems.) Users should use the netlink API. -/proc/net/pppol2tp is also provided for backwards compaibility with +/proc/net/pppol2tp is also provided for backwards compatibility with the original pppol2tp driver. It lists information about L2TPv2 tunnels and sessions only. Its use is discouraged. diff --git a/Documentation/networking/netdev-FAQ.txt b/Documentation/networking/netdev-FAQ.txt index 3a2c586..0fe1c6e 100644 --- a/Documentation/networking/netdev-FAQ.txt +++ b/Documentation/networking/netdev-FAQ.txt @@ -68,7 +68,7 @@ A: To understand this, you need to know a bit of background information The "net" tree continues to collect fixes for the vX.Y content, and is fed back to Linus at regular (~weekly) intervals. Meaning that the - focus for "net" is on stablilization and bugfixes. + focus for "net" is on stabilization and bugfixes. Finally, the vX.Y gets released, and the whole cycle starts over. diff --git a/Documentation/networking/netlink_mmap.txt b/Documentation/networking/netlink_mmap.txt index 5333788..b261229 100644 --- a/Documentation/networking/netlink_mmap.txt +++ b/Documentation/networking/netlink_mmap.txt @@ -45,7 +45,7 @@ processing. Conversion of the reception path involves calling poll() on the file descriptor, once the socket is readable the frames from the ring are -processsed in order until no more messages are available, as indicated by +processed in order until no more messages are available, as indicated by a status word in the frame header. On kernel side, in order to make use of memory mapped I/O on receive, the @@ -56,7 +56,7 @@ Dumps of kernel databases automatically support memory mapped I/O. Conversion of the transmit path involves changing message construction to use memory from the TX ring instead of (usually) a buffer declared on the -stack and setting up the frame header approriately. Optionally poll() can +stack and setting up the frame header appropriately. Optionally poll() can be used to wait for free frames in the TX ring. Structured and definitions for using memory mapped I/O are contained in @@ -231,7 +231,7 @@ Ring setup: if (setsockopt(fd, NETLINK_TX_RING, &req, sizeof(req)) < 0) exit(1) - /* Calculate size of each invididual ring */ + /* Calculate size of each individual ring */ ring_size = req.nm_block_nr * req.nm_block_size; /* Map RX/TX rings. The TX ring is located after the RX ring */ diff --git a/Documentation/networking/operstates.txt b/Documentation/networking/operstates.txt index 9769457..355c6d8 100644 --- a/Documentation/networking/operstates.txt +++ b/Documentation/networking/operstates.txt @@ -89,8 +89,8 @@ packets. The name 'carrier' and the inversion are historical, think of it as lower layer. Note that for certain kind of soft-devices, which are not managing any -real hardware, there is possible to set this bit from userpsace. -One should use TVL IFLA_CARRIER to do so. +real hardware, it is possible to set this bit from userspace. One +should use TVL IFLA_CARRIER to do so. netif_carrier_ok() can be used to query that bit. diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index 60d05eb..b89bc82e 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -144,7 +144,7 @@ An overview of the RxRPC protocol: (*) Calls use ACK packets to handle reliability. Data packets are also explicitly sequenced per call. - (*) There are two types of positive acknowledgement: hard-ACKs and soft-ACKs. + (*) There are two types of positive acknowledgment: hard-ACKs and soft-ACKs. A hard-ACK indicates to the far side that all the data received to a point has been received and processed; a soft-ACK indicates that the data has been received but may yet be discarded and re-requested. The sender may diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt index 457b8bb..cdd916d 100644 --- a/Documentation/networking/stmmac.txt +++ b/Documentation/networking/stmmac.txt @@ -160,7 +160,7 @@ Where: o pmt: core has the embedded power module (optional). o force_sf_dma_mode: force DMA to use the Store and Forward mode instead of the Threshold. - o force_thresh_dma_mode: force DMA to use the Shreshold mode other than + o force_thresh_dma_mode: force DMA to use the Threshold mode other than the Store and Forward mode. o riwt_off: force to disable the RX watchdog feature and switch to NAPI mode. o fix_mac_speed: this callback is used for modifying some syscfg registers @@ -175,7 +175,7 @@ Where: registers. o custom_cfg/custom_data: this is a custom configuration that can be passed while initializing the resources. - o bsp_priv: another private poiter. + o bsp_priv: another private pointer. For MDIO bus The we have: @@ -271,7 +271,7 @@ reset procedure etc). o dwmac1000_dma.c: dma functions for the GMAC chip; o dwmac1000.h: specific header file for the GMAC; o dwmac100_core: MAC 100 core and dma code; - o dwmac100_dma.c: dma funtions for the MAC chip; + o dwmac100_dma.c: dma functions for the MAC chip; o dwmac1000.h: specific header file for the MAC; o dwmac_lib.c: generic DMA functions shared among chips; o enh_desc.c: functions for handling enhanced descriptors; @@ -364,4 +364,4 @@ Auto-negotiated Link Parter Ability. 10) TODO: o XGMAC is not supported. o Complete the TBI & RTBI support. - o extened VLAN support for 3.70a SYNP GMAC. + o extend VLAN support for 3.70a SYNP GMAC. diff --git a/Documentation/networking/vortex.txt b/Documentation/networking/vortex.txt index 9a8041d..97282da 100644 --- a/Documentation/networking/vortex.txt +++ b/Documentation/networking/vortex.txt @@ -68,7 +68,7 @@ Module parameters There are several parameters which may be provided to the driver when its module is loaded. These are usually placed in /etc/modprobe.d/*.conf -configuretion files. Example: +configuration files. Example: options 3c59x debug=3 rx_copybreak=300 @@ -178,7 +178,7 @@ max_interrupt_work=N The driver's interrupt service routine can handle many receive and transmit packets in a single invocation. It does this in a loop. - The value of max_interrupt_work governs how mnay times the interrupt + The value of max_interrupt_work governs how many times the interrupt service routine will loop. The default value is 32 loops. If this is exceeded the interrupt service routine gives up and generates a warning message "eth0: Too much work in interrupt". diff --git a/Documentation/networking/x25-iface.txt b/Documentation/networking/x25-iface.txt index 78f662e..7f213b5 100644 --- a/Documentation/networking/x25-iface.txt +++ b/Documentation/networking/x25-iface.txt @@ -105,7 +105,7 @@ reduced by the following measures or a combination thereof: later. The lapb module interface was modified to support this. Its data_indication() method should now transparently pass the - netif_rx() return value to the (lapb mopdule) caller. + netif_rx() return value to the (lapb module) caller. (2) Drivers for kernel versions 2.2.x should always check the global variable netdev_dropping when a new frame is received. The driver should only call netif_rx() if netdev_dropping is zero. Otherwise -- cgit v0.10.2 From 3017f079efd6af199b0852b5c425364513db460e Mon Sep 17 00:00:00 2001 From: Chen LinX Date: Wed, 30 Oct 2013 13:56:18 -0700 Subject: mm/pagewalk.c: fix walk_page_range() access of wrong PTEs When walk_page_range walk a memory map's page tables, it'll skip VM_PFNMAP area, then variable 'next' will to assign to vma->vm_end, it maybe larger than 'end'. In next loop, 'addr' will be larger than 'next'. Then in /proc/XXXX/pagemap file reading procedure, the 'addr' will growing forever in pagemap_pte_range, pte_to_pagemap_entry will access the wrong pte. BUG: Bad page map in process procrank pte:8437526f pmd:785de067 addr:9108d000 vm_flags:00200073 anon_vma:f0d99020 mapping: (null) index:9108d CPU: 1 PID: 4974 Comm: procrank Tainted: G B W O 3.10.1+ #1 Call Trace: dump_stack+0x16/0x18 print_bad_pte+0x114/0x1b0 vm_normal_page+0x56/0x60 pagemap_pte_range+0x17a/0x1d0 walk_page_range+0x19e/0x2c0 pagemap_read+0x16e/0x200 vfs_read+0x84/0x150 SyS_read+0x4a/0x80 syscall_call+0x7/0xb Signed-off-by: Liu ShuoX Signed-off-by: Chen LinX Acked-by: Kirill A. Shutemov Reviewed-by: Naoya Horiguchi Cc: [3.10.x+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 5da2cbc..2beeabf 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -242,7 +242,7 @@ int walk_page_range(unsigned long addr, unsigned long end, if (err) break; pgd++; - } while (addr = next, addr != end); + } while (addr = next, addr < end); return err; } -- cgit v0.10.2 From bd09d9a35111b6ffc0c7585d3853d0ec7f9f1eb4 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Wed, 30 Oct 2013 13:56:20 -0700 Subject: percpu: fix this_cpu_sub() subtrahend casting for unsigneds this_cpu_sub() is implemented as negation and addition. This patch casts the adjustment to the counter type before negation to sign extend the adjustment. This helps in cases where the counter type is wider than an unsigned adjustment. An alternative to this patch is to declare such operations unsupported, but it seemed useful to avoid surprises. This patch specifically helps the following example: unsigned int delta = 1 preempt_disable() this_cpu_write(long_counter, 0) this_cpu_sub(long_counter, delta) preempt_enable() Before this change long_counter on a 64 bit machine ends with value 0xffffffff, rather than 0xffffffffffffffff. This is because this_cpu_sub(pcp, delta) boils down to this_cpu_add(pcp, -delta), which is basically: long_counter = 0 + 0xffffffff Also apply the same cast to: __this_cpu_sub() __this_cpu_sub_return() this_cpu_sub_return() All percpu_test.ko passes, especially the following cases which previously failed: l -= ui_one; __this_cpu_sub(long_counter, ui_one); CHECK(l, long_counter, -1); l -= ui_one; this_cpu_sub(long_counter, ui_one); CHECK(l, long_counter, -1); CHECK(l, long_counter, 0xffffffffffffffff); ul -= ui_one; __this_cpu_sub(ulong_counter, ui_one); CHECK(ul, ulong_counter, -1); CHECK(ul, ulong_counter, 0xffffffffffffffff); ul = this_cpu_sub_return(ulong_counter, ui_one); CHECK(ul, ulong_counter, 2); ul = __this_cpu_sub_return(ulong_counter, ui_one); CHECK(ul, ulong_counter, 1); Signed-off-by: Greg Thelen Acked-by: Tejun Heo Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 0da5200..b3e18f8 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -128,7 +128,8 @@ do { \ do { \ typedef typeof(var) pao_T__; \ const int pao_ID__ = (__builtin_constant_p(val) && \ - ((val) == 1 || (val) == -1)) ? (val) : 0; \ + ((val) == 1 || (val) == -1)) ? \ + (int)(val) : 0; \ if (0) { \ pao_T__ pao_tmp__; \ pao_tmp__ = (val); \ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index cc88172..c74088a 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -332,7 +332,7 @@ do { \ #endif #ifndef this_cpu_sub -# define this_cpu_sub(pcp, val) this_cpu_add((pcp), -(val)) +# define this_cpu_sub(pcp, val) this_cpu_add((pcp), -(typeof(pcp))(val)) #endif #ifndef this_cpu_inc @@ -418,7 +418,7 @@ do { \ # define this_cpu_add_return(pcp, val) __pcpu_size_call_return2(this_cpu_add_return_, pcp, val) #endif -#define this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(val)) +#define this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(typeof(pcp))(val)) #define this_cpu_inc_return(pcp) this_cpu_add_return(pcp, 1) #define this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1) @@ -586,7 +586,7 @@ do { \ #endif #ifndef __this_cpu_sub -# define __this_cpu_sub(pcp, val) __this_cpu_add((pcp), -(val)) +# define __this_cpu_sub(pcp, val) __this_cpu_add((pcp), -(typeof(pcp))(val)) #endif #ifndef __this_cpu_inc @@ -668,7 +668,7 @@ do { \ __pcpu_size_call_return2(__this_cpu_add_return_, pcp, val) #endif -#define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(val)) +#define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(typeof(pcp))(val)) #define __this_cpu_inc_return(pcp) __this_cpu_add_return(pcp, 1) #define __this_cpu_dec_return(pcp) __this_cpu_add_return(pcp, -1) -- cgit v0.10.2 From 5e8cfc3c75b3e43497389896c0ecda62fc311ce9 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Wed, 30 Oct 2013 13:56:21 -0700 Subject: memcg: use __this_cpu_sub() to dec stats to avoid incorrect subtrahend casting As of commit 3ea67d06e467 ("memcg: add per cgroup writeback pages accounting") memcg counter errors are possible when moving charged memory to a different memcg. Charge movement occurs when processing writes to memory.force_empty, moving tasks to a memcg with memcg.move_charge_at_immigrate=1, or memcg deletion. An example showing error after memory.force_empty: $ cd /sys/fs/cgroup/memory $ mkdir x $ rm /data/tmp/file $ (echo $BASHPID >> x/tasks && exec mmap_writer /data/tmp/file 1M) & [1] 13600 $ grep ^mapped x/memory.stat mapped_file 1048576 $ echo 13600 > tasks $ echo 1 > x/memory.force_empty $ grep ^mapped x/memory.stat mapped_file 4503599627370496 mapped_file should end with 0. 4503599627370496 == 0x10,0000,0000,0000 == 0x100,0000,0000 pages 1048576 == 0x10,0000 == 0x100 pages This issue only affects the source memcg on 64 bit machines; the destination memcg counters are correct. So the rmdir case is not too important because such counters are soon disappearing with the entire memcg. But the memcg.force_empty and memory.move_charge_at_immigrate=1 cases are larger problems as the bogus counters are visible for the (possibly long) remaining life of the source memcg. The problem is due to memcg use of __this_cpu_from(.., -nr_pages), which is subtly wrong because it subtracts the unsigned int nr_pages (either -1 or -512 for THP) from a signed long percpu counter. When nr_pages=-1, -nr_pages=0xffffffff. On 64 bit machines stat->count[idx] is signed 64 bit. So memcg's attempt to simply decrement a count (e.g. from 1 to 0) boils down to: long count = 1 unsigned int nr_pages = 1 count += -nr_pages /* -nr_pages == 0xffff,ffff */ count is now 0x1,0000,0000 instead of 0 The fix is to subtract the unsigned page count rather than adding its negation. This only works once "percpu: fix this_cpu_sub() subtrahend casting for unsigneds" is applied to fix this_cpu_sub(). Signed-off-by: Greg Thelen Acked-by: Tejun Heo Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 34d3ca9..497ec33 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3774,7 +3774,7 @@ void mem_cgroup_move_account_page_stat(struct mem_cgroup *from, /* Update stat data for mem_cgroup */ preempt_disable(); WARN_ON_ONCE(from->stat->count[idx] < nr_pages); - __this_cpu_add(from->stat->count[idx], -nr_pages); + __this_cpu_sub(from->stat->count[idx], nr_pages); __this_cpu_add(to->stat->count[idx], nr_pages); preempt_enable(); } -- cgit v0.10.2 From 5beea882e64121dfe3b33145767d3302afa784d5 Mon Sep 17 00:00:00 2001 From: Yunkang Tang Date: Thu, 31 Oct 2013 00:55:58 -0700 Subject: Input: ALPS - add support for model found on Dell XT2 This patch adds support for touchpad found on Dell XT2. It's a dual device with device ID: 73, 00, 14, that comply with "ALPS_PROTO_V2". Signed-off-by: Yunkang Tang Signed-off-by: Dmitry Torokhov diff --git a/drivers/input/mouse/alps.c b/drivers/input/mouse/alps.c index 7c5d72a..8365847 100644 --- a/drivers/input/mouse/alps.c +++ b/drivers/input/mouse/alps.c @@ -103,6 +103,7 @@ static const struct alps_model_info alps_model_data[] = { /* Dell Latitude E5500, E6400, E6500, Precision M4400 */ { { 0x62, 0x02, 0x14 }, 0x00, ALPS_PROTO_V2, 0xcf, 0xcf, ALPS_PASS | ALPS_DUALPOINT | ALPS_PS2_INTERLEAVED }, + { { 0x73, 0x00, 0x14 }, 0x00, ALPS_PROTO_V2, 0xcf, 0xcf, ALPS_DUALPOINT }, /* Dell XT2 */ { { 0x73, 0x02, 0x50 }, 0x00, ALPS_PROTO_V2, 0xcf, 0xcf, ALPS_FOUR_BUTTONS }, /* Dell Vostro 1400 */ { { 0x52, 0x01, 0x14 }, 0x00, ALPS_PROTO_V2, 0xff, 0xff, ALPS_PASS | ALPS_DUALPOINT | ALPS_PS2_INTERLEAVED }, /* Toshiba Tecra A11-11L */ -- cgit v0.10.2 From cd5d58108e41b0edecc1e7a6468cbe06ce03be3f Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 31 Oct 2013 15:51:38 +0800 Subject: MIPS: ralink: fix return value check in rt_timer_probe() In case of error, the function devm_request_and_ioremap() returns NULL pointer not ERR_PTR(). Fix it by using devm_ioremap_resource() instead of devm_request_and_ioremap(). Signed-off-by: Wei Yongjun Acked-by: John Crispin Cc: grant.likely@linaro.org Cc: rob.herring@calxeda.com Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/6098/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/ralink/timer.c b/arch/mips/ralink/timer.c index e49241a..2027857 100644 --- a/arch/mips/ralink/timer.c +++ b/arch/mips/ralink/timer.c @@ -126,7 +126,7 @@ static int rt_timer_probe(struct platform_device *pdev) return -ENOENT; } - rt->membase = devm_request_and_ioremap(&pdev->dev, res); + rt->membase = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(rt->membase)) return PTR_ERR(rt->membase); -- cgit v0.10.2 From a4461f41b94cb52e0141af717dcf4ef6558c8e2e Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 31 Oct 2013 15:01:37 +0000 Subject: ALSA: fix oops in snd_pcm_info() caused by ASoC DPCM Unable to handle kernel NULL pointer dereference at virtual address 00000008 pgd = d5300000 [00000008] *pgd=0d265831, *pte=00000000, *ppte=00000000 Internal error: Oops: 17 [#1] PREEMPT ARM CPU: 0 PID: 2295 Comm: vlc Not tainted 3.11.0+ #755 task: dee74800 ti: e213c000 task.ti: e213c000 PC is at snd_pcm_info+0xc8/0xd8 LR is at 0x30232065 pc : [] lr : [<30232065>] psr: a0070013 sp : e213dea8 ip : d81cb0d0 fp : c05f7678 r10: c05f7770 r9 : fffffdfd r8 : 00000000 r7 : d8a968a8 r6 : d8a96800 r5 : d8a96200 r4 : d81cb000 r3 : 00000000 r2 : d81cb000 r1 : 00000001 r0 : d8a96200 Flags: NzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user Control: 10c5387d Table: 15300019 DAC: 00000015 Process vlc (pid: 2295, stack limit = 0xe213c248) [] (snd_pcm_info) from [] (snd_pcm_info_user+0x34/0x9c) [] (snd_pcm_info_user) from [] (snd_pcm_control_ioctl+0x274/0x280) [] (snd_pcm_control_ioctl) from [] (snd_ctl_ioctl+0xc0/0x55c) [] (snd_ctl_ioctl) from [] (do_vfs_ioctl+0x80/0x31c) [] (do_vfs_ioctl) from [] (SyS_ioctl+0x3c/0x60) [] (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x48) Code: e1a00005 e59530dc e3a01001 e1a02004 (e5933008) ---[ end trace cb3d9bdb8dfefb3c ]--- This is provoked when the ASoC front end is open along with its backend, (which causes the backend to have a runtime assigned to it) and then the SNDRV_CTL_IOCTL_PCM_INFO is requested for the (visible) backend device. Resolve this by ensuring that ASoC internal backend devices are not visible to userspace, just as the commentry for snd_pcm_new_internal() says it should be. Signed-off-by: Russell King Acked-by: Mark Brown Cc: [v3.4+] Signed-off-by: Takashi Iwai diff --git a/sound/core/pcm.c b/sound/core/pcm.c index 17f45e8..e1e9e0c 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -49,6 +49,8 @@ static struct snd_pcm *snd_pcm_get(struct snd_card *card, int device) struct snd_pcm *pcm; list_for_each_entry(pcm, &snd_pcm_devices, list) { + if (pcm->internal) + continue; if (pcm->card == card && pcm->device == device) return pcm; } @@ -60,6 +62,8 @@ static int snd_pcm_next(struct snd_card *card, int device) struct snd_pcm *pcm; list_for_each_entry(pcm, &snd_pcm_devices, list) { + if (pcm->internal) + continue; if (pcm->card == card && pcm->device > device) return pcm->device; else if (pcm->card->number > card->number) -- cgit v0.10.2 From 5d0f801a2ccec3b1fdabc3392c8d99ed0413d216 Mon Sep 17 00:00:00 2001 From: Markus Pargmann Date: Mon, 28 Oct 2013 09:54:40 +0100 Subject: can: c_can: Fix RX message handling, handle lost message before EOB If we handle end of block messages with higher priority than a lost message, we can run into an endless interrupt loop. This is reproducable with a am335x processor and "cansequence -r" at 1Mbit. As soon as we loose a packet we can't escape from an interrupt loop. This patch fixes the problem by handling lost packets before EOB packets. Cc: linux-stable Signed-off-by: Markus Pargmann Signed-off-by: Marc Kleine-Budde diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index a668cd4..e3fc07c 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -814,9 +814,6 @@ static int c_can_do_rx_poll(struct net_device *dev, int quota) msg_ctrl_save = priv->read_reg(priv, C_CAN_IFACE(MSGCTRL_REG, 0)); - if (msg_ctrl_save & IF_MCONT_EOB) - return num_rx_pkts; - if (msg_ctrl_save & IF_MCONT_MSGLST) { c_can_handle_lost_msg_obj(dev, 0, msg_obj); num_rx_pkts++; @@ -824,6 +821,9 @@ static int c_can_do_rx_poll(struct net_device *dev, int quota) continue; } + if (msg_ctrl_save & IF_MCONT_EOB) + return num_rx_pkts; + if (!(msg_ctrl_save & IF_MCONT_NEWDAT)) continue; -- cgit v0.10.2 From 896e23bd04ea50a146dffd342e2f96180f0812a5 Mon Sep 17 00:00:00 2001 From: Olivier Sobrie Date: Sun, 27 Oct 2013 22:07:53 +0100 Subject: can: kvaser_usb: fix usb endpoints detection Some devices, like the Kvaser Memorator Professional, have several bulk in endpoints. Only the first one found must be used by the driver. The same holds for the bulk out endpoint. The official Kvaser driver (leaf) was used as reference for this patch. Cc: linux-stable Signed-off-by: Olivier Sobrie Signed-off-by: Marc Kleine-Budde diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 3b95465..4b2d5ed 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -1544,9 +1544,9 @@ static int kvaser_usb_init_one(struct usb_interface *intf, return 0; } -static void kvaser_usb_get_endpoints(const struct usb_interface *intf, - struct usb_endpoint_descriptor **in, - struct usb_endpoint_descriptor **out) +static int kvaser_usb_get_endpoints(const struct usb_interface *intf, + struct usb_endpoint_descriptor **in, + struct usb_endpoint_descriptor **out) { const struct usb_host_interface *iface_desc; struct usb_endpoint_descriptor *endpoint; @@ -1557,12 +1557,18 @@ static void kvaser_usb_get_endpoints(const struct usb_interface *intf, for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { endpoint = &iface_desc->endpoint[i].desc; - if (usb_endpoint_is_bulk_in(endpoint)) + if (!*in && usb_endpoint_is_bulk_in(endpoint)) *in = endpoint; - if (usb_endpoint_is_bulk_out(endpoint)) + if (!*out && usb_endpoint_is_bulk_out(endpoint)) *out = endpoint; + + /* use first bulk endpoint for in and out */ + if (*in && *out) + return 0; } + + return -ENODEV; } static int kvaser_usb_probe(struct usb_interface *intf, @@ -1576,8 +1582,8 @@ static int kvaser_usb_probe(struct usb_interface *intf, if (!dev) return -ENOMEM; - kvaser_usb_get_endpoints(intf, &dev->bulk_in, &dev->bulk_out); - if (!dev->bulk_in || !dev->bulk_out) { + err = kvaser_usb_get_endpoints(intf, &dev->bulk_in, &dev->bulk_out); + if (err) { dev_err(&intf->dev, "Cannot get usb endpoint(s)"); return err; } -- cgit v0.10.2 From 0baab4fd6de4beb3393e173b392038d01da54bec Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 31 Oct 2013 15:28:23 -0700 Subject: i915: fix compiler warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The last i915 drm update brought with it this annoying warning drivers/gpu/drm/i915/intel_crt.c: In function ‘intel_crt_get_config’: drivers/gpu/drm/i915/intel_crt.c:110:21: warning: unused variable ‘dev’ [-Wunused-variable] struct drm_device *dev = encoder->base.dev; ^ introduced by commit 7195a50b5c7e ("drm/i915: Add HSW CRT output readout support"). Remove the offending pointless variable. Signed-off-by: Linus Torvalds diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index db59bb9..10d1de5 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -107,8 +107,6 @@ static unsigned int intel_crt_get_flags(struct intel_encoder *encoder) static void intel_crt_get_config(struct intel_encoder *encoder, struct intel_crtc_config *pipe_config) { - struct drm_device *dev = encoder->base.dev; - pipe_config->adjusted_mode.flags |= intel_crt_get_flags(encoder); } -- cgit v0.10.2 From 358eec18243ac025b2eb0317ab52bd247e1b03c6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 31 Oct 2013 15:43:02 -0700 Subject: vfs: decrapify dput(), fix cache behavior under normal load We do not want to dirty the dentry->d_flags cacheline in dput() just to set the DCACHE_REFERENCED flag when it is already set in the common case anyway. This way the first cacheline of the dentry (which contains the RCU lookup information etc) can stay shared among multiple CPU's. This finishes off some of the details of all the scalability patches merged during the merge window. Also don't mark dentry_kill() for inlining, since it's the uncommon path and inlining it just makes the common path slower due to extra function entry/exit overhead. Signed-off-by: Linus Torvalds diff --git a/fs/dcache.c b/fs/dcache.c index 20532cb..ae6ebb8 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -542,7 +542,7 @@ EXPORT_SYMBOL(d_drop); * If ref is non-zero, then decrement the refcount too. * Returns dentry requiring refcount drop, or NULL if we're done. */ -static inline struct dentry * +static struct dentry * dentry_kill(struct dentry *dentry, int unlock_on_failure) __releases(dentry->d_lock) { @@ -630,7 +630,8 @@ repeat: goto kill_it; } - dentry->d_flags |= DCACHE_REFERENCED; + if (!(dentry->d_flags & DCACHE_REFERENCED)) + dentry->d_flags |= DCACHE_REFERENCED; dentry_lru_add(dentry); dentry->d_lockref.count--; -- cgit v0.10.2 From 3168ecbe1c04ec3feb7cb42388a17d7f047fe1a2 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 31 Oct 2013 16:34:13 -0700 Subject: mm: memcg: use proper memcg in limit bypass Commit 84235de394d9 ("fs: buffer: move allocation failure loop into the allocator") allowed __GFP_NOFAIL allocations to bypass the limit if they fail to reclaim enough memory for the charge. But because the main test case was on a 3.2-based system, the patch missed the fact that on newer kernels the charge function needs to return root_mem_cgroup when bypassing the limit, and not NULL. This will corrupt whatever memory is at NULL + percpu pointer offset. Fix this quickly before problems are reported. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 497ec33..623d5c8 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2765,10 +2765,10 @@ done: *ptr = memcg; return 0; nomem: - *ptr = NULL; - if (gfp_mask & __GFP_NOFAIL) - return 0; - return -ENOMEM; + if (!(gfp_mask & __GFP_NOFAIL)) { + *ptr = NULL; + return -ENOMEM; + } bypass: *ptr = root_mem_cgroup; return -EINTR; -- cgit v0.10.2 From 0056f4e66a1b8f00245248877e80386af36af14c Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 31 Oct 2013 16:34:14 -0700 Subject: mm: memcg: lockdep annotation for memcg OOM lock The memcg OOM lock is a mutex-type lock that is open-coded due to memcg's special needs. Add annotations for lockdep coverage. Signed-off-by: Johannes Weiner Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 623d5c8..7e11cb7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -54,6 +54,7 @@ #include #include #include +#include #include "internal.h" #include #include @@ -2046,6 +2047,12 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg, return total; } +#ifdef CONFIG_LOCKDEP +static struct lockdep_map memcg_oom_lock_dep_map = { + .name = "memcg_oom_lock", +}; +#endif + static DEFINE_SPINLOCK(memcg_oom_lock); /* @@ -2083,7 +2090,8 @@ static bool mem_cgroup_oom_trylock(struct mem_cgroup *memcg) } iter->oom_lock = false; } - } + } else + mutex_acquire(&memcg_oom_lock_dep_map, 0, 1, _RET_IP_); spin_unlock(&memcg_oom_lock); @@ -2095,6 +2103,7 @@ static void mem_cgroup_oom_unlock(struct mem_cgroup *memcg) struct mem_cgroup *iter; spin_lock(&memcg_oom_lock); + mutex_release(&memcg_oom_lock_dep_map, 1, _RET_IP_); for_each_mem_cgroup_tree(iter, memcg) iter->oom_lock = false; spin_unlock(&memcg_oom_lock); -- cgit v0.10.2 From 696ac172fffa653dca401bb2b0cad91cf2ce453f Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 31 Oct 2013 16:34:15 -0700 Subject: mm: memcg: fix test for child groups When memcg code needs to know whether any given memcg has children, it uses the cgroup child iteration primitives and returns true/false depending on whether the iteration loop is executed at least once or not. Because a cgroup's list of children is RCU protected, these primitives require the RCU read-lock to be held, which is not the case for all memcg callers. This results in the following splat when e.g. enabling hierarchy mode: WARNING: CPU: 3 PID: 1 at kernel/cgroup.c:3043 css_next_child+0xa3/0x160() CPU: 3 PID: 1 Comm: systemd Not tainted 3.12.0-rc5-00117-g83f11a9-dirty #18 Hardware name: LENOVO 3680B56/3680B56, BIOS 6QET69WW (1.39 ) 04/26/2012 Call Trace: dump_stack+0x54/0x74 warn_slowpath_common+0x78/0xa0 warn_slowpath_null+0x1a/0x20 css_next_child+0xa3/0x160 mem_cgroup_hierarchy_write+0x5b/0xa0 cgroup_file_write+0x108/0x2a0 vfs_write+0xbd/0x1e0 SyS_write+0x4c/0xa0 system_call_fastpath+0x16/0x1b In the memcg case, we only care about children when we are attempting to modify inheritable attributes interactively. Racing with deletion could mean a spurious -EBUSY, no problem. Racing with addition is handled just fine as well through the memcg_create_mutex: if the child group is not on the list after the mutex is acquired, it won't be initialized from the parent's attributes until after the unlock. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7e11cb7..e632782 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4959,31 +4959,18 @@ static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg) } while (usage > 0); } -/* - * This mainly exists for tests during the setting of set of use_hierarchy. - * Since this is the very setting we are changing, the current hierarchy value - * is meaningless - */ -static inline bool __memcg_has_children(struct mem_cgroup *memcg) -{ - struct cgroup_subsys_state *pos; - - /* bounce at first found */ - css_for_each_child(pos, &memcg->css) - return true; - return false; -} - -/* - * Must be called with memcg_create_mutex held, unless the cgroup is guaranteed - * to be already dead (as in mem_cgroup_force_empty, for instance). This is - * from mem_cgroup_count_children(), in the sense that we don't really care how - * many children we have; we only need to know if we have any. It also counts - * any memcg without hierarchy as infertile. - */ static inline bool memcg_has_children(struct mem_cgroup *memcg) { - return memcg->use_hierarchy && __memcg_has_children(memcg); + lockdep_assert_held(&memcg_create_mutex); + /* + * The lock does not prevent addition or deletion to the list + * of children, but it prevents a new child from being + * initialized based on this parent in css_online(), so it's + * enough to decide whether hierarchically inherited + * attributes can still be changed or not. + */ + return memcg->use_hierarchy && + !list_empty(&memcg->css.cgroup->children); } /* @@ -5063,7 +5050,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css, */ if ((!parent_memcg || !parent_memcg->use_hierarchy) && (val == 1 || val == 0)) { - if (!__memcg_has_children(memcg)) + if (list_empty(&memcg->css.cgroup->children)) memcg->use_hierarchy = val; else retval = -EBUSY; -- cgit v0.10.2 From 3d77b50c5874b7e923be946ba793644f82336b75 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 31 Oct 2013 16:34:17 -0700 Subject: lib/scatterlist.c: don't flush_kernel_dcache_page on slab page Commit b1adaf65ba03 ("[SCSI] block: add sg buffer copy helper functions") introduces two sg buffer copy helpers, and calls flush_kernel_dcache_page() on pages in SG list after these pages are written to. Unfortunately, the commit may introduce a potential bug: - Before sending some SCSI commands, kmalloc() buffer may be passed to block layper, so flush_kernel_dcache_page() can see a slab page finally - According to cachetlb.txt, flush_kernel_dcache_page() is only called on "a user page", which surely can't be a slab page. - ARCH's implementation of flush_kernel_dcache_page() may use page mapping information to do optimization so page_mapping() will see the slab page, then VM_BUG_ON() is triggered. Aaro Koskinen reported the bug on ARM/kirkwood when DEBUG_VM is enabled, and this patch fixes the bug by adding test of '!PageSlab(miter->page)' before calling flush_kernel_dcache_page(). Signed-off-by: Ming Lei Reported-by: Aaro Koskinen Tested-by: Simon Baatz Cc: Russell King - ARM Linux Cc: Will Deacon Cc: Aaro Koskinen Acked-by: Catalin Marinas Cc: FUJITA Tomonori Cc: Tejun Heo Cc: "James E.J. Bottomley" Cc: Jens Axboe Cc: [3.2+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/lib/scatterlist.c b/lib/scatterlist.c index a685c8a..d16fa29 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -577,7 +577,8 @@ void sg_miter_stop(struct sg_mapping_iter *miter) miter->__offset += miter->consumed; miter->__remaining -= miter->consumed; - if (miter->__flags & SG_MITER_TO_SG) + if ((miter->__flags & SG_MITER_TO_SG) && + !PageSlab(miter->page)) flush_kernel_dcache_page(miter->page); if (miter->__flags & SG_MITER_ATOMIC) { -- cgit v0.10.2 From 84502b5ef9849a9694673b15c31bd3ac693010ae Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 30 Oct 2013 11:16:28 +0100 Subject: xfrm: Fix null pointer dereference when decoding sessions On some codepaths the skb does not have a dst entry when xfrm_decode_session() is called. So check for a valid skb_dst() before dereferencing the device interface index. We use 0 as the device index if there is no valid skb_dst(), or at reverse decoding we use skb_iif as device interface index. Bug was introduced with git commit bafd4bd4dc ("xfrm: Decode sessions with output interface."). Reported-by: Meelis Roos Tested-by: Meelis Roos Signed-off-by: Steffen Klassert diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 4764ee4..e1a6393 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -104,10 +104,14 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) const struct iphdr *iph = ip_hdr(skb); u8 *xprth = skb_network_header(skb) + iph->ihl * 4; struct flowi4 *fl4 = &fl->u.ip4; + int oif = 0; + + if (skb_dst(skb)) + oif = skb_dst(skb)->dev->ifindex; memset(fl4, 0, sizeof(struct flowi4)); fl4->flowi4_mark = skb->mark; - fl4->flowi4_oif = skb_dst(skb)->dev->ifindex; + fl4->flowi4_oif = reverse ? skb->skb_iif : oif; if (!ip_is_fragment(iph)) { switch (iph->protocol) { diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index dd503a3..5f8e128 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -135,10 +135,14 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) struct ipv6_opt_hdr *exthdr; const unsigned char *nh = skb_network_header(skb); u8 nexthdr = nh[IP6CB(skb)->nhoff]; + int oif = 0; + + if (skb_dst(skb)) + oif = skb_dst(skb)->dev->ifindex; memset(fl6, 0, sizeof(struct flowi6)); fl6->flowi6_mark = skb->mark; - fl6->flowi6_oif = skb_dst(skb)->dev->ifindex; + fl6->flowi6_oif = reverse ? skb->skb_iif : oif; fl6->daddr = reverse ? hdr->saddr : hdr->daddr; fl6->saddr = reverse ? hdr->daddr : hdr->saddr; -- cgit v0.10.2 From 09169197c9f5e3b42f0c83c6d7071b3e9c94153e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 1 Nov 2013 09:12:52 -0700 Subject: Revert "USB: pl2303: distinguish between original and cloned HX chips" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 7d26a78f62ff4fb08bc5ba740a8af4aa7ac67da4. Revert all of the pl2303 changes that went into 3.12-rc1 and -rc2 as they cause regressions on some versions of the chip. This will all be revisited for later kernel versions when we can figure out how to handle this in a way that does not break working devices. Reported-by: Mika Westerberg Cc: Frank Schäfer Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index bedf8e4..e7a84f0 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -139,7 +139,6 @@ enum pl2303_type { HX_TA, /* HX(A) / X(A) / TA version */ /* TODO: improve */ HXD_EA_RA_SA, /* HXD / EA / RA / SA version */ /* TODO: improve */ TB, /* TB version */ - HX_CLONE, /* Cheap and less functional clone of the HX chip */ }; /* * NOTE: don't know the difference between type 0 and type 1, @@ -207,23 +206,8 @@ static int pl2303_startup(struct usb_serial *serial) * the device descriptors of the X/HX, HXD, EA, RA, SA, TA, TB */ if (le16_to_cpu(serial->dev->descriptor.bcdDevice) == 0x300) { - /* Check if the device is a clone */ - pl2303_vendor_read(0x9494, 0, serial, buf); - /* - * NOTE: Not sure if this read is really needed. - * The HX returns 0x00, the clone 0x02, but the Windows - * driver seems to ignore the value and continues. - */ - pl2303_vendor_write(0x0606, 0xaa, serial); - pl2303_vendor_read(0x8686, 0, serial, buf); - if (buf[0] != 0xaa) { - type = HX_CLONE; - type_str = "X/HX clone (limited functionality)"; - } else { - type = HX_TA; - type_str = "X/HX/TA"; - } - pl2303_vendor_write(0x0606, 0x00, serial); + type = HX_TA; + type_str = "X/HX/TA"; } else if (le16_to_cpu(serial->dev->descriptor.bcdDevice) == 0x400) { type = HXD_EA_RA_SA; @@ -321,9 +305,8 @@ static int pl2303_baudrate_encode_direct(int baud, enum pl2303_type type, { /* * NOTE: Only the values defined in baud_sup are supported ! - * => if unsupported values are set, the PL2303 uses 9600 baud instead - * => HX clones just don't work at unsupported baud rates < 115200 baud, - * for baud rates > 115200 they run at 115200 baud + * => if unsupported values are set, the PL2303 seems to + * use 9600 baud (at least my PL2303X always does) */ const int baud_sup[] = { 75, 150, 300, 600, 1200, 1800, 2400, 3600, 4800, 7200, 9600, 14400, 19200, 28800, 38400, @@ -333,14 +316,14 @@ static int pl2303_baudrate_encode_direct(int baud, enum pl2303_type type, * NOTE: With the exception of type_0/1 devices, the following * additional baud rates are supported (tested with HX rev. 3A only): * 110*, 56000*, 128000, 134400, 161280, 201600, 256000*, 268800, - * 403200, 806400. (*: not HX and HX clones) + * 403200, 806400. (*: not HX) * * Maximum values: HXD, TB: 12000000; HX, TA: 6000000; - * type_0+1: 1228800; RA: 921600; HX clones, SA: 115200 + * type_0+1: 1228800; RA: 921600; SA: 115200 * * As long as we are not using this encoding method for anything else - * than the type_0+1, HX and HX clone chips, there is no point in - * complicating the code to support them. + * than the type_0+1 and HX chips, there is no point in complicating + * the code to support them. */ int i; @@ -364,8 +347,6 @@ static int pl2303_baudrate_encode_direct(int baud, enum pl2303_type type, baud = min_t(int, baud, 6000000); else if (type == type_0 || type == type_1) baud = min_t(int, baud, 1228800); - else if (type == HX_CLONE) - baud = min_t(int, baud, 115200); /* Direct (standard) baud rate encoding method */ put_unaligned_le32(baud, buf); @@ -378,8 +359,7 @@ static int pl2303_baudrate_encode_divisor(int baud, enum pl2303_type type, /* * Divisor based baud rate encoding method * - * NOTE: HX clones do NOT support this method. - * It's not clear if the type_0/1 chips support it. + * NOTE: it's not clear if the type_0/1 chips support this method * * divisor = 12MHz * 32 / baudrate = 2^A * B * @@ -472,7 +452,7 @@ static void pl2303_encode_baudrate(struct tty_struct *tty, * 1) Direct method: encodes the baud rate value directly * => supported by all chip types * 2) Divisor based method: encodes a divisor to a base value (12MHz*32) - * => not supported by HX clones (and likely type_0/1 chips) + * => supported by HX chips (and likely not by type_0/1 chips) * * NOTE: Although the divisor based baud rate encoding method is much * more flexible, some of the standard baud rate values can not be @@ -480,7 +460,7 @@ static void pl2303_encode_baudrate(struct tty_struct *tty, * the device likely uses the same baud rate generator for both methods * so that there is likley no difference. */ - if (type == type_0 || type == type_1 || type == HX_CLONE) + if (type == type_0 || type == type_1) baud = pl2303_baudrate_encode_direct(baud, type, buf); else baud = pl2303_baudrate_encode_divisor(baud, type, buf); @@ -833,7 +813,6 @@ static void pl2303_break_ctl(struct tty_struct *tty, int break_state) result = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), BREAK_REQUEST, BREAK_REQUEST_TYPE, state, 0, NULL, 0, 100); - /* NOTE: HX clones don't support sending breaks, -EPIPE is returned */ if (result) dev_err(&port->dev, "error sending break = %d\n", result); } -- cgit v0.10.2 From e8bbd5c42b65b662756d67290a5c4dcda1abc596 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 1 Nov 2013 09:16:09 -0700 Subject: Revert "pl2303: improve the chip type detection/distinction" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 034d1527adebd302115c87ef343497a889638275. Revert all of the pl2303 changes that went into 3.12-rc1 and -rc2 as they cause regressions on some versions of the chip. This will all be revisited for later kernel versions when we can figure out how to handle this in a way that does not break working devices. Reported-by: Mika Westerberg Cc: Frank Schäfer Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index e7a84f0..409000a 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -134,17 +134,10 @@ MODULE_DEVICE_TABLE(usb, id_table); enum pl2303_type { - type_0, /* H version ? */ - type_1, /* H version ? */ - HX_TA, /* HX(A) / X(A) / TA version */ /* TODO: improve */ - HXD_EA_RA_SA, /* HXD / EA / RA / SA version */ /* TODO: improve */ - TB, /* TB version */ + type_0, /* don't know the difference between type 0 and */ + type_1, /* type 1, until someone from prolific tells us... */ + HX, /* HX version of the pl2303 chip */ }; -/* - * NOTE: don't know the difference between type 0 and type 1, - * until someone from Prolific tells us... - * TODO: distinguish between X/HX, TA and HXD, EA, RA, SA variants - */ struct pl2303_serial_private { enum pl2303_type type; @@ -201,28 +194,8 @@ static int pl2303_startup(struct usb_serial *serial) type = type_0; type_str = "type_0"; } else if (serial->dev->descriptor.bMaxPacketSize0 == 0x40) { - /* - * NOTE: The bcdDevice version is the only difference between - * the device descriptors of the X/HX, HXD, EA, RA, SA, TA, TB - */ - if (le16_to_cpu(serial->dev->descriptor.bcdDevice) == 0x300) { - type = HX_TA; - type_str = "X/HX/TA"; - } else if (le16_to_cpu(serial->dev->descriptor.bcdDevice) - == 0x400) { - type = HXD_EA_RA_SA; - type_str = "HXD/EA/RA/SA"; - } else if (le16_to_cpu(serial->dev->descriptor.bcdDevice) - == 0x500) { - type = TB; - type_str = "TB"; - } else { - dev_info(&serial->interface->dev, - "unknown/unsupported device type\n"); - kfree(spriv); - kfree(buf); - return -ENODEV; - } + type = HX; + type_str = "X/HX"; } else if (serial->dev->descriptor.bDeviceClass == 0x00 || serial->dev->descriptor.bDeviceClass == 0xFF) { type = type_1; @@ -243,10 +216,10 @@ static int pl2303_startup(struct usb_serial *serial) pl2303_vendor_read(0x8383, 0, serial, buf); pl2303_vendor_write(0, 1, serial); pl2303_vendor_write(1, 0, serial); - if (type == type_0 || type == type_1) - pl2303_vendor_write(2, 0x24, serial); - else + if (type == HX) pl2303_vendor_write(2, 0x44, serial); + else + pl2303_vendor_write(2, 0x24, serial); kfree(buf); return 0; @@ -311,19 +284,12 @@ static int pl2303_baudrate_encode_direct(int baud, enum pl2303_type type, const int baud_sup[] = { 75, 150, 300, 600, 1200, 1800, 2400, 3600, 4800, 7200, 9600, 14400, 19200, 28800, 38400, 57600, 115200, 230400, 460800, 614400, 921600, - 1228800, 2457600, 3000000, 6000000, 12000000 }; + 1228800, 2457600, 3000000, 6000000 }; /* - * NOTE: With the exception of type_0/1 devices, the following - * additional baud rates are supported (tested with HX rev. 3A only): - * 110*, 56000*, 128000, 134400, 161280, 201600, 256000*, 268800, - * 403200, 806400. (*: not HX) - * - * Maximum values: HXD, TB: 12000000; HX, TA: 6000000; - * type_0+1: 1228800; RA: 921600; SA: 115200 - * - * As long as we are not using this encoding method for anything else - * than the type_0+1 and HX chips, there is no point in complicating - * the code to support them. + * NOTE: The PL2303HX (tested with rev. 3A) also supports the following + * baud rates: 128000, 134400, 161280, 201600, 268800, 403200, 806400. + * As long as we are not using this encoding method for them, there is + * no point in complicating the code to support them. */ int i; @@ -338,14 +304,8 @@ static int pl2303_baudrate_encode_direct(int baud, enum pl2303_type type, baud = baud_sup[i - 1]; else baud = baud_sup[i]; - /* Respect the chip type specific baud rate limits */ - /* - * FIXME: as long as we don't know how to distinguish between the - * HXD, EA, RA, and SA chip variants, allow the max. value of 12M. - */ - if (type == HX_TA) - baud = min_t(int, baud, 6000000); - else if (type == type_0 || type == type_1) + /* type_0, type_1 only support up to 1228800 baud */ + if (type != HX) baud = min_t(int, baud, 1228800); /* Direct (standard) baud rate encoding method */ put_unaligned_le32(baud, buf); @@ -384,19 +344,10 @@ static int pl2303_baudrate_encode_divisor(int baud, enum pl2303_type type, * Baud rates smaller than the specified 75 baud are definitely working * fine. */ - if (type == type_0 || type == type_1) - baud = min_t(int, baud, 1228800 * 1.1); - else if (type == HX_TA) + if (type == HX) baud = min_t(int, baud, 6000000 * 1.1); - else if (type == HXD_EA_RA_SA) - /* HXD, EA: 12Mbps; RA: 1Mbps; SA: 115200 bps */ - /* - * FIXME: as long as we don't know how to distinguish between - * these chip variants, allow the max. of these values - */ - baud = min_t(int, baud, 12000000 * 1.1); - else if (type == TB) - baud = min_t(int, baud, 12000000 * 1.1); + else + baud = min_t(int, baud, 1228800 * 1.1); /* Determine factors A and B */ A = 0; B = 12000000 * 32 / baud; /* 12MHz */ @@ -460,7 +411,7 @@ static void pl2303_encode_baudrate(struct tty_struct *tty, * the device likely uses the same baud rate generator for both methods * so that there is likley no difference. */ - if (type == type_0 || type == type_1) + if (type != HX) baud = pl2303_baudrate_encode_direct(baud, type, buf); else baud = pl2303_baudrate_encode_divisor(baud, type, buf); @@ -598,10 +549,10 @@ static void pl2303_set_termios(struct tty_struct *tty, dev_dbg(&port->dev, "0xa1:0x21:0:0 %d - %7ph\n", i, buf); if (C_CRTSCTS(tty)) { - if (spriv->type == type_0 || spriv->type == type_1) - pl2303_vendor_write(0x0, 0x41, serial); - else + if (spriv->type == HX) pl2303_vendor_write(0x0, 0x61, serial); + else + pl2303_vendor_write(0x0, 0x41, serial); } else { pl2303_vendor_write(0x0, 0x0, serial); } @@ -638,7 +589,7 @@ static int pl2303_open(struct tty_struct *tty, struct usb_serial_port *port) struct pl2303_serial_private *spriv = usb_get_serial_data(serial); int result; - if (spriv->type == type_0 || spriv->type == type_1) { + if (spriv->type != HX) { usb_clear_halt(serial->dev, port->write_urb->pipe); usb_clear_halt(serial->dev, port->read_urb->pipe); } else { -- cgit v0.10.2 From b52e111363e366202386f3e67f71681dbbb8e5d9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 1 Nov 2013 09:17:50 -0700 Subject: Revert "pl2303: improve the chip type information output on startup" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit a77a8c23e4db9fb1f776147eda0d85117359c700. Revert all of the pl2303 changes that went into 3.12-rc1 and -rc2 as they cause regressions on some versions of the chip. This will all be revisited for later kernel versions when we can figure out how to handle this in a way that does not break working devices. Reported-by: Mika Westerberg Cc: Frank Schäfer Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 409000a..7efb39c 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -177,7 +177,6 @@ static int pl2303_startup(struct usb_serial *serial) { struct pl2303_serial_private *spriv; enum pl2303_type type = type_0; - char *type_str = "unknown (treating as type_0)"; unsigned char *buf; spriv = kzalloc(sizeof(*spriv), GFP_KERNEL); @@ -190,18 +189,14 @@ static int pl2303_startup(struct usb_serial *serial) return -ENOMEM; } - if (serial->dev->descriptor.bDeviceClass == 0x02) { + if (serial->dev->descriptor.bDeviceClass == 0x02) type = type_0; - type_str = "type_0"; - } else if (serial->dev->descriptor.bMaxPacketSize0 == 0x40) { + else if (serial->dev->descriptor.bMaxPacketSize0 == 0x40) type = HX; - type_str = "X/HX"; - } else if (serial->dev->descriptor.bDeviceClass == 0x00 - || serial->dev->descriptor.bDeviceClass == 0xFF) { + else if (serial->dev->descriptor.bDeviceClass == 0x00 + || serial->dev->descriptor.bDeviceClass == 0xFF) type = type_1; - type_str = "type_1"; - } - dev_dbg(&serial->interface->dev, "device type: %s\n", type_str); + dev_dbg(&serial->interface->dev, "device type: %d\n", type); spriv->type = type; usb_set_serial_data(serial, spriv); -- cgit v0.10.2 From 281393ad0bcfc309434d2bff38abc15805c2cbc4 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 1 Nov 2013 09:18:10 -0700 Subject: Revert "pl2303: simplify the else-if contruct for type_1 chips in pl2303_startup()" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 73b583af597542329e6adae44524da6f27afed62. Revert all of the pl2303 changes that went into 3.12-rc1 and -rc2 as they cause regressions on some versions of the chip. This will all be revisited for later kernel versions when we can figure out how to handle this in a way that does not break working devices. Reported-by: Mika Westerberg Cc: Frank Schäfer Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 7efb39c..6638c5d 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -193,8 +193,9 @@ static int pl2303_startup(struct usb_serial *serial) type = type_0; else if (serial->dev->descriptor.bMaxPacketSize0 == 0x40) type = HX; - else if (serial->dev->descriptor.bDeviceClass == 0x00 - || serial->dev->descriptor.bDeviceClass == 0xFF) + else if (serial->dev->descriptor.bDeviceClass == 0x00) + type = type_1; + else if (serial->dev->descriptor.bDeviceClass == 0xFF) type = type_1; dev_dbg(&serial->interface->dev, "device type: %d\n", type); -- cgit v0.10.2 From 233c3dda5cd1bb26fa871b94db17627117e51026 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 1 Nov 2013 09:18:25 -0700 Subject: Revert "usb: pl2303: add two comments concerning the supported baud rates with HX chips" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit c23bda365dfbf56aa4d6d4a97f83136c36050e01. Revert all of the pl2303 changes that went into 3.12-rc1 and -rc2 as they cause regressions on some versions of the chip. This will all be revisited for later kernel versions when we can figure out how to handle this in a way that does not break working devices. Reported-by: Mika Westerberg Cc: Frank Schäfer Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 6638c5d..09fb55c 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -281,12 +281,6 @@ static int pl2303_baudrate_encode_direct(int baud, enum pl2303_type type, 4800, 7200, 9600, 14400, 19200, 28800, 38400, 57600, 115200, 230400, 460800, 614400, 921600, 1228800, 2457600, 3000000, 6000000 }; - /* - * NOTE: The PL2303HX (tested with rev. 3A) also supports the following - * baud rates: 128000, 134400, 161280, 201600, 268800, 403200, 806400. - * As long as we are not using this encoding method for them, there is - * no point in complicating the code to support them. - */ int i; /* Set baudrate to nearest supported value */ @@ -400,12 +394,6 @@ static void pl2303_encode_baudrate(struct tty_struct *tty, * => supported by all chip types * 2) Divisor based method: encodes a divisor to a base value (12MHz*32) * => supported by HX chips (and likely not by type_0/1 chips) - * - * NOTE: Although the divisor based baud rate encoding method is much - * more flexible, some of the standard baud rate values can not be - * realized exactly. But the difference is very small (max. 0.2%) and - * the device likely uses the same baud rate generator for both methods - * so that there is likley no difference. */ if (type != HX) baud = pl2303_baudrate_encode_direct(baud, type, buf); -- cgit v0.10.2 From e2afb1d66644a3c55e3a46ba312e302a065ecac5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 1 Nov 2013 09:18:38 -0700 Subject: Revert "usb: pl2303: also use the divisor based baud rate encoding method for baud rates < 115200 with HX chips" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 61fa8d694b8547894b57ea0d99d0120a58f6ebf8. Revert all of the pl2303 changes that went into 3.12-rc1 and -rc2 as they cause regressions on some versions of the chip. This will all be revisited for later kernel versions when we can figure out how to handle this in a way that does not break working devices. Reported-by: Mika Westerberg Cc: Frank Schäfer Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 09fb55c..61c9f9d 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -395,7 +395,7 @@ static void pl2303_encode_baudrate(struct tty_struct *tty, * 2) Divisor based method: encodes a divisor to a base value (12MHz*32) * => supported by HX chips (and likely not by type_0/1 chips) */ - if (type != HX) + if (type != HX || baud <= 115200) baud = pl2303_baudrate_encode_direct(baud, type, buf); else baud = pl2303_baudrate_encode_divisor(baud, type, buf); -- cgit v0.10.2 From 92dfe410880b8bde731ca1a6e7da2dd3b13404e6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 1 Nov 2013 09:18:47 -0700 Subject: Revert "usb: pl2303: increase the allowed baud rate range for the divisor based encoding method" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit b5c16c6a031c52cc4b7dda6c3de46462fbc92eab. Revert all of the pl2303 changes that went into 3.12-rc1 and -rc2 as they cause regressions on some versions of the chip. This will all be revisited for later kernel versions when we can figure out how to handle this in a way that does not break working devices. Reported-by: Mika Westerberg Cc: Frank Schäfer Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 61c9f9d..693ed7e 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -324,20 +324,12 @@ static int pl2303_baudrate_encode_divisor(int baud, enum pl2303_type type, */ unsigned int A, B; - /* - * NOTE: The Windows driver allows maximum baud rates of 110% of the - * specified maximium value. - * Quick tests with early (2004) HX (rev. A) chips suggest, that even - * higher baud rates (up to the maximum of 24M baud !) are working fine, - * but that should really be tested carefully in "real life" scenarios - * before removing the upper limit completely. - * Baud rates smaller than the specified 75 baud are definitely working - * fine. - */ + /* Respect the specified baud rate limits */ + baud = max_t(int, baud, 75); if (type == HX) - baud = min_t(int, baud, 6000000 * 1.1); + baud = min_t(int, baud, 6000000); else - baud = min_t(int, baud, 1228800 * 1.1); + baud = min_t(int, baud, 1228800); /* Determine factors A and B */ A = 0; B = 12000000 * 32 / baud; /* 12MHz */ -- cgit v0.10.2 From 692ed4ddf0010dd643d38d6ef1a15bf64a7fbc6d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 1 Nov 2013 09:19:03 -0700 Subject: Revert "usb: pl2303: move the two baud rate encoding methods to separate functions" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit e917ba01d69ad705a4cd6a6c77538f55d84f5907. Revert all of the pl2303 changes that went into 3.12-rc1 and -rc2 as they cause regressions on some versions of the chip. This will all be revisited for later kernel versions when we can figure out how to handle this in a way that does not break working devices. Reported-by: Mika Westerberg Cc: Frank Schäfer Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 693ed7e..a0ea92e 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -269,128 +269,115 @@ static int pl2303_set_control_lines(struct usb_serial_port *port, u8 value) return retval; } -static int pl2303_baudrate_encode_direct(int baud, enum pl2303_type type, - u8 buf[4]) -{ - /* - * NOTE: Only the values defined in baud_sup are supported ! - * => if unsupported values are set, the PL2303 seems to - * use 9600 baud (at least my PL2303X always does) - */ - const int baud_sup[] = { 75, 150, 300, 600, 1200, 1800, 2400, 3600, - 4800, 7200, 9600, 14400, 19200, 28800, 38400, - 57600, 115200, 230400, 460800, 614400, 921600, - 1228800, 2457600, 3000000, 6000000 }; - int i; - - /* Set baudrate to nearest supported value */ - for (i = 0; i < ARRAY_SIZE(baud_sup); ++i) { - if (baud_sup[i] > baud) - break; - } - if (i == ARRAY_SIZE(baud_sup)) - baud = baud_sup[i - 1]; - else if (i > 0 && (baud_sup[i] - baud) > (baud - baud_sup[i - 1])) - baud = baud_sup[i - 1]; - else - baud = baud_sup[i]; - /* type_0, type_1 only support up to 1228800 baud */ - if (type != HX) - baud = min_t(int, baud, 1228800); - /* Direct (standard) baud rate encoding method */ - put_unaligned_le32(baud, buf); - - return baud; -} - -static int pl2303_baudrate_encode_divisor(int baud, enum pl2303_type type, - u8 buf[4]) -{ - /* - * Divisor based baud rate encoding method - * - * NOTE: it's not clear if the type_0/1 chips support this method - * - * divisor = 12MHz * 32 / baudrate = 2^A * B - * - * with - * - * A = buf[1] & 0x0e - * B = buf[0] + (buf[1] & 0x01) << 8 - * - * Special cases: - * => 8 < B < 16: device seems to work not properly - * => B <= 8: device uses the max. value B = 512 instead - */ - unsigned int A, B; - - /* Respect the specified baud rate limits */ - baud = max_t(int, baud, 75); - if (type == HX) - baud = min_t(int, baud, 6000000); - else - baud = min_t(int, baud, 1228800); - /* Determine factors A and B */ - A = 0; - B = 12000000 * 32 / baud; /* 12MHz */ - B <<= 1; /* Add one bit for rounding */ - while (B > (512 << 1) && A <= 14) { - A += 2; - B >>= 2; - } - if (A > 14) { /* max. divisor = min. baudrate reached */ - A = 14; - B = 512; - /* => ~45.78 baud */ - } else { - B = (B + 1) >> 1; /* Round the last bit */ - } - /* Handle special cases */ - if (B == 512) - B = 0; /* also: 1 to 8 */ - else if (B < 16) - /* - * NOTE: With the current algorithm this happens - * only for A=0 and means that the min. divisor - * (respectively: the max. baudrate) is reached. - */ - B = 16; /* => 24 MBaud */ - /* Encode the baud rate */ - buf[3] = 0x80; /* Select divisor encoding method */ - buf[2] = 0; - buf[1] = (A & 0x0e); /* A */ - buf[1] |= ((B & 0x100) >> 8); /* MSB of B */ - buf[0] = B & 0xff; /* 8 LSBs of B */ - /* Calculate the actual/resulting baud rate */ - if (B <= 8) - B = 512; - baud = 12000000 * 32 / ((1 << A) * B); - - return baud; -} - static void pl2303_encode_baudrate(struct tty_struct *tty, struct usb_serial_port *port, - enum pl2303_type type, u8 buf[4]) { + struct usb_serial *serial = port->serial; + struct pl2303_serial_private *spriv = usb_get_serial_data(serial); int baud; baud = tty_get_baud_rate(tty); dev_dbg(&port->dev, "baud requested = %d\n", baud); if (!baud) return; - /* - * There are two methods for setting/encoding the baud rate - * 1) Direct method: encodes the baud rate value directly - * => supported by all chip types - * 2) Divisor based method: encodes a divisor to a base value (12MHz*32) - * => supported by HX chips (and likely not by type_0/1 chips) - */ - if (type != HX || baud <= 115200) - baud = pl2303_baudrate_encode_direct(baud, type, buf); - else - baud = pl2303_baudrate_encode_divisor(baud, type, buf); + + if (spriv->type != HX || baud <= 115200) { + /* + * NOTE: Only the values defined in baud_sup are supported ! + * => if unsupported values are set, the PL2303 seems to + * use 9600 baud (at least my PL2303X always does) + */ + const int baud_sup[] = { 75, 150, 300, 600, 1200, 1800, 2400, + 3600, 4800, 7200, 9600, 14400, 19200, + 28800, 38400, 57600, 115200, 230400, + 460800, 614400, 921600, 1228800, + 2457600, 3000000, 6000000 }; + int i; + + /* Set baudrate to nearest supported value */ + for (i = 0; i < ARRAY_SIZE(baud_sup); ++i) { + if (baud_sup[i] > baud) + break; + } + + if (i == ARRAY_SIZE(baud_sup)) + baud = baud_sup[i - 1]; + else if (i > 0 + && (baud_sup[i] - baud) > (baud - baud_sup[i - 1])) + baud = baud_sup[i - 1]; + else + baud = baud_sup[i]; + + /* type_0, type_1 only support up to 1228800 baud */ + if (spriv->type != HX) + baud = min_t(int, baud, 1228800); + + /* Direct (standard) baud rate encoding method */ + put_unaligned_le32(baud, buf); + } else { + /* + * Divisor based baud rate encoding method + * + * NOTE: it's not clear if the type_0/1 chips + * support this method + * + * divisor = 12MHz * 32 / baudrate = 2^A * B + * + * with + * + * A = buf[1] & 0x0e + * B = buf[0] + (buf[1] & 0x01) << 8 + * + * Special cases: + * => 8 < B < 16: device seems to work not properly + * => B <= 8: device uses the max. value B = 512 instead + */ + unsigned int A, B; + + /* Respect the specified baud rate limits */ + baud = max_t(int, baud, 75); + if (spriv->type == HX) + baud = min_t(int, baud, 6000000); + else + baud = min_t(int, baud, 1228800); + /* Determine factors A and B */ + A = 0; + B = 12000000 * 32 / baud; /* 12MHz */ + B <<= 1; /* Add one bit for rounding */ + while (B > (512 << 1) && A <= 14) { + A += 2; + B >>= 2; + } + if (A > 14) { /* max. divisor = min. baudrate reached */ + A = 14; + B = 512; + /* => ~45.78 baud */ + } else { + B = (B + 1) >> 1; /* Round the last bit */ + } + /* Handle special cases */ + if (B == 512) + B = 0; /* also: 1 to 8 */ + else if (B < 16) + /* + * NOTE: With the current algorithm this happens + * only for A=0 and means that the min. divisor + * (respectively: the max. baudrate) is reached. + */ + B = 16; /* => 24 MBaud */ + /* Encode the baud rate */ + buf[3] = 0x80; /* Select divisor encoding method */ + buf[2] = 0; + buf[1] = (A & 0x0e); /* A */ + buf[1] |= ((B & 0x100) >> 8); /* MSB of B */ + buf[0] = B & 0xff; /* 8 LSBs of B */ + /* Calculate the actual/resulting baud rate */ + if (B <= 8) + B = 512; + baud = 12000000 * 32 / ((1 << A) * B); + } + /* Save resulting baud rate */ tty_encode_baud_rate(tty, baud, baud); dev_dbg(&port->dev, "baud set = %d\n", baud); @@ -447,8 +434,8 @@ static void pl2303_set_termios(struct tty_struct *tty, dev_dbg(&port->dev, "data bits = %d\n", buf[6]); } - /* For reference: buf[0]:buf[3] baud rate value */ - pl2303_encode_baudrate(tty, port, spriv->type, buf); + /* For reference buf[0]:buf[3] baud rate value */ + pl2303_encode_baudrate(tty, port, &buf[0]); /* For reference buf[4]=0 is 1 stop bits */ /* For reference buf[4]=1 is 1.5 stop bits */ -- cgit v0.10.2 From 336b9daf90d2a1575088ab93d7bfe82dcd10dd8d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 1 Nov 2013 09:19:24 -0700 Subject: Revert "usb: pl2303: remove 500000 baud from the list of standard baud rates" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit b9208c721ce736125fe58d398319513a27850fd8. Revert all of the pl2303 changes that went into 3.12-rc1 and -rc2 as they cause regressions on some versions of the chip. This will all be revisited for later kernel versions when we can figure out how to handle this in a way that does not break working devices. Reported-by: Mika Westerberg Cc: Frank Schäfer Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index a0ea92e..04390df 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -291,8 +291,8 @@ static void pl2303_encode_baudrate(struct tty_struct *tty, const int baud_sup[] = { 75, 150, 300, 600, 1200, 1800, 2400, 3600, 4800, 7200, 9600, 14400, 19200, 28800, 38400, 57600, 115200, 230400, - 460800, 614400, 921600, 1228800, - 2457600, 3000000, 6000000 }; + 460800, 500000, 614400, 921600, + 1228800, 2457600, 3000000, 6000000 }; int i; /* Set baudrate to nearest supported value */ -- cgit v0.10.2 From 7e12a6fcbf266eb0d5b19761f91b2964ad18e371 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 1 Nov 2013 09:19:34 -0700 Subject: Revert "usb: pl2303: do not round to the next nearest standard baud rate for the divisor based baud rate encoding method" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 75417d9f99f89ab241de69d7db15af5842b488c4. Revert all of the pl2303 changes that went into 3.12-rc1 and -rc2 as they cause regressions on some versions of the chip. This will all be revisited for later kernel versions when we can figure out how to handle this in a way that does not break working devices. Reported-by: Mika Westerberg Cc: Frank Schäfer Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 04390df..b93b3b3 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -273,46 +273,44 @@ static void pl2303_encode_baudrate(struct tty_struct *tty, struct usb_serial_port *port, u8 buf[4]) { + const int baud_sup[] = { 75, 150, 300, 600, 1200, 1800, 2400, 3600, + 4800, 7200, 9600, 14400, 19200, 28800, 38400, + 57600, 115200, 230400, 460800, 500000, 614400, + 921600, 1228800, 2457600, 3000000, 6000000 }; + struct usb_serial *serial = port->serial; struct pl2303_serial_private *spriv = usb_get_serial_data(serial); int baud; + int i; + /* + * NOTE: Only the values defined in baud_sup are supported! + * => if unsupported values are set, the PL2303 seems to use + * 9600 baud (at least my PL2303X always does) + */ baud = tty_get_baud_rate(tty); dev_dbg(&port->dev, "baud requested = %d\n", baud); if (!baud) return; - if (spriv->type != HX || baud <= 115200) { - /* - * NOTE: Only the values defined in baud_sup are supported ! - * => if unsupported values are set, the PL2303 seems to - * use 9600 baud (at least my PL2303X always does) - */ - const int baud_sup[] = { 75, 150, 300, 600, 1200, 1800, 2400, - 3600, 4800, 7200, 9600, 14400, 19200, - 28800, 38400, 57600, 115200, 230400, - 460800, 500000, 614400, 921600, - 1228800, 2457600, 3000000, 6000000 }; - int i; - - /* Set baudrate to nearest supported value */ - for (i = 0; i < ARRAY_SIZE(baud_sup); ++i) { - if (baud_sup[i] > baud) - break; - } + /* Set baudrate to nearest supported value */ + for (i = 0; i < ARRAY_SIZE(baud_sup); ++i) { + if (baud_sup[i] > baud) + break; + } - if (i == ARRAY_SIZE(baud_sup)) - baud = baud_sup[i - 1]; - else if (i > 0 - && (baud_sup[i] - baud) > (baud - baud_sup[i - 1])) - baud = baud_sup[i - 1]; - else - baud = baud_sup[i]; + if (i == ARRAY_SIZE(baud_sup)) + baud = baud_sup[i - 1]; + else if (i > 0 && (baud_sup[i] - baud) > (baud - baud_sup[i - 1])) + baud = baud_sup[i - 1]; + else + baud = baud_sup[i]; - /* type_0, type_1 only support up to 1228800 baud */ - if (spriv->type != HX) - baud = min_t(int, baud, 1228800); + /* type_0, type_1 only support up to 1228800 baud */ + if (spriv->type != HX) + baud = min_t(int, baud, 1228800); + if (spriv->type != HX || baud <= 115200) { /* Direct (standard) baud rate encoding method */ put_unaligned_le32(baud, buf); } else { @@ -333,17 +331,10 @@ static void pl2303_encode_baudrate(struct tty_struct *tty, * => 8 < B < 16: device seems to work not properly * => B <= 8: device uses the max. value B = 512 instead */ - unsigned int A, B; - /* Respect the specified baud rate limits */ - baud = max_t(int, baud, 75); - if (spriv->type == HX) - baud = min_t(int, baud, 6000000); - else - baud = min_t(int, baud, 1228800); /* Determine factors A and B */ - A = 0; - B = 12000000 * 32 / baud; /* 12MHz */ + unsigned int A = 0; + unsigned int B = 12000000 * 32 / baud; /* 12MHz */ B <<= 1; /* Add one bit for rounding */ while (B > (512 << 1) && A <= 14) { A += 2; -- cgit v0.10.2 From 1796a228762cd0b86e14d6d4a3de9ecfe65b3b8d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 1 Nov 2013 09:19:45 -0700 Subject: Revert "usb: pl2303: fix+improve the divsor based baud rate encoding method" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 57ce61aad748ceaa08c859da04043ad7dae7c15e. Revert all of the pl2303 changes that went into 3.12-rc1 and -rc2 as they cause regressions on some versions of the chip. This will all be revisited for later kernel versions when we can figure out how to handle this in a way that does not break working devices. Reported-by: Mika Westerberg Cc: Frank Schäfer Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index b93b3b3..2448201 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -4,11 +4,6 @@ * Copyright (C) 2001-2007 Greg Kroah-Hartman (greg@kroah.com) * Copyright (C) 2003 IBM Corp. * - * Copyright (C) 2009, 2013 Frank Schäfer - * - fixes, improvements and documentation for the baud rate encoding methods - * Copyright (C) 2013 Reinhard Max - * - fixes and improvements for the divisor based baud rate encoding method - * * Original driver for 2.2.x by anonymous * * This program is free software; you can redistribute it and/or @@ -315,58 +310,21 @@ static void pl2303_encode_baudrate(struct tty_struct *tty, put_unaligned_le32(baud, buf); } else { /* - * Divisor based baud rate encoding method - * * NOTE: it's not clear if the type_0/1 chips * support this method * - * divisor = 12MHz * 32 / baudrate = 2^A * B - * - * with - * - * A = buf[1] & 0x0e - * B = buf[0] + (buf[1] & 0x01) << 8 - * - * Special cases: - * => 8 < B < 16: device seems to work not properly - * => B <= 8: device uses the max. value B = 512 instead + * Apparently the formula for higher speeds is: + * baudrate = 12M * 32 / (2^buf[1]) / buf[0] */ - - /* Determine factors A and B */ - unsigned int A = 0; - unsigned int B = 12000000 * 32 / baud; /* 12MHz */ - B <<= 1; /* Add one bit for rounding */ - while (B > (512 << 1) && A <= 14) { - A += 2; - B >>= 2; - } - if (A > 14) { /* max. divisor = min. baudrate reached */ - A = 14; - B = 512; - /* => ~45.78 baud */ - } else { - B = (B + 1) >> 1; /* Round the last bit */ - } - /* Handle special cases */ - if (B == 512) - B = 0; /* also: 1 to 8 */ - else if (B < 16) - /* - * NOTE: With the current algorithm this happens - * only for A=0 and means that the min. divisor - * (respectively: the max. baudrate) is reached. - */ - B = 16; /* => 24 MBaud */ - /* Encode the baud rate */ - buf[3] = 0x80; /* Select divisor encoding method */ + unsigned tmp = 12000000 * 32 / baud; + buf[3] = 0x80; buf[2] = 0; - buf[1] = (A & 0x0e); /* A */ - buf[1] |= ((B & 0x100) >> 8); /* MSB of B */ - buf[0] = B & 0xff; /* 8 LSBs of B */ - /* Calculate the actual/resulting baud rate */ - if (B <= 8) - B = 512; - baud = 12000000 * 32 / ((1 << A) * B); + buf[1] = (tmp >= 256); + while (tmp >= 256) { + tmp >>= 2; + buf[1] <<= 1; + } + buf[0] = tmp; } /* Save resulting baud rate */ -- cgit v0.10.2 From 54dc5792ea933a3ff8c62a1f9ea9e4e6cbdd324a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 1 Nov 2013 09:19:56 -0700 Subject: Revert "USB: pl2303: restrict the divisor based baud rate encoding method to the "HX" chip type" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit b8bdad608213caffa081a97d2e937e5fe08c4046. Revert all of the pl2303 changes that went into 3.12-rc1 and -rc2 as they cause regressions on some versions of the chip. This will all be revisited for later kernel versions when we can figure out how to handle this in a way that does not break working devices. Reported-by: Mika Westerberg Cc: Frank Schäfer Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 2448201..1e6de4c 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -305,14 +305,10 @@ static void pl2303_encode_baudrate(struct tty_struct *tty, if (spriv->type != HX) baud = min_t(int, baud, 1228800); - if (spriv->type != HX || baud <= 115200) { - /* Direct (standard) baud rate encoding method */ + if (baud <= 115200) { put_unaligned_le32(baud, buf); } else { /* - * NOTE: it's not clear if the type_0/1 chips - * support this method - * * Apparently the formula for higher speeds is: * baudrate = 12M * 32 / (2^buf[1]) / buf[0] */ -- cgit v0.10.2 From f896b7968b627d71c0a462404392103c79ca8595 Mon Sep 17 00:00:00 2001 From: Greg KH Date: Wed, 30 Oct 2013 11:07:31 -0700 Subject: USB: Maintainers change for usb serial drivers Johan has been conned^Wgracious in accepting the maintainership of the USB serial drivers, especially as he's been doing all of the real work for the past few years. At the same time, remove a bunch of old entries for USB serial drivers that don't make sense anymore, given that the developers are no longer around, and individual driver maintainerships for tiny things like this is pretty pointless. Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman diff --git a/MAINTAINERS b/MAINTAINERS index 3438384..ffcaf97 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8917,61 +8917,14 @@ W: http://pegasus2.sourceforge.net/ S: Maintained F: drivers/net/usb/rtl8150.c -USB SERIAL BELKIN F5U103 DRIVER -M: William Greathouse +USB SERIAL SUBSYSTEM +M: Johan Hovold L: linux-usb@vger.kernel.org S: Maintained -F: drivers/usb/serial/belkin_sa.* - -USB SERIAL CYPRESS M8 DRIVER -M: Lonnie Mendez -L: linux-usb@vger.kernel.org -S: Maintained -W: http://geocities.com/i0xox0i -W: http://firstlight.net/cvs -F: drivers/usb/serial/cypress_m8.* - -USB SERIAL CYBERJACK DRIVER -M: Matthias Bruestle and Harald Welte -W: http://www.reiner-sct.de/support/treiber_cyberjack.php -S: Maintained -F: drivers/usb/serial/cyberjack.c - -USB SERIAL DIGI ACCELEPORT DRIVER -M: Peter Berger -M: Al Borchers -L: linux-usb@vger.kernel.org -S: Maintained -F: drivers/usb/serial/digi_acceleport.c - -USB SERIAL DRIVER -M: Greg Kroah-Hartman -L: linux-usb@vger.kernel.org -S: Supported F: Documentation/usb/usb-serial.txt -F: drivers/usb/serial/generic.c -F: drivers/usb/serial/usb-serial.c +F: drivers/usb/serial/ F: include/linux/usb/serial.h -USB SERIAL EMPEG EMPEG-CAR MARK I/II DRIVER -M: Gary Brubaker -L: linux-usb@vger.kernel.org -S: Maintained -F: drivers/usb/serial/empeg.c - -USB SERIAL KEYSPAN DRIVER -M: Greg Kroah-Hartman -L: linux-usb@vger.kernel.org -S: Maintained -F: drivers/usb/serial/*keyspan* - -USB SERIAL WHITEHEAT DRIVER -M: Support Department -L: linux-usb@vger.kernel.org -W: http://www.connecttech.com -S: Supported -F: drivers/usb/serial/whiteheat* - USB SMSC75XX ETHERNET DRIVER M: Steve Glendinning L: netdev@vger.kernel.org -- cgit v0.10.2 From e1466ad5b1aeda303f9282463d55798d2eda218c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B5=D0=B9=20=D0=9A=D1=80?= =?UTF-8?q?=D0=B0=D0=BC=D0=B0=D1=80=D0=B5=D0=BD=D0=BA=D0=BE?= Date: Fri, 1 Nov 2013 17:26:38 +0400 Subject: USB: serial: ftdi_sio: add id for Z3X Box device Custom VID/PID for Z3X Box device, popular tool for cellphone flashing. Signed-off-by: Alexey E. Kramarenko Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index c45f9c0..b21d553 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -904,6 +904,7 @@ static struct usb_device_id id_table_combined [] = { { USB_DEVICE(FTDI_VID, FTDI_LUMEL_PD12_PID) }, /* Crucible Devices */ { USB_DEVICE(FTDI_VID, FTDI_CT_COMET_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_Z3X_PID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 1b8af46..a7019d1 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1307,3 +1307,9 @@ * Manufacturer: Crucible Technologies */ #define FTDI_CT_COMET_PID 0x8e08 + +/* + * Product: Z3X Box + * Manufacturer: Smart GSM Team + */ +#define FTDI_Z3X_PID 0x0011 -- cgit v0.10.2 From 6920a1bd037374a632d585de127b6f945199dcb8 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Fri, 1 Nov 2013 12:16:59 -0700 Subject: memcg: remove incorrect underflow check When a memcg is deleted mem_cgroup_reparent_charges() moves charged memory to the parent memcg. As of v3.11-9444-g3ea67d0 "memcg: add per cgroup writeback pages accounting" there's bad pointer read. The goal was to check for counter underflow. The counter is a per cpu counter and there are two problems with the code: (1) per cpu access function isn't used, instead a naked pointer is used which easily causes oops. (2) the check doesn't sum all cpus Test: $ cd /sys/fs/cgroup/memory $ mkdir x $ echo 3 > /proc/sys/vm/drop_caches $ (echo $BASHPID >> x/tasks && exec cat) & [1] 7154 $ grep ^mapped x/memory.stat mapped_file 53248 $ echo 7154 > tasks $ rmdir x The fix is to remove the check. It's currently dangerous and isn't worth fixing it to use something expensive, such as percpu_counter_sum(), for each reparented page. __this_cpu_read() isn't enough to fix this because there's no guarantees of the current cpus count. The only guarantees is that the sum of all per-cpu counter is >= nr_pages. Fixes: 3ea67d06e467 ("memcg: add per cgroup writeback pages accounting") Reported-and-tested-by: Flavio Leitner Signed-off-by: Greg Thelen Reviewed-by: Sha Zhengju Acked-by: Johannes Weiner Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e632782..13b9d0f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3782,7 +3782,6 @@ void mem_cgroup_move_account_page_stat(struct mem_cgroup *from, { /* Update stat data for mem_cgroup */ preempt_disable(); - WARN_ON_ONCE(from->stat->count[idx] < nr_pages); __this_cpu_sub(from->stat->count[idx], nr_pages); __this_cpu_add(to->stat->count[idx], nr_pages); preempt_enable(); -- cgit v0.10.2 From f6537f2f0eba4eba3354e48dbe3047db6d8b6254 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 2 Nov 2013 09:11:33 +1030 Subject: scripts/kallsyms: filter symbols not in kernel address space This patch uses CONFIG_PAGE_OFFSET to filter symbols which are not in kernel address space because these symbols are generally for generating code purpose and can't be run at kernel mode, so we needn't keep them in /proc/kallsyms. For example, on ARM there are some symbols which may be linked in relocatable code section, then perf can't parse symbols any more from /proc/kallsyms, this patch fixes the problem (introduced b9b32bf70f2fb710b07c94e13afbc729afe221da) Cc: Russell King Cc: linux-arm-kernel@lists.infradead.org Cc: Michal Marek Signed-off-by: Ming Lei Signed-off-by: Rusty Russell Cc: stable@vger.kernel.org diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 487ac6f..9a11f9f 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -55,6 +55,7 @@ static struct sym_entry *table; static unsigned int table_size, table_cnt; static int all_symbols = 0; static char symbol_prefix_char = '\0'; +static unsigned long long kernel_start_addr = 0; int token_profit[0x10000]; @@ -65,7 +66,10 @@ unsigned char best_table_len[256]; static void usage(void) { - fprintf(stderr, "Usage: kallsyms [--all-symbols] [--symbol-prefix=] < in.map > out.S\n"); + fprintf(stderr, "Usage: kallsyms [--all-symbols] " + "[--symbol-prefix=] " + "[--page-offset=] " + "< in.map > out.S\n"); exit(1); } @@ -194,6 +198,9 @@ static int symbol_valid(struct sym_entry *s) int i; int offset = 1; + if (s->addr < kernel_start_addr) + return 0; + /* skip prefix char */ if (symbol_prefix_char && *(s->sym + 1) == symbol_prefix_char) offset++; @@ -646,6 +653,9 @@ int main(int argc, char **argv) if ((*p == '"' && *(p+2) == '"') || (*p == '\'' && *(p+2) == '\'')) p++; symbol_prefix_char = *p; + } else if (strncmp(argv[i], "--page-offset=", 14) == 0) { + const char *p = &argv[i][14]; + kernel_start_addr = strtoull(p, NULL, 16); } else usage(); } diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 0149949..32b10f5 100644 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -82,6 +82,8 @@ kallsyms() kallsymopt="${kallsymopt} --all-symbols" fi + kallsymopt="${kallsymopt} --page-offset=$CONFIG_PAGE_OFFSET" + local aflags="${KBUILD_AFLAGS} ${KBUILD_AFLAGS_KERNEL} \ ${NOSTDINC_FLAGS} ${LINUXINCLUDE} ${KBUILD_CPPFLAGS}" -- cgit v0.10.2 From 6f092343855a71e03b8d209815d8c45bf3a27fcd Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 1 Nov 2013 15:01:10 +0800 Subject: net: flow_dissector: fail on evil iph->ihl We don't validate iph->ihl which may lead a dead loop if we meet a IPIP skb whose iph->ihl is zero. Fix this by failing immediately when iph->ihl is evil (less than 5). This issue were introduced by commit ec5efe7946280d1e84603389a1030ccec0a767ae (rps: support IPIP encapsulation). Cc: Eric Dumazet Cc: Petr Matousek Cc: Michael S. Tsirkin Cc: Daniel Borkmann Signed-off-by: Jason Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 8d7d0dd..143b6fd 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -40,7 +40,7 @@ again: struct iphdr _iph; ip: iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); - if (!iph) + if (!iph || iph->ihl < 5) return false; if (ip_is_fragment(iph)) -- cgit v0.10.2 From 9c41f4eeb9d51f3ece20428d35a3ea32cf3b5622 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Sat, 2 Nov 2013 17:47:49 +0530 Subject: ARC: Incorrect mm reference used in vmalloc fault handler A vmalloc fault needs to sync up PGD/PTE entry from init_mm to current task's "active_mm". ARC vmalloc fault handler however was using mm. A vmalloc fault for non user task context (actually pre-userland, from init thread's open for /dev/console) caused the handler to deref NULL mm (for mm->pgd) The reasons it worked so far is amazing: 1. By default (!SMP), vmalloc fault handler uses a cached value of PGD. In SMP that MMU register is repurposed hence need for mm pointer deref. 2. In pre-3.12 SMP kernel, the problem triggering vmalloc didn't exist in pre-userland code path - it was introduced with commit 20bafb3d23d108bc "n_tty: Move buffers into n_tty_data" Signed-off-by: Vineet Gupta Cc: Gilad Ben-Yossef Cc: Noam Camus Cc: stable@vger.kernel.org #3.10 and 3.11 Cc: Peter Hurley Signed-off-by: Linus Torvalds diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index d63f3de..0c14d8a 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -17,7 +17,7 @@ #include #include -static int handle_vmalloc_fault(struct mm_struct *mm, unsigned long address) +static int handle_vmalloc_fault(unsigned long address) { /* * Synchronize this task's top level page-table @@ -27,7 +27,7 @@ static int handle_vmalloc_fault(struct mm_struct *mm, unsigned long address) pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; - pgd = pgd_offset_fast(mm, address); + pgd = pgd_offset_fast(current->active_mm, address); pgd_k = pgd_offset_k(address); if (!pgd_present(*pgd_k)) @@ -72,7 +72,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address) * nothing more. */ if (address >= VMALLOC_START && address <= VMALLOC_END) { - ret = handle_vmalloc_fault(mm, address); + ret = handle_vmalloc_fault(address); if (unlikely(ret)) goto bad_area_nosemaphore; else -- cgit v0.10.2 From 9bf76ca325d5e9208eb343f7bd4cc666f703ed30 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sun, 3 Nov 2013 12:36:28 +0100 Subject: ipc, msg: forbid negative values for "msg{max,mnb,mni}" Negative message lengths make no sense -- so don't do negative queue lenghts or identifier counts. Prevent them from getting negative. Also change the underlying data types to be unsigned to avoid hairy surprises with sign extensions in cases where those variables get evaluated in unsigned expressions with bigger data types, e.g size_t. In case a user still wants to have "unlimited" sizes she could just use INT_MAX instead. Signed-off-by: Mathias Krause Cc: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 19c19a5..f6c82de 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -34,9 +34,9 @@ struct ipc_namespace { int sem_ctls[4]; int used_sems; - int msg_ctlmax; - int msg_ctlmnb; - int msg_ctlmni; + unsigned int msg_ctlmax; + unsigned int msg_ctlmnb; + unsigned int msg_ctlmni; atomic_t msg_bytes; atomic_t msg_hdrs; int auto_msgmni; diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 130dfec..b0e99de 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -62,7 +62,7 @@ static int proc_ipc_dointvec_minmax_orphans(ctl_table *table, int write, return err; } -static int proc_ipc_callback_dointvec(ctl_table *table, int write, +static int proc_ipc_callback_dointvec_minmax(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; @@ -72,7 +72,7 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write, memcpy(&ipc_table, table, sizeof(ipc_table)); ipc_table.data = get_ipc(table); - rc = proc_dointvec(&ipc_table, write, buffer, lenp, ppos); + rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); if (write && !rc && lenp_bef == *lenp) /* @@ -152,15 +152,13 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, #define proc_ipc_dointvec NULL #define proc_ipc_dointvec_minmax NULL #define proc_ipc_dointvec_minmax_orphans NULL -#define proc_ipc_callback_dointvec NULL +#define proc_ipc_callback_dointvec_minmax NULL #define proc_ipcauto_dointvec_minmax NULL #endif static int zero; static int one = 1; -#ifdef CONFIG_CHECKPOINT_RESTORE static int int_max = INT_MAX; -#endif static struct ctl_table ipc_kern_table[] = { { @@ -198,21 +196,27 @@ static struct ctl_table ipc_kern_table[] = { .data = &init_ipc_ns.msg_ctlmax, .maxlen = sizeof (init_ipc_ns.msg_ctlmax), .mode = 0644, - .proc_handler = proc_ipc_dointvec, + .proc_handler = proc_ipc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &int_max, }, { .procname = "msgmni", .data = &init_ipc_ns.msg_ctlmni, .maxlen = sizeof (init_ipc_ns.msg_ctlmni), .mode = 0644, - .proc_handler = proc_ipc_callback_dointvec, + .proc_handler = proc_ipc_callback_dointvec_minmax, + .extra1 = &zero, + .extra2 = &int_max, }, { .procname = "msgmnb", .data = &init_ipc_ns.msg_ctlmnb, .maxlen = sizeof (init_ipc_ns.msg_ctlmnb), .mode = 0644, - .proc_handler = proc_ipc_dointvec, + .proc_handler = proc_ipc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &int_max, }, { .procname = "sem", -- cgit v0.10.2 From 5e01dc7b26d9f24f39abace5da98ccbd6a5ceb52 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 3 Nov 2013 15:41:51 -0800 Subject: Linux 3.12 diff --git a/Makefile b/Makefile index 868c0eb..67077ad 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = NAME = One Giant Leap for Frogkind # *DOCUMENTATION* -- cgit v0.10.2 From 7926c1d5be0b7cbe5b8d5c788d7d39237e7b212c Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 31 Oct 2013 09:13:32 +0100 Subject: net: sctp: do not trigger BUG_ON in sctp_cmd_delete_tcb Introduced in f9e42b853523 ("net: sctp: sideeffect: throw BUG if primary_path is NULL"), we intended to find a buggy assoc that's part of the assoc hash table with a primary_path that is NULL. However, we better remove the BUG_ON for now and find a more suitable place to assert for these things as Mark reports that this also triggers the bug when duplication cookie processing happens, and the assoc is not part of the hash table (so all good in this case). Such a situation can for example easily be reproduced by: tc qdisc add dev eth0 root handle 1: prio bands 2 priomap 1 1 1 1 1 1 tc qdisc add dev eth0 parent 1:2 handle 20: netem loss 20% tc filter add dev eth0 protocol ip parent 1: prio 2 u32 match ip \ protocol 132 0xff match u8 0x0b 0xff at 32 flowid 1:2 This drops 20% of COOKIE-ACK packets. After some follow-up discussion with Vlad we came to the conclusion that for now we should still better remove this BUG_ON() assertion, and come up with two follow-ups later on, that is, i) find a more suitable place for this assertion, and possibly ii) have a special allocator/initializer for such kind of temporary assocs. Reported-by: Mark Thomas Signed-off-by: Vlad Yasevich Signed-off-by: Daniel Borkmann Acked-by: Neil Horman Signed-off-by: David S. Miller diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 666c668..1a6eef3 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -860,7 +860,6 @@ static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds, (!asoc->temp) && (sk->sk_shutdown != SHUTDOWN_MASK)) return; - BUG_ON(asoc->peer.primary_path == NULL); sctp_unhash_established(asoc); sctp_association_free(asoc); } -- cgit v0.10.2 From c32b7dfbb1dfb3f0a68f250deff65103c8bb704a Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sun, 3 Nov 2013 10:04:07 +0200 Subject: net/mlx4_core: Fix call to __mlx4_unregister_mac In function mlx4_master_deactivate_admin_state() __mlx4_unregister_mac was called using the MAC index. It should be called with the value of the MAC itself. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index ea20182..bb11624 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1691,7 +1691,7 @@ static void mlx4_master_deactivate_admin_state(struct mlx4_priv *priv, int slave vp_oper->vlan_idx = NO_INDX; } if (NO_INDX != vp_oper->mac_idx) { - __mlx4_unregister_mac(&priv->dev, port, vp_oper->mac_idx); + __mlx4_unregister_mac(&priv->dev, port, vp_oper->state.mac); vp_oper->mac_idx = NO_INDX; } } -- cgit v0.10.2