From 7e196aa1a011da35a04cb1f161e186563aa8d8db Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 6 Jul 2016 12:18:34 +0000 Subject: clk: sunxi: pll2: Fix return value check in sun4i_pll2_setup() In case of error, the functions clk_register_composite() and clk_register_divider() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Signed-off-by: Wei Yongjun Signed-off-by: Maxime Ripard diff --git a/drivers/clk/sunxi/clk-a10-pll2.c b/drivers/clk/sunxi/clk-a10-pll2.c index 0ee1f36..d8eab90 100644 --- a/drivers/clk/sunxi/clk-a10-pll2.c +++ b/drivers/clk/sunxi/clk-a10-pll2.c @@ -73,7 +73,7 @@ static void __init sun4i_pll2_setup(struct device_node *node, SUN4I_PLL2_PRE_DIV_WIDTH, CLK_DIVIDER_ONE_BASED | CLK_DIVIDER_ALLOW_ZERO, &sun4i_a10_pll2_lock); - if (!prediv_clk) { + if (IS_ERR(prediv_clk)) { pr_err("Couldn't register the prediv clock\n"); goto err_free_array; } @@ -106,7 +106,7 @@ static void __init sun4i_pll2_setup(struct device_node *node, &mult->hw, &clk_multiplier_ops, &gate->hw, &clk_gate_ops, CLK_SET_RATE_PARENT); - if (!base_clk) { + if (IS_ERR(base_clk)) { pr_err("Couldn't register the base multiplier clock\n"); goto err_free_multiplier; } -- cgit v0.10.2 From fbe359f12ce40e3126bea959b135656e4a305897 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 6 Jul 2016 12:21:47 +0000 Subject: clk: sunxi: Fix return value check in sun8i_a23_mbus_setup() In case of error, the function of_io_request_and_map() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Signed-off-by: Wei Yongjun Signed-off-by: Maxime Ripard diff --git a/drivers/clk/sunxi/clk-sun8i-mbus.c b/drivers/clk/sunxi/clk-sun8i-mbus.c index 411d303..b200ebf 100644 --- a/drivers/clk/sunxi/clk-sun8i-mbus.c +++ b/drivers/clk/sunxi/clk-sun8i-mbus.c @@ -48,7 +48,7 @@ static void __init sun8i_a23_mbus_setup(struct device_node *node) return; reg = of_io_request_and_map(node, 0, of_node_full_name(node)); - if (!reg) { + if (IS_ERR(reg)) { pr_err("Could not get registers for sun8i-mbus-clk\n"); goto err_free_parents; } -- cgit v0.10.2 From 156ad0d7eae4825a0d94a4bf68571e97302f2501 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 26 Jul 2016 15:04:23 +0800 Subject: clk: sunxi-ng: Fix inverted test condition in ccu_helper_wait_for_lock The condition passed to read*_poll_timeout() is the break condition, i.e. wait for this condition to happen and return success. The original code assumed the opposite, resulting in a warning when the PLL clock rate was changed but never lost it's lock as far as the readout indicated. This was verified by checking the read out register value. Fixes: 1d80c14248d6 ("clk: sunxi-ng: Add common infrastructure") Signed-off-by: Chen-Yu Tsai Acked-by: Maxime Ripard Signed-off-by: Maxime Ripard diff --git a/drivers/clk/sunxi-ng/ccu_common.c b/drivers/clk/sunxi-ng/ccu_common.c index fc17b52..51d4bac 100644 --- a/drivers/clk/sunxi-ng/ccu_common.c +++ b/drivers/clk/sunxi-ng/ccu_common.c @@ -31,7 +31,7 @@ void ccu_helper_wait_for_lock(struct ccu_common *common, u32 lock) return; WARN_ON(readl_relaxed_poll_timeout(common->base + common->reg, reg, - !(reg & lock), 100, 70000)); + reg & lock, 100, 70000)); } int sunxi_ccu_probe(struct device_node *node, void __iomem *reg, -- cgit v0.10.2 From 06421a7821f9040467e0db2702309b72aa2b7af4 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 26 Jul 2016 15:04:24 +0800 Subject: clk: sunxi-ng: nk: Make ccu_nk_find_best static make C=2 reports: CHECK drivers/clk/sunxi-ng/ccu_nk.c drivers/clk/sunxi-ng/ccu_nk.c:17:6: warning: symbol 'ccu_nk_find_best' was not declared. Should it be static? ccu_nk_find_best is only used within ccu_nk.c. So make it static to get rid of this warning. Fixes: adbfb0056e03 ("clk: sunxi-ng: Add N-K-factor clock support") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard diff --git a/drivers/clk/sunxi-ng/ccu_nk.c b/drivers/clk/sunxi-ng/ccu_nk.c index 4470ffc..d6fafb3 100644 --- a/drivers/clk/sunxi-ng/ccu_nk.c +++ b/drivers/clk/sunxi-ng/ccu_nk.c @@ -14,9 +14,9 @@ #include "ccu_gate.h" #include "ccu_nk.h" -void ccu_nk_find_best(unsigned long parent, unsigned long rate, - unsigned int max_n, unsigned int max_k, - unsigned int *n, unsigned int *k) +static void ccu_nk_find_best(unsigned long parent, unsigned long rate, + unsigned int max_n, unsigned int max_k, + unsigned int *n, unsigned int *k) { unsigned long best_rate = 0; unsigned int best_k = 0, best_n = 0; -- cgit v0.10.2 From 62148f0930a8e9bd5c5614f8387222f0220d7d47 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 2 Aug 2016 08:11:00 -0300 Subject: [media] cec: rename cec_devnode fhs_lock to just lock This lock will be used to protect more than just the fhs list. So rename it to just 'lock'. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c index b2393bb..9dcb784 100644 --- a/drivers/staging/media/cec/cec-adap.c +++ b/drivers/staging/media/cec/cec-adap.c @@ -124,10 +124,10 @@ static void cec_queue_event(struct cec_adapter *adap, u64 ts = ktime_get_ns(); struct cec_fh *fh; - mutex_lock(&adap->devnode.fhs_lock); + mutex_lock(&adap->devnode.lock); list_for_each_entry(fh, &adap->devnode.fhs, list) cec_queue_event_fh(fh, ev, ts); - mutex_unlock(&adap->devnode.fhs_lock); + mutex_unlock(&adap->devnode.lock); } /* @@ -191,12 +191,12 @@ static void cec_queue_msg_monitor(struct cec_adapter *adap, u32 monitor_mode = valid_la ? CEC_MODE_MONITOR : CEC_MODE_MONITOR_ALL; - mutex_lock(&adap->devnode.fhs_lock); + mutex_lock(&adap->devnode.lock); list_for_each_entry(fh, &adap->devnode.fhs, list) { if (fh->mode_follower >= monitor_mode) cec_queue_msg_fh(fh, msg); } - mutex_unlock(&adap->devnode.fhs_lock); + mutex_unlock(&adap->devnode.lock); } /* @@ -207,12 +207,12 @@ static void cec_queue_msg_followers(struct cec_adapter *adap, { struct cec_fh *fh; - mutex_lock(&adap->devnode.fhs_lock); + mutex_lock(&adap->devnode.lock); list_for_each_entry(fh, &adap->devnode.fhs, list) { if (fh->mode_follower == CEC_MODE_FOLLOWER) cec_queue_msg_fh(fh, msg); } - mutex_unlock(&adap->devnode.fhs_lock); + mutex_unlock(&adap->devnode.lock); } /* Notify userspace of an adapter state change. */ diff --git a/drivers/staging/media/cec/cec-api.c b/drivers/staging/media/cec/cec-api.c index 7be7615..4e2696a 100644 --- a/drivers/staging/media/cec/cec-api.c +++ b/drivers/staging/media/cec/cec-api.c @@ -508,14 +508,14 @@ static int cec_open(struct inode *inode, struct file *filp) filp->private_data = fh; - mutex_lock(&devnode->fhs_lock); + mutex_lock(&devnode->lock); /* Queue up initial state events */ ev_state.state_change.phys_addr = adap->phys_addr; ev_state.state_change.log_addr_mask = adap->log_addrs.log_addr_mask; cec_queue_event_fh(fh, &ev_state, 0); list_add(&fh->list, &devnode->fhs); - mutex_unlock(&devnode->fhs_lock); + mutex_unlock(&devnode->lock); return 0; } @@ -540,9 +540,9 @@ static int cec_release(struct inode *inode, struct file *filp) cec_monitor_all_cnt_dec(adap); mutex_unlock(&adap->lock); - mutex_lock(&devnode->fhs_lock); + mutex_lock(&devnode->lock); list_del(&fh->list); - mutex_unlock(&devnode->fhs_lock); + mutex_unlock(&devnode->lock); /* Unhook pending transmits from this filehandle. */ mutex_lock(&adap->lock); diff --git a/drivers/staging/media/cec/cec-core.c b/drivers/staging/media/cec/cec-core.c index 112a5fa..73792d0 100644 --- a/drivers/staging/media/cec/cec-core.c +++ b/drivers/staging/media/cec/cec-core.c @@ -117,7 +117,7 @@ static int __must_check cec_devnode_register(struct cec_devnode *devnode, /* Initialization */ INIT_LIST_HEAD(&devnode->fhs); - mutex_init(&devnode->fhs_lock); + mutex_init(&devnode->lock); /* Part 1: Find a free minor number */ mutex_lock(&cec_devnode_lock); @@ -181,10 +181,10 @@ static void cec_devnode_unregister(struct cec_devnode *devnode) if (!devnode->registered || devnode->unregistered) return; - mutex_lock(&devnode->fhs_lock); + mutex_lock(&devnode->lock); list_for_each_entry(fh, &devnode->fhs, list) wake_up_interruptible(&fh->wait); - mutex_unlock(&devnode->fhs_lock); + mutex_unlock(&devnode->lock); devnode->registered = false; devnode->unregistered = true; diff --git a/include/media/cec.h b/include/media/cec.h index dc7854b..fdb5d60 100644 --- a/include/media/cec.h +++ b/include/media/cec.h @@ -57,8 +57,8 @@ struct cec_devnode { int minor; bool registered; bool unregistered; - struct mutex fhs_lock; struct list_head fhs; + struct mutex lock; }; struct cec_adapter; -- cgit v0.10.2 From 2ab25d35a91098ef0f42d478cc37f6a5591a4ab0 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 2 Aug 2016 08:13:57 -0300 Subject: [media] cec: improve locking - The global lock was used in cec_get_device when it should have used the devnode lock. - cec_put_device also took the global lock, but since the release function takes that lock as well this could lead to a deadlock. Just don't take the lock here since there is no reason for it. - cec_devnode_register() should take the global lock when clearing the bit in the global bitmap. - In cec_devnode_unregister() place the devnode->(un)register tests and assignments under the devnode lock as well: this has to be in a critical block. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/staging/media/cec/cec-core.c b/drivers/staging/media/cec/cec-core.c index 73792d0..3b1e4d2 100644 --- a/drivers/staging/media/cec/cec-core.c +++ b/drivers/staging/media/cec/cec-core.c @@ -51,31 +51,29 @@ int cec_get_device(struct cec_devnode *devnode) { /* * Check if the cec device is available. This needs to be done with - * the cec_devnode_lock held to prevent an open/unregister race: + * the devnode->lock held to prevent an open/unregister race: * without the lock, the device could be unregistered and freed between * the devnode->registered check and get_device() calls, leading to * a crash. */ - mutex_lock(&cec_devnode_lock); + mutex_lock(&devnode->lock); /* * return ENXIO if the cec device has been removed * already or if it is not registered anymore. */ if (!devnode->registered) { - mutex_unlock(&cec_devnode_lock); + mutex_unlock(&devnode->lock); return -ENXIO; } /* and increase the device refcount */ get_device(&devnode->dev); - mutex_unlock(&cec_devnode_lock); + mutex_unlock(&devnode->lock); return 0; } void cec_put_device(struct cec_devnode *devnode) { - mutex_lock(&cec_devnode_lock); put_device(&devnode->dev); - mutex_unlock(&cec_devnode_lock); } /* Called when the last user of the cec device exits. */ @@ -84,11 +82,10 @@ static void cec_devnode_release(struct device *cd) struct cec_devnode *devnode = to_cec_devnode(cd); mutex_lock(&cec_devnode_lock); - /* Mark device node number as free */ clear_bit(devnode->minor, cec_devnode_nums); - mutex_unlock(&cec_devnode_lock); + cec_delete_adapter(to_cec_adapter(devnode)); } @@ -160,7 +157,9 @@ static int __must_check cec_devnode_register(struct cec_devnode *devnode, cdev_del: cdev_del(&devnode->cdev); clr_bit: + mutex_lock(&cec_devnode_lock); clear_bit(devnode->minor, cec_devnode_nums); + mutex_unlock(&cec_devnode_lock); return ret; } @@ -177,17 +176,21 @@ static void cec_devnode_unregister(struct cec_devnode *devnode) { struct cec_fh *fh; + mutex_lock(&devnode->lock); + /* Check if devnode was never registered or already unregistered */ - if (!devnode->registered || devnode->unregistered) + if (!devnode->registered || devnode->unregistered) { + mutex_unlock(&devnode->lock); return; + } - mutex_lock(&devnode->lock); list_for_each_entry(fh, &devnode->fhs, list) wake_up_interruptible(&fh->wait); - mutex_unlock(&devnode->lock); devnode->registered = false; devnode->unregistered = true; + mutex_unlock(&devnode->lock); + device_del(&devnode->dev); cdev_del(&devnode->cdev); put_device(&devnode->dev); -- cgit v0.10.2 From 9ebf1945d757433a089ab3ee940673503e3e11ec Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 1 Aug 2016 07:29:34 -0300 Subject: [media] cec-funcs.h: fix typo: && should be & Fix typo where logical AND was used instead of bitwise AND. Reported-by: David Binderman Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/include/linux/cec-funcs.h b/include/linux/cec-funcs.h index 82c3d3b..9e054aa 100644 --- a/include/linux/cec-funcs.h +++ b/include/linux/cec-funcs.h @@ -227,7 +227,7 @@ static inline void cec_set_digital_service_id(__u8 *msg, if (digital->service_id_method == CEC_OP_SERVICE_ID_METHOD_BY_CHANNEL) { *msg++ = (digital->channel.channel_number_fmt << 2) | (digital->channel.major >> 8); - *msg++ = digital->channel.major && 0xff; + *msg++ = digital->channel.major & 0xff; *msg++ = digital->channel.minor >> 8; *msg++ = digital->channel.minor & 0xff; *msg++ = 0; @@ -1277,7 +1277,7 @@ static inline void cec_msg_user_control_pressed(struct cec_msg *msg, msg->len += 4; msg->msg[3] = (ui_cmd->channel_identifier.channel_number_fmt << 2) | (ui_cmd->channel_identifier.major >> 8); - msg->msg[4] = ui_cmd->channel_identifier.major && 0xff; + msg->msg[4] = ui_cmd->channel_identifier.major & 0xff; msg->msg[5] = ui_cmd->channel_identifier.minor >> 8; msg->msg[6] = ui_cmd->channel_identifier.minor & 0xff; break; -- cgit v0.10.2 From 31dc8b7302f1e48952ec8e90cd49dca843146cd0 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 10 Aug 2016 08:01:38 -0300 Subject: [media] cec-funcs.h: add reply argument for Record On/Off A reply parameter is added to the cec_msg_record_on/off functions in cec-funcs.h. The standard mandates that Record Status shall be replied to Record On, and it may be replied to Record Off. Signed-off-by: Johan Fjeldtvedt Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/include/linux/cec-funcs.h b/include/linux/cec-funcs.h index 9e054aa..8af613e 100644 --- a/include/linux/cec-funcs.h +++ b/include/linux/cec-funcs.h @@ -162,10 +162,11 @@ static inline void cec_msg_standby(struct cec_msg *msg) /* One Touch Record Feature */ -static inline void cec_msg_record_off(struct cec_msg *msg) +static inline void cec_msg_record_off(struct cec_msg *msg, bool reply) { msg->len = 2; msg->msg[1] = CEC_MSG_RECORD_OFF; + msg->reply = reply ? CEC_MSG_RECORD_STATUS : 0; } struct cec_op_arib_data { @@ -323,6 +324,7 @@ static inline void cec_msg_record_on_phys_addr(struct cec_msg *msg, } static inline void cec_msg_record_on(struct cec_msg *msg, + bool reply, const struct cec_op_record_src *rec_src) { switch (rec_src->type) { @@ -346,6 +348,7 @@ static inline void cec_msg_record_on(struct cec_msg *msg, rec_src->ext_phys_addr.phys_addr); break; } + msg->reply = reply ? CEC_MSG_RECORD_STATUS : 0; } static inline void cec_ops_record_on(const struct cec_msg *msg, -- cgit v0.10.2 From 277f963cea4ec87144c6713377322fe3bf172a5e Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 10 Aug 2016 06:00:53 -0300 Subject: [media] cec: improve dqevent documentation The documentation for the cec_event_state_change struct was incomplete. This patch documents what happens in the corner cases. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/Documentation/media/uapi/cec/cec-ioc-dqevent.rst b/Documentation/media/uapi/cec/cec-ioc-dqevent.rst index 7a6d6d0..2e1e739 100644 --- a/Documentation/media/uapi/cec/cec-ioc-dqevent.rst +++ b/Documentation/media/uapi/cec/cec-ioc-dqevent.rst @@ -64,7 +64,8 @@ it is guaranteed that the state did change in between the two events. - ``phys_addr`` - - The current physical address. + - The current physical address. This is ``CEC_PHYS_ADDR_INVALID`` if no + valid physical address is set. - .. row 2 @@ -72,7 +73,10 @@ it is guaranteed that the state did change in between the two events. - ``log_addr_mask`` - - The current set of claimed logical addresses. + - The current set of claimed logical addresses. This is 0 if no logical + addresses are claimed or if ``phys_addr`` is ``CEC_PHYS_ADDR_INVALID``. + If bit 15 is set (``1 << CEC_LOG_ADDR_UNREGISTERED``) then this device + has the unregistered logical address. In that case all other bits are 0. -- cgit v0.10.2 From dcceb1eaf210096831b14471bc87678375b086ed Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 10 Aug 2016 09:24:45 -0300 Subject: [media] cec: add CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK flag Currently if none of the requested logical addresses can be claimed, the framework will fall back to the Unregistered logical address. Add a flag to enable this explicitly. By default it will just go back to the unconfigured state. Usually Unregistered is not something you want since the functionality is very limited. Unless the application has support for this, it will fail to work correctly. So require that the application explicitly requests this. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst b/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst index 04ee900..201d483 100644 --- a/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst +++ b/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst @@ -144,7 +144,7 @@ logical address types are already defined will return with error ``EBUSY``. - ``flags`` - - Flags. No flags are defined yet, so set this to 0. + - Flags. See :ref:`cec-log-addrs-flags` for a list of available flags. - .. row 7 @@ -201,6 +201,25 @@ logical address types are already defined will return with error ``EBUSY``. give the CEC framework more information about the device type, even though the framework won't use it directly in the CEC message. +.. _cec-log-addrs-flags: + +.. flat-table:: Flags for struct cec_log_addrs + :header-rows: 0 + :stub-columns: 0 + :widths: 3 1 4 + + + - .. _`CEC-LOG-ADDRS-FL-ALLOW-UNREG-FALLBACK`: + + - ``CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK`` + + - 1 + + - By default if no logical address of the requested type can be claimed, then + it will go back to the unconfigured state. If this flag is set, then it will + fallback to the Unregistered logical address. Note that if the Unregistered + logical address was explicitly requested, then this flag has no effect. + .. _cec-versions: .. flat-table:: CEC Versions diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c index 9dcb784..2458a6c 100644 --- a/drivers/staging/media/cec/cec-adap.c +++ b/drivers/staging/media/cec/cec-adap.c @@ -1047,6 +1047,10 @@ static int cec_config_thread_func(void *arg) dprintk(1, "could not claim LA %d\n", i); } + if (adap->log_addrs.log_addr_mask == 0 && + !(las->flags & CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK)) + goto unconfigure; + configured: if (adap->log_addrs.log_addr_mask == 0) { /* Fall back to unregistered */ diff --git a/drivers/staging/media/cec/cec-api.c b/drivers/staging/media/cec/cec-api.c index 4e2696a..6f58ee8 100644 --- a/drivers/staging/media/cec/cec-api.c +++ b/drivers/staging/media/cec/cec-api.c @@ -162,7 +162,7 @@ static long cec_adap_s_log_addrs(struct cec_adapter *adap, struct cec_fh *fh, return -ENOTTY; if (copy_from_user(&log_addrs, parg, sizeof(log_addrs))) return -EFAULT; - log_addrs.flags = 0; + log_addrs.flags &= CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK; mutex_lock(&adap->lock); if (!adap->is_configuring && (!log_addrs.num_log_addrs || !adap->is_configured) && diff --git a/include/linux/cec.h b/include/linux/cec.h index b3e2289..851968e 100644 --- a/include/linux/cec.h +++ b/include/linux/cec.h @@ -364,7 +364,7 @@ struct cec_caps { * @num_log_addrs: how many logical addresses should be claimed. Set by the * caller. * @vendor_id: the vendor ID of the device. Set by the caller. - * @flags: set to 0. + * @flags: flags. * @osd_name: the OSD name of the device. Set by the caller. * @primary_device_type: the primary device type for each logical address. * Set by the caller. @@ -389,6 +389,9 @@ struct cec_log_addrs { __u8 features[CEC_MAX_LOG_ADDRS][12]; }; +/* Allow a fallback to unregistered */ +#define CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK (1 << 0) + /* Events */ /* Event that occurs when the adapter state changes */ -- cgit v0.10.2 From 0c1d61b0e4ed68d125b21fed375c38b6e3c2a658 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sun, 14 Aug 2016 08:27:09 -0300 Subject: [media] cec: set unclaimed addresses to CEC_LOG_ADDR_INVALID Up to 4 logical addresses can be claimed. Make sure that any unclaimed logical addresses are set to CEC_LOG_ADDR_INVALID as per the documentation. Take special care in the unregistered case: when falling back to unregistered num_log_addrs may be > 1, so mark those as invalid. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c index 2458a6c..6cc7d79 100644 --- a/drivers/staging/media/cec/cec-adap.c +++ b/drivers/staging/media/cec/cec-adap.c @@ -1056,6 +1056,8 @@ configured: /* Fall back to unregistered */ las->log_addr[0] = CEC_LOG_ADDR_UNREGISTERED; las->log_addr_mask = 1 << las->log_addr[0]; + for (i = 1; i < las->num_log_addrs; i++) + las->log_addr[i] = CEC_LOG_ADDR_INVALID; } adap->is_configured = true; adap->is_configuring = false; @@ -1074,6 +1076,8 @@ configured: cec_report_features(adap, i); cec_report_phys_addr(adap, i); } + for (i = las->num_log_addrs; i < CEC_MAX_LOG_ADDRS; i++) + las->log_addr[i] = CEC_LOG_ADDR_INVALID; mutex_lock(&adap->lock); adap->kthread_config = NULL; mutex_unlock(&adap->lock); -- cgit v0.10.2 From 260ff1144a9dd1afb85cf5da462672d68412cbc4 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 19 Jul 2016 08:44:32 -0300 Subject: [media] cec: add item to TODO Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/staging/media/cec/TODO b/drivers/staging/media/cec/TODO index a10d4f8..1322469 100644 --- a/drivers/staging/media/cec/TODO +++ b/drivers/staging/media/cec/TODO @@ -12,6 +12,7 @@ Hopefully this will happen later in 2016. Other TODOs: +- There are two possible replies to CEC_MSG_INITIATE_ARC. How to handle that? - Add a flag to inhibit passing CEC RC messages to the rc subsystem. Applications should be able to choose this when calling S_LOG_ADDRS. - If the reply field of cec_msg is set then when the reply arrives it -- cgit v0.10.2 From 3e92d8b238e48dfb539e8112bb2cc463e35e1b71 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 12 Aug 2016 13:32:07 -0300 Subject: [media] cec: ignore messages when log_addr_mask == 0 Most CEC adapters will still receive broadcast messages, even if no logical addresses are claimed. But those messages should only be passed on for monitoring purposes, but not for processing by either kernel or userspace if userspace didn't call CEC_ADAP_S_LOG_ADDRS first. So if adap->log_addrs.log_addr_mask is 0, then just return before passing the received message on to the processing code. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c index 6cc7d79..e980ac9 100644 --- a/drivers/staging/media/cec/cec-adap.c +++ b/drivers/staging/media/cec/cec-adap.c @@ -851,6 +851,9 @@ void cec_received_msg(struct cec_adapter *adap, struct cec_msg *msg) if (!valid_la || msg->len <= 1) return; + if (adap->log_addrs.log_addr_mask == 0) + return; + /* * Process the message on the protocol level. If is_reply is true, * then cec_receive_notify() won't pass on the reply to the listener(s) -- cgit v0.10.2 From 73b14977549e4e1214413e7da2d0e97a9947bf8d Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sun, 14 Aug 2016 06:45:54 -0300 Subject: [media] mtk-vcodec: add HAS_DMA dependency This fixes this kbuild test robot error: tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master head: 329f4152911c276b074bec75a0443f88821afdb7 commit: c1023ba74fc77dc56dc317bd98f5060aab889ac1 [media] drivers/media/platform/Kconfig: fix VIDEO_MEDIATEK_VCODEC dependency config: m32r-allyesconfig (attached as .config) compiler: m32r-linux-gcc (GCC) 4.9.0 reproduce: wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross git checkout c1023ba74fc77dc56dc317bd98f5060aab889ac1 # save the attached .config to linux build tree make.cross ARCH=m32r All errors (new ones prefixed by >>): drivers/media/v4l2-core/videobuf2-dma-contig.c: In function 'vb2_dc_get_userptr': >> >> drivers/media/v4l2-core/videobuf2-dma-contig.c:486:2: error: implicit declaration of function 'dma_get_cache_alignment' [-Werror=implicit-function-declaration] unsigned long dma_align = dma_get_cache_alignment(); ^ cc1: some warnings being treated as errors This driver depends on HAS_DMA for dma_get_cache_alignment(). Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig index f25344b..552b635 100644 --- a/drivers/media/platform/Kconfig +++ b/drivers/media/platform/Kconfig @@ -169,7 +169,7 @@ config VIDEO_MEDIATEK_VPU config VIDEO_MEDIATEK_VCODEC tristate "Mediatek Video Codec driver" depends on MTK_IOMMU || COMPILE_TEST - depends on VIDEO_DEV && VIDEO_V4L2 + depends on VIDEO_DEV && VIDEO_V4L2 && HAS_DMA depends on ARCH_MEDIATEK || COMPILE_TEST select VIDEOBUF2_DMA_CONTIG select V4L2_MEM2MEM_DEV -- cgit v0.10.2 From 1e6e97541ab51b65019bd823506af81ebb3730fc Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 12 Aug 2016 06:44:27 -0300 Subject: [media] pulse8-cec: set correct Signal Free Time Don't hardcode the signal free time to 3 bit periods, instead use the value for the signal free time as passed in by the CEC framework. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/staging/media/pulse8-cec/pulse8-cec.c b/drivers/staging/media/pulse8-cec/pulse8-cec.c index 94f8590..28f853c 100644 --- a/drivers/staging/media/pulse8-cec/pulse8-cec.c +++ b/drivers/staging/media/pulse8-cec/pulse8-cec.c @@ -388,7 +388,7 @@ static int pulse8_cec_adap_transmit(struct cec_adapter *adap, u8 attempts, int err; cmd[0] = MSGCODE_TRANSMIT_IDLETIME; - cmd[1] = 3; + cmd[1] = signal_free_time; err = pulse8_send_and_wait(pulse8, cmd, 2, MSGCODE_COMMAND_ACCEPTED, 1); cmd[0] = MSGCODE_TRANSMIT_ACK_POLARITY; -- cgit v0.10.2 From 31f58e31dc0e170e117a83584103921269b7581b Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 12 Aug 2016 06:46:06 -0300 Subject: [media] pulse8-cec: fix error handling Support more error codes and fix a bug where MSGCODE_TRANSMIT_FAILED_LINE was mapped to CEC_TX_STATUS_ARB_LOST, which is wrong. Thanks to Pulse-Eight for providing me with the information needed to handle this correctly (I hope). Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/staging/media/pulse8-cec/pulse8-cec.c b/drivers/staging/media/pulse8-cec/pulse8-cec.c index 28f853c..ed8bd95 100644 --- a/drivers/staging/media/pulse8-cec/pulse8-cec.c +++ b/drivers/staging/media/pulse8-cec/pulse8-cec.c @@ -114,14 +114,11 @@ static void pulse8_irq_work_handler(struct work_struct *work) cec_transmit_done(pulse8->adap, CEC_TX_STATUS_OK, 0, 0, 0, 0); break; - case MSGCODE_TRANSMIT_FAILED_LINE: - cec_transmit_done(pulse8->adap, CEC_TX_STATUS_ARB_LOST, - 1, 0, 0, 0); - break; case MSGCODE_TRANSMIT_FAILED_ACK: cec_transmit_done(pulse8->adap, CEC_TX_STATUS_NACK, 0, 1, 0, 0); break; + case MSGCODE_TRANSMIT_FAILED_LINE: case MSGCODE_TRANSMIT_FAILED_TIMEOUT_DATA: case MSGCODE_TRANSMIT_FAILED_TIMEOUT_LINE: cec_transmit_done(pulse8->adap, CEC_TX_STATUS_ERROR, @@ -170,6 +167,9 @@ static irqreturn_t pulse8_interrupt(struct serio *serio, unsigned char data, case MSGCODE_TRANSMIT_FAILED_TIMEOUT_LINE: schedule_work(&pulse8->work); break; + case MSGCODE_HIGH_ERROR: + case MSGCODE_LOW_ERROR: + case MSGCODE_RECEIVE_FAILED: case MSGCODE_TIMEOUT_ERROR: break; case MSGCODE_COMMAND_ACCEPTED: -- cgit v0.10.2 From 8ac6a1a53e9f195e8c4336a7edfba2e102fc14bb Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 18 Aug 2016 04:13:42 -0300 Subject: [media] cec-edid: check for IEEE identifier The cec_get_edid_spa_location() function did not verify that the IEEE identifier in the Vendor Specific Data Block matched the HDMI-LLC identifier. This could result in the wrong VSDB block being returned. For example, for HDMI 2.0 EDIDs there is also a HDMI Forum VSDB. So check the IEEE identifier as well. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/cec-edid.c b/drivers/media/cec-edid.c index 7001824..5719b99 100644 --- a/drivers/media/cec-edid.c +++ b/drivers/media/cec-edid.c @@ -70,7 +70,10 @@ static unsigned int cec_get_edid_spa_location(const u8 *edid, unsigned int size) u8 tag = edid[i] >> 5; u8 len = edid[i] & 0x1f; - if (tag == 3 && len >= 5 && i + len <= end) + if (tag == 3 && len >= 5 && i + len <= end && + edid[i + 1] == 0x03 && + edid[i + 2] == 0x0c && + edid[i + 3] == 0x00) return i + 4; i += len + 1; } while (i < end); -- cgit v0.10.2 From 4808f721627c2a23b5d749f9bbd20d4529ea2b8d Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sat, 20 Aug 2016 07:54:38 -0300 Subject: [media] cec-funcs.h: add missing vendor-specific messages The cec-funcs.h header was missing support for these three vendor-specific messages: CEC_MSG_VENDOR_COMMAND CEC_MSG_VENDOR_COMMAND_WITH_ID CEC_MSG_VENDOR_REMOTE_BUTTON_DOWN Add wrappers for these messages. I originally postponed adding these wrappers due to the fact that the argument is just a byte array which cec-ctl couldn't handle at the time, and then I just forgot to add them once the CEC framework was finalized. It wasn't until an attempt to transmit a vendor specific command was made that I realized that these wrappers were missing. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/include/linux/cec-funcs.h b/include/linux/cec-funcs.h index 8af613e..138bbf7 100644 --- a/include/linux/cec-funcs.h +++ b/include/linux/cec-funcs.h @@ -1144,6 +1144,75 @@ static inline void cec_msg_give_device_vendor_id(struct cec_msg *msg, msg->reply = reply ? CEC_MSG_DEVICE_VENDOR_ID : 0; } +static inline void cec_msg_vendor_command(struct cec_msg *msg, + __u8 size, const __u8 *vendor_cmd) +{ + if (size > 14) + size = 14; + msg->len = 2 + size; + msg->msg[1] = CEC_MSG_VENDOR_COMMAND; + memcpy(msg->msg + 2, vendor_cmd, size); +} + +static inline void cec_ops_vendor_command(const struct cec_msg *msg, + __u8 *size, + const __u8 **vendor_cmd) +{ + *size = msg->len - 2; + + if (*size > 14) + *size = 14; + *vendor_cmd = msg->msg + 2; +} + +static inline void cec_msg_vendor_command_with_id(struct cec_msg *msg, + __u32 vendor_id, __u8 size, + const __u8 *vendor_cmd) +{ + if (size > 11) + size = 11; + msg->len = 5 + size; + msg->msg[1] = CEC_MSG_VENDOR_COMMAND_WITH_ID; + msg->msg[2] = vendor_id >> 16; + msg->msg[3] = (vendor_id >> 8) & 0xff; + msg->msg[4] = vendor_id & 0xff; + memcpy(msg->msg + 5, vendor_cmd, size); +} + +static inline void cec_ops_vendor_command_with_id(const struct cec_msg *msg, + __u32 *vendor_id, __u8 *size, + const __u8 **vendor_cmd) +{ + *size = msg->len - 5; + + if (*size > 11) + *size = 11; + *vendor_id = (msg->msg[2] << 16) | (msg->msg[3] << 8) | msg->msg[4]; + *vendor_cmd = msg->msg + 5; +} + +static inline void cec_msg_vendor_remote_button_down(struct cec_msg *msg, + __u8 size, + const __u8 *rc_code) +{ + if (size > 14) + size = 14; + msg->len = 2 + size; + msg->msg[1] = CEC_MSG_VENDOR_REMOTE_BUTTON_DOWN; + memcpy(msg->msg + 2, rc_code, size); +} + +static inline void cec_ops_vendor_remote_button_down(const struct cec_msg *msg, + __u8 *size, + const __u8 **rc_code) +{ + *size = msg->len - 2; + + if (*size > 14) + *size = 14; + *rc_code = msg->msg + 2; +} + static inline void cec_msg_vendor_remote_button_up(struct cec_msg *msg) { msg->len = 2; -- cgit v0.10.2 From 0d06108c65e572085b2d1f7c8273f417cad68734 Mon Sep 17 00:00:00 2001 From: Tiffany Lin Date: Sun, 14 Aug 2016 23:31:13 -0300 Subject: [media] vcodec:mediatek:code refine for v4l2 Encoder driver This patch remove unused header and define from haeder files Signed-off-by: Tiffany Lin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h index 94f0a42..3a8e695 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h @@ -23,7 +23,6 @@ #include #include -#include "mtk_vcodec_util.h" #define MTK_VCODEC_DRV_NAME "mtk_vcodec_drv" #define MTK_VCODEC_ENC_NAME "mtk-vcodec-enc" diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h b/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h index 33e890f..1213185 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h @@ -16,7 +16,6 @@ #define _MTK_VCODEC_INTR_H_ #define MTK_INST_IRQ_RECEIVED 0x1 -#define MTK_INST_WORK_THREAD_ABORT_DONE 0x2 struct mtk_vcodec_ctx; -- cgit v0.10.2 From ad34f5412d2a04a894b2cd2912538ae2e5d64e76 Mon Sep 17 00:00:00 2001 From: Tiffany Lin Date: Sun, 14 Aug 2016 23:47:20 -0300 Subject: [media] vcodec:mediatek: Fix fops_vcodec_release flow for V4L2 Encoder This patch fix that mtk_vcodec_venc_release should be called after v4l2_m2m_ctx_release Signed-off-by: Tiffany Lin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c index 3ed3f2d..3b0691f 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c @@ -1288,5 +1288,10 @@ int mtk_venc_lock(struct mtk_vcodec_ctx *ctx) void mtk_vcodec_enc_release(struct mtk_vcodec_ctx *ctx) { - venc_if_deinit(ctx); + int ret = venc_if_deinit(ctx); + + if (ret) + mtk_v4l2_err("venc_if_deinit failed=%d", ret); + + ctx->state = MTK_STATE_FREE; } diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c index c7806ec..5cd2151 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c @@ -218,11 +218,15 @@ static int fops_vcodec_release(struct file *file) mtk_v4l2_debug(1, "[%d] encoder", ctx->id); mutex_lock(&dev->dev_mutex); + /* + * Call v4l2_m2m_ctx_release to make sure the worker thread is not + * running after venc_if_deinit. + */ + v4l2_m2m_ctx_release(ctx->m2m_ctx); mtk_vcodec_enc_release(ctx); v4l2_fh_del(&ctx->fh); v4l2_fh_exit(&ctx->fh); v4l2_ctrl_handler_free(&ctx->ctrl_hdl); - v4l2_m2m_ctx_release(ctx->m2m_ctx); list_del_init(&ctx->list); dev->num_instances--; -- cgit v0.10.2 From 91ae0e1ec6ec91cd297933886b424f9a4a8acbd4 Mon Sep 17 00:00:00 2001 From: Tiffany Lin Date: Mon, 15 Aug 2016 00:08:03 -0300 Subject: [media] vcodec:mediatek: Fix visible_height larger than coded_height issue in s_fmt_out The original code add extra 32 line to visible_height. It is incorrect, 32 line should be add to coded_height. The purpose is that user space could calcuate real buffer size needed by using coded_width * coded_height. But this method will make v4l2-compliance test fail, since g_fmt != s_fmt(g_fmt) So remove extend visible_height or coded_height, user space should just use sizeimage to get real buffer size needed Signed-off-by: Tiffany Lin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c index 3b0691f..9b0187e 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c @@ -487,7 +487,6 @@ static int vidioc_venc_s_fmt_out(struct file *file, void *priv, struct mtk_q_data *q_data; int ret, i; struct mtk_video_fmt *fmt; - unsigned int pitch_w_div16; struct v4l2_pix_format_mplane *pix_fmt_mp = &f->fmt.pix_mp; vq = v4l2_m2m_get_vq(ctx->m2m_ctx, f->type); @@ -530,15 +529,6 @@ static int vidioc_venc_s_fmt_out(struct file *file, void *priv, q_data->coded_width = f->fmt.pix_mp.width; q_data->coded_height = f->fmt.pix_mp.height; - pitch_w_div16 = DIV_ROUND_UP(q_data->visible_width, 16); - if (pitch_w_div16 % 8 != 0) { - /* Adjust returned width/height, so application could correctly - * allocate hw required memory - */ - q_data->visible_height += 32; - vidioc_try_fmt(f, q_data->fmt); - } - q_data->field = f->fmt.pix_mp.field; ctx->colorspace = f->fmt.pix_mp.colorspace; ctx->ycbcr_enc = f->fmt.pix_mp.ycbcr_enc; -- cgit v0.10.2 From 16060f7ef660a11f282909b01fb6096e21cf5389 Mon Sep 17 00:00:00 2001 From: Tiffany Lin Date: Mon, 15 Aug 2016 00:15:44 -0300 Subject: [media] vcodec:mediatek: Add timestamp and timecode copy for V4L2 Encoder This patch add copying timestamp and timecode from src buffer to dst buffer Signed-off-by: Tiffany Lin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c index 9b0187e..0ca230e 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c @@ -868,7 +868,8 @@ static int mtk_venc_encode_header(void *priv) { struct mtk_vcodec_ctx *ctx = priv; int ret; - struct vb2_buffer *dst_buf; + struct vb2_buffer *src_buf, *dst_buf; + struct vb2_v4l2_buffer *dst_vb2_v4l2, *src_vb2_v4l2; struct mtk_vcodec_mem bs_buf; struct venc_done_result enc_result; @@ -901,6 +902,15 @@ static int mtk_venc_encode_header(void *priv) mtk_v4l2_err("venc_if_encode failed=%d", ret); return -EINVAL; } + src_buf = v4l2_m2m_next_src_buf(ctx->m2m_ctx); + if (src_buf) { + src_vb2_v4l2 = to_vb2_v4l2_buffer(src_buf); + dst_vb2_v4l2 = to_vb2_v4l2_buffer(dst_buf); + dst_buf->timestamp = src_buf->timestamp; + dst_vb2_v4l2->timecode = src_vb2_v4l2->timecode; + } else { + mtk_v4l2_err("No timestamp for the header buffer."); + } ctx->state = MTK_STATE_HEADER; dst_buf->planes[0].bytesused = enc_result.bs_size; @@ -993,7 +1003,7 @@ static void mtk_venc_worker(struct work_struct *work) struct mtk_vcodec_mem bs_buf; struct venc_done_result enc_result; int ret, i; - struct vb2_v4l2_buffer *vb2_v4l2; + struct vb2_v4l2_buffer *dst_vb2_v4l2, *src_vb2_v4l2; /* check dst_buf, dst_buf may be removed in device_run * to stored encdoe header so we need check dst_buf and @@ -1033,9 +1043,14 @@ static void mtk_venc_worker(struct work_struct *work) ret = venc_if_encode(ctx, VENC_START_OPT_ENCODE_FRAME, &frm_buf, &bs_buf, &enc_result); - vb2_v4l2 = container_of(dst_buf, struct vb2_v4l2_buffer, vb2_buf); + src_vb2_v4l2 = to_vb2_v4l2_buffer(src_buf); + dst_vb2_v4l2 = to_vb2_v4l2_buffer(dst_buf); + + dst_buf->timestamp = src_buf->timestamp; + dst_vb2_v4l2->timecode = src_vb2_v4l2->timecode; + if (enc_result.is_key_frm) - vb2_v4l2->flags |= V4L2_BUF_FLAG_KEYFRAME; + dst_vb2_v4l2->flags |= V4L2_BUF_FLAG_KEYFRAME; if (ret) { v4l2_m2m_buf_done(to_vb2_v4l2_buffer(src_buf), -- cgit v0.10.2 From 158d6071bc0aad6663109d2fe9249c3cf570d423 Mon Sep 17 00:00:00 2001 From: Tiffany Lin Date: Mon, 15 Aug 2016 00:26:02 -0300 Subject: [media] vcodec:mediatek: change H264 profile default to profile high This patch change default H264 profile from V4L2_MPEG_VIDEO_H264_PROFILE_MAIN to V4L2_MPEG_VIDEO_H264_PROFILE_HIGH Signed-off-by: Tiffany Lin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c index 0ca230e..2c5719a 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c @@ -1222,7 +1222,7 @@ int mtk_vcodec_enc_ctrls_setup(struct mtk_vcodec_ctx *ctx) 0, V4L2_MPEG_VIDEO_HEADER_MODE_SEPARATE); v4l2_ctrl_new_std_menu(handler, ops, V4L2_CID_MPEG_VIDEO_H264_PROFILE, V4L2_MPEG_VIDEO_H264_PROFILE_HIGH, - 0, V4L2_MPEG_VIDEO_H264_PROFILE_MAIN); + 0, V4L2_MPEG_VIDEO_H264_PROFILE_HIGH); v4l2_ctrl_new_std_menu(handler, ops, V4L2_CID_MPEG_VIDEO_H264_LEVEL, V4L2_MPEG_VIDEO_H264_LEVEL_4_2, 0, V4L2_MPEG_VIDEO_H264_LEVEL_4_0); -- cgit v0.10.2 From 2d683b6dad73b5636297ac4978f73f2c638a0b19 Mon Sep 17 00:00:00 2001 From: Tiffany Lin Date: Mon, 15 Aug 2016 00:33:32 -0300 Subject: [media] vcodec:mediatek: Refine H264 encoder driver This patch : 1. remove field and function that unused anymore 2. add support V4L2_MPEG_VIDEO_H264_LEVEL_4_2 Signed-off-by: Tiffany Lin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c b/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c index 9a60052..63d4be4 100644 --- a/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c +++ b/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c @@ -61,6 +61,8 @@ enum venc_h264_bs_mode { /* * struct venc_h264_vpu_config - Structure for h264 encoder configuration + * AP-W/R : AP is writer/reader on this item + * VPU-W/R: VPU is write/reader on this item * @input_fourcc: input fourcc * @bitrate: target bitrate (in bps) * @pic_w: picture width. Picture size is visible stream resolution, in pixels, @@ -94,13 +96,13 @@ struct venc_h264_vpu_config { /* * struct venc_h264_vpu_buf - Structure for buffer information - * @align: buffer alignment (in bytes) + * AP-W/R : AP is writer/reader on this item + * VPU-W/R: VPU is write/reader on this item * @iova: IO virtual address * @vpua: VPU side memory addr which is used by RC_CODE * @size: buffer size (in bytes) */ struct venc_h264_vpu_buf { - u32 align; u32 iova; u32 vpua; u32 size; @@ -108,6 +110,8 @@ struct venc_h264_vpu_buf { /* * struct venc_h264_vsi - Structure for VPU driver control and info share + * AP-W/R : AP is writer/reader on this item + * VPU-W/R: VPU is write/reader on this item * This structure is allocated in VPU side and shared to AP side. * @config: h264 encoder configuration * @work_bufs: working buffer information in VPU side @@ -150,12 +154,6 @@ struct venc_h264_inst { struct mtk_vcodec_ctx *ctx; }; -static inline void h264_write_reg(struct venc_h264_inst *inst, u32 addr, - u32 val) -{ - writel(val, inst->hw_base + addr); -} - static inline u32 h264_read_reg(struct venc_h264_inst *inst, u32 addr) { return readl(inst->hw_base + addr); @@ -214,6 +212,8 @@ static unsigned int h264_get_level(struct venc_h264_inst *inst, return 40; case V4L2_MPEG_VIDEO_H264_LEVEL_4_1: return 41; + case V4L2_MPEG_VIDEO_H264_LEVEL_4_2: + return 42; default: mtk_vcodec_debug(inst, "unsupported level %d", level); return 31; -- cgit v0.10.2 From 19d6837a52f1683cf448265952d559a44a7df924 Mon Sep 17 00:00:00 2001 From: Tiffany Lin Date: Mon, 15 Aug 2016 00:37:19 -0300 Subject: [media] vcodec:mediatek: Refine VP8 encoder driver This patch remove field and function that unused anymore Signed-off-by: Tiffany Lin Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c b/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c index 60bbcd2..6d97584 100644 --- a/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c +++ b/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c @@ -56,6 +56,8 @@ enum venc_vp8_vpu_work_buf { /* * struct venc_vp8_vpu_config - Structure for vp8 encoder configuration + * AP-W/R : AP is writer/reader on this item + * VPU-W/R: VPU is write/reader on this item * @input_fourcc: input fourcc * @bitrate: target bitrate (in bps) * @pic_w: picture width. Picture size is visible stream resolution, in pixels, @@ -83,14 +85,14 @@ struct venc_vp8_vpu_config { }; /* - * struct venc_vp8_vpu_buf -Structure for buffer information - * @align: buffer alignment (in bytes) + * struct venc_vp8_vpu_buf - Structure for buffer information + * AP-W/R : AP is writer/reader on this item + * VPU-W/R: VPU is write/reader on this item * @iova: IO virtual address * @vpua: VPU side memory addr which is used by RC_CODE * @size: buffer size (in bytes) */ struct venc_vp8_vpu_buf { - u32 align; u32 iova; u32 vpua; u32 size; @@ -98,6 +100,8 @@ struct venc_vp8_vpu_buf { /* * struct venc_vp8_vsi - Structure for VPU driver control and info share + * AP-W/R : AP is writer/reader on this item + * VPU-W/R: VPU is write/reader on this item * This structure is allocated in VPU side and shared to AP side. * @config: vp8 encoder configuration * @work_bufs: working buffer information in VPU side @@ -138,12 +142,6 @@ struct venc_vp8_inst { struct mtk_vcodec_ctx *ctx; }; -static inline void vp8_enc_write_reg(struct venc_vp8_inst *inst, u32 addr, - u32 val) -{ - writel(val, inst->hw_base + addr); -} - static inline u32 vp8_enc_read_reg(struct venc_vp8_inst *inst, u32 addr) { return readl(inst->hw_base + addr); -- cgit v0.10.2 From a7d4b8f2565ad0dfdff9a222d1d87990c73b36e8 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 16 Aug 2016 14:38:24 +0200 Subject: KVM: s390: don't use current->thread.fpu.* when accessing registers As the meaning of these variables and pointers seems to change more frequently, let's directly access our save area, instead of going via current->thread. Right now, this is broken for set/get_fpu. They simply overwrite the host registers, as the pointers to the current save area were turned into the static host save area. Cc: stable@vger.kernel.org # 4.7 Fixes: 3f6813b9a5e0 ("s390/fpu: allocate 'struct fpu' with the task_struct") Reported-by: Hao QingFeng Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f142215..607ec91 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2231,9 +2231,10 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return -EINVAL; current->thread.fpu.fpc = fpu->fpc; if (MACHINE_HAS_VX) - convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs); + convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs, + (freg_t *) fpu->fprs); else - memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs)); + memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs)); return 0; } @@ -2242,9 +2243,10 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) /* make sure we have the latest values */ save_fpu_regs(); if (MACHINE_HAS_VX) - convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs); + convert_vx_to_fp((freg_t *) fpu->fprs, + (__vector128 *) vcpu->run->s.regs.vrs); else - memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs)); + memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs)); fpu->fpc = current->thread.fpu.fpc; return 0; } -- cgit v0.10.2 From 936523441bb64cdc9a5b263e8fd2782e70313a57 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 6 Aug 2016 15:50:52 +0200 Subject: batman-adv: Add missing refcnt for last_candidate batadv_find_router dereferences last_bonding_candidate from orig_node without making sure that it has a valid reference. This reference has to be retrieved by increasing the reference counter while holding neigh_list_lock. The lock is required to avoid that batadv_last_bonding_replace removes the current last_bonding_candidate, reduces the reference counter and maybe destroys the object in this process. Fixes: f3b3d9018975 ("batman-adv: add bonding again") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Simon Wunderlich diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 7602c00..3d19947 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -470,6 +470,29 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, } /** + * batadv_last_bonding_get - Get last_bonding_candidate of orig_node + * @orig_node: originator node whose last bonding candidate should be retrieved + * + * Return: last bonding candidate of router or NULL if not found + * + * The object is returned with refcounter increased by 1. + */ +static struct batadv_orig_ifinfo * +batadv_last_bonding_get(struct batadv_orig_node *orig_node) +{ + struct batadv_orig_ifinfo *last_bonding_candidate; + + spin_lock_bh(&orig_node->neigh_list_lock); + last_bonding_candidate = orig_node->last_bonding_candidate; + + if (last_bonding_candidate) + kref_get(&last_bonding_candidate->refcount); + spin_unlock_bh(&orig_node->neigh_list_lock); + + return last_bonding_candidate; +} + +/** * batadv_last_bonding_replace - Replace last_bonding_candidate of orig_node * @orig_node: originator node whose bonding candidates should be replaced * @new_candidate: new bonding candidate or NULL @@ -539,7 +562,7 @@ batadv_find_router(struct batadv_priv *bat_priv, * router - obviously there are no other candidates. */ rcu_read_lock(); - last_candidate = orig_node->last_bonding_candidate; + last_candidate = batadv_last_bonding_get(orig_node); if (last_candidate) last_cand_router = rcu_dereference(last_candidate->router); @@ -631,6 +654,9 @@ next: batadv_orig_ifinfo_put(next_candidate); } + if (last_candidate) + batadv_orig_ifinfo_put(last_candidate); + return router; } -- cgit v0.10.2 From 1e5d343b8f23770e8ac5d31f5c439826bdb35148 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Tue, 23 Aug 2016 03:13:03 +0200 Subject: batman-adv: fix elp packet data reservation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The skb_reserve() call only reserved headroom for the mac header, but not the elp packet header itself. Fixing this by using skb_put()'ing towards the skb tail instead of skb_push()'ing towards the skb head. Fixes: d6f94d91f766 ("batman-adv: ELP - adding basic infrastructure") Signed-off-by: Linus Lüssing Signed-off-by: Marek Lindner Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 7d17001..ee08540 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -335,7 +335,7 @@ int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface) goto out; skb_reserve(hard_iface->bat_v.elp_skb, ETH_HLEN + NET_IP_ALIGN); - elp_buff = skb_push(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN); + elp_buff = skb_put(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN); elp_packet = (struct batadv_elp_packet *)elp_buff; memset(elp_packet, 0, BATADV_ELP_HLEN); -- cgit v0.10.2 From 29617e1cfc2aa869154f5ed2580b756ec2c3cb28 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Thu, 25 Aug 2016 10:56:48 -0400 Subject: MAINTAINERS: Add myself as reviewer for Samsung Exynos support I've been helping reviewing and testing Exynos SoC support patches for the last couple of years. But it would be easier for me if I'm cc'ed for patches, so I'm adding myself as reviewer for this entry. Signed-off-by: Javier Martinez Canillas Acked-by: Kukjin Kim Acked-by: Sylwester Nawrocki Signed-off-by: Krzysztof Kozlowski diff --git a/MAINTAINERS b/MAINTAINERS index c9cd8d3..8a8a485 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1614,6 +1614,7 @@ N: rockchip ARM/SAMSUNG EXYNOS ARM ARCHITECTURES M: Kukjin Kim M: Krzysztof Kozlowski +R: Javier Martinez Canillas L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers) S: Maintained -- cgit v0.10.2 From 6654674cb7b5953ac04fc9d7f5f511676ae97e29 Mon Sep 17 00:00:00 2001 From: Jorik Jonker Date: Sat, 27 Aug 2016 21:04:33 +0200 Subject: clk: sunxi-ng: Fix wrong reset register offsets The reset register offsets for UART*, I2C* and SCR were off by a few bytes. Signed-off-by: Jorik Jonker Signed-off-by: Maxime Ripard diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c index 9af35954..267f995 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c +++ b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c @@ -783,14 +783,14 @@ static struct ccu_reset_map sun8i_h3_ccu_resets[] = { [RST_BUS_I2S1] = { 0x2d0, BIT(13) }, [RST_BUS_I2S2] = { 0x2d0, BIT(14) }, - [RST_BUS_I2C0] = { 0x2d4, BIT(0) }, - [RST_BUS_I2C1] = { 0x2d4, BIT(1) }, - [RST_BUS_I2C2] = { 0x2d4, BIT(2) }, - [RST_BUS_UART0] = { 0x2d4, BIT(16) }, - [RST_BUS_UART1] = { 0x2d4, BIT(17) }, - [RST_BUS_UART2] = { 0x2d4, BIT(18) }, - [RST_BUS_UART3] = { 0x2d4, BIT(19) }, - [RST_BUS_SCR] = { 0x2d4, BIT(20) }, + [RST_BUS_I2C0] = { 0x2d8, BIT(0) }, + [RST_BUS_I2C1] = { 0x2d8, BIT(1) }, + [RST_BUS_I2C2] = { 0x2d8, BIT(2) }, + [RST_BUS_UART0] = { 0x2d8, BIT(16) }, + [RST_BUS_UART1] = { 0x2d8, BIT(17) }, + [RST_BUS_UART2] = { 0x2d8, BIT(18) }, + [RST_BUS_UART3] = { 0x2d8, BIT(19) }, + [RST_BUS_SCR] = { 0x2d8, BIT(20) }, }; static const struct sunxi_ccu_desc sun8i_h3_ccu_desc = { -- cgit v0.10.2 From 8a07fed44b126f48020f122b9e6bf05d8c48f281 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 23 Aug 2016 10:25:58 +0100 Subject: drm/i915/dvo: Remove dangling call to drm_encoder_cleanup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we hit the error path, we have never called drm_encoder_init() and so have nothing to cleanup. Doing so hits a null dereference: [ 10.066261] BUG: unable to handle kernel NULL pointer dereference at 00000104 [ 10.066273] IP: [] mutex_lock+0xa/0x15 [ 10.066287] *pde = 00000000 [ 10.066295] Oops: 0002 [#1] [ 10.066302] Modules linked in: i915(+) video i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm iTCO_wdt iTCO_vendor_support ppdev evdev snd_intel8x0 snd_ac97_codec ac97_bus psmouse snd_pcm snd_timer snd pcspkr uhci_hcd ehci_pci soundcore sr_mod ehci_hcd serio_raw i2c_i801 usbcore i2c_smbus cdrom lpc_ich mfd_core rng_core e100 mii floppy parport_pc parport acpi_cpufreq button processor usb_common eeprom lm85 hwmon_vid autofs4 [ 10.066378] CPU: 0 PID: 132 Comm: systemd-udevd Not tainted 4.8.0-rc3-00013-gef0e1ea #34 [ 10.066389] Hardware name: MicroLink /D865GLC , BIOS BF86510A.86A.0077.P25.0508040031 08/04/2005 [ 10.066401] task: f62db800 task.stack: f5970000 [ 10.066409] EIP: 0060:[] EFLAGS: 00010286 CPU: 0 [ 10.066417] EIP is at mutex_lock+0xa/0x15 [ 10.066424] EAX: 00000104 EBX: 00000104 ECX: 00000000 EDX: 80000000 [ 10.066432] ESI: 00000000 EDI: 00000104 EBP: f5be8000 ESP: f5971b58 [ 10.066439] DS: 007b ES: 007b FS: 0000 GS: 00e0 SS: 0068 [ 10.066446] CR0: 80050033 CR2: 00000104 CR3: 35945000 CR4: 000006d0 [ 10.066453] Stack: [ 10.066459] f503d740 f824dddf 00000000 f61170c0 f61170c0 f82371ae f850f40e 00000001 [ 10.066476] f61170c0 f5971bcc f5be8000 f9c2d401 00000001 f8236fcc 00000001 00000000 [ 10.066491] f5144014 f5be8104 00000008 f9c5267c 00000007 f61170c0 f5144400 f9c4ff00 [ 10.066507] Call Trace: [ 10.066526] [] ? drm_modeset_lock_all+0x27/0xb3 [drm] [ 10.066545] [] ? drm_encoder_cleanup+0x1a/0x132 [drm] [ 10.066559] [] ? drm_atomic_helper_connector_reset+0x3f/0x5c [drm_kms_helper] [ 10.066644] [] ? intel_dvo_init+0x569/0x788 [i915] [ 10.066663] [] ? drm_encoder_init+0x43/0x20b [drm] [ 10.066734] [] ? intel_modeset_init+0x1436/0x17dd [i915] [ 10.066791] [] ? i915_driver_load+0x85a/0x15d3 [i915] [ 10.066846] [] ? i915_driver_open+0x5/0x5 [i915] [ 10.066857] [] ? firmware_map_add_entry.part.2+0xc/0xc [ 10.066868] [] ? pci_device_probe+0x8e/0x11c [ 10.066878] [] ? driver_probe_device+0x1db/0x62e [ 10.066888] [] ? kernfs_new_node+0x29/0x9c [ 10.066897] [] ? pci_match_device+0xd9/0x161 [ 10.066905] [] ? kernfs_create_dir_ns+0x42/0x88 [ 10.066914] [] ? __driver_attach+0xe6/0x11b [ 10.066924] [] ? kobject_add_internal+0x1bb/0x44f [ 10.066933] [] ? driver_probe_device+0x62e/0x62e [ 10.066941] [] ? bus_for_each_dev+0x46/0x7f [ 10.066950] [] ? driver_attach+0x1a/0x34 [ 10.066958] [] ? driver_probe_device+0x62e/0x62e [ 10.066966] [] ? bus_add_driver+0x217/0x32a [ 10.066975] [] ? 0xf8403000 [ 10.066982] [] ? driver_register+0x5f/0x108 [ 10.066991] [] ? do_one_initcall+0x49/0x1f6 [ 10.067000] [] ? pick_next_task_fair+0x14b/0x2a3 [ 10.067008] [] ? __schedule+0x15c/0x4fe [ 10.067016] [] ? preempt_schedule_common+0x19/0x3c [ 10.067027] [] ? do_init_module+0x17/0x230 [ 10.067035] [] ? _cond_resched+0x12/0x1a [ 10.067044] [] ? kmem_cache_alloc+0x8f/0x11f [ 10.067052] [] ? do_init_module+0x17/0x230 [ 10.067060] [] ? kfree+0x137/0x203 [ 10.067068] [] ? do_init_module+0x76/0x230 [ 10.067078] [] ? load_module+0x2a39/0x333f [ 10.067087] [] ? SyS_finit_module+0x96/0xd5 [ 10.067096] [] ? vm_mmap_pgoff+0x79/0xa0 [ 10.067105] [] ? do_fast_syscall_32+0xb5/0x1b0 [ 10.067114] [] ? sysenter_past_esp+0x47/0x75 [ 10.067121] Code: c8 f7 76 c1 e8 8e cc d2 ff e9 45 fe ff ff 66 90 66 90 66 90 66 90 90 ff 00 7f 05 e8 4e 0c 00 00 c3 53 89 c3 e8 75 ec ff ff 89 d8 08 79 05 e8 fa 0a 00 00 5b c3 53 89 c3 85 c0 74 1b 8b 03 83 [ 10.067180] EIP: [] mutex_lock+0xa/0x15 SS:ESP 0068:f5971b58 [ 10.067190] CR2: 0000000000000104 [ 10.067222] ---[ end trace 049f1f09da45a856 ]--- Reported-by: Meelis Roos Fixes: 580d8ed522e0 ("drm/i915: Give encoders useful names") Reviewed-by: David Weinehall Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: drm-intel-fixes@lists.freedesktop.org Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20160823092558.14931-1-chris@chris-wilson.co.uk (cherry picked from commit 8f76aa0ebe0b7787afe768d9df80031e832d2520) diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index 47bdf9d..b9e5a63 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -554,7 +554,6 @@ void intel_dvo_init(struct drm_device *dev) return; } - drm_encoder_cleanup(&intel_encoder->base); kfree(intel_dvo); kfree(intel_connector); } -- cgit v0.10.2 From b030485220caf862c71db6fb8b8ad016ce7f7565 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Sun, 21 Aug 2016 03:27:45 -0400 Subject: ARM: EXYNOS: Clear OF_POPULATED flag from PMU node in IRQ init callback The Exynos PMU node is an interrupt, clock and PMU (Power Management Unit) controller, and these functionalities are supported by different drivers that matches the same compatible strings. Since commit 15cc2ed6dcf9 ("of/irq: Mark initialised interrupt controllers as populated") the OF core flags interrupt controllers registered with the IRQCHIP_DECLARE() macro as OF_POPULATED, so platform devices with the same compatible string as the interrupt controller will not be registered. This prevents the PMU platform device to be registered so the Exynos PMU driver is never probed. This breaks (among other things) Suspend-to-RAM. Fix this by clearing the OF_POPULATED flag in the PMU IRQ init callback, to allow the Exynos PMU platform driver to be probed. The patch is based on Philipp Zabel's "ARM: imx6: mark GPC node as not populated after irq init to probe pm domain driver". Fixes: 15cc2ed6dcf9 ("of/irq: Mark initialised interrupt controllers as populated") Signed-off-by: Javier Martinez Canillas Signed-off-by: Krzysztof Kozlowski diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c index 3750575..06332f6 100644 --- a/arch/arm/mach-exynos/suspend.c +++ b/arch/arm/mach-exynos/suspend.c @@ -255,6 +255,12 @@ static int __init exynos_pmu_irq_init(struct device_node *node, return -ENOMEM; } + /* + * Clear the OF_POPULATED flag set in of_irq_init so that + * later the Exynos PMU platform device won't be skipped. + */ + of_node_clear_flag(node, OF_POPULATED); + return 0; } -- cgit v0.10.2 From 313a61d30761217ce4383018de1cc0d5d503a376 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 30 Aug 2016 13:57:38 -0700 Subject: drm/vc4: Allow some more signals to be packed with uniform resets. The intent was to make sure people don't sneak in a small immediate or something to change the interpretation of the uniform update args, but these signals are just fine. Fixes a validation failure in the current X server on some Render operation. Signed-off-by: Eric Anholt diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c index 46527e9..2543cf5 100644 --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c @@ -309,8 +309,14 @@ validate_uniform_address_write(struct vc4_validated_shader_info *validated_shade * of uniforms on each side. However, this scheme is easy to * validate so it's all we allow for now. */ - - if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_NONE) { + switch (QPU_GET_FIELD(inst, QPU_SIG)) { + case QPU_SIG_NONE: + case QPU_SIG_SCOREBOARD_UNLOCK: + case QPU_SIG_COLOR_LOAD: + case QPU_SIG_LOAD_TMU0: + case QPU_SIG_LOAD_TMU1: + break; + default: DRM_ERROR("uniforms address change must be " "normal math\n"); return false; -- cgit v0.10.2 From 8e018c21da3febb558586b48c8db0d6d66cb6593 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 25 Aug 2016 10:09:39 -0700 Subject: raid5-cache: fix a deadlock in superblock write There is a potential deadlock in superblock write. Discard could zero data, so before discard we must make sure superblock is updated to new log tail. Updating superblock (either directly call md_update_sb() or depend on md thread) must hold reconfig mutex. On the other hand, raid5_quiesce is called with reconfig_mutex hold. The first step of raid5_quiesce() is waitting for all IO finish, hence waitting for reclaim thread, while reclaim thread is calling this function and waitting for reconfig mutex. So there is a deadlock. We workaround this issue with a trylock. The downside of the solution is we could miss discard if we can't take reconfig mutex. But this should happen rarely (mainly in raid array stop), so miss discard shouldn't be a big problem. Cc: NeilBrown Signed-off-by: Shaohua Li diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 51f76dd..1b1ab4a 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -96,7 +96,6 @@ struct r5l_log { spinlock_t no_space_stripes_lock; bool need_cache_flush; - bool in_teardown; }; /* @@ -704,31 +703,22 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log, mddev = log->rdev->mddev; /* - * This is to avoid a deadlock. r5l_quiesce holds reconfig_mutex and - * wait for this thread to finish. This thread waits for - * MD_CHANGE_PENDING clear, which is supposed to be done in - * md_check_recovery(). md_check_recovery() tries to get - * reconfig_mutex. Since r5l_quiesce already holds the mutex, - * md_check_recovery() fails, so the PENDING never get cleared. The - * in_teardown check workaround this issue. + * Discard could zero data, so before discard we must make sure + * superblock is updated to new log tail. Updating superblock (either + * directly call md_update_sb() or depend on md thread) must hold + * reconfig mutex. On the other hand, raid5_quiesce is called with + * reconfig_mutex hold. The first step of raid5_quiesce() is waitting + * for all IO finish, hence waitting for reclaim thread, while reclaim + * thread is calling this function and waitting for reconfig mutex. So + * there is a deadlock. We workaround this issue with a trylock. + * FIXME: we could miss discard if we can't take reconfig mutex */ - if (!log->in_teardown) { - set_mask_bits(&mddev->flags, 0, - BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING)); - md_wakeup_thread(mddev->thread); - wait_event(mddev->sb_wait, - !test_bit(MD_CHANGE_PENDING, &mddev->flags) || - log->in_teardown); - /* - * r5l_quiesce could run after in_teardown check and hold - * mutex first. Superblock might get updated twice. - */ - if (log->in_teardown) - md_update_sb(mddev, 1); - } else { - WARN_ON(!mddev_is_locked(mddev)); - md_update_sb(mddev, 1); - } + set_mask_bits(&mddev->flags, 0, + BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING)); + if (!mddev_trylock(mddev)) + return; + md_update_sb(mddev, 1); + mddev_unlock(mddev); /* discard IO error really doesn't matter, ignore it */ if (log->last_checkpoint < end) { @@ -827,7 +817,6 @@ void r5l_quiesce(struct r5l_log *log, int state) if (!log || state == 2) return; if (state == 0) { - log->in_teardown = 0; /* * This is a special case for hotadd. In suspend, the array has * no journal. In resume, journal is initialized as well as the @@ -838,11 +827,6 @@ void r5l_quiesce(struct r5l_log *log, int state) log->reclaim_thread = md_register_thread(r5l_reclaim_thread, log->rdev->mddev, "reclaim"); } else if (state == 1) { - /* - * at this point all stripes are finished, so io_unit is at - * least in STRIPE_END state - */ - log->in_teardown = 1; /* make sure r5l_write_super_and_discard_space exits */ mddev = log->rdev->mddev; wake_up(&mddev->sb_wait); -- cgit v0.10.2 From ad5b0f7685dbfc4730987cd16af3c5ebe8133f10 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 30 Aug 2016 10:29:33 -0700 Subject: raid5: guarantee enough stripes to avoid reshape hang If there aren't enough stripes, reshape will hang. We have a check for this in new reshape, but miss it for reshape resume, hence we could see hang in reshape resume. This patch forces enough stripes existed if reshape resumes. Reviewed-by: NeilBrown Signed-off-by: Shaohua Li diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index da583bb..b95c54c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6639,6 +6639,16 @@ static struct r5conf *setup_conf(struct mddev *mddev) } conf->min_nr_stripes = NR_STRIPES; + if (mddev->reshape_position != MaxSector) { + int stripes = max_t(int, + ((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4, + ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4); + conf->min_nr_stripes = max(NR_STRIPES, stripes); + if (conf->min_nr_stripes != NR_STRIPES) + printk(KERN_INFO + "md/raid:%s: force stripe size %d for reshape\n", + mdname(mddev), conf->min_nr_stripes); + } memory = conf->min_nr_stripes * (sizeof(struct stripe_head) + max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024; atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS); -- cgit v0.10.2 From 6af7e4f77259ee946103387372cb159f2e99a6d4 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 1 Sep 2016 08:52:29 -0500 Subject: PCI: Mark Haswell Power Control Unit as having non-compliant BARs The Haswell Power Control Unit has a non-PCI register (CONFIG_TDP_NOMINAL) where BAR 0 is supposed to be. This is erratum HSE43 in the spec update referenced below: The PCIe* Base Specification indicates that Configuration Space Headers have a base address register at offset 0x10. Due to this erratum, the Power Control Unit's CONFIG_TDP_NOMINAL CSR (Bus 1; Device 30; Function 3; Offset 0x10) is located where a base register is expected. Mark the PCU as having non-compliant BARs so we don't try to probe any of them. There are no other BARs on this device. Rename the quirk so it's not Broadwell-specific. Link: http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v3-spec-update.html Link: http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v3-datasheet-vol-2.html (section 5.4, Device 30 Function 3) Link: https://bugzilla.kernel.org/show_bug.cgi?id=153881 Reported-by: Paul Menzel Tested-by: Prarit Bhargava Signed-off-by: Bjorn Helgaas Acked-by: Myron Stowe diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 837ea36..6d52b94 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -553,15 +553,21 @@ static void twinhead_reserve_killing_zone(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x27B9, twinhead_reserve_killing_zone); /* - * Broadwell EP Home Agent BARs erroneously return non-zero values when read. + * Device [8086:2fc0] + * Erratum HSE43 + * CONFIG_TDP_NOMINAL CSR Implemented at Incorrect Offset + * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v3-spec-update.html * - * See http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v4-spec-update.html - * entry BDF2. + * Devices [8086:6f60,6fa0,6fc0] + * Erratum BDF2 + * PCI BARs in the Home Agent Will Return Non-Zero Values During Enumeration + * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v4-spec-update.html */ -static void pci_bdwep_bar(struct pci_dev *dev) +static void pci_invalid_bar(struct pci_dev *dev) { dev->non_compliant_bars = 1; } -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6f60, pci_bdwep_bar); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_bdwep_bar); -DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_bdwep_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, pci_invalid_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6f60, pci_invalid_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_invalid_bar); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_invalid_bar); -- cgit v0.10.2 From c2f321126e31cd69365e65ecd4a7c774e4fc71d2 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 30 Aug 2016 21:50:22 +0200 Subject: ARM: shmobile: fix regulator quirk for Gen2 The current implementation only works if the da9xxx devices are added before their drivers are registered. Only then it can apply the fixes to both devices. Otherwise, the driver for the first device gets probed before the fix for the second device can be applied. This is what fails when using the IP core switcher or when having the i2c master driver as a module. So, we need to disable both da9xxx once we detected one of them. We now use i2c_transfer with hardcoded i2c_messages and device addresses, so we don't need the da9xxx client devices to be instantiated. Because the fixup is used on specific boards only, the addresses are not going to change. Fixes: 663fbb52159cca ("ARM: shmobile: R-Car Gen2: Add da9063/da9210 regulator quirk") Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven (r8a7791/koelsch) Tested-by: Kuninori Morimoto Signed-off-by: Simon Horman diff --git a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c index 62437b5..73e3adb 100644 --- a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c +++ b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c @@ -41,39 +41,26 @@ #define REGULATOR_IRQ_MASK BIT(2) /* IRQ2, active low */ -static void __iomem *irqc; - -static const u8 da9063_mask_regs[] = { - DA9063_REG_IRQ_MASK_A, - DA9063_REG_IRQ_MASK_B, - DA9063_REG_IRQ_MASK_C, - DA9063_REG_IRQ_MASK_D, -}; - -/* DA9210 System Control and Event Registers */ +/* start of DA9210 System Control and Event Registers */ #define DA9210_REG_MASK_A 0x54 -#define DA9210_REG_MASK_B 0x55 - -static const u8 da9210_mask_regs[] = { - DA9210_REG_MASK_A, - DA9210_REG_MASK_B, -}; - -static void da9xxx_mask_irqs(struct i2c_client *client, const u8 regs[], - unsigned int nregs) -{ - unsigned int i; - dev_info(&client->dev, "Masking %s interrupt sources\n", client->name); +static void __iomem *irqc; - for (i = 0; i < nregs; i++) { - int error = i2c_smbus_write_byte_data(client, regs[i], ~0); - if (error) { - dev_err(&client->dev, "i2c error %d\n", error); - return; - } - } -} +/* first byte sets the memory pointer, following are consecutive reg values */ +static u8 da9063_irq_clr[] = { DA9063_REG_IRQ_MASK_A, 0xff, 0xff, 0xff, 0xff }; +static u8 da9210_irq_clr[] = { DA9210_REG_MASK_A, 0xff, 0xff }; + +static struct i2c_msg da9xxx_msgs[2] = { + { + .addr = 0x58, + .len = ARRAY_SIZE(da9063_irq_clr), + .buf = da9063_irq_clr, + }, { + .addr = 0x68, + .len = ARRAY_SIZE(da9210_irq_clr), + .buf = da9210_irq_clr, + }, +}; static int regulator_quirk_notify(struct notifier_block *nb, unsigned long action, void *data) @@ -93,12 +80,15 @@ static int regulator_quirk_notify(struct notifier_block *nb, client = to_i2c_client(dev); dev_dbg(dev, "Detected %s\n", client->name); - if ((client->addr == 0x58 && !strcmp(client->name, "da9063"))) - da9xxx_mask_irqs(client, da9063_mask_regs, - ARRAY_SIZE(da9063_mask_regs)); - else if (client->addr == 0x68 && !strcmp(client->name, "da9210")) - da9xxx_mask_irqs(client, da9210_mask_regs, - ARRAY_SIZE(da9210_mask_regs)); + if ((client->addr == 0x58 && !strcmp(client->name, "da9063")) || + (client->addr == 0x68 && !strcmp(client->name, "da9210"))) { + int ret; + + dev_info(&client->dev, "clearing da9063/da9210 interrupts\n"); + ret = i2c_transfer(client->adapter, da9xxx_msgs, ARRAY_SIZE(da9xxx_msgs)); + if (ret != ARRAY_SIZE(da9xxx_msgs)) + dev_err(&client->dev, "i2c error %d\n", ret); + } mon = ioread32(irqc + IRQC_MONITOR); if (mon & REGULATOR_IRQ_MASK) -- cgit v0.10.2 From a41bd25ae67d3e4052c7f00ee9f2b4ba9219309e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 25 Aug 2016 18:42:35 +0200 Subject: sunrpc: fix UDP memory accounting The commit f9b2ee714c5c ("SUNRPC: Move UDP receive data path into a workqueue context"), as a side effect, moved the skb_free_datagram() call outside the scope of the related socket lock, but UDP sockets require such lock to be held for proper memory accounting. Fix it by replacing skb_free_datagram() with skb_free_datagram_locked(). Fixes: f9b2ee714c5c ("SUNRPC: Move UDP receive data path into a workqueue context") Reported-and-tested-by: Jan Stancek Signed-off-by: Paolo Abeni Cc: stable@vger.kernel.org # 4.4+ Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 8ede3bc..bf16883 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1074,7 +1074,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport) skb = skb_recv_datagram(sk, 0, 1, &err); if (skb != NULL) { xs_udp_data_read_skb(&transport->xprt, sk, skb); - skb_free_datagram(sk, skb); + skb_free_datagram_locked(sk, skb); continue; } if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) -- cgit v0.10.2 From c49edecd513693ea7530ab18efbd7d6d5b7cbf90 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 3 Sep 2016 12:05:31 -0400 Subject: NFS: Fix error reporting in nfs_file_write() When doing O_DSYNC writes, the actual write errors are reported through generic_write_sync(), so we must test the result. Reported-by: J. R. Okajima Fixes: 18290650b1c8 ("NFS: Move buffered I/O locking into nfs_file_write()") Signed-off-by: Trond Myklebust diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 7d62097..ca699dd 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -657,7 +657,10 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) if (result <= 0) goto out; - written = generic_write_sync(iocb, result); + result = generic_write_sync(iocb, result); + if (result < 0) + goto out; + written = result; iocb->ki_pos += written; /* Return error values */ -- cgit v0.10.2 From bf0291dd2267a2b9a4cd74d65249553d11bb45d6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 3 Sep 2016 10:39:51 -0400 Subject: pNFS: Ensure LAYOUTGET and LAYOUTRETURN are properly serialised According to RFC5661, the client is responsible for serialising LAYOUTGET and LAYOUTRETURN to avoid ambiguity. Consider the case where we send both in parallel. Client Server ====== ====== LAYOUTGET(seqid=X) LAYOUTRETURN(seqid=X) LAYOUTGET return seqid=X+1 LAYOUTRETURN return seqid=X+2 Process LAYOUTRETURN Forget layout stateid Process LAYOUTGET Set seqid=X+1 The client processes the layoutget/layoutreturn in the wrong order, and since the result of the layoutreturn was to clear the only existing layout segment, the client forgets the layout stateid. When the LAYOUTGET comes in, it is treated as having a completely new stateid, and so the client sets the wrong sequence id... Fix is to check if there are outstanding LAYOUTGET requests before we send the LAYOUTRETURN (note that LAYOUGET will already wait if it sees an outstanding LAYOUTRETURN). Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v4.5+ Signed-off-by: Trond Myklebust diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6daf034..519ad32 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -899,6 +899,9 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, enum pnfs_iomode *iomode) { + /* Serialise LAYOUTGET/LAYOUTRETURN */ + if (atomic_read(&lo->plh_outstanding) != 0) + return false; if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) return false; pnfs_get_layout_hdr(lo); -- cgit v0.10.2 From 2a59a0411671ef9daf17ba21da57809c696f4119 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 3 Sep 2016 11:20:04 -0400 Subject: pNFS: Fix pnfs_set_layout_stateid() to clear NFS_LAYOUT_INVALID_STID If the layout was marked as invalid, we want to ensure to initialise the layout header fields correctly. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 519ad32..cd8b5fc 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -768,17 +768,32 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) pnfs_destroy_layouts_byclid(clp, false); } +static void +pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) +{ + lo->plh_return_iomode = 0; + lo->plh_return_seq = 0; + clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); +} + /* update lo->plh_stateid with new if is more recent */ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, bool update_barrier) { u32 oldseq, newseq, new_barrier = 0; - bool invalid = !pnfs_layout_is_valid(lo); oldseq = be32_to_cpu(lo->plh_stateid.seqid); newseq = be32_to_cpu(new->seqid); - if (invalid || pnfs_seqid_is_newer(newseq, oldseq)) { + + if (!pnfs_layout_is_valid(lo)) { + nfs4_stateid_copy(&lo->plh_stateid, new); + lo->plh_barrier = newseq; + pnfs_clear_layoutreturn_info(lo); + clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); + return; + } + if (pnfs_seqid_is_newer(newseq, oldseq)) { nfs4_stateid_copy(&lo->plh_stateid, new); /* * Because of wraparound, we want to keep the barrier @@ -790,7 +805,7 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, new_barrier = be32_to_cpu(new->seqid); else if (new_barrier == 0) return; - if (invalid || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) + if (pnfs_seqid_is_newer(new_barrier, lo->plh_barrier)) lo->plh_barrier = new_barrier; } @@ -886,14 +901,6 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); } -static void -pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) -{ - lo->plh_return_iomode = 0; - lo->plh_return_seq = 0; - clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); -} - static bool pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, @@ -1801,16 +1808,11 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) */ pnfs_mark_layout_stateid_invalid(lo, &free_me); - nfs4_stateid_copy(&lo->plh_stateid, &res->stateid); - lo->plh_barrier = be32_to_cpu(res->stateid.seqid); + pnfs_set_layout_stateid(lo, &res->stateid, true); } pnfs_get_lseg(lseg); pnfs_layout_insert_lseg(lo, lseg, &free_me); - if (!pnfs_layout_is_valid(lo)) { - pnfs_clear_layoutreturn_info(lo); - clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); - } if (res->return_on_close) -- cgit v0.10.2 From 52ec7be2e27392201adf77892ba883f68df88c99 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 3 Sep 2016 11:05:28 -0400 Subject: pNFS: Clear out all layout segments if the server unsets lrp->res.lrs_present If the server fails to set lrp->res.lrs_present in the LAYOUTRETURN reply, then that means it believes the client holds no more layout state for that file, and that the layout stateid is now invalid. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f5aecaa..c380d2e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8190,10 +8190,13 @@ static void nfs4_layoutreturn_release(void *calldata) dprintk("--> %s\n", __func__); spin_lock(&lo->plh_inode->i_lock); - pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range, - be32_to_cpu(lrp->args.stateid.seqid)); - if (lrp->res.lrs_present && pnfs_layout_is_valid(lo)) + if (lrp->res.lrs_present) { + pnfs_mark_matching_lsegs_invalid(lo, &freeme, + &lrp->args.range, + be32_to_cpu(lrp->args.stateid.seqid)); pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); + } else + pnfs_mark_layout_stateid_invalid(lo, &freeme); pnfs_clear_layoutreturn_waitbit(lo); spin_unlock(&lo->plh_inode->i_lock); nfs4_sequence_free_slot(&lrp->res.seq_res); -- cgit v0.10.2 From 609e941a6bcd7ceb1cbb561941c997f6465e8698 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 1 Sep 2016 06:43:46 -0700 Subject: iw_cxgb4: call dev_put() on l2t allocation failure Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Steve Wise Signed-off-by: Sagi Grimberg diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index a3a6721..57b5bb5 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2077,8 +2077,10 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, } ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, n, pdev, rt_tos2priority(tos)); - if (!ep->l2t) + if (!ep->l2t) { + dev_put(pdev); goto out; + } ep->mtu = pdev->mtu; ep->tx_chan = cxgb4_port_chan(pdev); ep->smac_idx = cxgb4_tp_smt_idx(adapter_type, -- cgit v0.10.2 From 37eb816c0867b1b0db273d22b530780a0a083980 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 1 Sep 2016 06:44:52 -0700 Subject: iw_cxgb4: block module unload until all ep resources are released Otherwise an endpoint can be still closing down causing a touch after free crash. Also WARN_ON if ulps have failed to destroy various resources during device removal. Fixes: ad61a4c7a9b7 ("iw_cxgb4: don't block in destroy_qp awaiting the last deref") Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Steve Wise Signed-off-by: Sagi Grimberg diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 57b5bb5..5621270 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -314,6 +314,8 @@ static void remove_ep_tid(struct c4iw_ep *ep) spin_lock_irqsave(&ep->com.dev->lock, flags); _remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0); + if (idr_is_empty(&ep->com.dev->hwtid_idr)) + wake_up(&ep->com.dev->wait); spin_unlock_irqrestore(&ep->com.dev->lock, flags); } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index ae2e8b2..63561aa 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -872,9 +872,13 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev) static void c4iw_dealloc(struct uld_ctx *ctx) { c4iw_rdev_close(&ctx->dev->rdev); + WARN_ON_ONCE(!idr_is_empty(&ctx->dev->cqidr)); idr_destroy(&ctx->dev->cqidr); + WARN_ON_ONCE(!idr_is_empty(&ctx->dev->qpidr)); idr_destroy(&ctx->dev->qpidr); + WARN_ON_ONCE(!idr_is_empty(&ctx->dev->mmidr)); idr_destroy(&ctx->dev->mmidr); + wait_event(ctx->dev->wait, idr_is_empty(&ctx->dev->hwtid_idr)); idr_destroy(&ctx->dev->hwtid_idr); idr_destroy(&ctx->dev->stid_idr); idr_destroy(&ctx->dev->atid_idr); @@ -992,6 +996,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) mutex_init(&devp->rdev.stats.lock); mutex_init(&devp->db_mutex); INIT_LIST_HEAD(&devp->db_fc_list); + init_waitqueue_head(&devp->wait); devp->avail_ird = devp->rdev.lldi.max_ird_adapter; if (c4iw_debugfs_root) { diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index f6f34a7..0e9cd44 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -263,6 +263,7 @@ struct c4iw_dev { struct idr stid_idr; struct list_head db_fc_list; u32 avail_ird; + wait_queue_head_t wait; }; static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) -- cgit v0.10.2 From cdbecc8d24b642b67ae79a0acc2ff18d3d0e677e Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 1 Sep 2016 09:12:25 -0700 Subject: nvme_rdma: keep a ref on the ctrl during delete/flush Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Steve Wise Signed-off-by: Sagi Grimberg diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index ab545fb..15b0c1d 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1351,9 +1351,15 @@ static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue) ret = 1; } - /* Queue controller deletion */ + /* + * Queue controller deletion. Keep a reference until all + * work is flushed since delete_work will free the ctrl mem + */ + kref_get(&ctrl->ctrl.kref); queue_work(nvme_rdma_wq, &ctrl->delete_work); flush_work(&ctrl->delete_work); + nvme_put_ctrl(&ctrl->ctrl); + return ret; } @@ -1700,15 +1706,19 @@ static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl) static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); - int ret; + int ret = 0; + /* + * Keep a reference until all work is flushed since + * __nvme_rdma_del_ctrl can free the ctrl mem + */ + if (!kref_get_unless_zero(&ctrl->ctrl.kref)) + return -EBUSY; ret = __nvme_rdma_del_ctrl(ctrl); - if (ret) - return ret; - - flush_work(&ctrl->delete_work); - - return 0; + if (!ret) + flush_work(&ctrl->delete_work); + nvme_put_ctrl(&ctrl->ctrl); + return ret; } static void nvme_rdma_remove_ctrl_work(struct work_struct *work) -- cgit v0.10.2 From f361e5a01ed35c0f9a00816d76a910d8a5cb4547 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 2 Sep 2016 09:01:27 -0700 Subject: nvme-rdma: destroy nvme queue rdma resources on connect failure After address resolution, the nvme_rdma_queue rdma resources are allocated. If rdma route resolution or the connect fails, or the controller reconnect times out and gives up, then the rdma resources need to be freed. Otherwise, rdma resources are leaked. Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Steve Wise Signed-off-by: Sagi Grimberg diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 15b0c1d..a9d43f0 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -82,6 +82,7 @@ struct nvme_rdma_request { enum nvme_rdma_queue_flags { NVME_RDMA_Q_CONNECTED = (1 << 0), + NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1), }; struct nvme_rdma_queue { @@ -480,9 +481,14 @@ out_err: static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) { - struct nvme_rdma_device *dev = queue->device; - struct ib_device *ibdev = dev->dev; + struct nvme_rdma_device *dev; + struct ib_device *ibdev; + if (!test_and_clear_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags)) + return; + + dev = queue->device; + ibdev = dev->dev; rdma_destroy_qp(queue->cm_id); ib_free_cq(queue->ib_cq); @@ -533,6 +539,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue, ret = -ENOMEM; goto out_destroy_qp; } + set_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags); return 0; @@ -590,6 +597,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, return 0; out_destroy_cm_id: + nvme_rdma_destroy_queue_ib(queue); rdma_destroy_id(queue->cm_id); return ret; } @@ -652,7 +660,7 @@ static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl) return 0; out_free_queues: - for (; i >= 1; i--) + for (i--; i >= 1; i--) nvme_rdma_stop_and_free_queue(&ctrl->queues[i]); return ret; -- cgit v0.10.2 From 334a8f37115bf35e38617315a360a91ac4f2b2c6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 4 Sep 2016 12:46:35 -0400 Subject: pNFS: Don't forget the layout stateid if there are outstanding LAYOUTGETs If there are outstanding LAYOUTGET rpc calls, then we want to ensure that we keep the layout stateid around so we that don't inadvertently pick up an old/misordered sequence id. The race is as follows: Client Server ====== ====== LAYOUTGET(seqid) LAYOUTGET(seqid) return LAYOUTGET(seqid+1) return LAYOUTGET(seqid+2) process LAYOUTGET(seqid+2) forget layout process LAYOUTGET(seqid+1) If it forgets the layout stateid before processing seqid+1, then the client will not check the layout->plh_barrier, and so will set the stateid with seqid+1. Signed-off-by: Trond Myklebust diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index cd8b5fc..2c93a85 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -365,7 +365,8 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ atomic_dec(&lo->plh_refcount); if (list_empty(&lo->plh_segs)) { - set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); + if (atomic_read(&lo->plh_outstanding) == 0) + set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); } rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); -- cgit v0.10.2 From cc2187a6e037bc64404f63c6d650ff263c2200c0 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 4 Sep 2016 11:37:36 +0200 Subject: x86/microcode/AMD: Fix load of builtin microcode with randomized memory We do not need to add the randomization offset when the microcode is built in. Reported-and-tested-by: Emanuel Czirai Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20160904093736.GA11939@pd.tnic Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index b816971..620ab06 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -54,6 +54,7 @@ static LIST_HEAD(pcache); */ static u8 *container; static size_t container_size; +static bool ucode_builtin; static u32 ucode_new_rev; static u8 amd_ucode_patch[PATCH_MAX_SIZE]; @@ -281,18 +282,22 @@ static bool __init load_builtin_amd_microcode(struct cpio_data *cp, void __init load_ucode_amd_bsp(unsigned int family) { struct cpio_data cp; + bool *builtin; void **data; size_t *size; #ifdef CONFIG_X86_32 data = (void **)__pa_nodebug(&ucode_cpio.data); size = (size_t *)__pa_nodebug(&ucode_cpio.size); + builtin = (bool *)__pa_nodebug(&ucode_builtin); #else data = &ucode_cpio.data; size = &ucode_cpio.size; + builtin = &ucode_builtin; #endif - if (!load_builtin_amd_microcode(&cp, family)) + *builtin = load_builtin_amd_microcode(&cp, family); + if (!*builtin) cp = find_ucode_in_initrd(); if (!(cp.data && cp.size)) @@ -373,7 +378,8 @@ void load_ucode_amd_ap(void) return; /* Add CONFIG_RANDOMIZE_MEMORY offset. */ - cont += PAGE_OFFSET - __PAGE_OFFSET_BASE; + if (!ucode_builtin) + cont += PAGE_OFFSET - __PAGE_OFFSET_BASE; eax = cpuid_eax(0x00000001); eq = (struct equiv_cpu_entry *)(cont + CONTAINER_HDR_SZ); @@ -439,7 +445,8 @@ int __init save_microcode_in_initrd_amd(void) container = cont_va; /* Add CONFIG_RANDOMIZE_MEMORY offset. */ - container += PAGE_OFFSET - __PAGE_OFFSET_BASE; + if (!ucode_builtin) + container += PAGE_OFFSET - __PAGE_OFFSET_BASE; eax = cpuid_eax(0x00000001); eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); -- cgit v0.10.2 From d7127b5e5fa0551be21b86640f1648b224e36d43 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 26 Aug 2016 08:16:00 +0200 Subject: locking/barriers: Don't use sizeof(void) in lockless_dereference() My previous commit: 112dc0c8069e ("locking/barriers: Suppress sparse warnings in lockless_dereference()") caused sparse to complain that (in radix-tree.h) we use sizeof(void) since that rcu_dereference()s a void *. Really, all we need is to have the expression *p in here somewhere to make sure p is a pointer type, and sizeof(*p) was the thing that came to my mind first to make sure that's done without really doing anything at runtime. Another thing I had considered was using typeof(*p), but obviously we can't just declare a typeof(*p) variable either, since that may end up being void. Declaring a variable as typeof(*p)* gets around that, and still checks that typeof(*p) is valid, so do that. This type construction can't be done for _________p1 because that will actually be used and causes sparse address space warnings, so keep a separate unused variable for it. Reported-by: Fengguang Wu Signed-off-by: Johannes Berg Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Paul E . McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kbuild-all@01.org Fixes: 112dc0c8069e ("locking/barriers: Suppress sparse warnings in lockless_dereference()") Link: http://lkml.kernel.org/r/1472192160-4049-1-git-send-email-johannes@sipsolutions.net Signed-off-by: Ingo Molnar diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 436aa4e..6685698 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -527,13 +527,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * object's lifetime is managed by something other than RCU. That * "something other" might be reference counting or simple immortality. * - * The seemingly unused size_t variable is to validate @p is indeed a pointer - * type by making sure it can be dereferenced. + * The seemingly unused variable ___typecheck_p validates that @p is + * indeed a pointer type by using a pointer to typeof(*p) as the type. + * Taking a pointer to typeof(*p) again is needed in case p is void *. */ #define lockless_dereference(p) \ ({ \ typeof(p) _________p1 = READ_ONCE(p); \ - size_t __maybe_unused __size_of_ptr = sizeof(*(p)); \ + typeof(*(p)) *___typecheck_p __maybe_unused; \ smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ (_________p1); \ }) -- cgit v0.10.2 From 58763148758057ffc447bf990321d3ea86d199a0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 30 Aug 2016 10:15:03 +0200 Subject: perf/core: Remove WARN from perf_event_read() This effectively reverts commit: 71e7bc2bab77 ("perf/core: Check return value of the perf_event_read() IPI") ... and puts in a comment explaining why we ignore the return value. Reported-by: Vegard Nossum Signed-off-by: Peter Zijlstra (Intel) Cc: David Carrillo-Cisneros Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 71e7bc2bab77 ("perf/core: Check return value of the perf_event_read() IPI") Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index 3cfabdf..07ac859 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3549,10 +3549,18 @@ static int perf_event_read(struct perf_event *event, bool group) .group = group, .ret = 0, }; - ret = smp_call_function_single(event->oncpu, __perf_event_read, &data, 1); - /* The event must have been read from an online CPU: */ - WARN_ON_ONCE(ret); - ret = ret ? : data.ret; + /* + * Purposely ignore the smp_call_function_single() return + * value. + * + * If event->oncpu isn't a valid CPU it means the event got + * scheduled out and that will have updated the event count. + * + * Therefore, either way, we'll have an up-to-date event count + * after this. + */ + (void)smp_call_function_single(event->oncpu, __perf_event_read, &data, 1); + ret = data.ret; } else if (event->state == PERF_EVENT_STATE_INACTIVE) { struct perf_event_context *ctx = event->ctx; unsigned long flags; -- cgit v0.10.2 From 135e8c9250dd5c8c9aae5984fde6f230d0cbfeaf Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Mon, 5 Sep 2016 13:16:40 +1000 Subject: sched/core: Fix a race between try_to_wake_up() and a woken up task The origin of the issue I've seen is related to a missing memory barrier between check for task->state and the check for task->on_rq. The task being woken up is already awake from a schedule() and is doing the following: do { schedule() set_current_state(TASK_(UN)INTERRUPTIBLE); } while (!cond); The waker, actually gets stuck doing the following in try_to_wake_up(): while (p->on_cpu) cpu_relax(); Analysis: The instance I've seen involves the following race: CPU1 CPU2 while () { if (cond) break; do { schedule(); set_current_state(TASK_UN..) } while (!cond); wakeup_routine() spin_lock_irqsave(wait_lock) raw_spin_lock_irqsave(wait_lock) wake_up_process() } try_to_wake_up() set_current_state(TASK_RUNNING); .. list_del(&waiter.list); CPU2 wakes up CPU1, but before it can get the wait_lock and set current state to TASK_RUNNING the following occurs: CPU3 wakeup_routine() raw_spin_lock_irqsave(wait_lock) if (!list_empty) wake_up_process() try_to_wake_up() raw_spin_lock_irqsave(p->pi_lock) .. if (p->on_rq && ttwu_wakeup()) .. while (p->on_cpu) cpu_relax() .. CPU3 tries to wake up the task on CPU1 again since it finds it on the wait_queue, CPU1 is spinning on wait_lock, but immediately after CPU2, CPU3 got it. CPU3 checks the state of p on CPU1, it is TASK_UNINTERRUPTIBLE and the task is spinning on the wait_lock. Interestingly since p->on_rq is checked under pi_lock, I've noticed that try_to_wake_up() finds p->on_rq to be 0. This was the most confusing bit of the analysis, but p->on_rq is changed under runqueue lock, rq_lock, the p->on_rq check is not reliable without this fix IMHO. The race is visible (based on the analysis) only when ttwu_queue() does a remote wakeup via ttwu_queue_remote. In which case the p->on_rq change is not done uder the pi_lock. The result is that after a while the entire system locks up on the raw_spin_irqlock_save(wait_lock) and the holder spins infintely Reproduction of the issue: The issue can be reproduced after a long run on my system with 80 threads and having to tweak available memory to very low and running memory stress-ng mmapfork test. It usually takes a long time to reproduce. I am trying to work on a test case that can reproduce the issue faster, but thats work in progress. I am still testing the changes on my still in a loop and the tests seem OK thus far. Big thanks to Benjamin and Nick for helping debug this as well. Ben helped catch the missing barrier, Nick caught every missing bit in my theory. Signed-off-by: Balbir Singh [ Updated comment to clarify matching barriers. Many architectures do not have a full barrier in switch_to() so that cannot be relied upon. ] Signed-off-by: Peter Zijlstra (Intel) Acked-by: Benjamin Herrenschmidt Cc: Alexey Kardashevskiy Cc: Linus Torvalds Cc: Nicholas Piggin Cc: Nicholas Piggin Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Link: http://lkml.kernel.org/r/e02cce7b-d9ca-1ad0-7a61-ea97c7582b37@gmail.com Signed-off-by: Ingo Molnar diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2a906f2..44817c6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2016,6 +2016,28 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) success = 1; /* we're going to change ->state */ cpu = task_cpu(p); + /* + * Ensure we load p->on_rq _after_ p->state, otherwise it would + * be possible to, falsely, observe p->on_rq == 0 and get stuck + * in smp_cond_load_acquire() below. + * + * sched_ttwu_pending() try_to_wake_up() + * [S] p->on_rq = 1; [L] P->state + * UNLOCK rq->lock -----. + * \ + * +--- RMB + * schedule() / + * LOCK rq->lock -----' + * UNLOCK rq->lock + * + * [task p] + * [S] p->state = UNINTERRUPTIBLE [L] p->on_rq + * + * Pairs with the UNLOCK+LOCK on rq->lock from the + * last wakeup of our task and the schedule that got our task + * current. + */ + smp_rmb(); if (p->on_rq && ttwu_remote(p, wake_flags)) goto stat; -- cgit v0.10.2 From d4c4fed08f31f3746000c46cb1b20bed2959547a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 15 Aug 2016 09:05:45 -0600 Subject: efi: Make for_each_efi_memory_desc_in_map() cope with running on Xen While commit 55f1ea15216 ("efi: Fix for_each_efi_memory_desc_in_map() for empty memmaps") made an attempt to deal with empty memory maps, it didn't address the case where the map field never gets set, as is apparently the case when running under Xen. Reported-by: Tested-by: Cc: Vitaly Kuznetsov Cc: Jiri Slaby Cc: Mark Rutland Cc: # v4.7+ Signed-off-by: Jan Beulich [ Guard the loop with a NULL check instead of pointer underflow ] Signed-off-by: Matt Fleming diff --git a/include/linux/efi.h b/include/linux/efi.h index 7f5a582..23cd3ce 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -946,7 +946,7 @@ extern int efi_memattr_apply_permissions(struct mm_struct *mm, /* Iterate through an efi_memory_map */ #define for_each_efi_memory_desc_in_map(m, md) \ for ((md) = (m)->map; \ - ((void *)(md) + (m)->desc_size) <= (m)->map_end; \ + (md) && ((void *)(md) + (m)->desc_size) <= (m)->map_end; \ (md) = (void *)(md) + (m)->desc_size) /** -- cgit v0.10.2 From 4af9ed578a50cd331a725322cfd9d555251ce788 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 30 Aug 2016 12:41:37 +0200 Subject: efi: Fix handling error value in fdt_find_uefi_params of_get_flat_dt_subnode_by_name can return negative value in case of error. Assigning the result to unsigned variable and checking if the variable is lesser than zero is incorrect and always false. The patch fixes it by using signed variable to check the result. The problem has been detected using semantic patch scripts/coccinelle/tests/unsigned_lesser_than_zero.cocci Signed-off-by: Andrzej Hajda Cc: Bartlomiej Zolnierkiewicz Cc: Marek Szyprowski Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Shawn Lin Cc: Mark Rutland Cc: Signed-off-by: Matt Fleming diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 5a2631a..7dd2e2d 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -657,9 +657,12 @@ static int __init fdt_find_uefi_params(unsigned long node, const char *uname, } if (subnode) { - node = of_get_flat_dt_subnode_by_name(node, subnode); - if (node < 0) + int err = of_get_flat_dt_subnode_by_name(node, subnode); + + if (err < 0) return 0; + + node = err; } return __find_uefi_params(node, info, dt_params[i].params); -- cgit v0.10.2 From dadb57abc37499f565b23933dbf49b435c3ba8af Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 29 Aug 2016 14:38:51 -0600 Subject: efi/libstub: Allocate headspace in efi_get_memory_map() efi_get_memory_map() allocates a buffer to store the memory map that it retrieves. This buffer may need to be reused by the client after ExitBootServices() is called, at which point allocations are not longer permitted. To support this usecase, provide the allocated buffer size back to the client, and allocate some additional headroom to account for any reasonable growth in the map that is likely to happen between the call to efi_get_memory_map() and the client reusing the buffer. Signed-off-by: Jeffrey Hugo Cc: Ard Biesheuvel Cc: Mark Rutland Cc: Leif Lindholm Cc: Ingo Molnar Cc: Signed-off-by: Matt Fleming diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index ff574da..c5b7c7b 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -1008,7 +1008,7 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle, bool is64) { struct efi_info *efi = &boot_params->efi_info; - unsigned long map_sz, key, desc_size; + unsigned long map_sz, key, desc_size, buff_size; efi_memory_desc_t *mem_map; struct setup_data *e820ext; const char *signature; @@ -1019,14 +1019,20 @@ static efi_status_t exit_boot(struct boot_params *boot_params, bool called_exit = false; u8 nr_entries; int i; - - nr_desc = 0; - e820ext = NULL; - e820ext_size = 0; + struct efi_boot_memmap map; + + nr_desc = 0; + e820ext = NULL; + e820ext_size = 0; + map.map = &mem_map; + map.map_size = &map_sz; + map.desc_size = &desc_size; + map.desc_ver = &desc_version; + map.key_ptr = &key; + map.buff_size = &buff_size; get_map: - status = efi_get_memory_map(sys_table, &mem_map, &map_sz, &desc_size, - &desc_version, &key); + status = efi_get_memory_map(sys_table, &map); if (status != EFI_SUCCESS) return status; diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 3bd127f9..29368ac 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -41,6 +41,8 @@ static unsigned long __chunk_size = EFI_READ_CHUNK_SIZE; #define EFI_ALLOC_ALIGN EFI_PAGE_SIZE #endif +#define EFI_MMAP_NR_SLACK_SLOTS 8 + struct file_info { efi_file_handle_t *handle; u64 size; @@ -63,49 +65,62 @@ void efi_printk(efi_system_table_t *sys_table_arg, char *str) } } +static inline bool mmap_has_headroom(unsigned long buff_size, + unsigned long map_size, + unsigned long desc_size) +{ + unsigned long slack = buff_size - map_size; + + return slack / desc_size >= EFI_MMAP_NR_SLACK_SLOTS; +} + efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg, - efi_memory_desc_t **map, - unsigned long *map_size, - unsigned long *desc_size, - u32 *desc_ver, - unsigned long *key_ptr) + struct efi_boot_memmap *map) { efi_memory_desc_t *m = NULL; efi_status_t status; unsigned long key; u32 desc_version; - *map_size = sizeof(*m) * 32; + *map->desc_size = sizeof(*m); + *map->map_size = *map->desc_size * 32; + *map->buff_size = *map->map_size; again: - /* - * Add an additional efi_memory_desc_t because we're doing an - * allocation which may be in a new descriptor region. - */ - *map_size += sizeof(*m); status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - *map_size, (void **)&m); + *map->map_size, (void **)&m); if (status != EFI_SUCCESS) goto fail; - *desc_size = 0; + *map->desc_size = 0; key = 0; - status = efi_call_early(get_memory_map, map_size, m, - &key, desc_size, &desc_version); - if (status == EFI_BUFFER_TOO_SMALL) { + status = efi_call_early(get_memory_map, map->map_size, m, + &key, map->desc_size, &desc_version); + if (status == EFI_BUFFER_TOO_SMALL || + !mmap_has_headroom(*map->buff_size, *map->map_size, + *map->desc_size)) { efi_call_early(free_pool, m); + /* + * Make sure there is some entries of headroom so that the + * buffer can be reused for a new map after allocations are + * no longer permitted. Its unlikely that the map will grow to + * exceed this headroom once we are ready to trigger + * ExitBootServices() + */ + *map->map_size += *map->desc_size * EFI_MMAP_NR_SLACK_SLOTS; + *map->buff_size = *map->map_size; goto again; } if (status != EFI_SUCCESS) efi_call_early(free_pool, m); - if (key_ptr && status == EFI_SUCCESS) - *key_ptr = key; - if (desc_ver && status == EFI_SUCCESS) - *desc_ver = desc_version; + if (map->key_ptr && status == EFI_SUCCESS) + *map->key_ptr = key; + if (map->desc_ver && status == EFI_SUCCESS) + *map->desc_ver = desc_version; fail: - *map = m; + *map->map = m; return status; } @@ -113,13 +128,20 @@ fail: unsigned long get_dram_base(efi_system_table_t *sys_table_arg) { efi_status_t status; - unsigned long map_size; + unsigned long map_size, buff_size; unsigned long membase = EFI_ERROR; struct efi_memory_map map; efi_memory_desc_t *md; + struct efi_boot_memmap boot_map; - status = efi_get_memory_map(sys_table_arg, (efi_memory_desc_t **)&map.map, - &map_size, &map.desc_size, NULL, NULL); + boot_map.map = (efi_memory_desc_t **)&map.map; + boot_map.map_size = &map_size; + boot_map.desc_size = &map.desc_size; + boot_map.desc_ver = NULL; + boot_map.key_ptr = NULL; + boot_map.buff_size = &buff_size; + + status = efi_get_memory_map(sys_table_arg, &boot_map); if (status != EFI_SUCCESS) return membase; @@ -144,15 +166,22 @@ efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg, unsigned long size, unsigned long align, unsigned long *addr, unsigned long max) { - unsigned long map_size, desc_size; + unsigned long map_size, desc_size, buff_size; efi_memory_desc_t *map; efi_status_t status; unsigned long nr_pages; u64 max_addr = 0; int i; + struct efi_boot_memmap boot_map; + + boot_map.map = ↦ + boot_map.map_size = &map_size; + boot_map.desc_size = &desc_size; + boot_map.desc_ver = NULL; + boot_map.key_ptr = NULL; + boot_map.buff_size = &buff_size; - status = efi_get_memory_map(sys_table_arg, &map, &map_size, &desc_size, - NULL, NULL); + status = efi_get_memory_map(sys_table_arg, &boot_map); if (status != EFI_SUCCESS) goto fail; @@ -230,14 +259,21 @@ efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, unsigned long size, unsigned long align, unsigned long *addr) { - unsigned long map_size, desc_size; + unsigned long map_size, desc_size, buff_size; efi_memory_desc_t *map; efi_status_t status; unsigned long nr_pages; int i; + struct efi_boot_memmap boot_map; + + boot_map.map = ↦ + boot_map.map_size = &map_size; + boot_map.desc_size = &desc_size; + boot_map.desc_ver = NULL; + boot_map.key_ptr = NULL; + boot_map.buff_size = &buff_size; - status = efi_get_memory_map(sys_table_arg, &map, &map_size, &desc_size, - NULL, NULL); + status = efi_get_memory_map(sys_table_arg, &boot_map); if (status != EFI_SUCCESS) goto fail; diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index e58abfa..bec0fa8 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -175,13 +175,21 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, unsigned long fdt_addr, unsigned long fdt_size) { - unsigned long map_size, desc_size; + unsigned long map_size, desc_size, buff_size; u32 desc_ver; unsigned long mmap_key; efi_memory_desc_t *memory_map, *runtime_map; unsigned long new_fdt_size; efi_status_t status; int runtime_entry_count = 0; + struct efi_boot_memmap map; + + map.map = &runtime_map; + map.map_size = &map_size; + map.desc_size = &desc_size; + map.desc_ver = &desc_ver; + map.key_ptr = &mmap_key; + map.buff_size = &buff_size; /* * Get a copy of the current memory map that we will use to prepare @@ -189,8 +197,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, * subsequent allocations adding entries, since they could not affect * the number of EFI_MEMORY_RUNTIME regions. */ - status = efi_get_memory_map(sys_table, &runtime_map, &map_size, - &desc_size, &desc_ver, &mmap_key); + status = efi_get_memory_map(sys_table, &map); if (status != EFI_SUCCESS) { pr_efi_err(sys_table, "Unable to retrieve UEFI memory map.\n"); return status; @@ -199,6 +206,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, pr_efi(sys_table, "Exiting boot services and installing virtual address map...\n"); + map.map = &memory_map; /* * Estimate size of new FDT, and allocate memory for it. We * will allocate a bigger buffer if this ends up being too @@ -218,8 +226,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, * we can get the memory map key needed for * exit_boot_services(). */ - status = efi_get_memory_map(sys_table, &memory_map, &map_size, - &desc_size, &desc_ver, &mmap_key); + status = efi_get_memory_map(sys_table, &map); if (status != EFI_SUCCESS) goto fail_free_new_fdt; diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c index 53f6d3f..0c9f58c 100644 --- a/drivers/firmware/efi/libstub/random.c +++ b/drivers/firmware/efi/libstub/random.c @@ -73,12 +73,20 @@ efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg, unsigned long random_seed) { unsigned long map_size, desc_size, total_slots = 0, target_slot; + unsigned long buff_size; efi_status_t status; efi_memory_desc_t *memory_map; int map_offset; + struct efi_boot_memmap map; - status = efi_get_memory_map(sys_table_arg, &memory_map, &map_size, - &desc_size, NULL, NULL); + map.map = &memory_map; + map.map_size = &map_size; + map.desc_size = &desc_size; + map.desc_ver = NULL; + map.key_ptr = NULL; + map.buff_size = &buff_size; + + status = efi_get_memory_map(sys_table_arg, &map); if (status != EFI_SUCCESS) return status; diff --git a/include/linux/efi.h b/include/linux/efi.h index 23cd3ce..943fee5 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -118,6 +118,15 @@ typedef struct { u32 imagesize; } efi_capsule_header_t; +struct efi_boot_memmap { + efi_memory_desc_t **map; + unsigned long *map_size; + unsigned long *desc_size; + u32 *desc_ver; + unsigned long *key_ptr; + unsigned long *buff_size; +}; + /* * EFI capsule flags */ @@ -1371,11 +1380,7 @@ char *efi_convert_cmdline(efi_system_table_t *sys_table_arg, efi_loaded_image_t *image, int *cmd_line_len); efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg, - efi_memory_desc_t **map, - unsigned long *map_size, - unsigned long *desc_size, - u32 *desc_ver, - unsigned long *key_ptr); + struct efi_boot_memmap *map); efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, unsigned long size, unsigned long align, -- cgit v0.10.2 From fc07716ba803483be91bc4b2344f9c84985e6f07 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 29 Aug 2016 14:38:52 -0600 Subject: efi/libstub: Introduce ExitBootServices helper The spec allows ExitBootServices to fail with EFI_INVALID_PARAMETER if a race condition has occurred where the EFI has updated the memory map after the stub grabbed a reference to the map. The spec defines a retry proceedure with specific requirements to handle this scenario. This scenario was previously observed on x86 - commit d3768d885c6c ("x86, efi: retry ExitBootServices() on failure") but the current fix is not spec compliant and the scenario is now observed on the Qualcomm Technologies QDF2432 via the FDT stub which does not handle the error and thus causes boot failures. The user will notice the boot failure as the kernel is not executed and the system may drop back to a UEFI shell, but will be unresponsive to input and the system will require a power cycle to recover. Add a helper to the stub library that correctly adheres to the spec in the case of EFI_INVALID_PARAMETER from ExitBootServices and can be universally used across all stub implementations. Signed-off-by: Jeffrey Hugo Cc: Ard Biesheuvel Cc: Mark Rutland Cc: Leif Lindholm Cc: Ingo Molnar Cc: Signed-off-by: Matt Fleming diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 29368ac..aded106 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -740,3 +740,76 @@ char *efi_convert_cmdline(efi_system_table_t *sys_table_arg, *cmd_line_len = options_bytes; return (char *)cmdline_addr; } + +/* + * Handle calling ExitBootServices according to the requirements set out by the + * spec. Obtains the current memory map, and returns that info after calling + * ExitBootServices. The client must specify a function to perform any + * processing of the memory map data prior to ExitBootServices. A client + * specific structure may be passed to the function via priv. The client + * function may be called multiple times. + */ +efi_status_t efi_exit_boot_services(efi_system_table_t *sys_table_arg, + void *handle, + struct efi_boot_memmap *map, + void *priv, + efi_exit_boot_map_processing priv_func) +{ + efi_status_t status; + + status = efi_get_memory_map(sys_table_arg, map); + + if (status != EFI_SUCCESS) + goto fail; + + status = priv_func(sys_table_arg, map, priv); + if (status != EFI_SUCCESS) + goto free_map; + + status = efi_call_early(exit_boot_services, handle, *map->key_ptr); + + if (status == EFI_INVALID_PARAMETER) { + /* + * The memory map changed between efi_get_memory_map() and + * exit_boot_services(). Per the UEFI Spec v2.6, Section 6.4: + * EFI_BOOT_SERVICES.ExitBootServices we need to get the + * updated map, and try again. The spec implies one retry + * should be sufficent, which is confirmed against the EDK2 + * implementation. Per the spec, we can only invoke + * get_memory_map() and exit_boot_services() - we cannot alloc + * so efi_get_memory_map() cannot be used, and we must reuse + * the buffer. For all practical purposes, the headroom in the + * buffer should account for any changes in the map so the call + * to get_memory_map() is expected to succeed here. + */ + *map->map_size = *map->buff_size; + status = efi_call_early(get_memory_map, + map->map_size, + *map->map, + map->key_ptr, + map->desc_size, + map->desc_ver); + + /* exit_boot_services() was called, thus cannot free */ + if (status != EFI_SUCCESS) + goto fail; + + status = priv_func(sys_table_arg, map, priv); + /* exit_boot_services() was called, thus cannot free */ + if (status != EFI_SUCCESS) + goto fail; + + status = efi_call_early(exit_boot_services, handle, *map->key_ptr); + } + + /* exit_boot_services() was called, thus cannot free */ + if (status != EFI_SUCCESS) + goto fail; + + return EFI_SUCCESS; + +free_map: + efi_call_early(free_pool, *map->map); +fail: + return status; +} diff --git a/include/linux/efi.h b/include/linux/efi.h index 943fee5..0148a30 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1462,4 +1462,14 @@ extern void efi_call_virt_check_flags(unsigned long flags, const char *call); arch_efi_call_virt_teardown(); \ }) +typedef efi_status_t (*efi_exit_boot_map_processing)( + efi_system_table_t *sys_table_arg, + struct efi_boot_memmap *map, + void *priv); + +efi_status_t efi_exit_boot_services(efi_system_table_t *sys_table, + void *handle, + struct efi_boot_memmap *map, + void *priv, + efi_exit_boot_map_processing priv_func); #endif /* _LINUX_EFI_H */ -- cgit v0.10.2 From ed9cc156c42ff0c0bf9b1d09df48a12bf0873473 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 29 Aug 2016 14:38:53 -0600 Subject: efi/libstub: Use efi_exit_boot_services() in FDT The FDT code directly calls ExitBootServices. This is inadvisable as the UEFI spec details a complex set of errors, race conditions, and API interactions that the caller of ExitBootServices must get correct. The FDT code does not handle EFI_INVALID_PARAMETER as required by the spec, which causes intermittent boot failures on the Qualcomm Technologies QDF2432. Call the efi_exit_boot_services() helper intead, which handles the EFI_INVALID_PARAMETER scenario properly. Signed-off-by: Jeffrey Hugo Cc: Ard Biesheuvel Cc: Mark Rutland Cc: Leif Lindholm Cc: Ingo Molnar Cc: Signed-off-by: Matt Fleming diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index bec0fa8..a6a9311 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -152,6 +152,27 @@ fdt_set_fail: #define EFI_FDT_ALIGN EFI_PAGE_SIZE #endif +struct exit_boot_struct { + efi_memory_desc_t *runtime_map; + int *runtime_entry_count; +}; + +static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, + struct efi_boot_memmap *map, + void *priv) +{ + struct exit_boot_struct *p = priv; + /* + * Update the memory map with virtual addresses. The function will also + * populate @runtime_map with copies of just the EFI_MEMORY_RUNTIME + * entries so that we can pass it straight to SetVirtualAddressMap() + */ + efi_get_virtmap(*map->map, *map->map_size, *map->desc_size, + p->runtime_map, p->runtime_entry_count); + + return EFI_SUCCESS; +} + /* * Allocate memory for a new FDT, then add EFI, commandline, and * initrd related fields to the FDT. This routine increases the @@ -183,6 +204,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, efi_status_t status; int runtime_entry_count = 0; struct efi_boot_memmap map; + struct exit_boot_struct priv; map.map = &runtime_map; map.map_size = &map_size; @@ -257,16 +279,11 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, } } - /* - * Update the memory map with virtual addresses. The function will also - * populate @runtime_map with copies of just the EFI_MEMORY_RUNTIME - * entries so that we can pass it straight into SetVirtualAddressMap() - */ - efi_get_virtmap(memory_map, map_size, desc_size, runtime_map, - &runtime_entry_count); - - /* Now we are ready to exit_boot_services.*/ - status = sys_table->boottime->exit_boot_services(handle, mmap_key); + sys_table->boottime->free_pool(memory_map); + priv.runtime_map = runtime_map; + priv.runtime_entry_count = &runtime_entry_count; + status = efi_exit_boot_services(sys_table, handle, &map, &priv, + exit_boot_func); if (status == EFI_SUCCESS) { efi_set_virtual_address_map_t *svam; -- cgit v0.10.2 From d64934019f6cc39202e2f78063709f61ca5cb364 Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Mon, 29 Aug 2016 14:38:54 -0600 Subject: x86/efi: Use efi_exit_boot_services() The eboot code directly calls ExitBootServices. This is inadvisable as the UEFI spec details a complex set of errors, race conditions, and API interactions that the caller of ExitBootServices must get correct. The eboot code attempts allocations after calling ExitBootSerives which is not permitted per the spec. Call the efi_exit_boot_services() helper intead, which handles the allocation scenario properly. Signed-off-by: Jeffrey Hugo Cc: Ard Biesheuvel Cc: Mark Rutland Cc: Leif Lindholm Cc: Ingo Molnar Cc: Signed-off-by: Matt Fleming diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index c5b7c7b..94dd4a3 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -1004,85 +1004,87 @@ static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext, return status; } +struct exit_boot_struct { + struct boot_params *boot_params; + struct efi_info *efi; + struct setup_data *e820ext; + __u32 e820ext_size; + bool is64; +}; + +static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg, + struct efi_boot_memmap *map, + void *priv) +{ + static bool first = true; + const char *signature; + __u32 nr_desc; + efi_status_t status; + struct exit_boot_struct *p = priv; + + if (first) { + nr_desc = *map->buff_size / *map->desc_size; + if (nr_desc > ARRAY_SIZE(p->boot_params->e820_map)) { + u32 nr_e820ext = nr_desc - + ARRAY_SIZE(p->boot_params->e820_map); + + status = alloc_e820ext(nr_e820ext, &p->e820ext, + &p->e820ext_size); + if (status != EFI_SUCCESS) + return status; + } + first = false; + } + + signature = p->is64 ? EFI64_LOADER_SIGNATURE : EFI32_LOADER_SIGNATURE; + memcpy(&p->efi->efi_loader_signature, signature, sizeof(__u32)); + + p->efi->efi_systab = (unsigned long)sys_table_arg; + p->efi->efi_memdesc_size = *map->desc_size; + p->efi->efi_memdesc_version = *map->desc_ver; + p->efi->efi_memmap = (unsigned long)*map->map; + p->efi->efi_memmap_size = *map->map_size; + +#ifdef CONFIG_X86_64 + p->efi->efi_systab_hi = (unsigned long)sys_table_arg >> 32; + p->efi->efi_memmap_hi = (unsigned long)*map->map >> 32; +#endif + + return EFI_SUCCESS; +} + static efi_status_t exit_boot(struct boot_params *boot_params, void *handle, bool is64) { - struct efi_info *efi = &boot_params->efi_info; unsigned long map_sz, key, desc_size, buff_size; efi_memory_desc_t *mem_map; struct setup_data *e820ext; - const char *signature; __u32 e820ext_size; - __u32 nr_desc, prev_nr_desc; efi_status_t status; __u32 desc_version; - bool called_exit = false; - u8 nr_entries; - int i; struct efi_boot_memmap map; + struct exit_boot_struct priv; + + map.map = &mem_map; + map.map_size = &map_sz; + map.desc_size = &desc_size; + map.desc_ver = &desc_version; + map.key_ptr = &key; + map.buff_size = &buff_size; + priv.boot_params = boot_params; + priv.efi = &boot_params->efi_info; + priv.e820ext = NULL; + priv.e820ext_size = 0; + priv.is64 = is64; - nr_desc = 0; - e820ext = NULL; - e820ext_size = 0; - map.map = &mem_map; - map.map_size = &map_sz; - map.desc_size = &desc_size; - map.desc_ver = &desc_version; - map.key_ptr = &key; - map.buff_size = &buff_size; - -get_map: - status = efi_get_memory_map(sys_table, &map); - + /* Might as well exit boot services now */ + status = efi_exit_boot_services(sys_table, handle, &map, &priv, + exit_boot_func); if (status != EFI_SUCCESS) return status; - prev_nr_desc = nr_desc; - nr_desc = map_sz / desc_size; - if (nr_desc > prev_nr_desc && - nr_desc > ARRAY_SIZE(boot_params->e820_map)) { - u32 nr_e820ext = nr_desc - ARRAY_SIZE(boot_params->e820_map); - - status = alloc_e820ext(nr_e820ext, &e820ext, &e820ext_size); - if (status != EFI_SUCCESS) - goto free_mem_map; - - efi_call_early(free_pool, mem_map); - goto get_map; /* Allocated memory, get map again */ - } - - signature = is64 ? EFI64_LOADER_SIGNATURE : EFI32_LOADER_SIGNATURE; - memcpy(&efi->efi_loader_signature, signature, sizeof(__u32)); - - efi->efi_systab = (unsigned long)sys_table; - efi->efi_memdesc_size = desc_size; - efi->efi_memdesc_version = desc_version; - efi->efi_memmap = (unsigned long)mem_map; - efi->efi_memmap_size = map_sz; - -#ifdef CONFIG_X86_64 - efi->efi_systab_hi = (unsigned long)sys_table >> 32; - efi->efi_memmap_hi = (unsigned long)mem_map >> 32; -#endif - - /* Might as well exit boot services now */ - status = efi_call_early(exit_boot_services, handle, key); - if (status != EFI_SUCCESS) { - /* - * ExitBootServices() will fail if any of the event - * handlers change the memory map. In which case, we - * must be prepared to retry, but only once so that - * we're guaranteed to exit on repeated failures instead - * of spinning forever. - */ - if (called_exit) - goto free_mem_map; - - called_exit = true; - efi_call_early(free_pool, mem_map); - goto get_map; - } - + e820ext = priv.e820ext; + e820ext_size = priv.e820ext_size; /* Historic? */ boot_params->alt_mem_k = 32 * 1024; @@ -1091,10 +1093,6 @@ get_map: return status; return EFI_SUCCESS; - -free_mem_map: - efi_call_early(free_pool, mem_map); - return status; } /* -- cgit v0.10.2 From 4d21cef3ea00ba3ac508eb61fb8db70e3e31df67 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 2 Sep 2016 12:33:49 +0200 Subject: KVM: s390: vsie: fix riccbd We store the address of riccbd at the wrong location, overwriting gvrd. This means that our nested guest will not be able to use runtime instrumentation. Also, a memory leak, if our KVM guest actually sets gvrd. Not noticed until now, as KVM guests never make use of gvrd and runtime instrumentation wasn't completely tested yet. Reported-by: Fan Zhang Reviewed-by: Cornelia Huck Signed-off-by: David Hildenbrand Signed-off-by: Cornelia Huck diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index c106488..d8673e2 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -584,7 +584,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) /* Validity 0x0044 will be checked by SIE */ if (rc) goto unpin; - scb_s->gvrd = hpa; + scb_s->riccbd = hpa; } return 0; unpin: -- cgit v0.10.2 From e12c8f36f3f7a60d55938c5aed5999278fa92bcb Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 30 Aug 2016 16:14:00 +0800 Subject: KVM: lapic: adjust preemption timer correctly when goes TSC backward MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TSC_OFFSET will be adjusted if discovers TSC backward during vCPU load. The preemption timer, which relies on the guest tsc to reprogram its preemption timer value, is also reprogrammed if vCPU is scheded in to a different pCPU. However, the current implementation reprogram preemption timer before TSC_OFFSET is adjusted to the right value, resulting in the preemption timer firing prematurely. This patch fix it by adjusting TSC_OFFSET before reprogramming preemption timer if TSC backward. Cc: Paolo Bonzini Cc: Radim Krċmář Cc: Yunhong Jiang Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 19f9f9e..699f872 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2743,16 +2743,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (tsc_delta < 0) mark_tsc_unstable("KVM discovered backwards TSC"); - if (kvm_lapic_hv_timer_in_use(vcpu) && - kvm_x86_ops->set_hv_timer(vcpu, - kvm_get_lapic_tscdeadline_msr(vcpu))) - kvm_lapic_switch_to_sw_timer(vcpu); if (check_tsc_unstable()) { u64 offset = kvm_compute_tsc_offset(vcpu, vcpu->arch.last_guest_tsc); kvm_x86_ops->write_tsc_offset(vcpu, offset); vcpu->arch.tsc_catchup = 1; } + if (kvm_lapic_hv_timer_in_use(vcpu) && + kvm_x86_ops->set_hv_timer(vcpu, + kvm_get_lapic_tscdeadline_msr(vcpu))) + kvm_lapic_switch_to_sw_timer(vcpu); /* * On a host with synchronized TSC, there is no need to update * kvmclock on vcpu->cpu migration -- cgit v0.10.2 From 5210d393ef84e5d2a4854671a9af2d97fd1b8dd4 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Fri, 2 Sep 2016 20:49:12 +0800 Subject: netfilter: nf_tables_trace: fix endiness when dump chain policy NFTA_TRACE_POLICY attribute is big endian, but we forget to call htonl to convert it. Fortunately, this attribute is parsed as big endian in libnftnl. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 39eb1cc..fa24a5b 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -237,7 +237,7 @@ void nft_trace_notify(struct nft_traceinfo *info) break; case NFT_TRACETYPE_POLICY: if (nla_put_be32(skb, NFTA_TRACE_POLICY, - info->basechain->policy)) + htonl(info->basechain->policy))) goto nla_put_failure; break; } -- cgit v0.10.2 From 79d102cbfd2e9d94257fcc7c82807ef1cdf80322 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 5 Sep 2016 17:30:07 +0200 Subject: perf/x86/intel/cqm: Check cqm/mbm enabled state in event init Yanqiu Zhang reported kernel panic when using mbm event on system where CQM is detected but without mbm event support, like with perf: # perf stat -e 'intel_cqm/event=3/' -a BUG: unable to handle kernel NULL pointer dereference at 0000000000000020 IP: [] update_sample+0xbc/0xe0 ... [] __intel_mbm_event_init+0x18/0x20 [] flush_smp_call_function_queue+0x7b/0x160 [] generic_smp_call_function_single_interrupt+0x13/0x60 [] smp_call_function_interrupt+0x27/0x40 [] call_function_interrupt+0x8c/0xa0 ... The reason is that we currently allow to init mbm event even if mbm support is not detected. Adding checks for both cqm and mbm events and support into cqm's event_init. Fixes: 33c3cc7acfd9 ("perf/x86/mbm: Add Intel Memory B/W Monitoring enumeration and init") Reported-by: Yanqiu Zhang Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Vikas Shivappa Cc: Tony Luck Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1473089407-21857-1-git-send-email-jolsa@kernel.org Signed-off-by: Thomas Gleixner diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c index 783c49d..8f82b02 100644 --- a/arch/x86/events/intel/cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -458,6 +458,11 @@ static void __intel_cqm_event_count(void *info); static void init_mbm_sample(u32 rmid, u32 evt_type); static void __intel_mbm_event_count(void *info); +static bool is_cqm_event(int e) +{ + return (e == QOS_L3_OCCUP_EVENT_ID); +} + static bool is_mbm_event(int e) { return (e >= QOS_MBM_TOTAL_EVENT_ID && e <= QOS_MBM_LOCAL_EVENT_ID); @@ -1366,6 +1371,10 @@ static int intel_cqm_event_init(struct perf_event *event) (event->attr.config > QOS_MBM_LOCAL_EVENT_ID)) return -EINVAL; + if ((is_cqm_event(event->attr.config) && !cqm_enabled) || + (is_mbm_event(event->attr.config) && !mbm_enabled)) + return -EINVAL; + /* unsupported modes and filters */ if (event->attr.exclude_user || event->attr.exclude_kernel || -- cgit v0.10.2 From d2896d4b55b2e32b423072a4124d7da4dc1e6cb1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 22 Aug 2016 09:01:17 +0100 Subject: arm: KVM: Fix idmap overlap detection when the kernel is idmap'ed We're trying hard to detect when the HYP idmap overlaps with the HYP va, as it makes the teardown of a cpu dangerous. But there is one case where an overlap is completely safe, which is when the whole of the kernel is idmap'ed, which is likely to happen on 32bit when RAM is at 0x8000000 and we're using a 2G/2G VA split. In that case, we can proceed safely. Reported-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 29d0b23..a3faafe 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1714,7 +1714,8 @@ int kvm_mmu_init(void) kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL)); if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) && - hyp_idmap_start < kern_hyp_va(~0UL)) { + hyp_idmap_start < kern_hyp_va(~0UL) && + hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) { /* * The idmap page is intersecting with the VA space, * it is not safe to continue further. -- cgit v0.10.2 From d31ed3f05763644840c654a384eaefa94c097ba2 Mon Sep 17 00:00:00 2001 From: Jan Leupold Date: Wed, 6 Jul 2016 13:22:35 +0200 Subject: drm: atmel-hlcdc: Fix vertical scaling The code is applying the same scaling for the X and Y components, thus making the scaling feature only functional when both components have the same scaling factor. Do the s/_w/_h/ replacement where appropriate to fix vertical scaling. Signed-off-by: Jan Leupold Fixes: 1a396789f65a2 ("drm: add Atmel HLCDC Display Controller support") Cc: Signed-off-by: Boris Brezillon diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c index 016c191..52c527f 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c @@ -320,19 +320,19 @@ atmel_hlcdc_plane_update_pos_and_size(struct atmel_hlcdc_plane *plane, u32 *coeff_tab = heo_upscaling_ycoef; u32 max_memsize; - if (state->crtc_w < state->src_w) + if (state->crtc_h < state->src_h) coeff_tab = heo_downscaling_ycoef; for (i = 0; i < ARRAY_SIZE(heo_upscaling_ycoef); i++) atmel_hlcdc_layer_update_cfg(&plane->layer, 33 + i, 0xffffffff, coeff_tab[i]); - factor = ((8 * 256 * state->src_w) - (256 * 4)) / - state->crtc_w; + factor = ((8 * 256 * state->src_h) - (256 * 4)) / + state->crtc_h; factor++; - max_memsize = ((factor * state->crtc_w) + (256 * 4)) / + max_memsize = ((factor * state->crtc_h) + (256 * 4)) / 2048; - if (max_memsize > state->src_w) + if (max_memsize > state->src_h) factor--; factor_reg |= (factor << 16) | 0x80000000; } -- cgit v0.10.2 From 1ba7db07ccc2825669d6e376632316813a072887 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 11 Jul 2016 12:19:40 +0200 Subject: drm/atmel-hlcdc: Make ->reset() implementation static The atmel_hlcdc_crtc_reset() function is never used outside the file and can be static. This avoids a warning from sparse. Signed-off-by: Thierry Reding diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c index a978381..9b17a66 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c @@ -387,7 +387,7 @@ void atmel_hlcdc_crtc_irq(struct drm_crtc *c) atmel_hlcdc_crtc_finish_page_flip(drm_crtc_to_atmel_hlcdc_crtc(c)); } -void atmel_hlcdc_crtc_reset(struct drm_crtc *crtc) +static void atmel_hlcdc_crtc_reset(struct drm_crtc *crtc) { struct atmel_hlcdc_crtc_state *state; -- cgit v0.10.2 From cd28e716c6869d2f06e64bcd679d0a45dd8a6295 Mon Sep 17 00:00:00 2001 From: Ping Gao Date: Tue, 6 Sep 2016 12:04:11 +0800 Subject: drm/i915: enable vGPU detection for all vGPU capability is handled by GVT-g host driver, not needed to put extra HW check for vGPU detection. And we'll actually support vGPU from BDW. Signed-off-by: Ping Gao Signed-off-by: Zhenyu Wang Reviewed-by: Joonas Lahtinen Acked-by: Chris Wilson Cc: drm-intel-fixes@lists.freedesktop.org Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20160906040412.1274-2-zhenyuw@linux.intel.com (cherry picked from commit 8ef89995c735f978d5dfcb3ca6bce70d41728c91) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index f6acb5a..b81cfb3 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -65,9 +65,6 @@ void i915_check_vgpu(struct drm_i915_private *dev_priv) BUILD_BUG_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE); - if (!IS_HASWELL(dev_priv)) - return; - magic = __raw_i915_read64(dev_priv, vgtif_reg(magic)); if (magic != VGT_MAGIC) return; -- cgit v0.10.2 From 557b1a8cae25e36ac2f125d93f003e60a7d0d014 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Tue, 6 Sep 2016 12:04:12 +0800 Subject: drm/i915: disable 48bit full PPGTT when vGPU is active Disable 48bit full PPGTT on vGPU too for now. Signed-off-by: Zhi Wang Signed-off-by: Zhenyu Wang Reviewed-by: Joonas Lahtinen Acked-by: Chris Wilson Cc: drm-intel-fixes@lists.freedesktop.org Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20160906040412.1274-3-zhenyuw@linux.intel.com (cherry picked from commit e320d40022128845dfff900422ea9fd69f576c98) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 7a30af7..f38ceff 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -122,8 +122,11 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, has_full_48bit_ppgtt = IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9; - if (intel_vgpu_active(dev_priv)) - has_full_ppgtt = false; /* emulation is too hard */ + if (intel_vgpu_active(dev_priv)) { + /* emulation is too hard */ + has_full_ppgtt = false; + has_full_48bit_ppgtt = false; + } if (!has_aliasing_ppgtt) return 0; @@ -158,7 +161,7 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, return 0; } - if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists) + if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists && has_full_ppgtt) return has_full_48bit_ppgtt ? 3 : 2; else return has_aliasing_ppgtt ? 1 : 0; -- cgit v0.10.2 From d1a6cba576fc7c43e476538fe5aa72fe04bd80e1 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Tue, 6 Sep 2016 22:31:02 +0800 Subject: netfilter: nft_chain_route: re-route before skb is queued to userspace Imagine such situation, user add the following nft rules, and queue the packets to userspace for further check: # ip rule add fwmark 0x0/0x1 lookup eth0 # ip rule add fwmark 0x1/0x1 lookup eth1 # nft add table filter # nft add chain filter output {type route hook output priority 0 \;} # nft add rule filter output mark set 0x1 # nft add rule filter output queue num 0 But after we reinject the skbuff, the packet will be sent via the wrong route, i.e. in this case, the packet will be routed via eth0 table, not eth1 table. Because we skip to do re-route when verdict is NF_QUEUE, even if the mark was changed. Acctually, we should not touch sk_buff if verdict is NF_DROP or NF_STOLEN, and when re-route fails, return NF_DROP with error code. This is consistent with the mangle table in iptables. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c index 2375b0a..30493be 100644 --- a/net/ipv4/netfilter/nft_chain_route_ipv4.c +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -31,6 +31,7 @@ static unsigned int nf_route_table_hook(void *priv, __be32 saddr, daddr; u_int8_t tos; const struct iphdr *iph; + int err; /* root is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) || @@ -46,15 +47,17 @@ static unsigned int nf_route_table_hook(void *priv, tos = iph->tos; ret = nft_do_chain(&pkt, priv); - if (ret != NF_DROP && ret != NF_QUEUE) { + if (ret != NF_DROP && ret != NF_STOLEN) { iph = ip_hdr(skb); if (iph->saddr != saddr || iph->daddr != daddr || skb->mark != mark || - iph->tos != tos) - if (ip_route_me_harder(state->net, skb, RTN_UNSPEC)) - ret = NF_DROP; + iph->tos != tos) { + err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); + if (err < 0) + ret = NF_DROP_ERR(err); + } } return ret; } diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index 71d995f..2535223 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -31,6 +31,7 @@ static unsigned int nf_route_table_hook(void *priv, struct in6_addr saddr, daddr; u_int8_t hop_limit; u32 mark, flowlabel; + int err; /* malformed packet, drop it */ if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0) @@ -46,13 +47,16 @@ static unsigned int nf_route_table_hook(void *priv, flowlabel = *((u32 *)ipv6_hdr(skb)); ret = nft_do_chain(&pkt, priv); - if (ret != NF_DROP && ret != NF_QUEUE && + if (ret != NF_DROP && ret != NF_STOLEN && (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || skb->mark != mark || ipv6_hdr(skb)->hop_limit != hop_limit || - flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) - return ip6_route_me_harder(state->net, skb) == 0 ? ret : NF_DROP; + flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { + err = ip6_route_me_harder(state->net, skb); + if (err < 0) + ret = NF_DROP_ERR(err); + } return ret; } -- cgit v0.10.2 From 78d506e1b7071b24850fd5ac22b896c459b0a04c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 6 Sep 2016 11:22:49 -0400 Subject: xprtrdma: Revert 3d4cf35bd4fa ("xprtrdma: Reply buffer exhaustion...") Receive buffer exhaustion, if it were to actually occur, would be catastrophic. However, when there are no reply buffers to post, that means all of them have already been posted and are waiting for incoming replies. By design, there can never be more RPCs in flight than there are available receive buffers. A receive buffer can be left posted after an RPC exits without a received reply; say, due to a credential problem or a soft timeout. This does not result in fewer posted receive buffers than there are pending RPCs, and there is already logic in xprtrdma to deal appropriately with this case. It also looks like the "+ 2" that was removed was accidentally accommodating the number of extra receive buffers needed for receiving backchannel requests. That will need to be addressed by another patch. Fixes: 3d4cf35bd4fa ("xprtrdma: Reply buffer exhaustion can be...") Signed-off-by: Chuck Lever Reviewed-by: Anna Schumaker Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 536d0be..fefcba9 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -923,7 +923,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) } INIT_LIST_HEAD(&buf->rb_recv_bufs); - for (i = 0; i < buf->rb_max_requests; i++) { + for (i = 0; i < buf->rb_max_requests + 2; i++) { struct rpcrdma_rep *rep; rep = rpcrdma_create_rep(r_xprt); @@ -1076,6 +1076,8 @@ rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) /* * Get a set of request/reply buffers. + * + * Reply buffer (if available) is attached to send buffer upon return. */ struct rpcrdma_req * rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) @@ -1094,13 +1096,13 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) out_reqbuf: spin_unlock(&buffers->rb_lock); - pr_warn("rpcrdma: out of request buffers (%p)\n", buffers); + pr_warn("RPC: %s: out of request buffers\n", __func__); return NULL; out_repbuf: - list_add(&req->rl_free, &buffers->rb_send_bufs); spin_unlock(&buffers->rb_lock); - pr_warn("rpcrdma: out of reply buffers (%p)\n", buffers); - return NULL; + pr_warn("RPC: %s: out of reply buffers\n", __func__); + req->rl_reply = NULL; + return req; } /* -- cgit v0.10.2 From 05c974669ecec510a85d8534099bb75404e82c41 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 6 Sep 2016 11:22:58 -0400 Subject: xprtrdma: Fix receive buffer accounting An RPC can terminate before its reply arrives, if a credential problem or a soft timeout occurs. After this happens, xprtrdma reports it is out of Receive buffers. A Receive buffer is posted before each RPC is sent, and returned to the buffer pool when a reply is received. If no reply is received for an RPC, that Receive buffer remains posted. But xprtrdma tries to post another when the next RPC is sent. If this happens a few dozen times, there are no receive buffers left to be posted at send time. I don't see a way for a transport connection to recover at that point, and it will spit warnings and unnecessarily delay RPCs on occasion for its remaining lifetime. Commit 1e465fd4ff47 ("xprtrdma: Replace send and receive arrays") removed a little bit of logic to detect this case and not provide a Receive buffer so no more buffers are posted, and then transport operation continues correctly. We didn't understand what that logic did, and it wasn't commented, so it was removed as part of the overhaul to support backchannel requests. Restore it, but be wary of the need to keep extra Receives posted to deal with backchannel requests. Fixes: 1e465fd4ff47 ("xprtrdma: Replace send and receive arrays") Signed-off-by: Chuck Lever Reviewed-by: Anna Schumaker Signed-off-by: Trond Myklebust diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index fefcba9..799cce6 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include /* try_module_get()/module_put() */ @@ -923,7 +924,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) } INIT_LIST_HEAD(&buf->rb_recv_bufs); - for (i = 0; i < buf->rb_max_requests + 2; i++) { + for (i = 0; i < buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; i++) { struct rpcrdma_rep *rep; rep = rpcrdma_create_rep(r_xprt); @@ -1018,6 +1019,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) rep = rpcrdma_buffer_get_rep_locked(buf); rpcrdma_destroy_rep(ia, rep); } + buf->rb_send_count = 0; spin_lock(&buf->rb_reqslock); while (!list_empty(&buf->rb_allreqs)) { @@ -1032,6 +1034,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) spin_lock(&buf->rb_reqslock); } spin_unlock(&buf->rb_reqslock); + buf->rb_recv_count = 0; rpcrdma_destroy_mrs(buf); } @@ -1074,6 +1077,23 @@ rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) spin_unlock(&buf->rb_mwlock); } +static struct rpcrdma_rep * +rpcrdma_buffer_get_rep(struct rpcrdma_buffer *buffers) +{ + /* If an RPC previously completed without a reply (say, a + * credential problem or a soft timeout occurs) then hold off + * on supplying more Receive buffers until the number of new + * pending RPCs catches up to the number of posted Receives. + */ + if (unlikely(buffers->rb_send_count < buffers->rb_recv_count)) + return NULL; + + if (unlikely(list_empty(&buffers->rb_recv_bufs))) + return NULL; + buffers->rb_recv_count++; + return rpcrdma_buffer_get_rep_locked(buffers); +} + /* * Get a set of request/reply buffers. * @@ -1087,10 +1107,9 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) spin_lock(&buffers->rb_lock); if (list_empty(&buffers->rb_send_bufs)) goto out_reqbuf; + buffers->rb_send_count++; req = rpcrdma_buffer_get_req_locked(buffers); - if (list_empty(&buffers->rb_recv_bufs)) - goto out_repbuf; - req->rl_reply = rpcrdma_buffer_get_rep_locked(buffers); + req->rl_reply = rpcrdma_buffer_get_rep(buffers); spin_unlock(&buffers->rb_lock); return req; @@ -1098,11 +1117,6 @@ out_reqbuf: spin_unlock(&buffers->rb_lock); pr_warn("RPC: %s: out of request buffers\n", __func__); return NULL; -out_repbuf: - spin_unlock(&buffers->rb_lock); - pr_warn("RPC: %s: out of reply buffers\n", __func__); - req->rl_reply = NULL; - return req; } /* @@ -1119,9 +1133,12 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) req->rl_reply = NULL; spin_lock(&buffers->rb_lock); + buffers->rb_send_count--; list_add_tail(&req->rl_free, &buffers->rb_send_bufs); - if (rep) + if (rep) { + buffers->rb_recv_count--; list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs); + } spin_unlock(&buffers->rb_lock); } @@ -1135,8 +1152,7 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req) struct rpcrdma_buffer *buffers = req->rl_buffer; spin_lock(&buffers->rb_lock); - if (!list_empty(&buffers->rb_recv_bufs)) - req->rl_reply = rpcrdma_buffer_get_rep_locked(buffers); + req->rl_reply = rpcrdma_buffer_get_rep(buffers); spin_unlock(&buffers->rb_lock); } @@ -1150,6 +1166,7 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf; spin_lock(&buffers->rb_lock); + buffers->rb_recv_count--; list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs); spin_unlock(&buffers->rb_lock); } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 670fad5..a71b0f5 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -321,6 +321,7 @@ struct rpcrdma_buffer { char *rb_pool; spinlock_t rb_lock; /* protect buf lists */ + int rb_send_count, rb_recv_count; struct list_head rb_send_bufs; struct list_head rb_recv_bufs; u32 rb_max_requests; -- cgit v0.10.2 From 0bd2223594a4dcddc1e34b15774a3a4776f7749e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 1 Sep 2016 14:25:43 +0100 Subject: crypto: cryptd - initialize child shash_desc on import When calling .import() on a cryptd ahash_request, the structure members that describe the child transform in the shash_desc need to be initialized like they are when calling .init() Cc: stable@vger.kernel.org Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 77207b4..0c654e5 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -631,9 +631,14 @@ static int cryptd_hash_export(struct ahash_request *req, void *out) static int cryptd_hash_import(struct ahash_request *req, const void *in) { - struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct shash_desc *desc = cryptd_shash_desc(req); + + desc->tfm = ctx->child; + desc->flags = req->base.flags; - return crypto_shash_import(&rctx->desc, in); + return crypto_shash_import(desc, in); } static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb, -- cgit v0.10.2 From fc2780b66b15092ac68272644a522c1624c48547 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 26 Aug 2016 11:59:26 +0100 Subject: drm/i915: Add GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE to SNB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the CI test machines, SNB also uses the GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE value to report a bad GEN6_PCODE_MIN_FREQ_TABLE request. [ 157.744641] WARNING: CPU: 5 PID: 9238 at drivers/gpu/drm/i915/intel_pm.c:7760 sandybridge_pcode_write+0x141/0x200 [i915] [ 157.744642] Missing switch case (16) in gen6_check_mailbox_status [ 157.744642] Modules linked in: snd_hda_intel i915 ax88179_178a usbnet mii x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic snd_hda_codec snd_hwdep snd_hda_core mei_me lpc_ich snd_pcm mei broadcom bcm_phy_lib tg3 ptp pps_core [last unloaded: vgem] [ 157.744658] CPU: 5 PID: 9238 Comm: drv_hangman Tainted: G U W 4.8.0-rc3-CI-CI_DRM_1589+ #1 [ 157.744658] Hardware name: Dell Inc. XPS 8300 /0Y2MRG, BIOS A06 10/17/2011 [ 157.744659] 0000000000000000 ffff88011f093a98 ffffffff81426415 ffff88011f093ae8 [ 157.744662] 0000000000000000 ffff88011f093ad8 ffffffff8107d2a6 00001e50810d3c9f [ 157.744663] ffff880128680000 0000000000000008 0000000000000000 ffff88012868a650 [ 157.744665] Call Trace: [ 157.744669] [] dump_stack+0x67/0x92 [ 157.744672] [] __warn+0xc6/0xe0 [ 157.744673] [] warn_slowpath_fmt+0x4a/0x50 [ 157.744685] [] sandybridge_pcode_write+0x141/0x200 [i915] [ 157.744697] [] intel_enable_gt_powersave+0x64a/0x1330 [i915] [ 157.744712] [] ? i9xx_emit_request+0x1b/0x80 [i915] [ 157.744725] [] __i915_add_request+0x1e3/0x370 [i915] [ 157.744738] [] i915_gem_do_execbuffer.isra.16+0xced/0x1b80 [i915] [ 157.744740] [] ? __might_fault+0x3e/0x90 [ 157.744752] [] i915_gem_execbuffer2+0xc2/0x2a0 [i915] [ 157.744753] [] drm_ioctl+0x207/0x4c0 [ 157.744765] [] ? i915_gem_execbuffer+0x360/0x360 [i915] [ 157.744767] [] ? debug_lockdep_rcu_enabled+0x1d/0x20 [ 157.744769] [] do_vfs_ioctl+0x8e/0x680 [ 157.744770] [] ? __might_fault+0x87/0x90 [ 157.744771] [] ? __might_fault+0x3e/0x90 [ 157.744773] [] ? trace_hardirqs_on_caller+0x122/0x1b0 [ 157.744774] [] SyS_ioctl+0x3c/0x70 [ 157.744776] [] entry_SYSCALL_64_fastpath+0x1c/0xac Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97491 Fixes: 87660502f1a4 ("drm/i915/gen6+: Interpret mailbox error flags") Signed-off-by: Chris Wilson Cc: Lyude Cc: Matt Roper Cc: Maarten Lankhorst Cc: Daniel Vetter Cc: Ville Syrjälä Cc: stable@vger.kernel.org Link: http://patchwork.freedesktop.org/patch/msgid/20160826105926.3413-1-chris@chris-wilson.co.uk Acked-by: Mika Kuoppala (cherry picked from commit 7850d1c35344c7bd6a357240f2f9f60fc2c097b5) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 53e13c1..2d24813 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7859,6 +7859,7 @@ static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv) case GEN6_PCODE_ILLEGAL_CMD: return -ENXIO; case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: + case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: return -EOVERFLOW; case GEN6_PCODE_TIMEOUT: return -ETIMEDOUT; -- cgit v0.10.2 From 82469c59d222f839ded5cd282172258e026f9112 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Tue, 6 Sep 2016 17:39:13 -0300 Subject: nvme: Don't suspend admin queue that wasn't created This fixes a regression in my previous commit c21377f8366c ("nvme: Suspend all queues before deletion"), which provoked an Oops in the removal path when removing a device that became IO incapable very early at probe (i.e. after a failed EEH recovery). Turns out, if the error occurred very early at the probe path, before even configuring the admin queue, we might try to suspend the uninitialized admin queue, accessing bad memory. Fixes: c21377f8366c ("nvme: Suspend all queues before deletion") Signed-off-by: Gabriel Krisman Bertazi Reviewed-by: Jay Freyensee Signed-off-by: Jens Axboe diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 8dcf5a9..be84a84 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1693,7 +1693,12 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_suspend_queue(dev->queues[i]); if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) { - nvme_suspend_queue(dev->queues[0]); + /* A device might become IO incapable very soon during + * probe, before the admin queue is configured. Thus, + * queue_count can be 0 here. + */ + if (dev->queue_count) + nvme_suspend_queue(dev->queues[0]); } else { nvme_disable_io_queues(dev); nvme_disable_admin_queue(dev, shutdown); -- cgit v0.10.2 From db91e2370e087967cb6b6425c092188767fb5e00 Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Wed, 7 Sep 2016 19:49:34 +0930 Subject: tools/lguest: Don't bork the terminal in case of wrong args Running lguest without arguments or with a wrong argument name borks the terminal, because the cleanup handler is set up too late in the initialization process. Signed-off-by: Daniel Baluta Signed-off-by: Rusty Russell Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c index d9836c5..11c8d9b 100644 --- a/tools/lguest/lguest.c +++ b/tools/lguest/lguest.c @@ -3266,6 +3266,9 @@ int main(int argc, char *argv[]) } } + /* If we exit via err(), this kills all the threads, restores tty. */ + atexit(cleanup_devices); + /* We always have a console device, and it's always device 1. */ setup_console(); @@ -3369,9 +3372,6 @@ int main(int argc, char *argv[]) /* Ensure that we terminate if a device-servicing child dies. */ signal(SIGCHLD, kill_launcher); - /* If we exit via err(), this kills all the threads, restores tty. */ - atexit(cleanup_devices); - /* If requested, chroot to a directory */ if (chroot_path) { if (chroot(chroot_path) != 0) -- cgit v0.10.2 From c291b015158577be533dd5a959dfc09bab119eed Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Wed, 7 Sep 2016 10:21:33 +0800 Subject: x86/apic: Fix num_processors value in case of failure If the topology package map check of the APIC ID and the CPU is a failure, we don't generate the processor info for that APIC ID yet we increase disabled_cpus by one - which is buggy. Only increase num_processors once we are sure we don't fail. Signed-off-by: Dou Liyang Acked-by: David Rientjes Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1473214893-16481-1-git-send-email-douly.fnst@cn.fujitsu.com [ Rewrote the changelog. ] Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 50c95af..f3e9b2d 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2093,7 +2093,6 @@ int generic_processor_info(int apicid, int version) return -EINVAL; } - num_processors++; if (apicid == boot_cpu_physical_apicid) { /* * x86_bios_cpu_apicid is required to have processors listed @@ -2116,10 +2115,13 @@ int generic_processor_info(int apicid, int version) pr_warning("APIC: Package limit reached. Processor %d/0x%x ignored.\n", thiscpu, apicid); + disabled_cpus++; return -ENOSPC; } + num_processors++; + /* * Validate version */ -- cgit v0.10.2 From 7e9d2850a8db4e0d85a20bb692198bf2cc4be3b7 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 8 Sep 2016 11:11:00 +0200 Subject: ARM: dts: STiH410: Handle interconnect clock required by EHCI/OHCI (USB) The STiH4{07,10} platform contains some interconnect clocks which are used by various IPs. If this clock isn't handled correctly by ST's EHCI/OHCI drivers, their hub won't be found, the following error be shown and the result will be non-working USB: [ 97.221963] hub 2-1:1.0: hub_ext_port_status failed (err = -110) Cc: stable@vger.kernel.org Tested-by: Peter Griffin Signed-off-by: Lee Jones Acked-by: Patrice Chotard diff --git a/arch/arm/boot/dts/stih410.dtsi b/arch/arm/boot/dts/stih410.dtsi index 18ed1ad..4031886 100644 --- a/arch/arm/boot/dts/stih410.dtsi +++ b/arch/arm/boot/dts/stih410.dtsi @@ -41,7 +41,8 @@ compatible = "st,st-ohci-300x"; reg = <0x9a03c00 0x100>; interrupts = ; - clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>; + clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>, + <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>; resets = <&powerdown STIH407_USB2_PORT0_POWERDOWN>, <&softreset STIH407_USB2_PORT0_SOFTRESET>; reset-names = "power", "softreset"; @@ -57,7 +58,8 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb0>; - clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>; + clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>, + <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>; resets = <&powerdown STIH407_USB2_PORT0_POWERDOWN>, <&softreset STIH407_USB2_PORT0_SOFTRESET>; reset-names = "power", "softreset"; @@ -71,7 +73,8 @@ compatible = "st,st-ohci-300x"; reg = <0x9a83c00 0x100>; interrupts = ; - clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>; + clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>, + <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>; resets = <&powerdown STIH407_USB2_PORT1_POWERDOWN>, <&softreset STIH407_USB2_PORT1_SOFTRESET>; reset-names = "power", "softreset"; @@ -87,7 +90,8 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb1>; - clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>; + clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>, + <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>; resets = <&powerdown STIH407_USB2_PORT1_POWERDOWN>, <&softreset STIH407_USB2_PORT1_SOFTRESET>; reset-names = "power", "softreset"; -- cgit v0.10.2 From 78567f135d9bbbaf4538f63656d3e4d957c35fe9 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 8 Sep 2016 11:11:00 +0200 Subject: ARM: dts: STiH407-family: Provide interconnect clock for consumption in ST SDHCI The STiH4{07,10} platform contains some interconnect clocks which are used by various IPs. If these clocks aren't handled correctly by ST's SDHCI driver MMC will break and the following output can be observed: [ 13.916949] mmc0: Timeout waiting for hardware interrupt. [ 13.922349] sdhci: =========== REGISTER DUMP (mmc0)=========== [ 13.928175] sdhci: Sys addr: 0x00000000 | Version: 0x00001002 [ 13.933999] sdhci: Blk size: 0x00007040 | Blk cnt: 0x00000001 [ 13.939825] sdhci: Argument: 0x00fffff0 | Trn mode: 0x00000013 [ 13.945650] sdhci: Present: 0x1fff0206 | Host ctl: 0x00000011 [ 13.951475] sdhci: Power: 0x0000000f | Blk gap: 0x00000080 [ 13.957300] sdhci: Wake-up: 0x00000000 | Clock: 0x00003f07 [ 13.963126] sdhci: Timeout: 0x00000004 | Int stat: 0x00000000 [ 13.968952] sdhci: Int enab: 0x02ff008b | Sig enab: 0x02ff008b [ 13.974777] sdhci: AC12 err: 0x00000000 | Slot int: 0x00000000 [ 13.980602] sdhci: Caps: 0x21ed3281 | Caps_1: 0x00000000 [ 13.986428] sdhci: Cmd: 0x0000063a | Max curr: 0x00000000 [ 13.992252] sdhci: Host ctl2: 0x00000000 [ 13.996166] sdhci: ADMA Err: 0x00000000 | ADMA Ptr: 0x7c048200 [ 14.001990] sdhci: =========================================== [ 14.009802] mmc0: Got data interrupt 0x02000000 even though no data operation was in progress. Cc: stable@vger.kernel.org Tested-by: Peter Griffin Signed-off-by: Lee Jones Acked-by: Patrice Chotard diff --git a/arch/arm/boot/dts/stih407-family.dtsi b/arch/arm/boot/dts/stih407-family.dtsi index d294e82..8b063ab 100644 --- a/arch/arm/boot/dts/stih407-family.dtsi +++ b/arch/arm/boot/dts/stih407-family.dtsi @@ -550,8 +550,9 @@ interrupt-names = "mmcirq"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_mmc0>; - clock-names = "mmc"; - clocks = <&clk_s_c0_flexgen CLK_MMC_0>; + clock-names = "mmc", "icn"; + clocks = <&clk_s_c0_flexgen CLK_MMC_0>, + <&clk_s_c0_flexgen CLK_RX_ICN_HVA>; bus-width = <8>; non-removable; }; @@ -565,8 +566,9 @@ interrupt-names = "mmcirq"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_sd1>; - clock-names = "mmc"; - clocks = <&clk_s_c0_flexgen CLK_MMC_1>; + clock-names = "mmc", "icn"; + clocks = <&clk_s_c0_flexgen CLK_MMC_1>, + <&clk_s_c0_flexgen CLK_RX_ICN_HVA>; resets = <&softreset STIH407_MMC1_SOFTRESET>; bus-width = <4>; }; -- cgit v0.10.2 From a4497a86fb9b855c5ac8503fdc959393b00bb643 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Thu, 8 Sep 2016 10:15:28 -0400 Subject: x86, clock: Fix kvm guest tsc initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When booting a kvm guest on AMD with the latest kernel the following messages are displayed in the boot log: tsc: Unable to calibrate against PIT tsc: HPET/PMTIMER calibration failed aa297292d708 ("x86/tsc: Enumerate SKL cpu_khz and tsc_khz via CPUID") introduced a change to account for a difference in cpu and tsc frequencies for Intel SKL processors. Before this change the native tsc set x86_platform.calibrate_tsc to native_calibrate_tsc() which is a hardware calibration of the tsc, and in tsc_init() executed tsc_khz = x86_platform.calibrate_tsc(); cpu_khz = tsc_khz; The kvm code changed x86_platform.calibrate_tsc to kvm_get_tsc_khz() and executed the same tsc_init() function. This meant that KVM guests did not execute the native hardware calibration function. After aa297292d708, there are separate native calibrations for cpu_khz and tsc_khz. The code sets x86_platform.calibrate_tsc to native_calibrate_tsc() which is now an Intel specific calibration function, and x86_platform.calibrate_cpu to native_calibrate_cpu() which is the "old" native_calibrate_tsc() function (ie, the native hardware calibration function). tsc_init() now does cpu_khz = x86_platform.calibrate_cpu(); tsc_khz = x86_platform.calibrate_tsc(); if (tsc_khz == 0) tsc_khz = cpu_khz; else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz) cpu_khz = tsc_khz; The kvm code should not call the hardware initialization in native_calibrate_cpu(), as it isn't applicable for kvm and it didn't do that prior to aa297292d708. This patch resolves this issue by setting x86_platform.calibrate_cpu to kvm_get_tsc_khz(). v2: I had originally set x86_platform.calibrate_cpu to cpu_khz_from_cpuid(), however, pbonzini pointed out that the CPUID leaf in that function is not available in KVM. I have changed the function pointer to kvm_get_tsc_khz(). Fixes: aa297292d708 ("x86/tsc: Enumerate SKL cpu_khz and tsc_khz via CPUID") Signed-off-by: Prarit Bhargava Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x86@kernel.org Cc: Len Brown Cc: "Peter Zijlstra (Intel)" Cc: Borislav Petkov Cc: Adrian Hunter Cc: "Christopher S. Hall" Cc: David Woodhouse Cc: kvm@vger.kernel.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 1d39bfb..3692249 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -289,6 +289,7 @@ void __init kvmclock_init(void) put_cpu(); x86_platform.calibrate_tsc = kvm_get_tsc_khz; + x86_platform.calibrate_cpu = kvm_get_tsc_khz; x86_platform.get_wallclock = kvm_get_wallclock; x86_platform.set_wallclock = kvm_set_wallclock; #ifdef CONFIG_X86_LOCAL_APIC -- cgit v0.10.2 From 015282c9eb6da05bfad6ff009078f91e06c0c98f Mon Sep 17 00:00:00 2001 From: Wenbo Wang Date: Thu, 8 Sep 2016 12:12:11 -0400 Subject: nvme/quirk: Add a delay before checking device ready for memblaze device Signed-off-by: Wenbo Wang Signed-off-by: Jens Axboe diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index be84a84..60f7eab 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2117,6 +2117,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, + { PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */ + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) }, { 0, } -- cgit v0.10.2 From 47a7b0d8888c04c9746812820b6e60553cc77bbc Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Sun, 4 Sep 2016 22:17:28 -0400 Subject: md-cluster: make md-cluster also can work when compiled into kernel The md-cluster is compiled as module by default, if it is compiled by built-in way, then we can't make md-cluster works. [64782.630008] md/raid1:md127: active with 2 out of 2 mirrors [64782.630528] md-cluster module not found. [64782.630530] md127: Could not setup cluster service (-2) Fixes: edb39c9 ("Introduce md_cluster_operations to handle cluster functions") Cc: stable@vger.kernel.org (v4.1+) Reported-by: Marc Smith Reviewed-by: NeilBrown Signed-off-by: Guoqing Jiang Signed-off-by: Shaohua Li diff --git a/drivers/md/md.c b/drivers/md/md.c index 67642ba..915e84d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7610,16 +7610,12 @@ EXPORT_SYMBOL(unregister_md_cluster_operations); int md_setup_cluster(struct mddev *mddev, int nodes) { - int err; - - err = request_module("md-cluster"); - if (err) { - pr_err("md-cluster module not found.\n"); - return -ENOENT; - } - + if (!md_cluster_ops) + request_module("md-cluster"); spin_lock(&pers_lock); + /* ensure module won't be unloaded */ if (!md_cluster_ops || !try_module_get(md_cluster_mod)) { + pr_err("can't find md-cluster module or get it's reference.\n"); spin_unlock(&pers_lock); return -ENOENT; } -- cgit v0.10.2 From 2f30ea5090cbc57ea573cdc66421264b3de3fb0a Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 8 Sep 2016 18:09:57 +0200 Subject: xfrm_user: propagate sec ctx allocation errors When we fail to attach the security context in xfrm_state_construct() we'll return 0 as error value which, in turn, will wrongly claim success to userland when, in fact, we won't be adding / updating the XFRM state. This is a regression introduced by commit fd21150a0fe1 ("[XFRM] netlink: Inline attach_encap_tmpl(), attach_sec_ctx(), and attach_one_addr()"). Fix it by propagating the error returned by security_xfrm_state_alloc() in this case. Fixes: fd21150a0fe1 ("[XFRM] netlink: Inline attach_encap_tmpl()...") Signed-off-by: Mathias Krause Cc: Thomas Graf Signed-off-by: Steffen Klassert diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index cb65d91..0889209 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -581,9 +581,12 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, if (err) goto error; - if (attrs[XFRMA_SEC_CTX] && - security_xfrm_state_alloc(x, nla_data(attrs[XFRMA_SEC_CTX]))) - goto error; + if (attrs[XFRMA_SEC_CTX]) { + err = security_xfrm_state_alloc(x, + nla_data(attrs[XFRMA_SEC_CTX])); + if (err) + goto error; + } if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn, attrs[XFRMA_REPLAY_ESN_VAL]))) -- cgit v0.10.2 From 1fb81e09d487656aa23f2acb1232c7f56b4c2367 Mon Sep 17 00:00:00 2001 From: "thomas.zeitlhofer+lkml@ze-it.at" Date: Wed, 7 Sep 2016 20:40:38 +0200 Subject: vti: use right inner_mode for inbound inter address family policy checks In case of inter address family tunneling (IPv6 over vti4 or IPv4 over vti6), the inbound policy checks in vti_rcv_cb() and vti6_rcv_cb() are using the wrong address family. As a result, all inbound inter address family traffic is dropped. Use the xfrm_ip2inner_mode() helper, as done in xfrm_input() (i.e., also increment LINUX_MIB_XFRMINSTATEMODEERROR in case of error), to select the inner_mode that contains the right address family for the inbound policy checks. Signed-off-by: Thomas Zeitlhofer Signed-off-by: Steffen Klassert diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index cc701fa..5d7944f 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -88,6 +88,7 @@ static int vti_rcv_cb(struct sk_buff *skb, int err) struct net_device *dev; struct pcpu_sw_netstats *tstats; struct xfrm_state *x; + struct xfrm_mode *inner_mode; struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4; u32 orig_mark = skb->mark; int ret; @@ -105,7 +106,19 @@ static int vti_rcv_cb(struct sk_buff *skb, int err) } x = xfrm_input_state(skb); - family = x->inner_mode->afinfo->family; + + inner_mode = x->inner_mode; + + if (x->sel.family == AF_UNSPEC) { + inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); + if (inner_mode == NULL) { + XFRM_INC_STATS(dev_net(skb->dev), + LINUX_MIB_XFRMINSTATEMODEERROR); + return -EINVAL; + } + } + + family = inner_mode->afinfo->family; skb->mark = be32_to_cpu(tunnel->parms.i_key); ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index d90a11f..52a2f73 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -340,6 +340,7 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err) struct net_device *dev; struct pcpu_sw_netstats *tstats; struct xfrm_state *x; + struct xfrm_mode *inner_mode; struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6; u32 orig_mark = skb->mark; int ret; @@ -357,7 +358,19 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err) } x = xfrm_input_state(skb); - family = x->inner_mode->afinfo->family; + + inner_mode = x->inner_mode; + + if (x->sel.family == AF_UNSPEC) { + inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); + if (inner_mode == NULL) { + XFRM_INC_STATS(dev_net(skb->dev), + LINUX_MIB_XFRMINSTATEMODEERROR); + return -EINVAL; + } + } + + family = inner_mode->afinfo->family; skb->mark = be32_to_cpu(t->parms.i_key); ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family); -- cgit v0.10.2 From 293f293637b55db4f9f522a5a72514e98a541076 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 8 Sep 2016 16:25:49 +0100 Subject: kvm-arm: Unmap shadow pagetables properly On arm/arm64, we depend on the kvm_unmap_hva* callbacks (via mmu_notifiers::invalidate_*) to unmap the stage2 pagetables when the userspace buffer gets unmapped. However, when the Hypervisor process exits without explicit unmap of the guest buffers, the only notifier we get is kvm_arch_flush_shadow_all() (via mmu_notifier::release ) which does nothing on arm. Later this causes us to access pages that were already released [via exit_mmap() -> unmap_vmas()] when we actually get to unmap the stage2 pagetable [via kvm_arch_destroy_vm() -> kvm_free_stage2_pgd()]. This triggers crashes with CONFIG_DEBUG_PAGEALLOC, which unmaps any free'd pages from the linear map. [ 757.644120] Unable to handle kernel paging request at virtual address ffff800661e00000 [ 757.652046] pgd = ffff20000b1a2000 [ 757.655471] [ffff800661e00000] *pgd=00000047fffe3003, *pud=00000047fcd8c003, *pmd=00000047fcc7c003, *pte=00e8004661e00712 [ 757.666492] Internal error: Oops: 96000147 [#3] PREEMPT SMP [ 757.672041] Modules linked in: [ 757.675100] CPU: 7 PID: 3630 Comm: qemu-system-aar Tainted: G D 4.8.0-rc1 #3 [ 757.683240] Hardware name: AppliedMicro X-Gene Mustang Board/X-Gene Mustang Board, BIOS 3.06.15 Aug 19 2016 [ 757.692938] task: ffff80069cdd3580 task.stack: ffff8006adb7c000 [ 757.698840] PC is at __flush_dcache_area+0x1c/0x40 [ 757.703613] LR is at kvm_flush_dcache_pmd+0x60/0x70 [ 757.708469] pc : [] lr : [] pstate: 20000145 ... [ 758.357249] [] __flush_dcache_area+0x1c/0x40 [ 758.363059] [] unmap_stage2_range+0x458/0x5f0 [ 758.368954] [] kvm_free_stage2_pgd+0x34/0x60 [ 758.374761] [] kvm_arch_destroy_vm+0x20/0x68 [ 758.380570] [] kvm_put_kvm+0x210/0x358 [ 758.385860] [] kvm_vm_release+0x2c/0x40 [ 758.391239] [] __fput+0x114/0x2e8 [ 758.396096] [] ____fput+0xc/0x18 [ 758.400869] [] task_work_run+0x108/0x138 [ 758.406332] [] do_exit+0x48c/0x10e8 [ 758.411363] [] do_group_exit+0x6c/0x130 [ 758.416739] [] get_signal+0x284/0xa18 [ 758.421943] [] do_signal+0x158/0x860 [ 758.427060] [] do_notify_resume+0x6c/0x88 [ 758.432608] [] work_pending+0x10/0x14 [ 758.437812] Code: 9ac32042 8b010001 d1000443 8a230000 (d50b7e20) This patch fixes the issue by moving the kvm_free_stage2_pgd() to kvm_arch_flush_shadow_all(). Cc: # 3.9+ Tested-by: Itaru Kitayama Reported-by: Itaru Kitayama Reported-by: James Morse Cc: Marc Zyngier Cc: Catalin Marinas Cc: Christoffer Dall Signed-off-by: Suzuki K Poulose Signed-off-by: Christoffer Dall diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 75f130e..c94b90d 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -158,8 +158,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) { int i; - kvm_free_stage2_pgd(kvm); - for (i = 0; i < KVM_MAX_VCPUS; ++i) { if (kvm->vcpus[i]) { kvm_arch_vcpu_free(kvm->vcpus[i]); diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index a3faafe..e9a5c0e 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1894,6 +1894,7 @@ void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) void kvm_arch_flush_shadow_all(struct kvm *kvm) { + kvm_free_stage2_pgd(kvm); } void kvm_arch_flush_shadow_memslot(struct kvm *kvm, -- cgit v0.10.2 From a179b69359feb26ddb148bb6a2c0c53a8d1dc5be Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 24 Aug 2016 05:36:53 -0300 Subject: [media] cec: don't Feature Abort broadcast msgs when unregistered If the adapter is configured as 'Unregistered', then cec_receive_notify incorrectly thinks that broadcast messages are directed messages. The destination for broadcast messages is 0xf, and the logical address assigned to Unregistered devices is also 0xf and the logic didn't handle that correctly. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c index e980ac9..946986f 100644 --- a/drivers/staging/media/cec/cec-adap.c +++ b/drivers/staging/media/cec/cec-adap.c @@ -1409,7 +1409,6 @@ static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg, u8 init_laddr = cec_msg_initiator(msg); u8 devtype = cec_log_addr2dev(adap, dest_laddr); int la_idx = cec_log_addr2idx(adap, dest_laddr); - bool is_directed = la_idx >= 0; bool from_unregistered = init_laddr == 0xf; struct cec_msg tx_cec_msg = { }; @@ -1571,7 +1570,7 @@ static int cec_receive_notify(struct cec_adapter *adap, struct cec_msg *msg, * Unprocessed messages are aborted if userspace isn't doing * any processing either. */ - if (is_directed && !is_reply && !adap->follower_cnt && + if (!is_broadcast && !is_reply && !adap->follower_cnt && !adap->cec_follower && msg->msg[1] != CEC_MSG_FEATURE_ABORT) return cec_feature_abort(adap, msg); break; -- cgit v0.10.2 From 60815d4a78204915f5cdf79a536bc96d5d23ae5f Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 24 Aug 2016 07:17:22 -0300 Subject: [media] cec: fix ioctl return code when not registered Don't return the confusing -EIO error code when the device is not registered, instead return -ENODEV which is the proper thing to do in this situation. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/staging/media/cec/cec-api.c b/drivers/staging/media/cec/cec-api.c index 6f58ee8..e274e2f 100644 --- a/drivers/staging/media/cec/cec-api.c +++ b/drivers/staging/media/cec/cec-api.c @@ -435,7 +435,7 @@ static long cec_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) void __user *parg = (void __user *)arg; if (!devnode->registered) - return -EIO; + return -ENODEV; switch (cmd) { case CEC_ADAP_G_CAPS: -- cgit v0.10.2 From 6b7b554d34fdbc5dc9fae9d4ca9dd37f6346be3d Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 3 Aug 2016 15:12:44 +0100 Subject: ARM: dts: Remove use of skeleton.dtsi from bcm283x.dtsi This file is included from DTS files under arch/arm64 too (via broadcom/bcm2837-rpi-3-b.dts and broadcom/bcm2837.dtsi). There is a desire not to have skeleton.dtsi for ARM64. See commit 3ebee5a2e141 ("arm64: dts: kill skeleton.dtsi") for rationale for its removal. As well as the addition of #*-cells also requires adding the device_type to the rpi memory node explicitly. Note that this change results in the removal of an empty /aliases node from bcm2835-rpi-a.dtb and bcm2835-rpi-a-plus.dtb. I have no hardware to check if this is a problem or not. It also results in some reordering of the nodes in the DTBs (the /aliases and /memory nodes come later). This isn't supposed to matter but, again, I've no hardware to check if it is true in this particular case. Signed-off-by: Ian Campbell Acked-by: Mark Rutland Tested-by: Stefan Wahren Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Rob Herring Cc: Frank Rowand Cc: Eric Anholt Cc: Stephen Warren Cc: Lee Jones Cc: Gerd Hoffmann Cc: devicetree@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-rpi-kernel@lists.infradead.org Cc: arm@kernel.org Signed-off-by: Arnd Bergmann diff --git a/arch/arm/boot/dts/bcm2835-rpi.dtsi b/arch/arm/boot/dts/bcm2835-rpi.dtsi index caf2707..e9b47b2 100644 --- a/arch/arm/boot/dts/bcm2835-rpi.dtsi +++ b/arch/arm/boot/dts/bcm2835-rpi.dtsi @@ -2,6 +2,7 @@ / { memory { + device_type = "memory"; reg = <0 0x10000000>; }; diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index b982522..445624a 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -2,7 +2,6 @@ #include #include #include -#include "skeleton.dtsi" /* This include file covers the common peripherals and configuration between * bcm2835 and bcm2836 implementations, leaving the CPU configuration to @@ -13,6 +12,8 @@ compatible = "brcm,bcm2835"; model = "BCM2835"; interrupt-parent = <&intc>; + #address-cells = <1>; + #size-cells = <1>; chosen { bootargs = "earlyprintk console=ttyAMA0"; -- cgit v0.10.2 From 76aa7591688001839ec9ca838041f2d55d49ab92 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 3 Aug 2016 15:12:45 +0100 Subject: ARM64: dts: bcm: Use a symlink to R-Pi dtsi files from arch=arm The ../../../arm... style cross-references added by commit 9d56c22a7861 ("ARM: bcm2835: Add devicetree for the Raspberry Pi 3.") do not work in the context of the split device-tree repository[0] (where the directory structure differs). As with commit 8ee57b8182c4 ("ARM64: dts: vexpress: Use a symlink to vexpress-v2m-rs1.dtsi from arch=arm") use symlinks instead. [0] https://git.kernel.org/cgit/linux/kernel/git/devicetree/devicetree-rebasing.git/ Signed-off-by: Ian Campbell Acked-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Cc: Mark Rutland Cc: Rob Herring Cc: Frank Rowand Cc: Eric Anholt Cc: Stephen Warren Cc: Lee Jones Cc: Gerd Hoffmann Cc: devicetree@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-rpi-kernel@lists.infradead.org Cc: arm@kernel.org Signed-off-by: Arnd Bergmann diff --git a/arch/arm64/boot/dts/broadcom/bcm2835-rpi.dtsi b/arch/arm64/boot/dts/broadcom/bcm2835-rpi.dtsi new file mode 120000 index 0000000..3937b77 --- /dev/null +++ b/arch/arm64/boot/dts/broadcom/bcm2835-rpi.dtsi @@ -0,0 +1 @@ +../../../../arm/boot/dts/bcm2835-rpi.dtsi \ No newline at end of file diff --git a/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts b/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts index 6f47dd2..7841b72 100644 --- a/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts +++ b/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts @@ -1,7 +1,7 @@ /dts-v1/; #include "bcm2837.dtsi" -#include "../../../../arm/boot/dts/bcm2835-rpi.dtsi" -#include "../../../../arm/boot/dts/bcm283x-rpi-smsc9514.dtsi" +#include "bcm2835-rpi.dtsi" +#include "bcm283x-rpi-smsc9514.dtsi" / { compatible = "raspberrypi,3-model-b", "brcm,bcm2837"; diff --git a/arch/arm64/boot/dts/broadcom/bcm2837.dtsi b/arch/arm64/boot/dts/broadcom/bcm2837.dtsi index f2a31d0..8216bbb 100644 --- a/arch/arm64/boot/dts/broadcom/bcm2837.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm2837.dtsi @@ -1,4 +1,4 @@ -#include "../../../../arm/boot/dts/bcm283x.dtsi" +#include "bcm283x.dtsi" / { compatible = "brcm,bcm2836"; diff --git a/arch/arm64/boot/dts/broadcom/bcm283x-rpi-smsc9514.dtsi b/arch/arm64/boot/dts/broadcom/bcm283x-rpi-smsc9514.dtsi new file mode 120000 index 0000000..dca7c05 --- /dev/null +++ b/arch/arm64/boot/dts/broadcom/bcm283x-rpi-smsc9514.dtsi @@ -0,0 +1 @@ +../../../../arm/boot/dts/bcm283x-rpi-smsc9514.dtsi \ No newline at end of file diff --git a/arch/arm64/boot/dts/broadcom/bcm283x.dtsi b/arch/arm64/boot/dts/broadcom/bcm283x.dtsi new file mode 120000 index 0000000..5f54e4c --- /dev/null +++ b/arch/arm64/boot/dts/broadcom/bcm283x.dtsi @@ -0,0 +1 @@ +../../../../arm/boot/dts/bcm283x.dtsi \ No newline at end of file -- cgit v0.10.2 From c94455558337eece474eebb6a16b905f98930418 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 8 Sep 2016 10:43:58 -0700 Subject: raid5: fix a small race condition commit 5f9d1fde7d54a5(raid5: fix memory leak of bio integrity data) moves bio_reset to bio_endio. But it introduces a small race condition. It does bio_reset after raid5_release_stripe, which could make the stripe reusable and hence reuse the bio just before bio_reset. Moving bio_reset before raid5_release_stripe is called should fix the race. Reported-and-tested-by: Stefan Priebe - Profihost AG Signed-off-by: Shaohua Li diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b95c54c..ee7fc37 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2423,10 +2423,10 @@ static void raid5_end_read_request(struct bio * bi) } } rdev_dec_pending(rdev, conf->mddev); + bio_reset(bi); clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); raid5_release_stripe(sh); - bio_reset(bi); } static void raid5_end_write_request(struct bio *bi) @@ -2498,6 +2498,7 @@ static void raid5_end_write_request(struct bio *bi) if (sh->batch_head && bi->bi_error && !replacement) set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state); + bio_reset(bi); if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags)) clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); @@ -2505,7 +2506,6 @@ static void raid5_end_write_request(struct bio *bi) if (sh->batch_head && sh != sh->batch_head) raid5_release_stripe(sh->batch_head); - bio_reset(bi); } static void raid5_build_block(struct stripe_head *sh, int i, int previous) -- cgit v0.10.2 From ea90383837334bcebe842e719ad4d8c966f4ef51 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Thu, 8 Sep 2016 19:03:20 +0800 Subject: drm/vc4: mark vc4_bo_cache_purge() static We get 1 warning when building kernel with W=1: drivers/gpu/drm/vc4/vc4_bo.c:147:6: warning: no previous prototype for 'vc4_bo_cache_purge' [-Wmissing-prototypes] In fact, this function is only used in the file in which it is declared and don't need a declaration, but can be made static. So this patch marks it 'static'. Signed-off-by: Baoyou Xie Signed-off-by: Eric Anholt diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 59adcf8..3f6704c 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -144,7 +144,7 @@ static struct list_head *vc4_get_cache_list_for_size(struct drm_device *dev, return &vc4->bo_cache.size_list[page_index]; } -void vc4_bo_cache_purge(struct drm_device *dev) +static void vc4_bo_cache_purge(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); -- cgit v0.10.2 From 7d762e49c2117d3829eb3355f2617aea080ed3a7 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 9 Sep 2016 18:08:23 +0200 Subject: perf/x86/amd/uncore: Prevent use after free The resent conversion of the cpu hotplug support in the uncore driver introduced a regression due to the way the callbacks are invoked at initialization time. The old code called the prepare/starting/online function on each online cpu as a block. The new code registers the hotplug callbacks in the core for each state. The core invokes the callbacks at each registration on all online cpus. The code implicitely relied on the prepare/starting/online callbacks being called as combo on a particular cpu, which was not obvious and completely undocumented. The resulting subtle wreckage happens due to the way how the uncore code manages shared data structures for cpus which share an uncore resource in hardware. The sharing is determined in the cpu starting callback, but the prepare callback allocates per cpu data for the upcoming cpu because potential sharing is unknown at this point. If the starting callback finds a online cpu which shares the hardware resource it takes a refcount on the percpu data of that cpu and puts the own data structure into a 'free_at_online' pointer of that shared data structure. The online callback frees that. With the old model this worked because in a starting callback only one non unused structure (the one of the starting cpu) was available. The new code allocates the data structures for all cpus when the prepare callback is registered. Now the starting function iterates through all online cpus and looks for a data structure (skipping its own) which has a matching hardware id. The id member of the data structure is initialized to 0, but the hardware id can be 0 as well. The resulting wreckage is: CPU0 finds a matching id on CPU1, takes a refcount on CPU1 data and puts its own data structure into CPU1s data structure to be freed. CPU1 skips CPU0 because the data structure is its allegedly unsued own. It finds a matching id on CPU2, takes a refcount on CPU1 data and puts its own data structure into CPU2s data structure to be freed. .... Now the online callbacks are invoked. CPU0 has a pointer to CPU1s data and frees the original CPU0 data. So far so good. CPU1 has a pointer to CPU2s data and frees the original CPU1 data, which is still referenced by CPU0 ---> Booom So there are two issues to be solved here: 1) The id field must be initialized at allocation time to a value which cannot be a valid hardware id, i.e. -1 This prevents the above scenario, but now CPU1 and CPU2 both stick their own data structure into the free_at_online pointer of CPU0. So we leak CPU1s data structure. 2) Fix the memory leak described in #1 Instead of having a single pointer, use a hlist to enqueue the superflous data structures which are then freed by the first cpu invoking the online callback. Ideally we should know the sharing _before_ invoking the prepare callback, but that's way beyond the scope of this bug fix. [ tglx: Rewrote changelog ] Fixes: 96b2bd3866a0 ("perf/x86/amd/uncore: Convert to hotplug state machine") Reported-and-tested-by: Eric Sandeen Signed-off-by: Sebastian Andrzej Siewior Cc: Borislav Petkov Link: http://lkml.kernel.org/r/20160909160822.lowgmkdwms2dheyv@linutronix.de Signed-off-by: Thomas Gleixner diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index e6131d4..65577f0 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -29,6 +29,8 @@ #define COUNTER_SHIFT 16 +static HLIST_HEAD(uncore_unused_list); + struct amd_uncore { int id; int refcnt; @@ -39,7 +41,7 @@ struct amd_uncore { cpumask_t *active_mask; struct pmu *pmu; struct perf_event *events[MAX_COUNTERS]; - struct amd_uncore *free_when_cpu_online; + struct hlist_node node; }; static struct amd_uncore * __percpu *amd_uncore_nb; @@ -306,6 +308,7 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu) uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL; uncore_nb->active_mask = &amd_nb_active_mask; uncore_nb->pmu = &amd_nb_pmu; + uncore_nb->id = -1; *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb; } @@ -319,6 +322,7 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu) uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL; uncore_l2->active_mask = &amd_l2_active_mask; uncore_l2->pmu = &amd_l2_pmu; + uncore_l2->id = -1; *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2; } @@ -348,7 +352,7 @@ amd_uncore_find_online_sibling(struct amd_uncore *this, continue; if (this->id == that->id) { - that->free_when_cpu_online = this; + hlist_add_head(&this->node, &uncore_unused_list); this = that; break; } @@ -388,13 +392,23 @@ static int amd_uncore_cpu_starting(unsigned int cpu) return 0; } +static void uncore_clean_online(void) +{ + struct amd_uncore *uncore; + struct hlist_node *n; + + hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) { + hlist_del(&uncore->node); + kfree(uncore); + } +} + static void uncore_online(unsigned int cpu, struct amd_uncore * __percpu *uncores) { struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu); - kfree(uncore->free_when_cpu_online); - uncore->free_when_cpu_online = NULL; + uncore_clean_online(); if (cpu == uncore->cpu) cpumask_set_cpu(cpu, uncore->active_mask); -- cgit v0.10.2 From 2561d309dfd1555e781484af757ed0115035ddb3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 17 Aug 2016 16:02:32 -0400 Subject: alpha: fix copy_from_user() it should clear the destination even when access_ok() fails. Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/arch/alpha/include/asm/uaccess.h b/arch/alpha/include/asm/uaccess.h index c419b43..466e42e 100644 --- a/arch/alpha/include/asm/uaccess.h +++ b/arch/alpha/include/asm/uaccess.h @@ -371,14 +371,6 @@ __copy_tofrom_user_nocheck(void *to, const void *from, long len) return __cu_len; } -extern inline long -__copy_tofrom_user(void *to, const void *from, long len, const void __user *validate) -{ - if (__access_ok((unsigned long)validate, len, get_fs())) - len = __copy_tofrom_user_nocheck(to, from, len); - return len; -} - #define __copy_to_user(to, from, n) \ ({ \ __chk_user_ptr(to); \ @@ -393,17 +385,22 @@ __copy_tofrom_user(void *to, const void *from, long len, const void __user *vali #define __copy_to_user_inatomic __copy_to_user #define __copy_from_user_inatomic __copy_from_user - extern inline long copy_to_user(void __user *to, const void *from, long n) { - return __copy_tofrom_user((__force void *)to, from, n, to); + if (likely(__access_ok((unsigned long)to, n, get_fs()))) + n = __copy_tofrom_user_nocheck((__force void *)to, from, n); + return n; } extern inline long copy_from_user(void *to, const void __user *from, long n) { - return __copy_tofrom_user(to, (__force void *)from, n, from); + if (likely(__access_ok((unsigned long)from, n, get_fs()))) + n = __copy_tofrom_user_nocheck(to, (__force void *)from, n); + else + memset(to, 0, n); + return n; } extern void __do_clear_user(void); -- cgit v0.10.2 From 2545e5da080b4839dd859e3b09343a884f6ab0e3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 17 Aug 2016 16:36:37 -0400 Subject: asm-generic: make copy_from_user() zero the destination properly ... in all cases, including the failing access_ok() Note that some architectures using asm-generic/uaccess.h have __copy_from_user() not zeroing the tail on failure halfway through. This variant works either way. Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h index 1bfa602..04e21a4 100644 --- a/include/asm-generic/uaccess.h +++ b/include/asm-generic/uaccess.h @@ -257,11 +257,13 @@ extern int __get_user_bad(void) __attribute__((noreturn)); static inline long copy_from_user(void *to, const void __user * from, unsigned long n) { + unsigned long res = n; might_fault(); - if (access_ok(VERIFY_READ, from, n)) - return __copy_from_user(to, from, n); - else - return n; + if (likely(access_ok(VERIFY_READ, from, n))) + res = __copy_from_user(to, from, n); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; } static inline long copy_to_user(void __user *to, -- cgit v0.10.2 From 4214ebf4654798309364d0c678b799e402f38288 Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Fri, 29 Jul 2016 22:38:19 +0100 Subject: Fix memory leaks in cifs_do_mount() Fix memory leaks introduced by the patch fs/cifs: make share unaccessible at root level mountable Also move allocation of cifs_sb->prepath to cifs_setup_cifs_sb(). Signed-off-by: Sachin Prabhu Tested-by: Aurelien Aptel Signed-off-by: Steve French diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 6bbec5e..cc9cdab 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -686,26 +686,22 @@ cifs_do_mount(struct file_system_type *fs_type, cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL); if (cifs_sb->mountdata == NULL) { root = ERR_PTR(-ENOMEM); - goto out_cifs_sb; + goto out_free; } - if (volume_info->prepath) { - cifs_sb->prepath = kstrdup(volume_info->prepath, GFP_KERNEL); - if (cifs_sb->prepath == NULL) { - root = ERR_PTR(-ENOMEM); - goto out_cifs_sb; - } + rc = cifs_setup_cifs_sb(volume_info, cifs_sb); + if (rc) { + root = ERR_PTR(rc); + goto out_free; } - cifs_setup_cifs_sb(volume_info, cifs_sb); - rc = cifs_mount(cifs_sb, volume_info); if (rc) { if (!(flags & MS_SILENT)) cifs_dbg(VFS, "cifs_mount failed w/return code = %d\n", rc); root = ERR_PTR(rc); - goto out_mountdata; + goto out_free; } mnt_data.vol = volume_info; @@ -752,9 +748,9 @@ out: cifs_cleanup_volume_info(volume_info); return root; -out_mountdata: +out_free: + kfree(cifs_sb->prepath); kfree(cifs_sb->mountdata); -out_cifs_sb: kfree(cifs_sb); out_nls: unload_nls(volume_info->local_nls); diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 1243bd3..95dab43 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -184,7 +184,7 @@ extern int cifs_read_from_socket(struct TCP_Server_Info *server, char *buf, unsigned int to_read); extern int cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page, unsigned int to_read); -extern void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, +extern int cifs_setup_cifs_sb(struct smb_vol *pvolume_info, struct cifs_sb_info *cifs_sb); extern int cifs_match_super(struct super_block *, void *); extern void cifs_cleanup_volume_info(struct smb_vol *pvolume_info); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 7ae0328..4546926 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3222,7 +3222,7 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon, } } -void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, +int cifs_setup_cifs_sb(struct smb_vol *pvolume_info, struct cifs_sb_info *cifs_sb) { INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks); @@ -3316,6 +3316,14 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, if ((pvolume_info->cifs_acl) && (pvolume_info->dynperm)) cifs_dbg(VFS, "mount option dynperm ignored if cifsacl mount option supported\n"); + + if (pvolume_info->prepath) { + cifs_sb->prepath = kstrdup(pvolume_info->prepath, GFP_KERNEL); + if (cifs_sb->prepath == NULL) + return -ENOMEM; + } + + return 0; } static void -- cgit v0.10.2 From c1d8b24d18192764fe82067ec6aa8d4c3bf094e0 Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Fri, 29 Jul 2016 22:38:20 +0100 Subject: Compare prepaths when comparing superblocks The patch fs/cifs: make share unaccessible at root level mountable makes use of prepaths when any component of the underlying path is inaccessible. When mounting 2 separate shares having different prepaths but are other wise similar in other respects, we end up sharing superblocks when we shouldn't be doing so. Signed-off-by: Sachin Prabhu Tested-by: Aurelien Aptel Signed-off-by: Steve French diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 4546926..2e4f4ba 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2781,6 +2781,24 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data) return 1; } +static int +match_prepath(struct super_block *sb, struct cifs_mnt_data *mnt_data) +{ + struct cifs_sb_info *old = CIFS_SB(sb); + struct cifs_sb_info *new = mnt_data->cifs_sb; + + if (old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) { + if (!(new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)) + return 0; + /* The prepath should be null terminated strings */ + if (strcmp(new->prepath, old->prepath)) + return 0; + + return 1; + } + return 0; +} + int cifs_match_super(struct super_block *sb, void *data) { @@ -2808,7 +2826,8 @@ cifs_match_super(struct super_block *sb, void *data) if (!match_server(tcp_srv, volume_info) || !match_session(ses, volume_info) || - !match_tcon(tcon, volume_info->UNC)) { + !match_tcon(tcon, volume_info->UNC) || + !match_prepath(sb, mnt_data)) { rc = 0; goto out; } -- cgit v0.10.2 From 348c1bfa84dfc47da1f1234b7f2bf09fa798edea Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Fri, 29 Jul 2016 22:38:21 +0100 Subject: Move check for prefix path to within cifs_get_root() Signed-off-by: Sachin Prabhu Tested-by: Aurelien Aptel Signed-off-by: Steve French diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index cc9cdab..14ae4b8 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -609,6 +609,9 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) char *s, *p; char sep; + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) + return dget(sb->s_root); + full_path = cifs_build_path_to_root(vol, cifs_sb, cifs_sb_master_tcon(cifs_sb)); if (full_path == NULL) @@ -731,11 +734,7 @@ cifs_do_mount(struct file_system_type *fs_type, sb->s_flags |= MS_ACTIVE; } - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) - root = dget(sb->s_root); - else - root = cifs_get_root(volume_info, sb); - + root = cifs_get_root(volume_info, sb); if (IS_ERR(root)) goto out_super; -- cgit v0.10.2 From 767ae08678c2c796bcd7f582ee457aee20a28a1e Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 6 Sep 2016 16:23:49 +0300 Subject: perf/core: Fix a race between mmap_close() and set_output() of AUX events In the mmap_close() path we need to stop all the AUX events that are writing data to the AUX area that we are unmapping, before we can safely free the pages. To determine if an event needs to be stopped, we're comparing its ->rb against the one that's getting unmapped. However, a SET_OUTPUT ioctl may turn up inside an AUX transaction and swizzle event::rb to some other ring buffer, but the transaction will keep writing data to the old ring buffer until the event gets scheduled out. At this point, mmap_close() will skip over such an event and will proceed to free the AUX area, while it's still being used by this event, which will set off a warning in the mmap_close() path and cause a memory corruption. To avoid this, always stop an AUX event before its ->rb is updated; this will release the (potentially) last reference on the AUX area of the buffer. If the event gets restarted, its new ring buffer will be used. If another SET_OUTPUT comes and switches it back to the old ring buffer that's getting unmapped, it's also fine: this ring buffer's aux_mmap_count will be zero and AUX transactions won't start any more. Reported-by: Vince Weaver Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160906132353.19887-2-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index 07ac859..a54f2c2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2496,11 +2496,11 @@ static int __perf_event_stop(void *info) return 0; } -static int perf_event_restart(struct perf_event *event) +static int perf_event_stop(struct perf_event *event, int restart) { struct stop_event_data sd = { .event = event, - .restart = 1, + .restart = restart, }; int ret = 0; @@ -4845,6 +4845,19 @@ static void ring_buffer_attach(struct perf_event *event, spin_unlock_irqrestore(&rb->event_lock, flags); } + /* + * Avoid racing with perf_mmap_close(AUX): stop the event + * before swizzling the event::rb pointer; if it's getting + * unmapped, its aux_mmap_count will be 0 and it won't + * restart. See the comment in __perf_pmu_output_stop(). + * + * Data will inevitably be lost when set_output is done in + * mid-air, but then again, whoever does it like this is + * not in for the data anyway. + */ + if (has_aux(event)) + perf_event_stop(event, 0); + rcu_assign_pointer(event->rb, rb); if (old_rb) { @@ -6120,7 +6133,7 @@ static void perf_event_addr_filters_exec(struct perf_event *event, void *data) raw_spin_unlock_irqrestore(&ifh->lock, flags); if (restart) - perf_event_restart(event); + perf_event_stop(event, 1); } void perf_event_exec(void) @@ -6164,7 +6177,13 @@ static void __perf_event_output_stop(struct perf_event *event, void *data) /* * In case of inheritance, it will be the parent that links to the - * ring-buffer, but it will be the child that's actually using it: + * ring-buffer, but it will be the child that's actually using it. + * + * We are using event::rb to determine if the event should be stopped, + * however this may race with ring_buffer_attach() (through set_output), + * which will make us skip the event that actually needs to be stopped. + * So ring_buffer_attach() has to stop an aux event before re-assigning + * its rb pointer. */ if (rcu_dereference(parent->rb) == rb) ro->err = __perf_event_stop(&sd); @@ -6678,7 +6697,7 @@ static void __perf_addr_filters_adjust(struct perf_event *event, void *data) raw_spin_unlock_irqrestore(&ifh->lock, flags); if (restart) - perf_event_restart(event); + perf_event_stop(event, 1); } /* @@ -7867,7 +7886,7 @@ static void perf_event_addr_filters_apply(struct perf_event *event) mmput(mm); restart: - perf_event_restart(event); + perf_event_stop(event, 1); } /* -- cgit v0.10.2 From b79ccadd6bb10e72cf784a298ca6dc1398eb9a24 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 6 Sep 2016 16:23:50 +0300 Subject: perf/core: Fix aux_mmap_count vs aux_refcount order The order of accesses to ring buffer's aux_mmap_count and aux_refcount has to be preserved across the users, namely perf_mmap_close() and perf_aux_output_begin(), otherwise the inversion can result in the latter holding the last reference to the aux buffer and subsequently free'ing it in atomic context, triggering a warning. > ------------[ cut here ]------------ > WARNING: CPU: 0 PID: 257 at kernel/events/ring_buffer.c:541 __rb_free_aux+0x11a/0x130 > CPU: 0 PID: 257 Comm: stopbug Not tainted 4.8.0-rc1+ #2596 > Call Trace: > [] __warn+0xcb/0xf0 > [] warn_slowpath_null+0x1d/0x20 > [] __rb_free_aux+0x11a/0x130 > [] rb_free_aux+0x18/0x20 > [] perf_aux_output_begin+0x163/0x1e0 > [] bts_event_start+0x3a/0xd0 > [] bts_event_add+0x5d/0x80 > [] event_sched_in.isra.104+0xf6/0x2f0 > [] group_sched_in+0x6e/0x190 > [] ctx_sched_in+0x2fe/0x5f0 > [] perf_event_sched_in+0x60/0x80 > [] ctx_resched+0x5b/0x90 > [] __perf_event_enable+0x1e1/0x240 > [] event_function+0xa9/0x180 > [] ? perf_cgroup_attach+0x70/0x70 > [] remote_function+0x3f/0x50 > [] flush_smp_call_function_queue+0x83/0x150 > [] generic_smp_call_function_single_interrupt+0x13/0x60 > [] smp_call_function_single_interrupt+0x27/0x40 > [] call_function_single_interrupt+0x89/0x90 > [] finish_task_switch+0xa6/0x210 > [] ? finish_task_switch+0x67/0x210 > [] __schedule+0x3dd/0xb50 > [] schedule+0x35/0x80 > [] sys_sched_yield+0x61/0x70 > [] entry_SYSCALL_64_fastpath+0x18/0xa8 > ---[ end trace 6235f556f5ea83a9 ]--- This patch puts the checks in perf_aux_output_begin() in the same order as that of perf_mmap_close(). Reported-by: Vince Weaver Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160906132353.19887-3-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index ae9b90d..257fa46 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -330,15 +330,22 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, if (!rb) return NULL; - if (!rb_has_aux(rb) || !atomic_inc_not_zero(&rb->aux_refcount)) + if (!rb_has_aux(rb)) goto err; /* - * If rb::aux_mmap_count is zero (and rb_has_aux() above went through), - * the aux buffer is in perf_mmap_close(), about to get freed. + * If aux_mmap_count is zero, the aux buffer is in perf_mmap_close(), + * about to get freed, so we leave immediately. + * + * Checking rb::aux_mmap_count and rb::refcount has to be done in + * the same order, see perf_mmap_close. Otherwise we end up freeing + * aux pages in this path, which is a bug, because in_atomic(). */ if (!atomic_read(&rb->aux_mmap_count)) - goto err_put; + goto err; + + if (!atomic_inc_not_zero(&rb->aux_refcount)) + goto err; /* * Nesting is not supported for AUX area, make sure nested -- cgit v0.10.2 From a9a94401c2b5805c71e39427b1af1bf1b9f67cd0 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 6 Sep 2016 16:23:51 +0300 Subject: perf/x86/intel/bts: Fix confused ordering of PMU callbacks The intel_bts driver is using a CPU-local 'started' variable to order callbacks and PMIs and make sure that AUX transactions don't get messed up. However, the ordering rules in regard to this variable is a complete mess, which recently resulted in perf_fuzzer-triggered warnings and panics. The general ordering rule that is patch is enforcing is that this cpu-local variable be set only when the cpu-local AUX transaction is active; consequently, this variable is to be checked before the AUX related bits can be touched. Reported-by: Vince Weaver Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160906132353.19887-4-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 0a6e393..61e1d71 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -31,7 +31,17 @@ struct bts_ctx { struct perf_output_handle handle; struct debug_store ds_back; - int started; + int state; +}; + +/* BTS context states: */ +enum { + /* no ongoing AUX transactions */ + BTS_STATE_STOPPED = 0, + /* AUX transaction is on, BTS tracing is disabled */ + BTS_STATE_INACTIVE, + /* AUX transaction is on, BTS tracing is running */ + BTS_STATE_ACTIVE, }; static DEFINE_PER_CPU(struct bts_ctx, bts_ctx); @@ -204,6 +214,15 @@ static void bts_update(struct bts_ctx *bts) static int bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle); +/* + * Ordering PMU callbacks wrt themselves and the PMI is done by means + * of bts::state, which: + * - is set when bts::handle::event is valid, that is, between + * perf_aux_output_begin() and perf_aux_output_end(); + * - is zero otherwise; + * - is ordered against bts::handle::event with a compiler barrier. + */ + static void __bts_event_start(struct perf_event *event) { struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); @@ -221,10 +240,13 @@ static void __bts_event_start(struct perf_event *event) /* * local barrier to make sure that ds configuration made it - * before we enable BTS + * before we enable BTS and bts::state goes ACTIVE */ wmb(); + /* INACTIVE/STOPPED -> ACTIVE */ + WRITE_ONCE(bts->state, BTS_STATE_ACTIVE); + intel_pmu_enable_bts(config); } @@ -251,9 +273,6 @@ static void bts_event_start(struct perf_event *event, int flags) __bts_event_start(event); - /* PMI handler: this counter is running and likely generating PMIs */ - ACCESS_ONCE(bts->started) = 1; - return; fail_end_stop: @@ -263,30 +282,34 @@ fail_stop: event->hw.state = PERF_HES_STOPPED; } -static void __bts_event_stop(struct perf_event *event) +static void __bts_event_stop(struct perf_event *event, int state) { + struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + + /* ACTIVE -> INACTIVE(PMI)/STOPPED(->stop()) */ + WRITE_ONCE(bts->state, state); + /* * No extra synchronization is mandated by the documentation to have * BTS data stores globally visible. */ intel_pmu_disable_bts(); - - if (event->hw.state & PERF_HES_STOPPED) - return; - - ACCESS_ONCE(event->hw.state) |= PERF_HES_STOPPED; } static void bts_event_stop(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); - struct bts_buffer *buf = perf_get_aux(&bts->handle); + struct bts_buffer *buf = NULL; + int state = READ_ONCE(bts->state); + + if (state == BTS_STATE_ACTIVE) + __bts_event_stop(event, BTS_STATE_STOPPED); - /* PMI handler: don't restart this counter */ - ACCESS_ONCE(bts->started) = 0; + if (state != BTS_STATE_STOPPED) + buf = perf_get_aux(&bts->handle); - __bts_event_stop(event); + event->hw.state |= PERF_HES_STOPPED; if (flags & PERF_EF_UPDATE) { bts_update(bts); @@ -296,6 +319,7 @@ static void bts_event_stop(struct perf_event *event, int flags) bts->handle.head = local_xchg(&buf->data_size, buf->nr_pages << PAGE_SHIFT); + perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0), !!local_xchg(&buf->lost, 0)); } @@ -310,8 +334,20 @@ static void bts_event_stop(struct perf_event *event, int flags) void intel_bts_enable_local(void) { struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + int state = READ_ONCE(bts->state); + + /* + * Here we transition from INACTIVE to ACTIVE; + * if we instead are STOPPED from the interrupt handler, + * stay that way. Can't be ACTIVE here though. + */ + if (WARN_ON_ONCE(state == BTS_STATE_ACTIVE)) + return; + + if (state == BTS_STATE_STOPPED) + return; - if (bts->handle.event && bts->started) + if (bts->handle.event) __bts_event_start(bts->handle.event); } @@ -319,8 +355,15 @@ void intel_bts_disable_local(void) { struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + /* + * Here we transition from ACTIVE to INACTIVE; + * do nothing for STOPPED or INACTIVE. + */ + if (READ_ONCE(bts->state) != BTS_STATE_ACTIVE) + return; + if (bts->handle.event) - __bts_event_stop(bts->handle.event); + __bts_event_stop(bts->handle.event, BTS_STATE_INACTIVE); } static int @@ -407,9 +450,13 @@ int intel_bts_interrupt(void) struct perf_event *event = bts->handle.event; struct bts_buffer *buf; s64 old_head; - int err; + int err = -ENOSPC; - if (!event || !bts->started) + /* + * this is wrapped in intel_bts_enable_local/intel_bts_disable_local, + * so we can only be INACTIVE or STOPPED + */ + if (READ_ONCE(bts->state) == BTS_STATE_STOPPED) return 0; buf = perf_get_aux(&bts->handle); @@ -432,12 +479,21 @@ int intel_bts_interrupt(void) !!local_xchg(&buf->lost, 0)); buf = perf_aux_output_begin(&bts->handle, event); - if (!buf) - return 1; + if (buf) + err = bts_buffer_reset(buf, &bts->handle); - err = bts_buffer_reset(buf, &bts->handle); - if (err) - perf_aux_output_end(&bts->handle, 0, false); + if (err) { + WRITE_ONCE(bts->state, BTS_STATE_STOPPED); + + if (buf) { + /* + * BTS_STATE_STOPPED should be visible before + * cleared handle::event + */ + barrier(); + perf_aux_output_end(&bts->handle, 0, false); + } + } return 1; } -- cgit v0.10.2 From 4d4c474124649198d9b0a065c06f9362cf18e14e Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 6 Sep 2016 16:23:52 +0300 Subject: perf/x86/intel/bts: Fix BTS PMI detection Since BTS doesn't have a dedicated PMI status bit, the driver needs to take extra care to check for the condition that triggers it to avoid spurious NMI warnings. Regardless of the local BTS context state, the only way of knowing that the NMI is ours is to compare the write pointer against the interrupt threshold. Reported-by: Vince Weaver Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160906132353.19887-5-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 61e1d71..9233edf 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -446,26 +446,37 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) int intel_bts_interrupt(void) { + struct debug_store *ds = this_cpu_ptr(&cpu_hw_events)->ds; struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); struct perf_event *event = bts->handle.event; struct bts_buffer *buf; s64 old_head; - int err = -ENOSPC; + int err = -ENOSPC, handled = 0; + + /* + * The only surefire way of knowing if this NMI is ours is by checking + * the write ptr against the PMI threshold. + */ + if (ds->bts_index >= ds->bts_interrupt_threshold) + handled = 1; /* * this is wrapped in intel_bts_enable_local/intel_bts_disable_local, * so we can only be INACTIVE or STOPPED */ if (READ_ONCE(bts->state) == BTS_STATE_STOPPED) - return 0; + return handled; buf = perf_get_aux(&bts->handle); + if (!buf) + return handled; + /* * Skip snapshot counters: they don't use the interrupt, but * there's no other way of telling, because the pointer will * keep moving */ - if (!buf || buf->snapshot) + if (buf->snapshot) return 0; old_head = local_read(&buf->head); @@ -473,7 +484,7 @@ int intel_bts_interrupt(void) /* no new data */ if (old_head == local_read(&buf->head)) - return 0; + return handled; perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0), !!local_xchg(&buf->lost, 0)); -- cgit v0.10.2 From ef9ef3befa0d76008e988a9ed9fe439e803351b9 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 6 Sep 2016 16:23:53 +0300 Subject: perf/x86/intel/bts: Kill a silly warning At the moment, intel_bts will WARN() out if there is more than one event writing to the same ring buffer, via SET_OUTPUT, and will only send data from one event to a buffer. There is no reason to have this warning in, so kill it. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160906132353.19887-6-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 9233edf..bdcd651 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -378,8 +378,6 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle) return 0; head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1); - if (WARN_ON_ONCE(head != local_read(&buf->head))) - return -EINVAL; phys = &buf->buf[buf->cur_buf]; space = phys->offset + phys->displacement + phys->size - head; -- cgit v0.10.2 From 8ef9b8455a2a3049efa9e46e8a6402b972a3eb41 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 7 Sep 2016 14:42:55 +0200 Subject: perf/x86/intel: Fix PEBSv3 record drain Alexander hit the WARN_ON_ONCE(!event) on his Skylake while running the perf fuzzer. This means the PEBSv3 record included a status bit for an inactive event, something that _should_ not happen. Move the code that filters the status bits against our known PEBS events up a spot to guarantee we only deal with events we know about. Further add "continue" statements to the WARN_ON_ONCE()s such that we'll not die nor generate silly events in case we ever do hit them again. Reported-by: Alexander Shishkin Tested-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: stable@vger.kernel.org Fixes: a3d86542de88 ("perf/x86/intel/pebs: Add PEBSv3 decoding") Signed-off-by: Ingo Molnar diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 7ce9f3f..9b983a4 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1274,18 +1274,18 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) struct pebs_record_nhm *p = at; u64 pebs_status; - /* PEBS v3 has accurate status bits */ + pebs_status = p->status & cpuc->pebs_enabled; + pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1; + + /* PEBS v3 has more accurate status bits */ if (x86_pmu.intel_cap.pebs_format >= 3) { - for_each_set_bit(bit, (unsigned long *)&p->status, - MAX_PEBS_EVENTS) + for_each_set_bit(bit, (unsigned long *)&pebs_status, + x86_pmu.max_pebs_events) counts[bit]++; continue; } - pebs_status = p->status & cpuc->pebs_enabled; - pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1; - /* * On some CPUs the PEBS status can be zero when PEBS is * racing with clearing of GLOBAL_STATUS. @@ -1333,8 +1333,11 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) continue; event = cpuc->events[bit]; - WARN_ON_ONCE(!event); - WARN_ON_ONCE(!event->attr.precise_ip); + if (WARN_ON_ONCE(!event)) + continue; + + if (WARN_ON_ONCE(!event->attr.precise_ip)) + continue; /* log dropped samples number */ if (error[bit]) -- cgit v0.10.2 From b519d408ea32040b1c7e10b155a3ee9a36660947 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 11 Sep 2016 14:50:01 -0400 Subject: NFSv4.1: Fix the CREATE_SESSION slot number accounting Ensure that we conform to the algorithm described in RFC5661, section 18.36.4 for when to bump the sequence id. In essence we do it for all cases except when the RPC call timed out, or in case of the server returning NFS4ERR_DELAY or NFS4ERR_STALE_CLIENTID. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c380d2e..a9dec32 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7570,12 +7570,20 @@ static int _nfs4_proc_create_session(struct nfs_client *clp, status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); trace_nfs4_create_session(clp, status); + switch (status) { + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_DELAY: + case -ETIMEDOUT: + case -EACCES: + case -EAGAIN: + goto out; + }; + + clp->cl_seqid++; if (!status) { /* Verify the session's negotiated channel_attrs values */ status = nfs4_verify_channel_attrs(&args, &res); /* Increment the clientid slot sequence id */ - if (clp->cl_seqid == res.seqid) - clp->cl_seqid++; if (status) goto out; nfs4_update_session(session, &res); -- cgit v0.10.2 From bd00a240dc52e28706fbbe3aceda63e6c291b433 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Wed, 7 Sep 2016 10:46:30 +0530 Subject: powerpc/powernv: Fix restore of SPRs upon wake up from hypervisor state loss pnv_wakeup_tb_loss() currently expects cr4 to be "eq" if the CPU is waking up from a complete hypervisor state loss. Hence, it currently restores the SPR contents only if cr4 is "eq". However, after commit bcef83a00dc4 ("powerpc/powernv: Add platform support for stop instruction"), on ISA v3.0 CPUs, the function pnv_restore_hyp_resource() sets cr4 to contain the result of the comparison between the state the CPU has woken up from and the first deep stop state before calling pnv_wakeup_tb_loss(). Thus if the CPU woke up from a state that is deeper than the first deep stop state, cr4 will have "gt" set and hence, pnv_wakeup_tb_loss() will fail to restore the SPRs on waking up from such a state. Fix the code in pnv_wakeup_tb_loss() to restore the SPR states when cr4 is "eq" or "gt". Fixes: bcef83a00dc4 ("powerpc/powernv: Add platform support for stop instruction") Signed-off-by: Gautham R. Shenoy Reviewed-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 2265c63..bd739fe 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -411,7 +411,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) * * r13 - PACA * cr3 - gt if waking up with partial/complete hypervisor state loss - * cr4 - eq if waking up from complete hypervisor state loss. + * cr4 - gt or eq if waking up from complete hypervisor state loss. */ _GLOBAL(pnv_wakeup_tb_loss) ld r1,PACAR1(r13) @@ -453,7 +453,7 @@ lwarx_loop2: * At this stage * cr2 - eq if first thread to wakeup in core * cr3- gt if waking up with partial/complete hypervisor state loss - * cr4 - eq if waking up from complete hypervisor state loss. + * cr4 - gt or eq if waking up from complete hypervisor state loss. */ ori r15,r15,PNV_CORE_IDLE_LOCK_BIT @@ -481,7 +481,7 @@ first_thread_in_subcore: * If waking up from sleep, subcore state is not lost. Hence * skip subcore state restore */ - bne cr4,subcore_state_restored + blt cr4,subcore_state_restored /* Restore per-subcore state */ ld r4,_SDR1(r1) @@ -526,7 +526,7 @@ timebase_resync: * If waking up from sleep, per core state is not lost, skip to * clear_lock. */ - bne cr4,clear_lock + blt cr4,clear_lock /* * First thread in the core to wake up and its waking up with @@ -557,7 +557,7 @@ common_exit: * If waking up from sleep, hypervisor state is not lost. Hence * skip hypervisor state restore. */ - bne cr4,hypervisor_state_restored + blt cr4,hypervisor_state_restored /* Waking up from winkle */ -- cgit v0.10.2 From ffed15d3ce3f710b94e6f402e1ca2318f7d7c0e2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 12 Sep 2016 12:48:28 +1000 Subject: powerpc/kernel: Fix size of NUM_CPU_FTR_KEYS on 32-bit The number of CPU feature keys is meant to map 1:1 to the number of CPU feature flags defined in cputable.h, and the latter must fit in an unsigned long. In commit 4db7327194db ("powerpc: Add option to use jump label for cpu_has_feature()"), I incorrectly defined NUM_CPU_FTR_KEYS to 64. There should be no real adverse consequences of this bug, other than us allocating too many keys. Fix it by using BITS_PER_LONG. Fixes: 4db7327194db ("powerpc: Add option to use jump label for cpu_has_feature()") Tested-by: Meelis Roos Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h index 2ef55f8..b312b15 100644 --- a/arch/powerpc/include/asm/cpu_has_feature.h +++ b/arch/powerpc/include/asm/cpu_has_feature.h @@ -15,7 +15,7 @@ static inline bool early_cpu_has_feature(unsigned long feature) #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS #include -#define NUM_CPU_FTR_KEYS 64 +#define NUM_CPU_FTR_KEYS BITS_PER_LONG extern struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS]; -- cgit v0.10.2 From f190fd92458da3e869b4e2c6289e2c617490ae53 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Fri, 2 Sep 2016 10:37:56 +0200 Subject: USB: serial: simple: add support for another Infineon flashloader This patch adds support for Infineon flashloader 0x8087/0x0801. The flashloader is used in Telit LE940B modem family with Telit flashing application. Signed-off-by: Daniele Palmas Cc: stable Signed-off-by: Johan Hovold diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index a204782..e98b6e5 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -54,7 +54,8 @@ DEVICE(funsoft, FUNSOFT_IDS); /* Infineon Flashloader driver */ #define FLASHLOADER_IDS() \ { USB_DEVICE_INTERFACE_CLASS(0x058b, 0x0041, USB_CLASS_CDC_DATA) }, \ - { USB_DEVICE(0x8087, 0x0716) } + { USB_DEVICE(0x8087, 0x0716) }, \ + { USB_DEVICE(0x8087, 0x0801) } DEVICE(flashloader, FLASHLOADER_IDS); /* Google Serial USB SubClass */ -- cgit v0.10.2 From 981b178964d03f6f8e6cca01568c17d1dbafdf0e Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 8 Sep 2016 11:11:35 +0200 Subject: dt-bindings: mmc: sdhci-st: Mention the discretionary "icn" clock The interconnect (ICN) clock is required for functional working of MMC on some ST platforms. When not supplied it can result in broken MMC and the following output: [ 13.916949] mmc0: Timeout waiting for hardware interrupt. [ 13.922349] sdhci: =========== REGISTER DUMP (mmc0)=========== [ 13.928175] sdhci: Sys addr: 0x00000000 | Version: 0x00001002 [ 13.933999] sdhci: Blk size: 0x00007040 | Blk cnt: 0x00000001 [ 13.939825] sdhci: Argument: 0x00fffff0 | Trn mode: 0x00000013 [ 13.945650] sdhci: Present: 0x1fff0206 | Host ctl: 0x00000011 [ 13.951475] sdhci: Power: 0x0000000f | Blk gap: 0x00000080 [ 13.957300] sdhci: Wake-up: 0x00000000 | Clock: 0x00003f07 [ 13.963126] sdhci: Timeout: 0x00000004 | Int stat: 0x00000000 [ 13.968952] sdhci: Int enab: 0x02ff008b | Sig enab: 0x02ff008b [ 13.974777] sdhci: AC12 err: 0x00000000 | Slot int: 0x00000000 [ 13.980602] sdhci: Caps: 0x21ed3281 | Caps_1: 0x00000000 [ 13.986428] sdhci: Cmd: 0x0000063a | Max curr: 0x00000000 [ 13.992252] sdhci: Host ctl2: 0x00000000 [ 13.996166] sdhci: ADMA Err: 0x00000000 | ADMA Ptr: 0x7c048200 [ 14.001990] sdhci: =========================================== [ 14.009802] mmc0: Got data interrupt 0x02000000 even though no data operation was in progress. Signed-off-by: Lee Jones Signed-off-by: Ulf Hansson diff --git a/Documentation/devicetree/bindings/mmc/sdhci-st.txt b/Documentation/devicetree/bindings/mmc/sdhci-st.txt index 88faa91..3cd4c43 100644 --- a/Documentation/devicetree/bindings/mmc/sdhci-st.txt +++ b/Documentation/devicetree/bindings/mmc/sdhci-st.txt @@ -10,7 +10,7 @@ Required properties: subsystem (mmcss) inside the FlashSS (available in STiH407 SoC family). -- clock-names: Should be "mmc". +- clock-names: Should be "mmc" and "icn". (NB: The latter is not compulsory) See: Documentation/devicetree/bindings/resource-names.txt - clocks: Phandle to the clock. See: Documentation/devicetree/bindings/clock/clock-bindings.txt -- cgit v0.10.2 From 3ae50f4512ce831e8b63eb54ad969417ff30ada7 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 8 Sep 2016 11:11:36 +0200 Subject: mmc: sdhci-st: Handle interconnect clock Some ST platforms contain interconnect (ICN) clocks which must be handed correctly in order to obtain full functionality of a given IP. In this case, if the ICN clocks are not handled properly by the ST SDHCI driver MMC will break and the following output can be observed: [ 13.916949] mmc0: Timeout waiting for hardware interrupt. [ 13.922349] sdhci: =========== REGISTER DUMP (mmc0)=========== [ 13.928175] sdhci: Sys addr: 0x00000000 | Version: 0x00001002 [ 13.933999] sdhci: Blk size: 0x00007040 | Blk cnt: 0x00000001 [ 13.939825] sdhci: Argument: 0x00fffff0 | Trn mode: 0x00000013 [ 13.945650] sdhci: Present: 0x1fff0206 | Host ctl: 0x00000011 [ 13.951475] sdhci: Power: 0x0000000f | Blk gap: 0x00000080 [ 13.957300] sdhci: Wake-up: 0x00000000 | Clock: 0x00003f07 [ 13.963126] sdhci: Timeout: 0x00000004 | Int stat: 0x00000000 [ 13.968952] sdhci: Int enab: 0x02ff008b | Sig enab: 0x02ff008b [ 13.974777] sdhci: AC12 err: 0x00000000 | Slot int: 0x00000000 [ 13.980602] sdhci: Caps: 0x21ed3281 | Caps_1: 0x00000000 [ 13.986428] sdhci: Cmd: 0x0000063a | Max curr: 0x00000000 [ 13.992252] sdhci: Host ctl2: 0x00000000 [ 13.996166] sdhci: ADMA Err: 0x00000000 | ADMA Ptr: 0x7c048200 [ 14.001990] sdhci: =========================================== [ 14.009802] mmc0: Got data interrupt 0x02000000 even though no data operation was in progress. A decent point was raised about minimising the use of a local variable that we 'could' do without. I've chosen consistency over the possibility of reducing the local variable count by 1. Thinking that it's more important for the code to be grouped and authoured in a similar manner/style for greater maintainability/readability. Cc: stable@vger.kernel.org Tested-by: Peter Griffin Signed-off-by: Lee Jones Signed-off-by: Ulf Hansson diff --git a/drivers/mmc/host/sdhci-st.c b/drivers/mmc/host/sdhci-st.c index c95ba83..ed92ce72 100644 --- a/drivers/mmc/host/sdhci-st.c +++ b/drivers/mmc/host/sdhci-st.c @@ -28,6 +28,7 @@ struct st_mmc_platform_data { struct reset_control *rstc; + struct clk *icnclk; void __iomem *top_ioaddr; }; @@ -353,7 +354,7 @@ static int sdhci_st_probe(struct platform_device *pdev) struct sdhci_host *host; struct st_mmc_platform_data *pdata; struct sdhci_pltfm_host *pltfm_host; - struct clk *clk; + struct clk *clk, *icnclk; int ret = 0; u16 host_version; struct resource *res; @@ -365,6 +366,11 @@ static int sdhci_st_probe(struct platform_device *pdev) return PTR_ERR(clk); } + /* ICN clock isn't compulsory, but use it if it's provided. */ + icnclk = devm_clk_get(&pdev->dev, "icn"); + if (IS_ERR(icnclk)) + icnclk = NULL; + rstc = devm_reset_control_get(&pdev->dev, NULL); if (IS_ERR(rstc)) rstc = NULL; @@ -389,6 +395,7 @@ static int sdhci_st_probe(struct platform_device *pdev) } clk_prepare_enable(clk); + clk_prepare_enable(icnclk); /* Configure the FlashSS Top registers for setting eMMC TX/RX delay */ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, @@ -400,6 +407,7 @@ static int sdhci_st_probe(struct platform_device *pdev) } pltfm_host->clk = clk; + pdata->icnclk = icnclk; /* Configure the Arasan HC inside the flashSS */ st_mmcss_cconfig(np, host); @@ -422,6 +430,7 @@ static int sdhci_st_probe(struct platform_device *pdev) return 0; err_out: + clk_disable_unprepare(icnclk); clk_disable_unprepare(clk); err_of: sdhci_pltfm_free(pdev); @@ -442,6 +451,8 @@ static int sdhci_st_remove(struct platform_device *pdev) ret = sdhci_pltfm_unregister(pdev); + clk_disable_unprepare(pdata->icnclk); + if (rstc) reset_control_assert(rstc); @@ -462,6 +473,7 @@ static int sdhci_st_suspend(struct device *dev) if (pdata->rstc) reset_control_assert(pdata->rstc); + clk_disable_unprepare(pdata->icnclk); clk_disable_unprepare(pltfm_host->clk); out: return ret; @@ -475,6 +487,7 @@ static int sdhci_st_resume(struct device *dev) struct device_node *np = dev->of_node; clk_prepare_enable(pltfm_host->clk); + clk_prepare_enable(pdata->icnclk); if (pdata->rstc) reset_control_deassert(pdata->rstc); -- cgit v0.10.2 From 83843c80dcf11a78995d167255b03072a1e49c2c Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 28 Aug 2016 13:10:37 +0200 Subject: mac80211: fix tim recalculation after PS response Handle the case where the mac80211 intermediate queues are empty and the driver has buffered frames Fixes: ba8c3d6f16a1 ("mac80211: add an intermediate software queue implementation") Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 76b737d..aa58df8 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1616,7 +1616,6 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, sta_info_recalc_tim(sta); } else { - unsigned long tids = sta->txq_buffered_tids & driver_release_tids; int tid; /* @@ -1648,7 +1647,8 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) { struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]); - if (!(tids & BIT(tid)) || txqi->tin.backlog_packets) + if (!(driver_release_tids & BIT(tid)) || + txqi->tin.backlog_packets) continue; sta_info_recalc_tim(sta); -- cgit v0.10.2 From df6ef5d8a87ace995d5c10a7bd684be05911a321 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 4 Sep 2016 18:00:59 +0200 Subject: mac80211: fix sequence number assignment for PS response frames When using intermediate queues, sequence number allocation is deferred until dequeue. This doesn't work for PS response frames, which bypass those queues. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 5023966..cc8e955 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -796,6 +796,36 @@ static __le16 ieee80211_tx_next_seq(struct sta_info *sta, int tid) return ret; } +static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local, + struct ieee80211_vif *vif, + struct ieee80211_sta *pubsta, + struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_txq *txq = NULL; + + if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) || + (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE)) + return NULL; + + if (!ieee80211_is_data(hdr->frame_control)) + return NULL; + + if (pubsta) { + u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; + + txq = pubsta->txq[tid]; + } else if (vif) { + txq = vif->txq; + } + + if (!txq) + return NULL; + + return to_txq_info(txq); +} + static ieee80211_tx_result debug_noinline ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) { @@ -853,7 +883,8 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) tid = *qc & IEEE80211_QOS_CTL_TID_MASK; tx->sta->tx_stats.msdu[tid]++; - if (!tx->sta->sta.txq[0]) + if (!ieee80211_get_txq(tx->local, info->control.vif, &tx->sta->sta, + tx->skb)) hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid); return TX_CONTINUE; @@ -1243,36 +1274,6 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, return TX_CONTINUE; } -static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local, - struct ieee80211_vif *vif, - struct ieee80211_sta *pubsta, - struct sk_buff *skb) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_txq *txq = NULL; - - if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) || - (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE)) - return NULL; - - if (!ieee80211_is_data(hdr->frame_control)) - return NULL; - - if (pubsta) { - u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; - - txq = pubsta->txq[tid]; - } else if (vif) { - txq = vif->txq; - } - - if (!txq) - return NULL; - - return to_txq_info(txq); -} - static void ieee80211_set_skb_enqueue_time(struct sk_buff *skb) { IEEE80211_SKB_CB(skb)->control.enqueue_time = codel_get_time(); @@ -3264,7 +3265,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { *ieee80211_get_qos_ctl(hdr) = tid; - if (!sta->sta.txq[0]) + if (!ieee80211_get_txq(local, &sdata->vif, &sta->sta, skb)) hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid); } else { info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; -- cgit v0.10.2 From ad8d52b897a14711e026889053befbbee7fd51ba Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 31 Aug 2016 08:49:43 +0100 Subject: pcmcia: ds: fix suspend/resume PCMCIA suspend/resume no longer works since the commit mentioned below, as the callbacks are no longer made. Convert the driver to the new dev_pm_ops, which restores the suspend/resume functionality. Tested on the arm arch Assabet platform. Fixes: aa8e54b559479 ("PM / sleep: Go direct_complete if driver has no callbacks") Signed-off-by: Russell King diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c index 489ea10..69b5e81 100644 --- a/drivers/pcmcia/ds.c +++ b/drivers/pcmcia/ds.c @@ -977,7 +977,7 @@ static int pcmcia_bus_uevent(struct device *dev, struct kobj_uevent_env *env) /************************ runtime PM support ***************************/ -static int pcmcia_dev_suspend(struct device *dev, pm_message_t state); +static int pcmcia_dev_suspend(struct device *dev); static int pcmcia_dev_resume(struct device *dev); static int runtime_suspend(struct device *dev) @@ -985,7 +985,7 @@ static int runtime_suspend(struct device *dev) int rc; device_lock(dev); - rc = pcmcia_dev_suspend(dev, PMSG_SUSPEND); + rc = pcmcia_dev_suspend(dev); device_unlock(dev); return rc; } @@ -1135,7 +1135,7 @@ ATTRIBUTE_GROUPS(pcmcia_dev); /* PM support, also needed for reset */ -static int pcmcia_dev_suspend(struct device *dev, pm_message_t state) +static int pcmcia_dev_suspend(struct device *dev) { struct pcmcia_device *p_dev = to_pcmcia_dev(dev); struct pcmcia_driver *p_drv = NULL; @@ -1410,6 +1410,9 @@ static struct class_interface pcmcia_bus_interface __refdata = { .remove_dev = &pcmcia_bus_remove_socket, }; +static const struct dev_pm_ops pcmcia_bus_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(pcmcia_dev_suspend, pcmcia_dev_resume) +}; struct bus_type pcmcia_bus_type = { .name = "pcmcia", @@ -1418,8 +1421,7 @@ struct bus_type pcmcia_bus_type = { .dev_groups = pcmcia_dev_groups, .probe = pcmcia_device_probe, .remove = pcmcia_device_remove, - .suspend = pcmcia_dev_suspend, - .resume = pcmcia_dev_resume, + .pm = &pcmcia_bus_pm_ops, }; -- cgit v0.10.2 From 6dec04e8f30a8cf1e782500244d8601c1f8505ad Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 31 Aug 2016 08:49:43 +0100 Subject: pcmcia: sa11xx_base: fix reporting of timing information Fix the reporting of the currently programmed timing information. These entries have been showing zero due to the clock rate being a factor of 1000 too big. With this change, we go from: I/O : 165 (0) attribute: 300 (0) common : 300 (0) to: I/O : 165 (172) attribute: 300 (316) common : 300 (316) Signed-off-by: Russell King diff --git a/drivers/pcmcia/sa11xx_base.c b/drivers/pcmcia/sa11xx_base.c index 9f6ec87..af37b7e 100644 --- a/drivers/pcmcia/sa11xx_base.c +++ b/drivers/pcmcia/sa11xx_base.c @@ -144,7 +144,7 @@ static int sa1100_pcmcia_show_timing(struct soc_pcmcia_socket *skt, char *buf) { struct soc_pcmcia_timing timing; - unsigned int clock = clk_get_rate(skt->clk); + unsigned int clock = clk_get_rate(skt->clk) / 1000; unsigned long mecr = MECR; char *p = buf; -- cgit v0.10.2 From cbd5a16820e576d26bf985ad62b8c4cdf792fb45 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 31 Aug 2016 08:49:43 +0100 Subject: pcmcia: sa11xx_base: add units to the timing information Add units to the timing information, so we know that the numbers are nanoseconds. The output changes from: I/O : 165 (172) attribute: 300 (316) common : 300 (316) to: I/O : 165ns (172ns) attribute: 300ns (316ns) common : 300ns (316ns) Signed-off-by: Russell King diff --git a/drivers/pcmcia/sa11xx_base.c b/drivers/pcmcia/sa11xx_base.c index af37b7e..48140ac 100644 --- a/drivers/pcmcia/sa11xx_base.c +++ b/drivers/pcmcia/sa11xx_base.c @@ -150,13 +150,13 @@ sa1100_pcmcia_show_timing(struct soc_pcmcia_socket *skt, char *buf) soc_common_pcmcia_get_timing(skt, &timing); - p+=sprintf(p, "I/O : %u (%u)\n", timing.io, + p+=sprintf(p, "I/O : %uns (%uns)\n", timing.io, sa1100_pcmcia_cmd_time(clock, MECR_BSIO_GET(mecr, skt->nr))); - p+=sprintf(p, "attribute: %u (%u)\n", timing.attr, + p+=sprintf(p, "attribute: %uns (%uns)\n", timing.attr, sa1100_pcmcia_cmd_time(clock, MECR_BSA_GET(mecr, skt->nr))); - p+=sprintf(p, "common : %u (%u)\n", timing.mem, + p+=sprintf(p, "common : %uns (%uns)\n", timing.mem, sa1100_pcmcia_cmd_time(clock, MECR_BSM_GET(mecr, skt->nr))); return p - buf; -- cgit v0.10.2 From a466ebd2fc6a793e55f028a008b9f094d7d30fe3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 31 Aug 2016 08:49:43 +0100 Subject: pcmcia: soc_common: fix SS_STSCHG polarity SS_STSCHG should be set for an IO card when the BVD1 signal is asserted low, not high. Signed-off-by: Russell King diff --git a/drivers/pcmcia/soc_common.c b/drivers/pcmcia/soc_common.c index eed5e9c..d5ca760 100644 --- a/drivers/pcmcia/soc_common.c +++ b/drivers/pcmcia/soc_common.c @@ -235,7 +235,7 @@ static unsigned int soc_common_pcmcia_skt_state(struct soc_pcmcia_socket *skt) stat |= skt->cs_state.Vcc ? SS_POWERON : 0; if (skt->cs_state.flags & SS_IOCARD) - stat |= state.bvd1 ? SS_STSCHG : 0; + stat |= state.bvd1 ? 0 : SS_STSCHG; else { if (state.bvd1 == 0) stat |= SS_BATDEAD; -- cgit v0.10.2 From 3f8df892b2312011f2ba73aedc0a192d70b8844e Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 2 Sep 2016 10:14:20 +0100 Subject: pcmcia: sa1111: fix propagation of lowlevel board init return code When testing Lubbock, it was noticed that the sa1111 pcmcia driver bound but was not functional due to no sockets being registered. This is because the return code from the lowlevel board initialisation was not being propagated out of the probe function. Fix this. Tested-by: Robert Jarzmik Signed-off-by: Russell King diff --git a/drivers/pcmcia/sa1111_badge4.c b/drivers/pcmcia/sa1111_badge4.c index 12f0dd0..2f49093 100644 --- a/drivers/pcmcia/sa1111_badge4.c +++ b/drivers/pcmcia/sa1111_badge4.c @@ -134,20 +134,14 @@ static struct pcmcia_low_level badge4_pcmcia_ops = { int pcmcia_badge4_init(struct sa1111_dev *dev) { - int ret = -ENODEV; - - if (machine_is_badge4()) { - printk(KERN_INFO - "%s: badge4_pcmvcc=%d, badge4_pcmvpp=%d, badge4_cfvcc=%d\n", - __func__, - badge4_pcmvcc, badge4_pcmvpp, badge4_cfvcc); - - sa11xx_drv_pcmcia_ops(&badge4_pcmcia_ops); - ret = sa1111_pcmcia_add(dev, &badge4_pcmcia_ops, - sa11xx_drv_pcmcia_add_one); - } - - return ret; + printk(KERN_INFO + "%s: badge4_pcmvcc=%d, badge4_pcmvpp=%d, badge4_cfvcc=%d\n", + __func__, + badge4_pcmvcc, badge4_pcmvpp, badge4_cfvcc); + + sa11xx_drv_pcmcia_ops(&badge4_pcmcia_ops); + return sa1111_pcmcia_add(dev, &badge4_pcmcia_ops, + sa11xx_drv_pcmcia_add_one); } static int __init pcmv_setup(char *s) diff --git a/drivers/pcmcia/sa1111_generic.c b/drivers/pcmcia/sa1111_generic.c index a1531fe..3d95dff 100644 --- a/drivers/pcmcia/sa1111_generic.c +++ b/drivers/pcmcia/sa1111_generic.c @@ -18,6 +18,7 @@ #include #include +#include #include #include "sa1111_generic.h" @@ -203,19 +204,30 @@ static int pcmcia_probe(struct sa1111_dev *dev) sa1111_writel(PCSSR_S0_SLEEP | PCSSR_S1_SLEEP, base + PCSSR); sa1111_writel(PCCR_S0_FLT | PCCR_S1_FLT, base + PCCR); + ret = -ENODEV; #ifdef CONFIG_SA1100_BADGE4 - pcmcia_badge4_init(dev); + if (machine_is_badge4()) + ret = pcmcia_badge4_init(dev); #endif #ifdef CONFIG_SA1100_JORNADA720 - pcmcia_jornada720_init(dev); + if (machine_is_jornada720()) + ret = pcmcia_jornada720_init(dev); #endif #ifdef CONFIG_ARCH_LUBBOCK - pcmcia_lubbock_init(dev); + if (machine_is_lubbock()) + ret = pcmcia_lubbock_init(dev); #endif #ifdef CONFIG_ASSABET_NEPONSET - pcmcia_neponset_init(dev); + if (machine_is_assabet()) + ret = pcmcia_neponset_init(dev); #endif - return 0; + + if (ret) { + release_mem_region(dev->res.start, 512); + sa1111_disable_device(dev); + } + + return ret; } static int pcmcia_remove(struct sa1111_dev *dev) diff --git a/drivers/pcmcia/sa1111_jornada720.c b/drivers/pcmcia/sa1111_jornada720.c index c2c3058..480a3ed 100644 --- a/drivers/pcmcia/sa1111_jornada720.c +++ b/drivers/pcmcia/sa1111_jornada720.c @@ -94,22 +94,17 @@ static struct pcmcia_low_level jornada720_pcmcia_ops = { int pcmcia_jornada720_init(struct sa1111_dev *sadev) { - int ret = -ENODEV; + unsigned int pin = GPIO_A0 | GPIO_A1 | GPIO_A2 | GPIO_A3; - if (machine_is_jornada720()) { - unsigned int pin = GPIO_A0 | GPIO_A1 | GPIO_A2 | GPIO_A3; + /* Fixme: why messing around with SA11x0's GPIO1? */ + GRER |= 0x00000002; - GRER |= 0x00000002; + /* Set GPIO_A<3:1> to be outputs for PCMCIA/CF power controller: */ + sa1111_set_io_dir(sadev, pin, 0, 0); + sa1111_set_io(sadev, pin, 0); + sa1111_set_sleep_io(sadev, pin, 0); - /* Set GPIO_A<3:1> to be outputs for PCMCIA/CF power controller: */ - sa1111_set_io_dir(sadev, pin, 0, 0); - sa1111_set_io(sadev, pin, 0); - sa1111_set_sleep_io(sadev, pin, 0); - - sa11xx_drv_pcmcia_ops(&jornada720_pcmcia_ops); - ret = sa1111_pcmcia_add(sadev, &jornada720_pcmcia_ops, - sa11xx_drv_pcmcia_add_one); - } - - return ret; + sa11xx_drv_pcmcia_ops(&jornada720_pcmcia_ops); + return sa1111_pcmcia_add(sadev, &jornada720_pcmcia_ops, + sa11xx_drv_pcmcia_add_one); } diff --git a/drivers/pcmcia/sa1111_lubbock.c b/drivers/pcmcia/sa1111_lubbock.c index c5caf57..df2b6b2 100644 --- a/drivers/pcmcia/sa1111_lubbock.c +++ b/drivers/pcmcia/sa1111_lubbock.c @@ -210,27 +210,21 @@ static struct pcmcia_low_level lubbock_pcmcia_ops = { int pcmcia_lubbock_init(struct sa1111_dev *sadev) { - int ret = -ENODEV; - - if (machine_is_lubbock()) { - /* - * Set GPIO_A<3:0> to be outputs for the MAX1600, - * and switch to standby mode. - */ - sa1111_set_io_dir(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0, 0); - sa1111_set_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); - sa1111_set_sleep_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); - - /* Set CF Socket 1 power to standby mode. */ - lubbock_set_misc_wr((1 << 15) | (1 << 14), 0); + /* + * Set GPIO_A<3:0> to be outputs for the MAX1600, + * and switch to standby mode. + */ + sa1111_set_io_dir(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0, 0); + sa1111_set_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); + sa1111_set_sleep_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); - pxa2xx_drv_pcmcia_ops(&lubbock_pcmcia_ops); - pxa2xx_configure_sockets(&sadev->dev); - ret = sa1111_pcmcia_add(sadev, &lubbock_pcmcia_ops, - pxa2xx_drv_pcmcia_add_one); - } + /* Set CF Socket 1 power to standby mode. */ + lubbock_set_misc_wr((1 << 15) | (1 << 14), 0); - return ret; + pxa2xx_drv_pcmcia_ops(&lubbock_pcmcia_ops); + pxa2xx_configure_sockets(&sadev->dev); + return sa1111_pcmcia_add(sadev, &lubbock_pcmcia_ops, + pxa2xx_drv_pcmcia_add_one); } MODULE_LICENSE("GPL"); diff --git a/drivers/pcmcia/sa1111_neponset.c b/drivers/pcmcia/sa1111_neponset.c index 1d78739..019c395 100644 --- a/drivers/pcmcia/sa1111_neponset.c +++ b/drivers/pcmcia/sa1111_neponset.c @@ -110,20 +110,14 @@ static struct pcmcia_low_level neponset_pcmcia_ops = { int pcmcia_neponset_init(struct sa1111_dev *sadev) { - int ret = -ENODEV; - - if (machine_is_assabet()) { - /* - * Set GPIO_A<3:0> to be outputs for the MAX1600, - * and switch to standby mode. - */ - sa1111_set_io_dir(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0, 0); - sa1111_set_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); - sa1111_set_sleep_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); - sa11xx_drv_pcmcia_ops(&neponset_pcmcia_ops); - ret = sa1111_pcmcia_add(sadev, &neponset_pcmcia_ops, - sa11xx_drv_pcmcia_add_one); - } - - return ret; + /* + * Set GPIO_A<3:0> to be outputs for the MAX1600, + * and switch to standby mode. + */ + sa1111_set_io_dir(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0, 0); + sa1111_set_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); + sa1111_set_sleep_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); + sa11xx_drv_pcmcia_ops(&neponset_pcmcia_ops); + return sa1111_pcmcia_add(sadev, &neponset_pcmcia_ops, + sa11xx_drv_pcmcia_add_one); } -- cgit v0.10.2 From 817ed5748e40bbc5b5f2aa0c3094c4a7adfb8881 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Thu, 1 Sep 2016 08:31:08 +0200 Subject: pcmcia: lubbock: fix sockets configuration On lubbock board, the probe of the driver crashes by dereferencing very early a platform_data structure which is not set, in pxa2xx_configure_sockets(). The stack fixed is : [ 0.244353] SA1111 Microprocessor Companion Chip: silicon revision 1, metal revision 1 [ 0.256321] sa1111 sa1111: Providing IRQ336-390 [ 0.340899] clocksource: Switched to clocksource oscr0 [ 0.472263] Unable to handle kernel NULL pointer dereference at virtual address 00000004 [ 0.480469] pgd = c0004000 [ 0.483432] [00000004] *pgd=00000000 [ 0.487105] Internal error: Oops: f5 [#1] ARM [ 0.491497] Modules linked in: [ 0.494650] CPU: 0 PID: 1 Comm: swapper Not tainted 4.8.0-rc3-00080-g1aaa68426f0c-dirty #2068 [ 0.503229] Hardware name: Intel DBPXA250 Development Platform (aka Lubbock) [ 0.510344] task: c3e42000 task.stack: c3e44000 [ 0.514984] PC is at pxa2xx_configure_sockets+0x4/0x24 (drivers/pcmcia/pxa2xx_base.c:227) [ 0.520193] LR is at pcmcia_lubbock_init+0x1c/0x38 [ 0.525079] pc : [] lr : [] psr: a0000053 [ 0.525079] sp : c3e45e70 ip : 100019ff fp : 00000000 [ 0.536651] r10: c0828900 r9 : c0434838 r8 : 00000000 [ 0.541953] r7 : c0820700 r6 : c0857b30 r5 : c3ec1400 r4 : c0820758 [ 0.548549] r3 : 00000000 r2 : 0000000c r1 : c3c09c40 r0 : c3ec1400 [ 0.555154] Flags: NzCv IRQs on FIQs off Mode SVC_32 ISA ARM Segment none [ 0.562450] Control: 0000397f Table: a0004000 DAC: 00000053 [ 0.568257] Process swapper (pid: 1, stack limit = 0xc3e44190) [ 0.574154] Stack: (0xc3e45e70 to 0xc3e46000) [ 0.578610] 5e60: c4849800 00000000 c3ec1400 c024769c [ 0.586928] 5e80: 00000000 c3ec140c c3c0ee0c c3ec1400 c3ec1434 c020c410 c3ec1400 c3ec1434 [ 0.595244] 5ea0: c0820700 c080b408 c0828900 c020c5f8 00000000 c0820700 c020c578 c020ac5c [ 0.603560] 5ec0: c3e687cc c3e71e10 c0820700 00000000 c3c02de0 c020bae4 c03c62f7 c03c62f7 [ 0.611872] 5ee0: c3e68780 c0820700 c042e034 00000000 c043c440 c020cdec c080b408 00000005 [ 0.620188] 5f00: c042e034 c00096c0 c0034440 c01c730c 20000053 ffffffff 00000000 00000000 [ 0.628502] 5f20: 00000000 c3ffcb87 c3ffcb90 c00346ac c3e66ba0 c03f7914 00000092 00000005 [ 0.636811] 5f40: 00000005 c03f847c 00000091 c03f847c 00000000 00000005 c0434828 00000005 [ 0.645125] 5f60: c043482c 00000092 c043c440 c0828900 c0434838 c0418d2c 00000005 00000005 [ 0.653430] 5f80: 00000000 c041858c 00000000 c032e9f0 00000000 00000000 00000000 00000000 [ 0.661729] 5fa0: 00000000 c032e9f8 00000000 c000f0f0 00000000 00000000 00000000 00000000 [ 0.670020] 5fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 [ 0.678311] 5fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000 [ 0.686673] (pxa2xx_configure_sockets) from pcmcia_lubbock_init (/drivers/pcmcia/sa1111_lubbock.c:161) [ 0.696026] (pcmcia_lubbock_init) from pcmcia_probe (/drivers/pcmcia/sa1111_generic.c:213) [ 0.704358] (pcmcia_probe) from driver_probe_device (/drivers/base/dd.c:378 /drivers/base/dd.c:499) [ 0.712848] (driver_probe_device) from __driver_attach (/./include/linux/device.h:983 /drivers/base/dd.c:733) [ 0.721414] (__driver_attach) from bus_for_each_dev (/drivers/base/bus.c:313) [ 0.729723] (bus_for_each_dev) from bus_add_driver (/drivers/base/bus.c:708) [ 0.738036] (bus_add_driver) from driver_register (/drivers/base/driver.c:169) [ 0.746185] (driver_register) from do_one_initcall (/init/main.c:778) [ 0.754561] (do_one_initcall) from kernel_init_freeable (/init/main.c:843 /init/main.c:851 /init/main.c:869 /init/main.c:1016) [ 0.763409] (kernel_init_freeable) from kernel_init (/init/main.c:944) [ 0.771660] (kernel_init) from ret_from_fork (/arch/arm/kernel/entry-common.S:119) [ 0.779347] Code: c03c6305 c03c631e c03c632e e5903048 (e993000c) All code ======== 0: c03c6305 eorsgt r6, ip, r5, lsl #6 4: c03c631e eorsgt r6, ip, lr, lsl r3 8: c03c632e eorsgt r6, ip, lr, lsr #6 c: e5903048 ldr r3, [r0, #72] ; 0x48 10:* e993000c ldmib r3, {r2, r3} <-- trapping instruction Signed-off-by: Robert Jarzmik Signed-off-by: Russell King diff --git a/drivers/pcmcia/pxa2xx_base.c b/drivers/pcmcia/pxa2xx_base.c index 483f919..91b5f57 100644 --- a/drivers/pcmcia/pxa2xx_base.c +++ b/drivers/pcmcia/pxa2xx_base.c @@ -214,9 +214,8 @@ pxa2xx_pcmcia_frequency_change(struct soc_pcmcia_socket *skt, } #endif -void pxa2xx_configure_sockets(struct device *dev) +void pxa2xx_configure_sockets(struct device *dev, struct pcmcia_low_level *ops) { - struct pcmcia_low_level *ops = dev->platform_data; /* * We have at least one socket, so set MECR:CIT * (Card Is There) @@ -322,7 +321,7 @@ static int pxa2xx_drv_pcmcia_probe(struct platform_device *dev) goto err1; } - pxa2xx_configure_sockets(&dev->dev); + pxa2xx_configure_sockets(&dev->dev, ops); dev_set_drvdata(&dev->dev, sinfo); return 0; @@ -348,7 +347,9 @@ static int pxa2xx_drv_pcmcia_remove(struct platform_device *dev) static int pxa2xx_drv_pcmcia_resume(struct device *dev) { - pxa2xx_configure_sockets(dev); + struct pcmcia_low_level *ops = (struct pcmcia_low_level *)dev->platform_data; + + pxa2xx_configure_sockets(dev, ops); return 0; } diff --git a/drivers/pcmcia/pxa2xx_base.h b/drivers/pcmcia/pxa2xx_base.h index b609b45..e58c7a4 100644 --- a/drivers/pcmcia/pxa2xx_base.h +++ b/drivers/pcmcia/pxa2xx_base.h @@ -1,4 +1,4 @@ int pxa2xx_drv_pcmcia_add_one(struct soc_pcmcia_socket *skt); void pxa2xx_drv_pcmcia_ops(struct pcmcia_low_level *ops); -void pxa2xx_configure_sockets(struct device *dev); +void pxa2xx_configure_sockets(struct device *dev, struct pcmcia_low_level *ops); diff --git a/drivers/pcmcia/sa1111_lubbock.c b/drivers/pcmcia/sa1111_lubbock.c index df2b6b2..e741f49 100644 --- a/drivers/pcmcia/sa1111_lubbock.c +++ b/drivers/pcmcia/sa1111_lubbock.c @@ -222,7 +222,7 @@ int pcmcia_lubbock_init(struct sa1111_dev *sadev) lubbock_set_misc_wr((1 << 15) | (1 << 14), 0); pxa2xx_drv_pcmcia_ops(&lubbock_pcmcia_ops); - pxa2xx_configure_sockets(&sadev->dev); + pxa2xx_configure_sockets(&sadev->dev, &lubbock_pcmcia_ops); return sa1111_pcmcia_add(sadev, &lubbock_pcmcia_ops, pxa2xx_drv_pcmcia_add_one); } -- cgit v0.10.2 From cb034407ec3f816540f359300cda1122faabdbbd Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 3 Sep 2016 10:21:51 +0100 Subject: ARM: sa1111: fix error code propagation in sa1111_probe() Ensure that we propagate the platform_get_irq() error code out of the probe function. This allows probe deferrals to work correctly should platform_get_irq() not be able to resolve the interrupt in a DT environment at probe time. Signed-off-by: Russell King diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index fb0a0a4..332b923 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -1017,7 +1017,7 @@ static int sa1111_probe(struct platform_device *pdev) return -EINVAL; irq = platform_get_irq(pdev, 0); if (irq < 0) - return -ENXIO; + return irq; return __sa1111_probe(&pdev->dev, mem, irq); } -- cgit v0.10.2 From 7c0091eceab231b59e51b80bbcf5a2205a0fa905 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 6 Sep 2016 14:21:46 +0100 Subject: ARM: sa1111: fix pcmcia interrupt mask polarity The polarity of the high IRQs was being calculated using SA1111_IRQMASK_HI(), but this assumes a Linux interrupt number, not a hardware interrupt number. Hence, the resulting mask was incorrect. Fix this. Signed-off-by: Russell King diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index 332b923..cfa61b8 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -472,8 +472,8 @@ static int sa1111_setup_irq(struct sa1111 *sachip, unsigned irq_base) * specifies that S0ReadyInt and S1ReadyInt should be '1'. */ sa1111_writel(0, irqbase + SA1111_INTPOL0); - sa1111_writel(SA1111_IRQMASK_HI(IRQ_S0_READY_NINT) | - SA1111_IRQMASK_HI(IRQ_S1_READY_NINT), + sa1111_writel(BIT(IRQ_S0_READY_NINT & 31) | + BIT(IRQ_S1_READY_NINT & 31), irqbase + SA1111_INTPOL1); /* clear all IRQs */ -- cgit v0.10.2 From 06dfe5cc0cc684e735cb0232fdb756d30780b05d Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 6 Sep 2016 14:34:05 +0100 Subject: ARM: sa1111: fix pcmcia suspend/resume SA1111 PCMCIA was broken when PCMCIA switched to using dev_pm_ops for the PCMCIA socket class. PCMCIA used to handle suspend/resume via the socket hosting device, which happened at normal device suspend/resume time. However, the referenced commit changed this: much of the resume now happens much earlier, in the noirq resume handler of dev_pm_ops. However, on SA1111, the PCMCIA device is not accessible as the SA1111 has not been resumed at _noirq time. It's slightly worse than that, because the SA1111 has already been put to sleep at _noirq time, so suspend doesn't work properly. Fix this by converting the core SA1111 code to use dev_pm_ops as well, and performing its own suspend/resume at noirq time. This fixes these errors in the kernel log: pcmcia_socket pcmcia_socket0: time out after reset pcmcia_socket pcmcia_socket1: time out after reset and the resulting lack of PCMCIA cards after a S2RAM cycle. Fixes: d7646f7632549 ("pcmcia: use dev_pm_ops for class pcmcia_socket_class") Signed-off-by: Russell King diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index cfa61b8..7838659 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -869,9 +869,9 @@ struct sa1111_save_data { #ifdef CONFIG_PM -static int sa1111_suspend(struct platform_device *dev, pm_message_t state) +static int sa1111_suspend_noirq(struct device *dev) { - struct sa1111 *sachip = platform_get_drvdata(dev); + struct sa1111 *sachip = dev_get_drvdata(dev); struct sa1111_save_data *save; unsigned long flags; unsigned int val; @@ -934,9 +934,9 @@ static int sa1111_suspend(struct platform_device *dev, pm_message_t state) * restored by their respective drivers, and must be called * via LDM after this function. */ -static int sa1111_resume(struct platform_device *dev) +static int sa1111_resume_noirq(struct device *dev) { - struct sa1111 *sachip = platform_get_drvdata(dev); + struct sa1111 *sachip = dev_get_drvdata(dev); struct sa1111_save_data *save; unsigned long flags, id; void __iomem *base; @@ -952,7 +952,7 @@ static int sa1111_resume(struct platform_device *dev) id = sa1111_readl(sachip->base + SA1111_SKID); if ((id & SKID_ID_MASK) != SKID_SA1111_ID) { __sa1111_remove(sachip); - platform_set_drvdata(dev, NULL); + dev_set_drvdata(dev, NULL); kfree(save); return 0; } @@ -1003,8 +1003,8 @@ static int sa1111_resume(struct platform_device *dev) } #else -#define sa1111_suspend NULL -#define sa1111_resume NULL +#define sa1111_suspend_noirq NULL +#define sa1111_resume_noirq NULL #endif static int sa1111_probe(struct platform_device *pdev) @@ -1038,6 +1038,11 @@ static int sa1111_remove(struct platform_device *pdev) return 0; } +static struct dev_pm_ops sa1111_pm_ops = { + .suspend_noirq = sa1111_suspend_noirq, + .resume_noirq = sa1111_resume_noirq, +}; + /* * Not sure if this should be on the system bus or not yet. * We really want some way to register a system device at @@ -1050,10 +1055,9 @@ static int sa1111_remove(struct platform_device *pdev) static struct platform_driver sa1111_device_driver = { .probe = sa1111_probe, .remove = sa1111_remove, - .suspend = sa1111_suspend, - .resume = sa1111_resume, .driver = { .name = "sa1111", + .pm = &sa1111_pm_ops, }, }; -- cgit v0.10.2 From 87d5dd62c07f90ed2b0d6718f5c666f69e7d39b0 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 6 Sep 2016 16:09:17 +0100 Subject: ARM: sa1111: fix missing clk_disable() SA1111 forgets to call clk_disable() in the probe error cleanup path. Add the necessary call. Signed-off-by: Russell King diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index 7838659..2e076c4 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -754,7 +754,7 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq) if (sachip->irq != NO_IRQ) { ret = sa1111_setup_irq(sachip, pd->irq_base); if (ret) - goto err_unmap; + goto err_clk; } #ifdef CONFIG_ARCH_SA1100 @@ -799,6 +799,8 @@ static int __sa1111_probe(struct device *me, struct resource *mem, int irq) return 0; + err_clk: + clk_disable(sachip->clk); err_unmap: iounmap(sachip->base); err_clk_unprep: -- cgit v0.10.2 From 5df20f2141eadb5430caaad20eceac61cfe0f139 Mon Sep 17 00:00:00 2001 From: "Pedersen, Thomas" Date: Tue, 6 Sep 2016 11:59:00 -0700 Subject: mac80211: make mpath path fixing more robust A fixed mpath was not quite being treated as such: 1) if a PERR frame was received, a fixed mpath was deactivated. 2) queued path discovery for fixed mpath was potentially being considered, changing mpath state. 3) other mpath flags were potentially being inherited when fixing the mpath. Just assign PATH_FIXED and SN_VALID. This solves several issues when fixing a mesh path in one direction. The reverse direction mpath should probably also be fixed, or root announcements at least be enabled. Signed-off-by: Thomas Pedersen Signed-off-by: Johannes Berg diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 8f9c3bd..faccef9 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -746,6 +746,7 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata, sta = next_hop_deref_protected(mpath); if (mpath->flags & MESH_PATH_ACTIVE && ether_addr_equal(ta, sta->sta.addr) && + !(mpath->flags & MESH_PATH_FIXED) && (!(mpath->flags & MESH_PATH_SN_VALID) || SN_GT(target_sn, mpath->sn) || target_sn == 0)) { mpath->flags &= ~MESH_PATH_ACTIVE; @@ -1012,7 +1013,7 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) goto enddiscovery; spin_lock_bh(&mpath->state_lock); - if (mpath->flags & MESH_PATH_DELETED) { + if (mpath->flags & (MESH_PATH_DELETED | MESH_PATH_FIXED)) { spin_unlock_bh(&mpath->state_lock); goto enddiscovery; } diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 6db2ddf..f0e6175 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -826,7 +826,7 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop) mpath->metric = 0; mpath->hop_count = 0; mpath->exp_time = 0; - mpath->flags |= MESH_PATH_FIXED; + mpath->flags = MESH_PATH_FIXED | MESH_PATH_SN_VALID; mesh_path_activate(mpath); spin_unlock_bh(&mpath->state_lock); mesh_path_tx_pending(mpath); -- cgit v0.10.2 From 6b3142b2b852cd5e3216d1aa800a0a49377e6e1c Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Wed, 7 Sep 2016 21:56:09 +0100 Subject: ARM: 8612/1: LPAE: initialize cache policy correctly The cachepolicy variable gets initialized using a masked pmd value. So far, the pmd has been masked with flags valid for the 2-page table format, but the 3-page table format requires a different mask. On LPAE, this lead to a wrong assumption of what initial cache policy has been used. Later a check forces the cache policy to writealloc and prints the following warning: Forcing write-allocate cache policy for SMP This patch introduces a new definition PMD_SECT_CACHE_MASK for both page table formats which masks in all cache flags in both cases. Signed-off-by: Stefan Agner Signed-off-by: Russell King diff --git a/arch/arm/include/asm/pgtable-2level-hwdef.h b/arch/arm/include/asm/pgtable-2level-hwdef.h index d0131ee..3f82e9d 100644 --- a/arch/arm/include/asm/pgtable-2level-hwdef.h +++ b/arch/arm/include/asm/pgtable-2level-hwdef.h @@ -47,6 +47,7 @@ #define PMD_SECT_WB (PMD_SECT_CACHEABLE | PMD_SECT_BUFFERABLE) #define PMD_SECT_MINICACHE (PMD_SECT_TEX(1) | PMD_SECT_CACHEABLE) #define PMD_SECT_WBWA (PMD_SECT_TEX(1) | PMD_SECT_CACHEABLE | PMD_SECT_BUFFERABLE) +#define PMD_SECT_CACHE_MASK (PMD_SECT_TEX(1) | PMD_SECT_CACHEABLE | PMD_SECT_BUFFERABLE) #define PMD_SECT_NONSHARED_DEV (PMD_SECT_TEX(2)) /* diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h index f8f1cff..4cd664a 100644 --- a/arch/arm/include/asm/pgtable-3level-hwdef.h +++ b/arch/arm/include/asm/pgtable-3level-hwdef.h @@ -62,6 +62,7 @@ #define PMD_SECT_WT (_AT(pmdval_t, 2) << 2) /* normal inner write-through */ #define PMD_SECT_WB (_AT(pmdval_t, 3) << 2) /* normal inner write-back */ #define PMD_SECT_WBWA (_AT(pmdval_t, 7) << 2) /* normal inner write-alloc */ +#define PMD_SECT_CACHE_MASK (_AT(pmdval_t, 7) << 2) /* * + Level 3 descriptor (PTE) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 6344913..30fe03f 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -137,7 +137,7 @@ void __init init_default_cache_policy(unsigned long pmd) initial_pmd_value = pmd; - pmd &= PMD_SECT_TEX(1) | PMD_SECT_BUFFERABLE | PMD_SECT_CACHEABLE; + pmd &= PMD_SECT_CACHE_MASK; for (i = 0; i < ARRAY_SIZE(cache_policies); i++) if (cache_policies[i].pmd == pmd) { -- cgit v0.10.2 From 07f56e6646228da27122e81d5a5a232fdf3d3a50 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 2 Sep 2016 22:08:45 +0100 Subject: ARM: locomo: fix locomo irq handling Accidentally booting Collie on Assabet reveals that the locomo driver incorrectly overwrites gpio-sa1100's chip data for its parent interrupt, leading to oops in sa1100_gpio_unmask() and sa1100_update_edge_regs() when "gpio: sa1100: convert to use IO accessors" is applied. Fix locomo to use the handler data rather than chip data for its parent interrupt. Signed-off-by: Russell King diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c index 0e97b4b..6c7b06854 100644 --- a/arch/arm/common/locomo.c +++ b/arch/arm/common/locomo.c @@ -140,7 +140,7 @@ static struct locomo_dev_info locomo_devices[] = { static void locomo_handler(struct irq_desc *desc) { - struct locomo *lchip = irq_desc_get_chip_data(desc); + struct locomo *lchip = irq_desc_get_handler_data(desc); int req, i; /* Acknowledge the parent IRQ */ @@ -200,8 +200,7 @@ static void locomo_setup_irq(struct locomo *lchip) * Install handler for IRQ_LOCOMO_HW. */ irq_set_irq_type(lchip->irq, IRQ_TYPE_EDGE_FALLING); - irq_set_chip_data(lchip->irq, lchip); - irq_set_chained_handler(lchip->irq, locomo_handler); + irq_set_chained_handler_and_data(lchip->irq, locomo_handler, lchip); /* Install handlers for IRQ_LOCOMO_* */ for ( ; irq <= lchip->irq_base + 3; irq++) { -- cgit v0.10.2 From 1a57c286d8ced1e4144c6201a19bbb70827edee6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 4 Sep 2016 21:45:56 +0100 Subject: ARM: pxa/lubbock: add pcmcia clock Add the required PCMCIA clock for the SA1111 "1800" device. This clock is used to compute timing information for the PCMCIA interface in the SoC device, rather than the SA1111. Hence, the provision of this clock is a convenience for the driver and does not reflect the hardware, so this must not be copied into DT. Acked-by: Robert Jarzmik Tested-by: Robert Jarzmik Signed-off-by: Russell King diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c index 7245f33..d6159f8 100644 --- a/arch/arm/mach-pxa/lubbock.c +++ b/arch/arm/mach-pxa/lubbock.c @@ -137,6 +137,18 @@ static struct pxa2xx_udc_mach_info udc_info __initdata = { // no D+ pullup; lubbock can't connect/disconnect in software }; +static void lubbock_init_pcmcia(void) +{ + struct clk *clk; + + /* Add an alias for the SA1111 PCMCIA clock */ + clk = clk_get_sys("pxa2xx-pcmcia", NULL); + if (!IS_ERR(clk)) { + clkdev_create(clk, NULL, "1800"); + clk_put(clk); + } +} + static struct resource sa1111_resources[] = { [0] = { .start = 0x10000000, @@ -467,6 +479,8 @@ static void __init lubbock_init(void) pxa_set_btuart_info(NULL); pxa_set_stuart_info(NULL); + lubbock_init_pcmcia(); + clk_add_alias("SA1111_CLK", NULL, "GPIO11_CLK", NULL); pxa_set_udc_info(&udc_info); pxa_set_fb_info(NULL, &sharp_lm8v31); -- cgit v0.10.2 From ecfcdfec7e0cc64215a194044305f02a5a836e6d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 9 Sep 2016 15:38:12 +0200 Subject: netfilter: nf_nat: handle NF_DROP from nfnetlink_parse_nat_setup() nf_nat_setup_info() returns NF_* verdicts, so convert them to error codes that is what ctnelink expects. This has passed overlook without having any impact since this nf_nat_setup_info() has always returned NF_ACCEPT so far. Since 870190a9ec90 ("netfilter: nat: convert nat bysrc hash to rhashtable"), this is problem. Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index de31818..19c081e 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -807,7 +807,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, if (err < 0) return err; - return nf_nat_setup_info(ct, &range, manip); + return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0; } #else static int -- cgit v0.10.2 From e89ca58f9c901c8c4cfb09f96d879b186bb01492 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 2 Sep 2016 09:01:54 -0700 Subject: nvme-rdma: add DELETING queue flag When we get a surprise disconnect from the target we queue a periodic reconnect (which is the sane thing to do...). We only move the queues out of CONNECTED when we retry to reconnect (after 10 seconds in the default case) but we stop the blk queues immediately so we are not bothered with traffic from now on. If delete() is kicking off in this period the queues are still in CONNECTED state. Part of the delete sequence is trying to issue ctrl shutdown if the admin queue is CONNECTED (which it is!). This request is issued but stuck in blk-mq waiting for the queues to start again. This might be the one preventing us from forward progress... The patch separates the queue flags to CONNECTED and DELETING. Now we will move out of CONNECTED as soon as error recovery kicks in (before stopping the queues) and DELETING is on when we start the queue deletion. Signed-off-by: Sagi Grimberg diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index a9d43f0..eeb08b6 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -83,6 +83,7 @@ struct nvme_rdma_request { enum nvme_rdma_queue_flags { NVME_RDMA_Q_CONNECTED = (1 << 0), NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1), + NVME_RDMA_Q_DELETING = (1 << 2), }; struct nvme_rdma_queue { @@ -559,6 +560,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, queue = &ctrl->queues[idx]; queue->ctrl = ctrl; + queue->flags = 0; init_completion(&queue->cm_done); if (idx > 0) @@ -616,7 +618,7 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) static void nvme_rdma_stop_and_free_queue(struct nvme_rdma_queue *queue) { - if (!test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) + if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags)) return; nvme_rdma_stop_queue(queue); nvme_rdma_free_queue(queue); @@ -769,8 +771,13 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, err_work); + int i; nvme_stop_keep_alive(&ctrl->ctrl); + + for (i = 0; i < ctrl->queue_count; i++) + clear_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[i].flags); + if (ctrl->queue_count > 1) nvme_stop_queues(&ctrl->ctrl); blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); @@ -1350,7 +1357,7 @@ static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue) cancel_delayed_work_sync(&ctrl->reconnect_work); /* Disable the queue so ctrl delete won't free it */ - if (test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags)) { + if (!test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags)) { /* Free this queue ourselves */ nvme_rdma_stop_queue(queue); nvme_rdma_destroy_queue_ib(queue); -- cgit v0.10.2 From e87a911fed07e368c6f97e75152e6297a7dfba48 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 2 Sep 2016 09:01:54 -0700 Subject: nvme-rdma: use ib_client API to detect device removal Change nvme-rdma to use the IB Client API to detect device removal. This has the wonderful benefit of being able to blow away all the ib/rdma_cm resources for the device being removed. No craziness about not destroying the cm_id handling the event. No deadlocks due to broken iw_cm/rdma_cm/iwarp dependencies. And no need to have a bound cm_id around during controller recovery/reconnect to catch device removal events. We don't use the device_add aspect of the ib_client service since we only want to create resources for an IB device if we have a target utilizing that device. Reviewed-by: Christoph Hellwig Signed-off-by: Steve Wise Signed-off-by: Sagi Grimberg diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index eeb08b6..d6bdf55 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1320,64 +1320,6 @@ out_destroy_queue_ib: return ret; } -/** - * nvme_rdma_device_unplug() - Handle RDMA device unplug - * @queue: Queue that owns the cm_id that caught the event - * - * DEVICE_REMOVAL event notifies us that the RDMA device is about - * to unplug so we should take care of destroying our RDMA resources. - * This event will be generated for each allocated cm_id. - * - * In our case, the RDMA resources are managed per controller and not - * only per queue. So the way we handle this is we trigger an implicit - * controller deletion upon the first DEVICE_REMOVAL event we see, and - * hold the event inflight until the controller deletion is completed. - * - * One exception that we need to handle is the destruction of the cm_id - * that caught the event. Since we hold the callout until the controller - * deletion is completed, we'll deadlock if the controller deletion will - * call rdma_destroy_id on this queue's cm_id. Thus, we claim ownership - * of destroying this queue before-hand, destroy the queue resources, - * then queue the controller deletion which won't destroy this queue and - * we destroy the cm_id implicitely by returning a non-zero rc to the callout. - */ -static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue) -{ - struct nvme_rdma_ctrl *ctrl = queue->ctrl; - int ret = 0; - - /* Own the controller deletion */ - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) - return 0; - - dev_warn(ctrl->ctrl.device, - "Got rdma device removal event, deleting ctrl\n"); - - /* Get rid of reconnect work if its running */ - cancel_delayed_work_sync(&ctrl->reconnect_work); - - /* Disable the queue so ctrl delete won't free it */ - if (!test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags)) { - /* Free this queue ourselves */ - nvme_rdma_stop_queue(queue); - nvme_rdma_destroy_queue_ib(queue); - - /* Return non-zero so the cm_id will destroy implicitly */ - ret = 1; - } - - /* - * Queue controller deletion. Keep a reference until all - * work is flushed since delete_work will free the ctrl mem - */ - kref_get(&ctrl->ctrl.kref); - queue_work(nvme_rdma_wq, &ctrl->delete_work); - flush_work(&ctrl->delete_work); - nvme_put_ctrl(&ctrl->ctrl); - - return ret; -} - static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *ev) { @@ -1419,8 +1361,8 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, nvme_rdma_error_recovery(queue->ctrl); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: - /* return 1 means impliciy CM ID destroy */ - return nvme_rdma_device_unplug(queue); + /* device removal is handled via the ib_client API */ + break; default: dev_err(queue->ctrl->ctrl.device, "Unexpected RDMA CM event (%d)\n", ev->event); @@ -2030,27 +1972,57 @@ static struct nvmf_transport_ops nvme_rdma_transport = { .create_ctrl = nvme_rdma_create_ctrl, }; +static void nvme_rdma_add_one(struct ib_device *ib_device) +{ +} + +static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data) +{ + struct nvme_rdma_ctrl *ctrl; + + /* Delete all controllers using this device */ + mutex_lock(&nvme_rdma_ctrl_mutex); + list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) { + if (ctrl->device->dev != ib_device) + continue; + dev_info(ctrl->ctrl.device, + "Removing ctrl: NQN \"%s\", addr %pISp\n", + ctrl->ctrl.opts->subsysnqn, &ctrl->addr); + __nvme_rdma_del_ctrl(ctrl); + } + mutex_unlock(&nvme_rdma_ctrl_mutex); + + flush_workqueue(nvme_rdma_wq); +} + +static struct ib_client nvme_rdma_ib_client = { + .name = "nvme_rdma", + .add = nvme_rdma_add_one, + .remove = nvme_rdma_remove_one +}; + static int __init nvme_rdma_init_module(void) { + int ret; + nvme_rdma_wq = create_workqueue("nvme_rdma_wq"); if (!nvme_rdma_wq) return -ENOMEM; + ret = ib_register_client(&nvme_rdma_ib_client); + if (ret) { + destroy_workqueue(nvme_rdma_wq); + return ret; + } + nvmf_register_transport(&nvme_rdma_transport); return 0; } static void __exit nvme_rdma_cleanup_module(void) { - struct nvme_rdma_ctrl *ctrl; - nvmf_unregister_transport(&nvme_rdma_transport); - - mutex_lock(&nvme_rdma_ctrl_mutex); - list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) - __nvme_rdma_del_ctrl(ctrl); - mutex_unlock(&nvme_rdma_ctrl_mutex); - + ib_unregister_client(&nvme_rdma_ib_client); destroy_workqueue(nvme_rdma_wq); } -- cgit v0.10.2 From 1bda18de8f15a611a61d1a935b679db2e153338a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 5 Sep 2016 16:24:38 +0100 Subject: nvme-rdma: fix null pointer dereference on req->mr If there is an error on req->mr, req->mr is set to null, however the following statement sets req->mr->need_inval causing a null pointer dereference. Fix this by bailing out to label 'out' to immediately return and hence skip over the offending null pointer dereference. Fixes: f5b7b559e1488 ("nvme-rdma: Get rid of duplicate variable") Signed-off-by: Colin Ian King Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index d6bdf55..c2c2c28 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -293,6 +293,7 @@ static int nvme_rdma_reinit_request(void *data, struct request *rq) if (IS_ERR(req->mr)) { ret = PTR_ERR(req->mr); req->mr = NULL; + goto out; } req->mr->need_inval = false; -- cgit v0.10.2 From 2cfe199ca5a8816ee80fe15bcf202dd1020aaea0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 6 Sep 2016 14:58:06 +0200 Subject: nvme-rdma: add back dependency on CONFIG_BLOCK A recent change removed the dependency on BLK_DEV_NVME, which implies the dependency on PCI and BLOCK. We don't need CONFIG_PCI, but without CONFIG_BLOCK we get tons of build errors, e.g. In file included from drivers/nvme/host/core.c:16:0: linux/blk-mq.h:182:33: error: 'struct gendisk' declared inside parameter list will not be visible outside of this definition or declaration [-Werror] drivers/nvme/host/core.c: In function 'nvme_setup_rw': drivers/nvme/host/core.c:295:21: error: implicit declaration of function 'rq_data_dir' [-Werror=implicit-function-declaration] drivers/nvme/host/nvme.h: In function 'nvme_map_len': drivers/nvme/host/nvme.h:217:6: error: implicit declaration of function 'req_op' [-Werror=implicit-function-declaration] drivers/nvme/host/scsi.c: In function 'nvme_trans_bdev_limits_page': drivers/nvme/host/scsi.c:768:85: error: implicit declaration of function 'queue_max_hw_sectors' [-Werror=implicit-function-declaration] This adds back the specific CONFIG_BLOCK dependency to avoid broken configurations. Signed-off-by: Arnd Bergmann Fixes: aa71987472a9 ("nvme: fabrics drivers don't need the nvme-pci driver") Signed-off-by: Sagi Grimberg diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 0c644f7..4b6cfff 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -31,6 +31,7 @@ config NVME_FABRICS config NVME_RDMA tristate "NVM Express over Fabrics RDMA host driver" depends on INFINIBAND + depends on BLOCK select NVME_CORE select NVME_FABRICS select SG_POOL -- cgit v0.10.2 From bf2c4b6f9b74c2ee1dd3c050b181e9b9c86fbcdb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 1 Sep 2016 10:50:38 -0400 Subject: svcauth_gss: Revert 64c59a3726f2 ("Remove unnecessary allocation") rsc_lookup steals the passed-in memory to avoid doing an allocation of its own, so we can't just pass in a pointer to memory that someone else is using. If we really want to avoid allocation there then maybe we should preallocate somwhere, or reference count these handles. For now we should revert. On occasion I see this on my server: kernel: kernel BUG at /home/cel/src/linux/linux-2.6/mm/slub.c:3851! kernel: invalid opcode: 0000 [#1] SMP kernel: Modules linked in: cts rpcsec_gss_krb5 sb_edac edac_core x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd btrfs xor iTCO_wdt iTCO_vendor_support raid6_pq pcspkr i2c_i801 i2c_smbus lpc_ich mfd_core mei_me sg mei shpchp wmi ioatdma ipmi_si ipmi_msghandler acpi_pad acpi_power_meter rpcrdma ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm nfsd nfs_acl lockd grace auth_rpcgss sunrpc ip_tables xfs libcrc32c mlx4_ib mlx4_en ib_core sr_mod cdrom sd_mod ast drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm crc32c_intel igb mlx4_core ahci libahci libata ptp pps_core dca i2c_algo_bit i2c_core dm_mirror dm_region_hash dm_log dm_mod kernel: CPU: 7 PID: 145 Comm: kworker/7:2 Not tainted 4.8.0-rc4-00006-g9d06b0b #15 kernel: Hardware name: Supermicro Super Server/X10SRL-F, BIOS 1.0c 09/09/2015 kernel: Workqueue: events do_cache_clean [sunrpc] kernel: task: ffff8808541d8000 task.stack: ffff880854344000 kernel: RIP: 0010:[] [] kfree+0x155/0x180 kernel: RSP: 0018:ffff880854347d70 EFLAGS: 00010246 kernel: RAX: ffffea0020fe7660 RBX: ffff88083f9db064 RCX: 146ff0f9d5ec5600 kernel: RDX: 000077ff80000000 RSI: ffff880853f01500 RDI: ffff88083f9db064 kernel: RBP: ffff880854347d88 R08: ffff8808594ee000 R09: ffff88087fdd8780 kernel: R10: 0000000000000000 R11: ffffea0020fe76c0 R12: ffff880853f01500 kernel: R13: ffffffffa013cf76 R14: ffffffffa013cff0 R15: ffffffffa04253a0 kernel: FS: 0000000000000000(0000) GS:ffff88087fdc0000(0000) knlGS:0000000000000000 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 kernel: CR2: 00007fed60b020c3 CR3: 0000000001c06000 CR4: 00000000001406e0 kernel: Stack: kernel: ffff8808589f2f00 ffff880853f01500 0000000000000001 ffff880854347da0 kernel: ffffffffa013cf76 ffff8808589f2f00 ffff880854347db8 ffffffffa013d006 kernel: ffff8808589f2f20 ffff880854347e00 ffffffffa0406f60 0000000057c7044f kernel: Call Trace: kernel: [] rsc_free+0x16/0x90 [auth_rpcgss] kernel: [] rsc_put+0x16/0x30 [auth_rpcgss] kernel: [] cache_clean+0x2e0/0x300 [sunrpc] kernel: [] do_cache_clean+0xe/0x70 [sunrpc] kernel: [] process_one_work+0x1ff/0x3b0 kernel: [] worker_thread+0x2bc/0x4a0 kernel: [] ? rescuer_thread+0x3a0/0x3a0 kernel: [] kthread+0xe4/0xf0 kernel: [] ret_from_fork+0x1f/0x40 kernel: [] ? kthread_stop+0x110/0x110 kernel: Code: f7 ff ff eb 3b 65 8b 05 da 30 e2 7e 89 c0 48 0f a3 05 a0 38 b8 00 0f 92 c0 84 c0 0f 85 d1 fe ff ff 0f 1f 44 00 00 e9 f5 fe ff ff <0f> 0b 49 8b 03 31 f6 f6 c4 40 0f 85 62 ff ff ff e9 61 ff ff ff kernel: RIP [] kfree+0x155/0x180 kernel: RSP kernel: ---[ end trace 3fdec044969def26 ]--- It seems to be most common after a server reboot where a client has been using a Kerberos mount, and reconnects to continue its workload. Signed-off-by: Chuck Lever Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 1d28181..d858202 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -569,9 +569,10 @@ gss_svc_searchbyctx(struct cache_detail *cd, struct xdr_netobj *handle) struct rsc *found; memset(&rsci, 0, sizeof(rsci)); - rsci.handle.data = handle->data; - rsci.handle.len = handle->len; + if (dup_to_netobj(&rsci.handle, handle->data, handle->len)) + return NULL; found = rsc_lookup(cd, &rsci); + rsc_free(&rsci); if (!found) return NULL; if (cache_check(cd, &found->h, NULL)) -- cgit v0.10.2 From 4440a2ab3b9f40dddbe006331ef0659c76859296 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Tue, 13 Sep 2016 08:49:18 +0800 Subject: netfilter: synproxy: Check oom when adding synproxy and seqadj ct extensions When memory is exhausted, nfct_seqadj_ext_add may fail to add the synproxy and seqadj extensions. The function nf_ct_seqadj_init doesn't check if get valid seqadj pointer by the nfct_seqadj. Now drop the packet directly when fail to add seqadj extension to avoid dereference NULL pointer in nf_ct_seqadj_init from init_conntrack(). Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h index 6793614..e693731 100644 --- a/include/net/netfilter/nf_conntrack_synproxy.h +++ b/include/net/netfilter/nf_conntrack_synproxy.h @@ -27,6 +27,20 @@ static inline struct nf_conn_synproxy *nfct_synproxy_ext_add(struct nf_conn *ct) #endif } +static inline bool nf_ct_add_synproxy(struct nf_conn *ct, + const struct nf_conn *tmpl) +{ + if (tmpl && nfct_synproxy(tmpl)) { + if (!nfct_seqadj_ext_add(ct)) + return false; + + if (!nfct_synproxy_ext_add(ct)) + return false; + } + + return true; +} + struct synproxy_stats { unsigned int syn_received; unsigned int cookie_invalid; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index dd2c43a..9934b0c 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1035,9 +1035,9 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, if (IS_ERR(ct)) return (struct nf_conntrack_tuple_hash *)ct; - if (tmpl && nfct_synproxy(tmpl)) { - nfct_seqadj_ext_add(ct); - nfct_synproxy_ext_add(ct); + if (!nf_ct_add_synproxy(ct, tmpl)) { + nf_conntrack_free(ct); + return ERR_PTR(-ENOMEM); } timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 19c081e..ecee105 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -441,7 +441,8 @@ nf_nat_setup_info(struct nf_conn *ct, ct->status |= IPS_DST_NAT; if (nfct_help(ct)) - nfct_seqadj_ext_add(ct); + if (!nfct_seqadj_ext_add(ct)) + return NF_DROP; } if (maniptype == NF_NAT_MANIP_SRC) { -- cgit v0.10.2 From 53a5d5ddccf849dbc27a8c1bba0b43c3a45fb792 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 7 Sep 2016 18:42:08 +0800 Subject: crypto: echainiv - Replace chaining with multiplication The current implementation uses a global per-cpu array to store data which are used to derive the next IV. This is insecure as the attacker may change the stored data. This patch removes all traces of chaining and replaces it with multiplication of the salt and the sequence number. Fixes: a10f554fa7e0 ("crypto: echainiv - Add encrypted chain IV...") Cc: stable@vger.kernel.org Reported-by: Mathias Krause Signed-off-by: Herbert Xu diff --git a/crypto/echainiv.c b/crypto/echainiv.c index 1b01fe9..e3d889b 100644 --- a/crypto/echainiv.c +++ b/crypto/echainiv.c @@ -1,8 +1,8 @@ /* * echainiv: Encrypted Chain IV Generator * - * This generator generates an IV based on a sequence number by xoring it - * with a salt and then encrypting it with the same key as used to encrypt + * This generator generates an IV based on a sequence number by multiplying + * it with a salt and then encrypting it with the same key as used to encrypt * the plain text. This algorithm requires that the block size be equal * to the IV size. It is mainly useful for CBC. * @@ -24,81 +24,17 @@ #include #include #include -#include #include -#include -#include +#include #include -#define MAX_IV_SIZE 16 - -static DEFINE_PER_CPU(u32 [MAX_IV_SIZE / sizeof(u32)], echainiv_iv); - -/* We don't care if we get preempted and read/write IVs from the next CPU. */ -static void echainiv_read_iv(u8 *dst, unsigned size) -{ - u32 *a = (u32 *)dst; - u32 __percpu *b = echainiv_iv; - - for (; size >= 4; size -= 4) { - *a++ = this_cpu_read(*b); - b++; - } -} - -static void echainiv_write_iv(const u8 *src, unsigned size) -{ - const u32 *a = (const u32 *)src; - u32 __percpu *b = echainiv_iv; - - for (; size >= 4; size -= 4) { - this_cpu_write(*b, *a); - a++; - b++; - } -} - -static void echainiv_encrypt_complete2(struct aead_request *req, int err) -{ - struct aead_request *subreq = aead_request_ctx(req); - struct crypto_aead *geniv; - unsigned int ivsize; - - if (err == -EINPROGRESS) - return; - - if (err) - goto out; - - geniv = crypto_aead_reqtfm(req); - ivsize = crypto_aead_ivsize(geniv); - - echainiv_write_iv(subreq->iv, ivsize); - - if (req->iv != subreq->iv) - memcpy(req->iv, subreq->iv, ivsize); - -out: - if (req->iv != subreq->iv) - kzfree(subreq->iv); -} - -static void echainiv_encrypt_complete(struct crypto_async_request *base, - int err) -{ - struct aead_request *req = base->data; - - echainiv_encrypt_complete2(req, err); - aead_request_complete(req, err); -} - static int echainiv_encrypt(struct aead_request *req) { struct crypto_aead *geniv = crypto_aead_reqtfm(req); struct aead_geniv_ctx *ctx = crypto_aead_ctx(geniv); struct aead_request *subreq = aead_request_ctx(req); - crypto_completion_t compl; - void *data; + __be64 nseqno; + u64 seqno; u8 *info; unsigned int ivsize = crypto_aead_ivsize(geniv); int err; @@ -108,8 +44,6 @@ static int echainiv_encrypt(struct aead_request *req) aead_request_set_tfm(subreq, ctx->child); - compl = echainiv_encrypt_complete; - data = req; info = req->iv; if (req->src != req->dst) { @@ -127,29 +61,30 @@ static int echainiv_encrypt(struct aead_request *req) return err; } - if (unlikely(!IS_ALIGNED((unsigned long)info, - crypto_aead_alignmask(geniv) + 1))) { - info = kmalloc(ivsize, req->base.flags & - CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL: - GFP_ATOMIC); - if (!info) - return -ENOMEM; - - memcpy(info, req->iv, ivsize); - } - - aead_request_set_callback(subreq, req->base.flags, compl, data); + aead_request_set_callback(subreq, req->base.flags, + req->base.complete, req->base.data); aead_request_set_crypt(subreq, req->dst, req->dst, req->cryptlen, info); aead_request_set_ad(subreq, req->assoclen); - crypto_xor(info, ctx->salt, ivsize); + memcpy(&nseqno, info + ivsize - 8, 8); + seqno = be64_to_cpu(nseqno); + memset(info, 0, ivsize); + scatterwalk_map_and_copy(info, req->dst, req->assoclen, ivsize, 1); - echainiv_read_iv(info, ivsize); - err = crypto_aead_encrypt(subreq); - echainiv_encrypt_complete2(req, err); - return err; + do { + u64 a; + + memcpy(&a, ctx->salt + ivsize - 8, 8); + + a |= 1; + a *= seqno; + + memcpy(info + ivsize - 8, &a, 8); + } while ((ivsize -= 8)); + + return crypto_aead_encrypt(subreq); } static int echainiv_decrypt(struct aead_request *req) @@ -196,8 +131,7 @@ static int echainiv_aead_create(struct crypto_template *tmpl, alg = crypto_spawn_aead_alg(spawn); err = -EINVAL; - if (inst->alg.ivsize & (sizeof(u32) - 1) || - inst->alg.ivsize > MAX_IV_SIZE) + if (inst->alg.ivsize & (sizeof(u64) - 1) || !inst->alg.ivsize) goto free_inst; inst->alg.encrypt = echainiv_encrypt; @@ -206,7 +140,6 @@ static int echainiv_aead_create(struct crypto_template *tmpl, inst->alg.init = aead_init_geniv; inst->alg.exit = aead_exit_geniv; - inst->alg.base.cra_alignmask |= __alignof__(u32) - 1; inst->alg.base.cra_ctxsize = sizeof(struct aead_geniv_ctx); inst->alg.base.cra_ctxsize += inst->alg.ivsize; -- cgit v0.10.2 From acdb04d0b36769b3e05990c488dc74d8b7ac8060 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Sep 2016 14:43:29 +0800 Subject: crypto: skcipher - Fix blkcipher walk OOM crash When we need to allocate a temporary blkcipher_walk_next and it fails, the code is supposed to take the slow path of processing the data block by block. However, due to an unrelated change we instead end up dereferencing the NULL pointer. This patch fixes it by moving the unrelated bsize setting out of the way so that we enter the slow path as inteded. Fixes: 7607bd8ff03b ("[CRYPTO] blkcipher: Added blkcipher_walk_virt_block") Cc: stable@vger.kernel.org Reported-by: xiakaixu Reported-by: Ard Biesheuvel Signed-off-by: Herbert Xu Tested-by: Ard Biesheuvel diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c index 3699995..a832426 100644 --- a/crypto/blkcipher.c +++ b/crypto/blkcipher.c @@ -233,6 +233,8 @@ static int blkcipher_walk_next(struct blkcipher_desc *desc, return blkcipher_walk_done(desc, walk, -EINVAL); } + bsize = min(walk->walk_blocksize, n); + walk->flags &= ~(BLKCIPHER_WALK_SLOW | BLKCIPHER_WALK_COPY | BLKCIPHER_WALK_DIFF); if (!scatterwalk_aligned(&walk->in, walk->alignmask) || @@ -245,7 +247,6 @@ static int blkcipher_walk_next(struct blkcipher_desc *desc, } } - bsize = min(walk->walk_blocksize, n); n = scatterwalk_clamp(&walk->in, n); n = scatterwalk_clamp(&walk->out, n); -- cgit v0.10.2 From f82e90b28654804ab72881d577d87c3d5c65e2bc Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 13 Sep 2016 09:48:52 +0100 Subject: crypto: arm/aes-ctr - fix NULL dereference in tail processing The AES-CTR glue code avoids calling into the blkcipher API for the tail portion of the walk, by comparing the remainder of walk.nbytes modulo AES_BLOCK_SIZE with the residual nbytes, and jumping straight into the tail processing block if they are equal. This tail processing block checks whether nbytes != 0, and does nothing otherwise. However, in case of an allocation failure in the blkcipher layer, we may enter this code with walk.nbytes == 0, while nbytes > 0. In this case, we should not dereference the source and destination pointers, since they may be NULL. So instead of checking for nbytes != 0, check for (walk.nbytes % AES_BLOCK_SIZE) != 0, which implies the former in non-error conditions. Fixes: 86464859cc77 ("crypto: arm - AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions") Cc: stable@vger.kernel.org Reported-by: xiakaixu Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c index da3c042..aef022a 100644 --- a/arch/arm/crypto/aes-ce-glue.c +++ b/arch/arm/crypto/aes-ce-glue.c @@ -284,7 +284,7 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } - if (nbytes) { + if (walk.nbytes % AES_BLOCK_SIZE) { u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; u8 __aligned(8) tail[AES_BLOCK_SIZE]; -- cgit v0.10.2 From 2db34e78f126c6001d79d3b66ab1abb482dc7caa Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 13 Sep 2016 09:48:53 +0100 Subject: crypto: arm64/aes-ctr - fix NULL dereference in tail processing The AES-CTR glue code avoids calling into the blkcipher API for the tail portion of the walk, by comparing the remainder of walk.nbytes modulo AES_BLOCK_SIZE with the residual nbytes, and jumping straight into the tail processing block if they are equal. This tail processing block checks whether nbytes != 0, and does nothing otherwise. However, in case of an allocation failure in the blkcipher layer, we may enter this code with walk.nbytes == 0, while nbytes > 0. In this case, we should not dereference the source and destination pointers, since they may be NULL. So instead of checking for nbytes != 0, check for (walk.nbytes % AES_BLOCK_SIZE) != 0, which implies the former in non-error conditions. Fixes: 49788fe2a128 ("arm64/crypto: AES-ECB/CBC/CTR/XTS using ARMv8 NEON and Crypto Extensions") Cc: stable@vger.kernel.org Reported-by: xiakaixu Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 5c88804..6b2aa0f 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -216,7 +216,7 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } - if (nbytes) { + if (walk.nbytes % AES_BLOCK_SIZE) { u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; u8 __aligned(8) tail[AES_BLOCK_SIZE]; -- cgit v0.10.2 From ebf9ff753c041b296241990aef76163bbb2cc9c8 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 13 Sep 2016 15:58:28 +0200 Subject: genirq: Provide irq_gc_{lock_irqsave,unlock_irqrestore}() helpers Some irqchip drivers need to take the generic chip lock outside of the irq context. Provide the irq_gc_{lock_irqsave,unlock_irqrestore}() helpers to allow one to disable irqs while entering a critical section protected by gc->lock. Note that we do not provide optimized version of these helpers for !SMP, because they are not called from the hot-path. [ tglx: Added a comment when these helpers should be [not] used ] Signed-off-by: Boris Brezillon Cc: Jason Cooper Cc: Marc Zyngier Cc: Nicolas Ferre Cc: stable@vger.kernel.org Cc: Alexandre Belloni Link: http://lkml.kernel.org/r/1473775109-4192-1-git-send-email-boris.brezillon@free-electrons.com Signed-off-by: Thomas Gleixner diff --git a/include/linux/irq.h b/include/linux/irq.h index b52424e..0ac26c8 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -945,6 +945,16 @@ static inline void irq_gc_lock(struct irq_chip_generic *gc) { } static inline void irq_gc_unlock(struct irq_chip_generic *gc) { } #endif +/* + * The irqsave variants are for usage in non interrupt code. Do not use + * them in irq_chip callbacks. Use irq_gc_lock() instead. + */ +#define irq_gc_lock_irqsave(gc, flags) \ + raw_spin_lock_irqsave(&(gc)->lock, flags) + +#define irq_gc_unlock_irqrestore(gc, flags) \ + raw_spin_unlock_irqrestore(&(gc)->lock, flags) + static inline void irq_reg_writel(struct irq_chip_generic *gc, u32 val, int reg_offset) { -- cgit v0.10.2 From 5eb0d6eb3fac3daa60d9190eed9fa41cf809c756 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 13 Sep 2016 15:58:29 +0200 Subject: irqchip/atmel-aic: Fix potential deadlock in ->xlate() aic5_irq_domain_xlate() and aic_irq_domain_xlate() take the generic chip lock without disabling interrupts, which can lead to a deadlock if an interrupt occurs while the lock is held in one of these functions. Replace irq_gc_{lock,unlock}() calls by irq_gc_{lock_irqsave,unlock_irqrestore}() ones to prevent this bug from happening. Fixes: b1479ebb7720 ("irqchip: atmel-aic: Add atmel AIC/AIC5 drivers") Signed-off-by: Boris Brezillon Acked-by: Marc Zyngier Cc: Jason Cooper Cc: Nicolas Ferre Cc: stable@vger.kernel.org Cc: Alexandre Belloni Link: http://lkml.kernel.org/r/1473775109-4192-2-git-send-email-boris.brezillon@free-electrons.com Signed-off-by: Thomas Gleixner diff --git a/drivers/irqchip/irq-atmel-aic.c b/drivers/irqchip/irq-atmel-aic.c index 112e17c..37f952d 100644 --- a/drivers/irqchip/irq-atmel-aic.c +++ b/drivers/irqchip/irq-atmel-aic.c @@ -176,6 +176,7 @@ static int aic_irq_domain_xlate(struct irq_domain *d, { struct irq_domain_chip_generic *dgc = d->gc; struct irq_chip_generic *gc; + unsigned long flags; unsigned smr; int idx; int ret; @@ -194,11 +195,11 @@ static int aic_irq_domain_xlate(struct irq_domain *d, gc = dgc->gc[idx]; - irq_gc_lock(gc); + irq_gc_lock_irqsave(gc, flags); smr = irq_reg_readl(gc, AT91_AIC_SMR(*out_hwirq)); aic_common_set_priority(intspec[2], &smr); irq_reg_writel(gc, smr, AT91_AIC_SMR(*out_hwirq)); - irq_gc_unlock(gc); + irq_gc_unlock_irqrestore(gc, flags); return ret; } diff --git a/drivers/irqchip/irq-atmel-aic5.c b/drivers/irqchip/irq-atmel-aic5.c index 4f0d068..2a624d8 100644 --- a/drivers/irqchip/irq-atmel-aic5.c +++ b/drivers/irqchip/irq-atmel-aic5.c @@ -258,6 +258,7 @@ static int aic5_irq_domain_xlate(struct irq_domain *d, unsigned int *out_type) { struct irq_chip_generic *bgc = irq_get_domain_generic_chip(d, 0); + unsigned long flags; unsigned smr; int ret; @@ -269,12 +270,12 @@ static int aic5_irq_domain_xlate(struct irq_domain *d, if (ret) return ret; - irq_gc_lock(bgc); + irq_gc_lock_irqsave(bgc, flags); irq_reg_writel(bgc, *out_hwirq, AT91_AIC5_SSR); smr = irq_reg_readl(bgc, AT91_AIC5_SMR); aic_common_set_priority(intspec[2], &smr); irq_reg_writel(bgc, smr, AT91_AIC5_SMR); - irq_gc_unlock(bgc); + irq_gc_unlock_irqrestore(bgc, flags); return ret; } -- cgit v0.10.2 From 3e1be7ad2d38c6bd6aeef96df9bd0a7822f4e51c Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 9 Sep 2016 22:43:12 +0800 Subject: bnx2: Reset device during driver initialization When system enters into kdump kernel because of kernel panic, it won't shutdown devices. On-flight DMA will continue transferring data until device driver initializes. All devices are supposed to reset during driver initialization. And this property is used to fix the kdump failure in system with intel iommu. Other systems with hardware iommu should be similar. Please check commit 091d42e ("iommu/vt-d: Copy translation tables from old kernel") and those commits around. But bnx2 driver doesn't reset device during driver initialization. The device resetting is deferred to net device up stage. This will cause hardware iommu handling failure on bnx2 device. And its resetting relies on firmware. So in this patch move the firmware requesting code to earlier bnx2_init_one(), then next call bnx2_reset_chip to reset device. Signed-off-by: Baoquan He Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 8fc3f3c..505ceaf 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -6356,10 +6356,6 @@ bnx2_open(struct net_device *dev) struct bnx2 *bp = netdev_priv(dev); int rc; - rc = bnx2_request_firmware(bp); - if (rc < 0) - goto out; - netif_carrier_off(dev); bnx2_disable_int(bp); @@ -6428,7 +6424,6 @@ open_err: bnx2_free_irq(bp); bnx2_free_mem(bp); bnx2_del_napi(bp); - bnx2_release_firmware(bp); goto out; } @@ -8575,6 +8570,12 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, dev); + rc = bnx2_request_firmware(bp); + if (rc < 0) + goto error; + + + bnx2_reset_chip(bp, BNX2_DRV_MSG_CODE_RESET); memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN); dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG | @@ -8607,6 +8608,7 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; error: + bnx2_release_firmware(bp); pci_iounmap(pdev, bp->regview); pci_release_regions(pdev); pci_disable_device(pdev); -- cgit v0.10.2 From 715f5552b1e90ba3eecf6d1a6d044d0d5226663f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 10 Sep 2016 23:11:23 +0800 Subject: sctp: hold the transport before using it in sctp_hash_cmp Since commit 4f0087812648 ("sctp: apply rhashtable api to send/recv path"), sctp uses transport rhashtable with .obj_cmpfn sctp_hash_cmp, in which it compares the members of the transport with the rhashtable args to check if it's the right transport. But sctp uses the transport without holding it in sctp_hash_cmp, it can cause a use-after-free panic. As after it gets transport from hashtable, another CPU may close the sk and free the asoc. In sctp_association_free, it frees all the transports, meanwhile, the assoc's refcnt may be reduced to 0, assoc can be destroyed by sctp_association_destroy. So after that, transport->assoc is actually an unavailable memory address in sctp_hash_cmp. Although sctp_hash_cmp is under rcu_read_lock, it still can not avoid this, as assoc is not freed by RCU. This patch is to hold the transport before checking it's members with sctp_transport_hold, in which it checks the refcnt first, holds it if it's not 0. Fixes: 4f0087812648 ("sctp: apply rhashtable api to send/recv path") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller diff --git a/net/sctp/input.c b/net/sctp/input.c index 69444d3..1555fb8 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -796,27 +796,34 @@ struct sctp_hash_cmp_arg { static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg, const void *ptr) { + struct sctp_transport *t = (struct sctp_transport *)ptr; const struct sctp_hash_cmp_arg *x = arg->key; - const struct sctp_transport *t = ptr; - struct sctp_association *asoc = t->asoc; - const struct net *net = x->net; + struct sctp_association *asoc; + int err = 1; if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr)) - return 1; - if (!net_eq(sock_net(asoc->base.sk), net)) - return 1; + return err; + if (!sctp_transport_hold(t)) + return err; + + asoc = t->asoc; + if (!net_eq(sock_net(asoc->base.sk), x->net)) + goto out; if (x->ep) { if (x->ep != asoc->ep) - return 1; + goto out; } else { if (x->laddr->v4.sin_port != htons(asoc->base.bind_addr.port)) - return 1; + goto out; if (!sctp_bind_addr_match(&asoc->base.bind_addr, x->laddr, sctp_sk(asoc->base.sk))) - return 1; + goto out; } - return 0; + err = 0; +out: + sctp_transport_put(t); + return err; } static inline u32 sctp_hash_obj(const void *data, u32 len, u32 seed) -- cgit v0.10.2 From 440f895aa97f81a2bdc02993da5360a1f6da2fb5 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Sun, 11 Sep 2016 21:43:34 +0200 Subject: drivers: net: phy: xgene: Fix 'remove' function If 'IS_ERR(pdata->clk)' is true, then 'clk_disable_unprepare(pdata->clk)' will do nothing. It is likely that 'if (!IS_ERR(pdata->clk))' was expected here. In fact, the test can even be removed because 'clk_disable_unprepare' already handles such cases. Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller diff --git a/drivers/net/phy/mdio-xgene.c b/drivers/net/phy/mdio-xgene.c index 7756748..92af182 100644 --- a/drivers/net/phy/mdio-xgene.c +++ b/drivers/net/phy/mdio-xgene.c @@ -424,10 +424,8 @@ static int xgene_mdio_remove(struct platform_device *pdev) mdiobus_unregister(mdio_bus); mdiobus_free(mdio_bus); - if (dev->of_node) { - if (IS_ERR(pdata->clk)) - clk_disable_unprepare(pdata->clk); - } + if (dev->of_node) + clk_disable_unprepare(pdata->clk); return 0; } -- cgit v0.10.2 From ad5987b47e96a0fb6d13fea250e936aed000093c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Sep 2016 15:53:55 +0200 Subject: nl80211: validate number of probe response CSA counters Due to an apparent copy/paste bug, the number of counters for the beacon configuration were checked twice, instead of checking the number of probe response counters. Fix this to check the number of probe response counters before parsing those. Cc: stable@vger.kernel.org Fixes: 9a774c78e211 ("cfg80211: Support multiple CSA counters") Signed-off-by: Johannes Berg diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f02653a..4809f4d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -6978,7 +6978,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) params.n_counter_offsets_presp = len / sizeof(u16); if (rdev->wiphy.max_num_csa_counters && - (params.n_counter_offsets_beacon > + (params.n_counter_offsets_presp > rdev->wiphy.max_num_csa_counters)) return -EINVAL; -- cgit v0.10.2 From 035ee288ae7ade4152f1c3cf23a587b04fdc526c Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 6 Sep 2016 06:20:46 +0200 Subject: PCI: Fix bridge_d3 update on device removal Starting with v4.8, we allow a PCIe port to runtime suspend to D3hot if the port itself and its children satisfy a number of conditions. Once a child is removed, we recheck those conditions in case the removed device was blocking the port from suspending. The rechecking needs to happen *after* the device has been removed from the bus it resides on. Otherwise when walking the port's subordinate bus in pci_bridge_d3_update(), the device being removed would erroneously still be taken into account. However the device is removed from the bus_list in pci_destroy_dev() and we currently recheck *before* that. Fix it. Fixes: 9d26d3a8f1b0 ("PCI: Put PCIe ports into D3 during suspend") Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Reviewed-by: Mika Westerberg Acked-by: Rafael J. Wysocki diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index d1ef7ac..f9357e0 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -40,6 +40,7 @@ static void pci_destroy_dev(struct pci_dev *dev) list_del(&dev->bus_list); up_write(&pci_bus_sem); + pci_bridge_d3_device_removed(dev); pci_free_resources(dev); put_device(&dev->dev); } @@ -96,8 +97,6 @@ static void pci_remove_bus_device(struct pci_dev *dev) dev->subordinate = NULL; } - pci_bridge_d3_device_removed(dev); - pci_destroy_dev(dev); } -- cgit v0.10.2 From 9ad18b75c2f6e4a78ce204e79f37781f8815c0fa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 17 Aug 2016 23:19:01 -0400 Subject: asm-generic: make get_user() clear the destination on errors both for access_ok() failures and for faults halfway through Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h index 04e21a4..32901d1 100644 --- a/include/asm-generic/uaccess.h +++ b/include/asm-generic/uaccess.h @@ -230,14 +230,18 @@ extern int __put_user_bad(void) __attribute__((noreturn)); might_fault(); \ access_ok(VERIFY_READ, __p, sizeof(*ptr)) ? \ __get_user((x), (__typeof__(*(ptr)) *)__p) : \ - -EFAULT; \ + ((x) = (__typeof__(*(ptr)))0,-EFAULT); \ }) #ifndef __get_user_fn static inline int __get_user_fn(size_t size, const void __user *ptr, void *x) { - size = __copy_from_user(x, ptr, size); - return size ? -EFAULT : size; + size_t n = __copy_from_user(x, ptr, size); + if (unlikely(n)) { + memset(x + (size - n), 0, n); + return -EFAULT; + } + return 0; } #define __get_user_fn(sz, u, k) __get_user_fn(sz, u, k) -- cgit v0.10.2 From eb47e0293baaa3044022059f1fa9ff474bfe35cb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 18 Aug 2016 19:34:00 -0400 Subject: cris: buggered copy_from_user/copy_to_user/clear_user * copy_from_user() on access_ok() failure ought to zero the destination * none of those primitives should skip the access_ok() check in case of small constant size. Cc: stable@vger.kernel.org Acked-by: Jesper Nilsson Signed-off-by: Al Viro diff --git a/arch/cris/include/asm/uaccess.h b/arch/cris/include/asm/uaccess.h index e3530d0..56c7d57 100644 --- a/arch/cris/include/asm/uaccess.h +++ b/arch/cris/include/asm/uaccess.h @@ -194,30 +194,6 @@ extern unsigned long __copy_user(void __user *to, const void *from, unsigned lon extern unsigned long __copy_user_zeroing(void *to, const void __user *from, unsigned long n); extern unsigned long __do_clear_user(void __user *to, unsigned long n); -static inline unsigned long -__generic_copy_to_user(void __user *to, const void *from, unsigned long n) -{ - if (access_ok(VERIFY_WRITE, to, n)) - return __copy_user(to, from, n); - return n; -} - -static inline unsigned long -__generic_copy_from_user(void *to, const void __user *from, unsigned long n) -{ - if (access_ok(VERIFY_READ, from, n)) - return __copy_user_zeroing(to, from, n); - return n; -} - -static inline unsigned long -__generic_clear_user(void __user *to, unsigned long n) -{ - if (access_ok(VERIFY_WRITE, to, n)) - return __do_clear_user(to, n); - return n; -} - static inline long __strncpy_from_user(char *dst, const char __user *src, long count) { @@ -282,7 +258,7 @@ __constant_copy_from_user(void *to, const void __user *from, unsigned long n) else if (n == 24) __asm_copy_from_user_24(to, from, ret); else - ret = __generic_copy_from_user(to, from, n); + ret = __copy_user_zeroing(to, from, n); return ret; } @@ -333,7 +309,7 @@ __constant_copy_to_user(void __user *to, const void *from, unsigned long n) else if (n == 24) __asm_copy_to_user_24(to, from, ret); else - ret = __generic_copy_to_user(to, from, n); + ret = __copy_user(to, from, n); return ret; } @@ -366,26 +342,43 @@ __constant_clear_user(void __user *to, unsigned long n) else if (n == 24) __asm_clear_24(to, ret); else - ret = __generic_clear_user(to, n); + ret = __do_clear_user(to, n); return ret; } -#define clear_user(to, n) \ - (__builtin_constant_p(n) ? \ - __constant_clear_user(to, n) : \ - __generic_clear_user(to, n)) +static inline size_t clear_user(void __user *to, size_t n) +{ + if (unlikely(!access_ok(VERIFY_WRITE, to, n))) + return n; + if (__builtin_constant_p(n)) + return __constant_clear_user(to, n); + else + return __do_clear_user(to, n); +} -#define copy_from_user(to, from, n) \ - (__builtin_constant_p(n) ? \ - __constant_copy_from_user(to, from, n) : \ - __generic_copy_from_user(to, from, n)) +static inline size_t copy_from_user(void *to, const void __user *from, size_t n) +{ + if (unlikely(!access_ok(VERIFY_READ, from, n))) { + memset(to, 0, n); + return n; + } + if (__builtin_constant_p(n)) + return __constant_copy_from_user(to, from, n); + else + return __copy_user_zeroing(to, from, n); +} -#define copy_to_user(to, from, n) \ - (__builtin_constant_p(n) ? \ - __constant_copy_to_user(to, from, n) : \ - __generic_copy_to_user(to, from, n)) +static inline size_t copy_to_user(void __user *to, const void *from, size_t n) +{ + if (unlikely(!access_ok(VERIFY_WRITE, to, n))) + return n; + if (__builtin_constant_p(n)) + return __constant_copy_to_user(to, from, n); + else + return __copy_user(to, from, n); +} /* We let the __ versions of copy_from/to_user inline, because they're often * used in fast paths and have only a small space overhead. -- cgit v0.10.2 From 3b8767a8f00cc6538ba6b1cf0f88502e2fd2eb90 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 18 Aug 2016 20:54:02 -0400 Subject: frv: fix clear_user() It should check access_ok(). Otherwise a bunch of places turn into trivially exploitable rootholes. Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/arch/frv/include/asm/uaccess.h b/arch/frv/include/asm/uaccess.h index 3ac9a59..87d9e34 100644 --- a/arch/frv/include/asm/uaccess.h +++ b/arch/frv/include/asm/uaccess.h @@ -263,19 +263,25 @@ do { \ extern long __memset_user(void *dst, unsigned long count); extern long __memcpy_user(void *dst, const void *src, unsigned long count); -#define clear_user(dst,count) __memset_user(____force(dst), (count)) +#define __clear_user(dst,count) __memset_user(____force(dst), (count)) #define __copy_from_user_inatomic(to, from, n) __memcpy_user((to), ____force(from), (n)) #define __copy_to_user_inatomic(to, from, n) __memcpy_user(____force(to), (from), (n)) #else -#define clear_user(dst,count) (memset(____force(dst), 0, (count)), 0) +#define __clear_user(dst,count) (memset(____force(dst), 0, (count)), 0) #define __copy_from_user_inatomic(to, from, n) (memcpy((to), ____force(from), (n)), 0) #define __copy_to_user_inatomic(to, from, n) (memcpy(____force(to), (from), (n)), 0) #endif -#define __clear_user clear_user +static inline unsigned long __must_check +clear_user(void __user *to, unsigned long n) +{ + if (likely(__access_ok(to, n))) + n = __clear_user(to, n); + return n; +} static inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) -- cgit v0.10.2 From f35c1e0671728d1c9abc405d05ef548b5fcb2fc4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 18 Aug 2016 21:16:49 -0400 Subject: hexagon: fix strncpy_from_user() error return It's -EFAULT, not -1 (and contrary to the comment in there, __strnlen_user() can return 0 - on faults). Cc: stable@vger.kernel.org Acked-by: Richard Kuo Signed-off-by: Al Viro diff --git a/arch/hexagon/include/asm/uaccess.h b/arch/hexagon/include/asm/uaccess.h index f000a38..f61cfb2 100644 --- a/arch/hexagon/include/asm/uaccess.h +++ b/arch/hexagon/include/asm/uaccess.h @@ -103,7 +103,8 @@ static inline long hexagon_strncpy_from_user(char *dst, const char __user *src, { long res = __strnlen_user(src, n); - /* return from strnlen can't be zero -- that would be rubbish. */ + if (unlikely(!res)) + return -EFAULT; if (res > n) { copy_from_user(dst, src, n); -- cgit v0.10.2 From a5e541f796f17228793694d64b507f5f57db4cd7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 18 Aug 2016 21:31:41 -0400 Subject: ia64: copy_from_user() should zero the destination on access_ok() failure Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h index 465c709..6c2d2c8 100644 --- a/arch/ia64/include/asm/uaccess.h +++ b/arch/ia64/include/asm/uaccess.h @@ -272,20 +272,17 @@ __copy_from_user (void *to, const void __user *from, unsigned long count) __cu_len; \ }) -#define copy_from_user(to, from, n) \ -({ \ - void *__cu_to = (to); \ - const void __user *__cu_from = (from); \ - long __cu_len = (n); \ - \ - __chk_user_ptr(__cu_from); \ - if (__access_ok(__cu_from, __cu_len, get_fs())) { \ - if (!__builtin_constant_p(n)) \ - check_object_size(__cu_to, __cu_len, false); \ - __cu_len = __copy_user((__force void __user *) __cu_to, __cu_from, __cu_len); \ - } \ - __cu_len; \ -}) +static inline unsigned long +copy_from_user(void *to, const void __user *from, unsigned long n) +{ + if (!__builtin_constant_p(n)) + check_object_size(to, n, false); + if (likely(__access_ok(from, n, get_fs()))) + n = __copy_user((__force void __user *) to, from, n); + else + memset(to, 0, n); + return n; +} #define __copy_in_user(to, from, size) __copy_user((to), (from), (size)) -- cgit v0.10.2 From 8ae95ed4ae5fc7c3391ed668b2014c9e2079533b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 18 Aug 2016 22:08:20 -0400 Subject: metag: copy_from_user() should zero the destination on access_ok() failure Cc: stable@vger.kernel.org Acked-by: James Hogan Signed-off-by: Al Viro diff --git a/arch/metag/include/asm/uaccess.h b/arch/metag/include/asm/uaccess.h index 8282cbc..273e612 100644 --- a/arch/metag/include/asm/uaccess.h +++ b/arch/metag/include/asm/uaccess.h @@ -204,8 +204,9 @@ extern unsigned long __must_check __copy_user_zeroing(void *to, static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) { - if (access_ok(VERIFY_READ, from, n)) + if (likely(access_ok(VERIFY_READ, from, n))) return __copy_user_zeroing(to, from, n); + memset(to, 0, n); return n; } -- cgit v0.10.2 From 05d9d0b96e53c52a113fd783c0c97c830c8dc7af Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 19 Aug 2016 12:10:02 -0700 Subject: ARC: uaccess: get_user to zero out dest in cause of fault Al reported potential issue with ARC get_user() as it wasn't clearing out destination pointer in case of fault due to bad address etc. Verified using following | { | u32 bogus1 = 0xdeadbeef; | u64 bogus2 = 0xdead; | int rc1, rc2; | | pr_info("Orig values %x %llx\n", bogus1, bogus2); | rc1 = get_user(bogus1, (u32 __user *)0x40000000); | rc2 = get_user(bogus2, (u64 __user *)0x50000000); | pr_info("access %d %d, new values %x %llx\n", | rc1, rc2, bogus1, bogus2); | } | [ARCLinux]# insmod /mnt/kernel-module/qtn.ko | Orig values deadbeef dead | access -14 -14, new values 0 0 Reported-by: Al Viro Cc: Linus Torvalds Cc: linux-snps-arc@lists.infradead.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Vineet Gupta Signed-off-by: Al Viro diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index a78d567..41faf17 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -83,7 +83,10 @@ "2: ;nop\n" \ " .section .fixup, \"ax\"\n" \ " .align 4\n" \ - "3: mov %0, %3\n" \ + "3: # return -EFAULT\n" \ + " mov %0, %3\n" \ + " # zero out dst ptr\n" \ + " mov %1, 0\n" \ " j 2b\n" \ " .previous\n" \ " .section __ex_table, \"a\"\n" \ @@ -101,7 +104,11 @@ "2: ;nop\n" \ " .section .fixup, \"ax\"\n" \ " .align 4\n" \ - "3: mov %0, %3\n" \ + "3: # return -EFAULT\n" \ + " mov %0, %3\n" \ + " # zero out dst ptr\n" \ + " mov %1, 0\n" \ + " mov %R1, 0\n" \ " j 2b\n" \ " .previous\n" \ " .section __ex_table, \"a\"\n" \ -- cgit v0.10.2 From e69d700535ac43a18032b3c399c69bf4639e89a2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Aug 2016 16:18:53 -0400 Subject: mips: copy_from_user() must zero the destination on access_ok() failure Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h index 11b965f..21a2aab 100644 --- a/arch/mips/include/asm/uaccess.h +++ b/arch/mips/include/asm/uaccess.h @@ -14,6 +14,7 @@ #include #include #include +#include #include /* @@ -1170,6 +1171,8 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n); __cu_len = __invoke_copy_from_user(__cu_to, \ __cu_from, \ __cu_len); \ + } else { \ + memset(__cu_to, 0, __cu_len); \ } \ } \ __cu_len; \ -- cgit v0.10.2 From 43403eabf558d2800b429cd886e996fd555aa542 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Aug 2016 16:32:02 -0400 Subject: mn10300: failing __get_user() and get_user() should zero Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/arch/mn10300/include/asm/uaccess.h b/arch/mn10300/include/asm/uaccess.h index 20f7bf6..d012e87 100644 --- a/arch/mn10300/include/asm/uaccess.h +++ b/arch/mn10300/include/asm/uaccess.h @@ -166,6 +166,7 @@ struct __large_struct { unsigned long buf[100]; }; "2:\n" \ " .section .fixup,\"ax\"\n" \ "3:\n\t" \ + " mov 0,%1\n" \ " mov %3,%0\n" \ " jmp 2b\n" \ " .previous\n" \ -- cgit v0.10.2 From ae7cc577ec2a4a6151c9e928fd1f595d953ecef1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Aug 2016 16:33:10 -0400 Subject: mn10300: copy_from_user() should zero on access_ok() failure... Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/arch/mn10300/lib/usercopy.c b/arch/mn10300/lib/usercopy.c index 7826e6c..ce8899e 100644 --- a/arch/mn10300/lib/usercopy.c +++ b/arch/mn10300/lib/usercopy.c @@ -9,7 +9,7 @@ * as published by the Free Software Foundation; either version * 2 of the Licence, or (at your option) any later version. */ -#include +#include unsigned long __generic_copy_to_user(void *to, const void *from, unsigned long n) @@ -24,6 +24,8 @@ __generic_copy_from_user(void *to, const void *from, unsigned long n) { if (access_ok(VERIFY_READ, from, n)) __copy_user_zeroing(to, from, n); + else + memset(to, 0, n); return n; } -- cgit v0.10.2 From e33d1f6f72cc82fcfc3d1fb20c9e3ad83b1928fa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Aug 2016 16:36:36 -0400 Subject: nios2: copy_from_user() should zero the tail of destination Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h index caa51ff..2b4b9e9 100644 --- a/arch/nios2/include/asm/uaccess.h +++ b/arch/nios2/include/asm/uaccess.h @@ -102,9 +102,12 @@ extern long __copy_to_user(void __user *to, const void *from, unsigned long n); static inline long copy_from_user(void *to, const void __user *from, unsigned long n) { - if (!access_ok(VERIFY_READ, from, n)) - return n; - return __copy_from_user(to, from, n); + unsigned long res = n; + if (access_ok(VERIFY_READ, from, n)) + res = __copy_from_user(to, from, n); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; } static inline long copy_to_user(void __user *to, const void *from, -- cgit v0.10.2 From 2e29f50ad5e23db37dde9be71410d95d50241ecd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Aug 2016 16:39:01 -0400 Subject: nios2: fix __get_user() a) should not leave crap on fault b) should _not_ require access_ok() in any cases. Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h index 2b4b9e9..0ab8232 100644 --- a/arch/nios2/include/asm/uaccess.h +++ b/arch/nios2/include/asm/uaccess.h @@ -142,7 +142,7 @@ extern long strnlen_user(const char __user *s, long n); #define __get_user_unknown(val, size, ptr, err) do { \ err = 0; \ - if (copy_from_user(&(val), ptr, size)) { \ + if (__copy_from_user(&(val), ptr, size)) { \ err = -EFAULT; \ } \ } while (0) @@ -169,7 +169,7 @@ do { \ ({ \ long __gu_err = -EFAULT; \ const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \ - unsigned long __gu_val; \ + unsigned long __gu_val = 0; \ __get_user_common(__gu_val, sizeof(*(ptr)), __gu_ptr, __gu_err);\ (x) = (__force __typeof__(x))__gu_val; \ __gu_err; \ -- cgit v0.10.2 From acb2505d0119033a80c85ac8d02dccae41271667 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Aug 2016 17:05:21 -0400 Subject: openrisc: fix copy_from_user() ... that should zero on faults. Also remove the helpful logics wrt range truncation copied from ppc32. Where it had ever been needed only in case of copy_from_user() *and* had not been merged into the mainline until a month after the need had disappeared. A decade before openrisc went into mainline, I might add... Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index a6bd07c..cbad29b 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -273,28 +273,20 @@ __copy_tofrom_user(void *to, const void *from, unsigned long size); static inline unsigned long copy_from_user(void *to, const void *from, unsigned long n) { - unsigned long over; - - if (access_ok(VERIFY_READ, from, n)) - return __copy_tofrom_user(to, from, n); - if ((unsigned long)from < TASK_SIZE) { - over = (unsigned long)from + n - TASK_SIZE; - return __copy_tofrom_user(to, from, n - over) + over; - } - return n; + unsigned long res = n; + + if (likely(access_ok(VERIFY_READ, from, n))) + n = __copy_tofrom_user(to, from, n); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; } static inline unsigned long copy_to_user(void *to, const void *from, unsigned long n) { - unsigned long over; - - if (access_ok(VERIFY_WRITE, to, n)) - return __copy_tofrom_user(to, from, n); - if ((unsigned long)to < TASK_SIZE) { - over = (unsigned long)to + n - TASK_SIZE; - return __copy_tofrom_user(to, from, n - over) + over; - } + if (likely(access_ok(VERIFY_WRITE, to, n))) + n = __copy_tofrom_user(to, from, n); return n; } @@ -303,13 +295,8 @@ extern unsigned long __clear_user(void *addr, unsigned long size); static inline __must_check unsigned long clear_user(void *addr, unsigned long size) { - - if (access_ok(VERIFY_WRITE, addr, size)) - return __clear_user(addr, size); - if ((unsigned long)addr < TASK_SIZE) { - unsigned long over = (unsigned long)addr + size - TASK_SIZE; - return __clear_user(addr, size - over) + over; - } + if (likely(access_ok(VERIFY_WRITE, addr, size))) + size = __clear_user(addr, size); return size; } -- cgit v0.10.2 From aace880feea38875fbc919761b77e5732a3659ef Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 20 Aug 2016 19:03:37 -0400 Subject: parisc: fix copy_from_user() Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 0f59fd9..37a1bee 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -10,6 +10,7 @@ #include #include +#include #define VERIFY_READ 0 #define VERIFY_WRITE 1 @@ -221,13 +222,14 @@ static inline unsigned long __must_check copy_from_user(void *to, unsigned long n) { int sz = __compiletime_object_size(to); - int ret = -EFAULT; + unsigned long ret = n; if (likely(sz == -1 || !__builtin_constant_p(n) || sz >= n)) ret = __copy_from_user(to, from, n); else copy_from_user_overflow(); - + if (unlikely(ret)) + memset(to + (n - ret), 0, ret); return ret; } -- cgit v0.10.2 From 224264657b8b228f949b42346e09ed8c90136a8e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Aug 2016 19:16:26 -0400 Subject: ppc32: fix copy_from_user() should clear on access_ok() failures. Also remove the useless range truncation logics. Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index c1dc6c1..c2ce5dd 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -308,40 +308,23 @@ extern unsigned long __copy_tofrom_user(void __user *to, static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) { - unsigned long over; - - if (access_ok(VERIFY_READ, from, n)) { + if (likely(access_ok(VERIFY_READ, from, n))) { if (!__builtin_constant_p(n)) check_object_size(to, n, false); return __copy_tofrom_user((__force void __user *)to, from, n); } - if ((unsigned long)from < TASK_SIZE) { - over = (unsigned long)from + n - TASK_SIZE; - if (!__builtin_constant_p(n - over)) - check_object_size(to, n - over, false); - return __copy_tofrom_user((__force void __user *)to, from, - n - over) + over; - } + memset(to, 0, n); return n; } static inline unsigned long copy_to_user(void __user *to, const void *from, unsigned long n) { - unsigned long over; - if (access_ok(VERIFY_WRITE, to, n)) { if (!__builtin_constant_p(n)) check_object_size(from, n, true); return __copy_tofrom_user(to, (__force void __user *)from, n); } - if ((unsigned long)to < TASK_SIZE) { - over = (unsigned long)to + n - TASK_SIZE; - if (!__builtin_constant_p(n)) - check_object_size(from, n - over, true); - return __copy_tofrom_user(to, (__force void __user *)from, - n - over) + over; - } return n; } @@ -439,10 +422,6 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size) might_fault(); if (likely(access_ok(VERIFY_WRITE, addr, size))) return __clear_user(addr, size); - if ((unsigned long)addr < TASK_SIZE) { - unsigned long over = (unsigned long)addr + size - TASK_SIZE; - return __clear_user(addr, size - over) + over; - } return size; } -- cgit v0.10.2 From fd2d2b191fe75825c4c7a6f12f3fef35aaed7dd7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Aug 2016 22:00:54 -0400 Subject: s390: get_user() should zero on failure Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 9b49cf1..2c5d292 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -266,28 +266,28 @@ int __put_user_bad(void) __attribute__((noreturn)); __chk_user_ptr(ptr); \ switch (sizeof(*(ptr))) { \ case 1: { \ - unsigned char __x; \ + unsigned char __x = 0; \ __gu_err = __get_user_fn(&__x, ptr, \ sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ case 2: { \ - unsigned short __x; \ + unsigned short __x = 0; \ __gu_err = __get_user_fn(&__x, ptr, \ sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ case 4: { \ - unsigned int __x; \ + unsigned int __x = 0; \ __gu_err = __get_user_fn(&__x, ptr, \ sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ break; \ }; \ case 8: { \ - unsigned long long __x; \ + unsigned long long __x = 0; \ __gu_err = __get_user_fn(&__x, ptr, \ sizeof(*(ptr))); \ (x) = *(__force __typeof__(*(ptr)) *) &__x; \ -- cgit v0.10.2 From c2f18fa4cbb3ad92e033a24efa27583978ce9600 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Aug 2016 22:13:39 -0400 Subject: score: fix __get_user/get_user * should zero on any failure * __get_user() should use __copy_from_user(), not copy_from_user() Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/arch/score/include/asm/uaccess.h b/arch/score/include/asm/uaccess.h index 20a3591..c5d4311 100644 --- a/arch/score/include/asm/uaccess.h +++ b/arch/score/include/asm/uaccess.h @@ -163,7 +163,7 @@ do { \ __get_user_asm(val, "lw", ptr); \ break; \ case 8: \ - if ((copy_from_user((void *)&val, ptr, 8)) == 0) \ + if (__copy_from_user((void *)&val, ptr, 8) == 0) \ __gu_err = 0; \ else \ __gu_err = -EFAULT; \ @@ -188,6 +188,8 @@ do { \ \ if (likely(access_ok(VERIFY_READ, __gu_ptr, size))) \ __get_user_common((x), size, __gu_ptr); \ + else \ + (x) = 0; \ \ __gu_err; \ }) @@ -201,6 +203,7 @@ do { \ "2:\n" \ ".section .fixup,\"ax\"\n" \ "3:li %0, %4\n" \ + "li %1, 0\n" \ "j 2b\n" \ ".previous\n" \ ".section __ex_table,\"a\"\n" \ -- cgit v0.10.2 From b615e3c74621e06cd97f86373ca90d43d6d998aa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Aug 2016 22:30:44 -0400 Subject: score: fix copy_from_user() and friends Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/arch/score/include/asm/uaccess.h b/arch/score/include/asm/uaccess.h index c5d4311..01aec8c 100644 --- a/arch/score/include/asm/uaccess.h +++ b/arch/score/include/asm/uaccess.h @@ -301,35 +301,34 @@ extern int __copy_tofrom_user(void *to, const void *from, unsigned long len); static inline unsigned long copy_from_user(void *to, const void *from, unsigned long len) { - unsigned long over; + unsigned long res = len; - if (access_ok(VERIFY_READ, from, len)) - return __copy_tofrom_user(to, from, len); + if (likely(access_ok(VERIFY_READ, from, len))) + res = __copy_tofrom_user(to, from, len); - if ((unsigned long)from < TASK_SIZE) { - over = (unsigned long)from + len - TASK_SIZE; - return __copy_tofrom_user(to, from, len - over) + over; - } - return len; + if (unlikely(res)) + memset(to + (len - res), 0, res); + + return res; } static inline unsigned long copy_to_user(void *to, const void *from, unsigned long len) { - unsigned long over; - - if (access_ok(VERIFY_WRITE, to, len)) - return __copy_tofrom_user(to, from, len); + if (likely(access_ok(VERIFY_WRITE, to, len))) + len = __copy_tofrom_user(to, from, len); - if ((unsigned long)to < TASK_SIZE) { - over = (unsigned long)to + len - TASK_SIZE; - return __copy_tofrom_user(to, from, len - over) + over; - } return len; } -#define __copy_from_user(to, from, len) \ - __copy_tofrom_user((to), (from), (len)) +static inline unsigned long +__copy_from_user(void *to, const void *from, unsigned long len) +{ + unsigned long left = __copy_tofrom_user(to, from, len); + if (unlikely(left)) + memset(to + (len - left), 0, left); + return left; +} #define __copy_to_user(to, from, len) \ __copy_tofrom_user((to), (from), (len)) @@ -343,17 +342,17 @@ __copy_to_user_inatomic(void *to, const void *from, unsigned long len) static inline unsigned long __copy_from_user_inatomic(void *to, const void *from, unsigned long len) { - return __copy_from_user(to, from, len); + return __copy_tofrom_user(to, from, len); } -#define __copy_in_user(to, from, len) __copy_from_user(to, from, len) +#define __copy_in_user(to, from, len) __copy_tofrom_user(to, from, len) static inline unsigned long copy_in_user(void *to, const void *from, unsigned long len) { if (access_ok(VERIFY_READ, from, len) && access_ok(VERFITY_WRITE, to, len)) - return copy_from_user(to, from, len); + return __copy_tofrom_user(to, from, len); } /* -- cgit v0.10.2 From c6852389228df9fb3067f94f3b651de2a7921b36 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Aug 2016 23:33:47 -0400 Subject: sh64: failing __get_user() should zero It could be done in exception-handling bits in __get_user_b() et.al., but the surgery involved would take more knowledge of sh64 details than I have or _want_ to have. Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/arch/sh/include/asm/uaccess_64.h b/arch/sh/include/asm/uaccess_64.h index c01376c..ca5073d 100644 --- a/arch/sh/include/asm/uaccess_64.h +++ b/arch/sh/include/asm/uaccess_64.h @@ -24,6 +24,7 @@ #define __get_user_size(x,ptr,size,retval) \ do { \ retval = 0; \ + x = 0; \ switch (size) { \ case 1: \ retval = __get_user_asm_b((void *)&x, \ -- cgit v0.10.2 From 6e050503a150b2126620c1a1e9b3a368fcd51eac Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 21 Aug 2016 23:39:47 -0400 Subject: sh: fix copy_from_user() Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/arch/sh/include/asm/uaccess.h b/arch/sh/include/asm/uaccess.h index a49635c..92ade79 100644 --- a/arch/sh/include/asm/uaccess.h +++ b/arch/sh/include/asm/uaccess.h @@ -151,7 +151,10 @@ copy_from_user(void *to, const void __user *from, unsigned long n) __kernel_size_t __copy_size = (__kernel_size_t) n; if (__copy_size && __access_ok(__copy_from, __copy_size)) - return __copy_user(to, from, __copy_size); + __copy_size = __copy_user(to, from, __copy_size); + + if (unlikely(__copy_size)) + memset(to + (n - __copy_size), 0, __copy_size); return __copy_size; } -- cgit v0.10.2 From 917400cecb4b52b5cde5417348322bb9c8272fa6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 22 Aug 2016 00:23:07 -0400 Subject: sparc32: fix copy_from_user() Cc: stable@vger.kernel.org Acked-by: David S. Miller Signed-off-by: Al Viro diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h index 341a5a1..035c89b 100644 --- a/arch/sparc/include/asm/uaccess_32.h +++ b/arch/sparc/include/asm/uaccess_32.h @@ -269,8 +269,10 @@ static inline unsigned long copy_from_user(void *to, const void __user *from, un if (!__builtin_constant_p(n)) check_object_size(to, n, false); return __copy_user((__force void __user *) to, from, n); - } else + } else { + memset(to, 0, n); return n; + } } static inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) -- cgit v0.10.2 From 8f035983dd826d7e04f67b28acf8e2f08c347e41 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Sep 2016 19:16:58 -0400 Subject: blackfin: fix copy_from_user() Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/arch/blackfin/include/asm/uaccess.h b/arch/blackfin/include/asm/uaccess.h index 12f5d68..0a2a700 100644 --- a/arch/blackfin/include/asm/uaccess.h +++ b/arch/blackfin/include/asm/uaccess.h @@ -171,11 +171,12 @@ static inline int bad_user_access_length(void) static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) { - if (access_ok(VERIFY_READ, from, n)) + if (likely(access_ok(VERIFY_READ, from, n))) { memcpy(to, (const void __force *)from, n); - else - return n; - return 0; + return 0; + } + memset(to, 0, n); + return n; } static inline unsigned long __must_check -- cgit v0.10.2 From c90a3bc5061d57e7931a9b7ad14784e1a0ed497d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Sep 2016 19:20:13 -0400 Subject: m32r: fix __get_user() Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/arch/m32r/include/asm/uaccess.h b/arch/m32r/include/asm/uaccess.h index cac7014..6f89821 100644 --- a/arch/m32r/include/asm/uaccess.h +++ b/arch/m32r/include/asm/uaccess.h @@ -219,7 +219,7 @@ extern int fixup_exception(struct pt_regs *regs); #define __get_user_nocheck(x, ptr, size) \ ({ \ long __gu_err = 0; \ - unsigned long __gu_val; \ + unsigned long __gu_val = 0; \ might_fault(); \ __get_user_size(__gu_val, (ptr), (size), __gu_err); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ -- cgit v0.10.2 From d0cf385160c12abd109746cad1f13e3b3e8b50b8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Sep 2016 19:22:34 -0400 Subject: microblaze: fix copy_from_user() Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index 331b0d3..3a486d3 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h @@ -373,10 +373,13 @@ extern long __user_bad(void); static inline long copy_from_user(void *to, const void __user *from, unsigned long n) { + unsigned long res = n; might_fault(); - if (access_ok(VERIFY_READ, from, n)) - return __copy_from_user(to, from, n); - return n; + if (likely(access_ok(VERIFY_READ, from, n))) + res = __copy_from_user(to, from, n); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; } #define __copy_to_user(to, from, n) \ -- cgit v0.10.2 From e98b9e37ae04562d52c96f46b3cf4c2e80222dc1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Sep 2016 19:23:33 -0400 Subject: microblaze: fix __get_user() Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index 3a486d3..8266767 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h @@ -227,7 +227,7 @@ extern long __user_bad(void); #define __get_user(x, ptr) \ ({ \ - unsigned long __gu_val; \ + unsigned long __gu_val = 0; \ /*unsigned long __gu_ptr = (unsigned long)(ptr);*/ \ long __gu_err; \ switch (sizeof(*(ptr))) { \ -- cgit v0.10.2 From 8630c32275bac2de6ffb8aea9d9b11663e7ad28e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Sep 2016 19:28:23 -0400 Subject: avr32: fix copy_from_user() really ugly, but apparently avr32 compilers turns access_ok() into something so bad that they want it in assembler. Left that way, zeroing added in inline wrapper. Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/arch/avr32/include/asm/uaccess.h b/arch/avr32/include/asm/uaccess.h index 68cf638..b1ec1fa 100644 --- a/arch/avr32/include/asm/uaccess.h +++ b/arch/avr32/include/asm/uaccess.h @@ -74,7 +74,7 @@ extern __kernel_size_t __copy_user(void *to, const void *from, extern __kernel_size_t copy_to_user(void __user *to, const void *from, __kernel_size_t n); -extern __kernel_size_t copy_from_user(void *to, const void __user *from, +extern __kernel_size_t ___copy_from_user(void *to, const void __user *from, __kernel_size_t n); static inline __kernel_size_t __copy_to_user(void __user *to, const void *from, @@ -88,6 +88,15 @@ static inline __kernel_size_t __copy_from_user(void *to, { return __copy_user(to, (const void __force *)from, n); } +static inline __kernel_size_t copy_from_user(void *to, + const void __user *from, + __kernel_size_t n) +{ + size_t res = ___copy_from_user(to, from, n); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; +} #define __copy_to_user_inatomic __copy_to_user #define __copy_from_user_inatomic __copy_from_user diff --git a/arch/avr32/kernel/avr32_ksyms.c b/arch/avr32/kernel/avr32_ksyms.c index d93ead0..7c6cf14 100644 --- a/arch/avr32/kernel/avr32_ksyms.c +++ b/arch/avr32/kernel/avr32_ksyms.c @@ -36,7 +36,7 @@ EXPORT_SYMBOL(copy_page); /* * Userspace access stuff. */ -EXPORT_SYMBOL(copy_from_user); +EXPORT_SYMBOL(___copy_from_user); EXPORT_SYMBOL(copy_to_user); EXPORT_SYMBOL(__copy_user); EXPORT_SYMBOL(strncpy_from_user); diff --git a/arch/avr32/lib/copy_user.S b/arch/avr32/lib/copy_user.S index ea59c04..96a6de9 100644 --- a/arch/avr32/lib/copy_user.S +++ b/arch/avr32/lib/copy_user.S @@ -25,11 +25,11 @@ .align 1 .global copy_from_user .type copy_from_user, @function -copy_from_user: +___copy_from_user: branch_if_kernel r8, __copy_user ret_if_privileged r8, r11, r10, r10 rjmp __copy_user - .size copy_from_user, . - copy_from_user + .size ___copy_from_user, . - ___copy_from_user .global copy_to_user .type copy_to_user, @function -- cgit v0.10.2 From 6eaed1665fc6864fbdbffcc6f43a7f5d012f3052 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 13 Sep 2016 16:40:24 +1000 Subject: powerpc/powernv: Fix the state of root PE The PE for root bus (root PE) can be removed because of PCI hot remove in EEH recovery path for fenced PHB error. We need update @phb->root_pe_populated accordingly so that the root PE can be populated again in forthcoming PCI hot add path. Also, the PE shouldn't be destroyed as it's global and reserved resource. Fixes: c5f7700bbd2e ("powerpc/powernv: Dynamically release PE") Reported-by: Frederic Barrat Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index c16d790..0c71a2f 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3426,7 +3426,17 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) } } - pnv_ioda_free_pe(pe); + /* + * The PE for root bus can be removed because of hotplug in EEH + * recovery for fenced PHB error. We need to mark the PE dead so + * that it can be populated again in PCI hot add path. The PE + * shouldn't be destroyed as it's the global reserved resource. + */ + if (phb->ioda.root_pe_populated && + phb->ioda.root_pe_idx == pe->pe_number) + phb->ioda.root_pe_populated = false; + else + pnv_ioda_free_pe(pe); } static void pnv_pci_release_device(struct pci_dev *pdev) -- cgit v0.10.2 From 74712339a4fc0f4ddc710e6bca836a6b78b7d8de Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 25 Aug 2016 08:23:14 +0100 Subject: drm/i915: Restore lost "Initialized i915" welcome message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A side effect of removing the midlayer from driver loading was the loss of a useful message announcing to userspace that i915 had successfully started, e.g.: [drm] Initialized i915 1.6.0 20160425 for 0000:00:02.0 on minor 0 Reported-by: Timo Aaltonen Signed-off-by: Chris Wilson Fixes: 8f460e2c78f2 ("drm/i915: Demidlayer driver loading") Cc: Daniel Vetter Cc: Ville Syrjälä Cc: drm-intel-fixes@lists.freedesktop.org Link: http://patchwork.freedesktop.org/patch/msgid/20160825072314.17402-1-chris@chris-wilson.co.uk Reviewed-by: Daniel Vetter (cherry picked from commit bc5ca47c0af4f949ba889e666b7da65569e36093) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 95ddd56..5de36d8 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1281,6 +1281,11 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) intel_runtime_pm_enable(dev_priv); + /* Everything is in place, we can now relax! */ + DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n", + driver.name, driver.major, driver.minor, driver.patchlevel, + driver.date, pci_name(pdev), dev_priv->drm.primary->index); + intel_runtime_pm_put(dev_priv); return 0; -- cgit v0.10.2 From 86dfb76cba284114cf586005cd943eeb6e4f328d Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 7 Sep 2016 17:42:31 -0700 Subject: Revert "drm/i915/psr: Make idle_frames sensible again" This reverts commit 1c80c25fb622973dd135878e98d172be20859049 Author: Daniel Vetter Date: Wed May 18 18:47:12 2016 +0200 drm/i915/psr: Make idle_frames sensible again There are panels that needs 4 idle frames before entering PSR, but VBT is unproperly set. Also lately it was identified that idle frame count calculated at HW can be off by 1, what makes the minimum of 2, at least. Without the current vbt+1 we are with the risk of having HW calculating 0 idle frames and entering PSR when it shouldn't. Regardless the lack of link training. [Jani: there is some disagreement on the explanation, but the commit regresses so revert it is.] References: http://marc.info/?i=20160904191153.GA2328@light.dominikbrodowski.net Cc: Dominik Brodowski Cc: Jani Nikula Cc: Daniel Vetter Signed-off-by: Rodrigo Vivi Fixes: 1c80c25fb622 ("drm/i915/psr: Make idle_frames sensible again") Cc: drm-intel-fixes@lists.freedesktop.org # v4.8-rc1+ Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1473295351-8766-1-git-send-email-rodrigo.vivi@intel.com (cherry picked from commit 40918e0bb81be02f507a941f8b2741f0dc1771b0) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 2b0d1ba..cf171b4 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -255,14 +255,14 @@ static void hsw_psr_enable_source(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = to_i915(dev); uint32_t max_sleep_time = 0x1f; - /* Lately it was identified that depending on panel idle frame count - * calculated at HW can be off by 1. So let's use what came - * from VBT + 1. - * There are also other cases where panel demands at least 4 - * but VBT is not being set. To cover these 2 cases lets use - * at least 5 when VBT isn't set to be on the safest side. + /* + * Let's respect VBT in case VBT asks a higher idle_frame value. + * Let's use 6 as the minimum to cover all known cases including + * the off-by-one issue that HW has in some cases. Also there are + * cases where sink should be able to train + * with the 5 or 6 idle patterns. */ - uint32_t idle_frames = dev_priv->vbt.psr.idle_frames + 1; + uint32_t idle_frames = max(6, dev_priv->vbt.psr.idle_frames); uint32_t val = EDP_PSR_ENABLE; val |= max_sleep_time << EDP_PSR_MAX_SLEEP_TIME_SHIFT; -- cgit v0.10.2 From ea54ff4008892b46c7a3e6bc8ab8aaec9d198639 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 13 Sep 2016 12:22:19 +0300 Subject: drm/i915: Ignore OpRegion panel type except on select machines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turns out commit a05628195a0d ("drm/i915: Get panel_type from OpRegion panel details") has regressed quite a few machines. So it looks like we can't use the panel type from OpRegion on all systems, and yet we absolutely must use it on some specific systems. Despite trying, I was unable to find any automagic way to determine if the OpRegion panel type is respectable or not. The only glimmer of hope I had was bit 8 in the SCIC response, but that turned out to not work either (it was always 0 on both types of systems). So, to fix the regressions without breaking the machine we know to need the OpRegion panel type, let's just add a quirk for this. Only specific machines known to require the OpRegion panel type will therefore use it. Everyone else will fall bck to the VBT panel type. The only known machine so far is a "Conrac GmbH IX45GM2". The PCI subsystem ID on this machine is just a generic 8086:2a42, so of no use. Instead we'll go with a DMI match. I suspect we can now also revert commit aeddda06c1a7 ("drm/i915: Ignore panel type from OpRegion on SKL") but let's leave that to a separate patch. v2: Do the DMI match in the opregion code directly, as dev_priv->quirks gets populated too late Cc: Rob Kramer Cc: Martin van Es Cc: Andrea Arcangeli Cc: Dave Airlie Cc: Marco Krüger Cc: Sean Greenslade Cc: Trudy Tective Cc: Robin Müller Cc: Alexander Kobel Cc: Alexey Shumitsky Cc: Emil Andersen Lauridsen Cc: oceans112@gmail.com Cc: James Hogan Cc: James Bottomley Cc: stable@vger.kernel.org References: https://lists.freedesktop.org/archives/intel-gfx/2016-August/105545.html References: https://lists.freedesktop.org/archives/dri-devel/2016-August/116888.html References: https://lists.freedesktop.org/archives/intel-gfx/2016-June/098826.html Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94825 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97060 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97443 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97363 Fixes: a05628195a0d ("drm/i915: Get panel_type from OpRegion panel details") Tested-by: Marco Krüger Tested-by: Alexey Shumitsky Tested-by: Sean Greenslade Tested-by: Emil Andersen Lauridsen Tested-by: Robin Müller Tested-by: oceans112@gmail.com Tested-by: Rob Kramer Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1473758539-21565-1-git-send-email-ville.syrjala@linux.intel.com References: http://patchwork.freedesktop.org/patch/msgid/1473602239-15855-1-git-send-email-adrienverge@gmail.com Acked-by: Jani Nikula (cherry picked from commit c8ebfad7a063fe665417fa0eeb0da7cfe987d8ed) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index adca262..7acbbbf 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -1047,6 +1047,23 @@ err_out: return err; } +static int intel_use_opregion_panel_type_callback(const struct dmi_system_id *id) +{ + DRM_INFO("Using panel type from OpRegion on %s\n", id->ident); + return 1; +} + +static const struct dmi_system_id intel_use_opregion_panel_type[] = { + { + .callback = intel_use_opregion_panel_type_callback, + .ident = "Conrac GmbH IX45GM2", + .matches = {DMI_MATCH(DMI_SYS_VENDOR, "Conrac GmbH"), + DMI_MATCH(DMI_PRODUCT_NAME, "IX45GM2"), + }, + }, + { } +}; + int intel_opregion_get_panel_type(struct drm_i915_private *dev_priv) { @@ -1073,6 +1090,16 @@ intel_opregion_get_panel_type(struct drm_i915_private *dev_priv) } /* + * So far we know that some machined must use it, others must not use it. + * There doesn't seem to be any way to determine which way to go, except + * via a quirk list :( + */ + if (!dmi_check_system(intel_use_opregion_panel_type)) { + DRM_DEBUG_KMS("Ignoring OpRegion panel type (%d)\n", ret - 1); + return -ENODEV; + } + + /* * FIXME On Dell XPS 13 9350 the OpRegion panel type (0) gives us * low vswing for eDP, whereas the VBT panel type (2) gives us normal * vswing instead. Low vswing results in some display flickers, so -- cgit v0.10.2 From 0b97a484e52cb423662eb98904aad82dafcc1f10 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 09:41:34 +0200 Subject: mac80211: check skb_linearize() return value The A-MSDU TX code (within TXQs) didn't always check the return value of skb_linearize() properly, resulting in potentially passing a frag- list SKB down to the driver even when it said it can't handle it. Fix that. Fixes: 6e0456b545456 ("mac80211: add A-MSDU tx support") Signed-off-by: Johannes Berg diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index cc8e955..18b285e 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1515,8 +1515,12 @@ out: spin_unlock_bh(&fq->lock); if (skb && skb_has_frag_list(skb) && - !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) - skb_linearize(skb); + !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) { + if (skb_linearize(skb)) { + ieee80211_free_txskb(&local->hw, skb); + return NULL; + } + } return skb; } -- cgit v0.10.2 From e5789608766113ca9c30d596d93ca7d5cbd8b461 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 14 Sep 2016 14:22:07 +0300 Subject: mmc: omap_hsmmc: Initialize dma_slave_config to avoid random data It is wrong to use uninitialized dma_slave_config and configure only certain fields as the DMAengine driver might look at non initialized (random data) fields and tries to interpret it. Signed-off-by: Peter Ujfalusi Signed-off-by: Ulf Hansson diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 24ebc9a..5f2f24a 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -1409,11 +1409,18 @@ static int omap_hsmmc_pre_dma_transfer(struct omap_hsmmc_host *host, static int omap_hsmmc_setup_dma_transfer(struct omap_hsmmc_host *host, struct mmc_request *req) { - struct dma_slave_config cfg; struct dma_async_tx_descriptor *tx; int ret = 0, i; struct mmc_data *data = req->data; struct dma_chan *chan; + struct dma_slave_config cfg = { + .src_addr = host->mapbase + OMAP_HSMMC_DATA, + .dst_addr = host->mapbase + OMAP_HSMMC_DATA, + .src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES, + .dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES, + .src_maxburst = data->blksz / 4, + .dst_maxburst = data->blksz / 4, + }; /* Sanity check: all the SG entries must be aligned by block size. */ for (i = 0; i < data->sg_len; i++) { @@ -1433,13 +1440,6 @@ static int omap_hsmmc_setup_dma_transfer(struct omap_hsmmc_host *host, chan = omap_hsmmc_get_dma_chan(host, data); - cfg.src_addr = host->mapbase + OMAP_HSMMC_DATA; - cfg.dst_addr = host->mapbase + OMAP_HSMMC_DATA; - cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - cfg.src_maxburst = data->blksz / 4; - cfg.dst_maxburst = data->blksz / 4; - ret = dmaengine_slave_config(chan, &cfg); if (ret) return ret; -- cgit v0.10.2 From df804d5e27490151da1ce9f216031a31352203e6 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 14 Sep 2016 14:21:54 +0300 Subject: mmc: omap: Initialize dma_slave_config to avoid random data in it's fields It is wrong to use uninitialized dma_slave_config and configure only certain fields as the DMAengine driver might look at non initialized (random data) fields and tries to interpret it. Signed-off-by: Peter Ujfalusi Signed-off-by: Ulf Hansson diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c index f23d65e..be3c49f 100644 --- a/drivers/mmc/host/omap.c +++ b/drivers/mmc/host/omap.c @@ -1016,14 +1016,16 @@ mmc_omap_prepare_data(struct mmc_omap_host *host, struct mmc_request *req) /* Only reconfigure if we have a different burst size */ if (*bp != burst) { - struct dma_slave_config cfg; - - cfg.src_addr = host->phys_base + OMAP_MMC_REG(host, DATA); - cfg.dst_addr = host->phys_base + OMAP_MMC_REG(host, DATA); - cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; - cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES; - cfg.src_maxburst = burst; - cfg.dst_maxburst = burst; + struct dma_slave_config cfg = { + .src_addr = host->phys_base + + OMAP_MMC_REG(host, DATA), + .dst_addr = host->phys_base + + OMAP_MMC_REG(host, DATA), + .src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES, + .dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES, + .src_maxburst = burst, + .dst_maxburst = burst, + }; if (dmaengine_slave_config(c, &cfg)) goto use_pio; -- cgit v0.10.2 From 6cfeaf5125d425043d44002d0a1a8a147be582bf Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Wed, 14 Sep 2016 11:00:26 +0100 Subject: cpu/hotplug: Include linux/types.h in linux/cpuhotplug.h The linux/cpuhotplug.h header makes use of the bool type, but wasn't including linux/types.h to ensure that type has been defined. Fix this by including linux/types.h in preparation for including linux/cpuhotplug.h in a file that doesn't do so already. Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Cc: Richard Cochran Cc: Sebastian Andrzej Siewior Cc: Ralf Baechle Cc: Anna-Maria Gleixner Link: http://lkml.kernel.org/r/20160914100027.20945-1-paul.burton@imgtec.com Signed-off-by: Thomas Gleixner diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 242bf53..34bd805 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -1,6 +1,8 @@ #ifndef __CPUHOTPLUG_H #define __CPUHOTPLUG_H +#include + enum cpuhp_state { CPUHP_OFFLINE, CPUHP_CREATE_THREADS, -- cgit v0.10.2 From de75abbe0121a6c3c9c6b04c75300088e57ad1d5 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Thu, 8 Sep 2016 11:48:28 +0200 Subject: arm/xen: fix SMP guests boot Commit 88e957d6e47f ("xen: introduce xen_vcpu_id mapping") broke SMP ARM guests on Xen. When FIFO-based event channels are in use (this is the default), evtchn_fifo_alloc_control_block() is called on CPU_UP_PREPARE event and this happens before we set up xen_vcpu_id mapping in xen_starting_cpu. Temporary fix the issue by setting direct Linux CPU id <-> Xen vCPU id mapping for all possible CPUs at boot. We don't currently support kexec/kdump on Xen/ARM so these ids always match. In future, we have several ways to solve the issue, e.g.: - Eliminate all hypercalls from CPU_UP_PREPARE, do them from the starting CPU. This can probably be done for both x86 and ARM and, if done, will allow us to get Xen's idea of vCPU id from CPUID/MPIDR on the starting CPU directly, no messing with ACPI/device tree required. - Save vCPU id information from ACPI/device tree on ARM and use it to initialize xen_vcpu_id mapping. This is the same trick we currently do on x86. Reported-by: Julien Grall Tested-by: Wei Chen Signed-off-by: Vitaly Kuznetsov Acked-by: Stefano Stabellini Signed-off-by: David Vrabel diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 3d2cef6..f193414 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -170,9 +170,6 @@ static int xen_starting_cpu(unsigned int cpu) pr_info("Xen: initializing cpu%d\n", cpu); vcpup = per_cpu_ptr(xen_vcpu_info, cpu); - /* Direct vCPU id mapping for ARM guests. */ - per_cpu(xen_vcpu_id, cpu) = cpu; - info.mfn = virt_to_gfn(vcpup); info.offset = xen_offset_in_page(vcpup); @@ -330,6 +327,7 @@ static int __init xen_guest_init(void) { struct xen_add_to_physmap xatp; struct shared_info *shared_info_page = NULL; + int cpu; if (!xen_domain()) return 0; @@ -380,7 +378,8 @@ static int __init xen_guest_init(void) return -ENOMEM; /* Direct vCPU id mapping for ARM guests. */ - per_cpu(xen_vcpu_id, 0) = 0; + for_each_possible_cpu(cpu) + per_cpu(xen_vcpu_id, cpu) = cpu; xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames(); if (xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn, -- cgit v0.10.2 From 7ccb8e633cfeb7969eba09bbf53346e746bb7f89 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Mon, 15 Aug 2016 19:34:44 +0200 Subject: ARM: multi_v7_defconfig: update XILINX_VDMA Commit fde57a7c4474 ("dmaengine: xilinx: Rename driver and config") renamed config XILINX_VDMA to config XILINX_DMA Update defconfig accordingly. Signed-off-by: Fabian Frederick Signed-off-by: Arnd Bergmann diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 2c8665c..ea3566f 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -781,7 +781,7 @@ CONFIG_MXS_DMA=y CONFIG_DMA_BCM2835=y CONFIG_DMA_OMAP=y CONFIG_QCOM_BAM_DMA=y -CONFIG_XILINX_VDMA=y +CONFIG_XILINX_DMA=y CONFIG_DMA_SUN6I=y CONFIG_STAGING=y CONFIG_SENSORS_ISL29018=y -- cgit v0.10.2 From f2a89d3b2b85b90b05453872aaabfdb412a21a03 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 1 Aug 2016 10:54:16 +0100 Subject: arm64: dts: Fix broken architected timer interrupt trigger The ARM architected timer specification mandates that the interrupt associated with each timer is level triggered (which corresponds to the "counter >= comparator" condition). A number of DTs are being remarkably creative, declaring the interrupt to be edge triggered. A quick look at the TRM for the corresponding ARM CPUs clearly shows that this is wrong, and I've corrected those. For non-ARM designs (and in the absence of a publicly available TRM), I've made them active low as well, which can't be completely wrong as the GIC cannot disinguish between level low and level high. The respective maintainers are of course welcome to prove me wrong. While I was at it, I took the liberty to fix a couple of related issue, such as some spurious affinity bits on ThunderX, and their complete absence on ls1043a (both of which seem to be related to copy-pasting from other DTs). Acked-by: Duc Dang Acked-by: Carlo Caione Acked-by: Michal Simek Acked-by: Krzysztof Kozlowski Acked-by: Dinh Nguyen Acked-by: Masahiro Yamada Signed-off-by: Marc Zyngier Signed-off-by: Arnd Bergmann diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index 445aa67..c2b9bcb 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -255,10 +255,10 @@ /* Local timer */ timer { compatible = "arm,armv8-timer"; - interrupts = <1 13 0xf01>, - <1 14 0xf01>, - <1 11 0xf01>, - <1 10 0xf01>; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; }; timer0: timer0@ffc03000 { diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi index e502c24..bf6c8d0 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi @@ -102,13 +102,13 @@ timer { compatible = "arm,armv8-timer"; interrupts = , + (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>, , + (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>, , + (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>, ; + (GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>; }; xtal: xtal-clk { diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi index f1c2c71..c29dab9 100644 --- a/arch/arm64/boot/dts/apm/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi @@ -110,10 +110,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <1 0 0xff01>, /* Secure Phys IRQ */ - <1 13 0xff01>, /* Non-secure Phys IRQ */ - <1 14 0xff01>, /* Virt IRQ */ - <1 15 0xff01>; /* Hyp IRQ */ + interrupts = <1 0 0xff08>, /* Secure Phys IRQ */ + <1 13 0xff08>, /* Non-secure Phys IRQ */ + <1 14 0xff08>, /* Virt IRQ */ + <1 15 0xff08>; /* Hyp IRQ */ clock-frequency = <50000000>; }; diff --git a/arch/arm64/boot/dts/broadcom/ns2.dtsi b/arch/arm64/boot/dts/broadcom/ns2.dtsi index f53b095..d4a12fa 100644 --- a/arch/arm64/boot/dts/broadcom/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/ns2.dtsi @@ -88,13 +88,13 @@ timer { compatible = "arm,armv8-timer"; interrupts = , + IRQ_TYPE_LEVEL_LOW)>, , + IRQ_TYPE_LEVEL_LOW)>, , + IRQ_TYPE_LEVEL_LOW)>, ; + IRQ_TYPE_LEVEL_LOW)>; }; pmu { diff --git a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi index 2eb9b22..04dc8a8 100644 --- a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi +++ b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi @@ -354,10 +354,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <1 13 0xff01>, - <1 14 0xff01>, - <1 11 0xff01>, - <1 10 0xff01>; + interrupts = <1 13 4>, + <1 14 4>, + <1 11 4>, + <1 10 4>; }; pmu { diff --git a/arch/arm64/boot/dts/exynos/exynos7.dtsi b/arch/arm64/boot/dts/exynos/exynos7.dtsi index ca663df..1628315 100644 --- a/arch/arm64/boot/dts/exynos/exynos7.dtsi +++ b/arch/arm64/boot/dts/exynos/exynos7.dtsi @@ -473,10 +473,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <1 13 0xff01>, - <1 14 0xff01>, - <1 11 0xff01>, - <1 10 0xff01>; + interrupts = <1 13 0xff08>, + <1 14 0xff08>, + <1 11 0xff08>, + <1 10 0xff08>; }; pmu_system_controller: system-controller@105c0000 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi index e669fbd..a67e210 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi @@ -119,10 +119,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <1 13 0x1>, /* Physical Secure PPI */ - <1 14 0x1>, /* Physical Non-Secure PPI */ - <1 11 0x1>, /* Virtual PPI */ - <1 10 0x1>; /* Hypervisor PPI */ + interrupts = <1 13 0xf08>, /* Physical Secure PPI */ + <1 14 0xf08>, /* Physical Non-Secure PPI */ + <1 11 0xf08>, /* Virtual PPI */ + <1 10 0xf08>; /* Hypervisor PPI */ }; pmu { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi index 21023a3..e3b6034 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi @@ -191,10 +191,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <1 13 0x8>, /* Physical Secure PPI, active-low */ - <1 14 0x8>, /* Physical Non-Secure PPI, active-low */ - <1 11 0x8>, /* Virtual PPI, active-low */ - <1 10 0x8>; /* Hypervisor PPI, active-low */ + interrupts = <1 13 4>, /* Physical Secure PPI, active-low */ + <1 14 4>, /* Physical Non-Secure PPI, active-low */ + <1 11 4>, /* Virtual PPI, active-low */ + <1 10 4>; /* Hypervisor PPI, active-low */ }; pmu { diff --git a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi index eab1a42..c2a6745 100644 --- a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi @@ -122,10 +122,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = , - , - , - ; + interrupts = , + , + , + ; }; odmi: odmi@300000 { diff --git a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi index c223915..d73bdc8 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi @@ -129,10 +129,10 @@ timer { compatible = "arm,armv8-timer"; - interrupts = <1 13 0xf01>, - <1 14 0xf01>, - <1 11 0xf01>, - <1 10 0xf01>; + interrupts = <1 13 4>, + <1 14 4>, + <1 11 4>, + <1 10 4>; }; soc { diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi index e595f22..3e2e51f 100644 --- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi +++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi @@ -65,10 +65,10 @@ timer { compatible = "arm,armv8-timer"; interrupt-parent = <&gic>; - interrupts = <1 13 0xf01>, - <1 14 0xf01>, - <1 11 0xf01>, - <1 10 0xf01>; + interrupts = <1 13 0xf08>, + <1 14 0xf08>, + <1 11 0xf08>, + <1 10 0xf08>; }; amba_apu { -- cgit v0.10.2 From 29bf282dec94f6015a675c007614cb29563f1c18 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 6 Sep 2016 16:34:01 +1000 Subject: powerpc/powernv: Detach from PE on releasing PCI device The PCI hotplug can be part of EEH error recovery. The @pdn and the device's PE number aren't removed and added afterwords. The PE number in @pdn should be set to an invalid one. Otherwise, the PE's device count is decreased on removing devices while failing to be increased on adding devices. It leads to unbalanced PE's device count and make normal PCI hotplug path broken. Fixes: c5f7700bbd2e ("powerpc/powernv: Dynamically release PE") Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 0c71a2f..da5da11 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3452,7 +3452,17 @@ static void pnv_pci_release_device(struct pci_dev *pdev) if (!pdn || pdn->pe_number == IODA_INVALID_PE) return; + /* + * PCI hotplug can happen as part of EEH error recovery. The @pdn + * isn't removed and added afterwards in this scenario. We should + * set the PE number in @pdn to an invalid one. Otherwise, the PE's + * device count is decreased on removing devices while failing to + * be increased on adding devices. It leads to unbalanced PE's device + * count and eventually make normal PCI hotplug path broken. + */ pe = &phb->ioda.pe_array[pdn->pe_number]; + pdn->pe_number = IODA_INVALID_PE; + WARN_ON(--pe->device_count < 0); if (pe->device_count == 0) pnv_ioda_release_pe(pe); -- cgit v0.10.2 From ed7d9a1d7da6fe7b1c7477dc70e95051583fd60c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 15 Sep 2016 17:03:06 +1000 Subject: powerpc/powernv/pci: Fix missed TCE invalidations that should fallback to OPAL In commit f0228c413011 ("powerpc/powernv/pci: Fallback to OPAL for TCE invalidations"), we added logic to fallback to OPAL for doing TCE invalidations if we can't do it in Linux. Ben sent a v2 of the patch, containing these additional call sites, but I had already applied v1 and didn't notice. So fix them now. Fixes: f0228c413011 ("powerpc/powernv/pci: Fallback to OPAL for TCE invalidations") Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index da5da11..bc0c91e 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2217,7 +2217,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, pnv_pci_link_table_and_group(phb->hose->node, num, tbl, &pe->table_group); - pnv_pci_phb3_tce_invalidate_pe(pe); + pnv_pci_ioda2_tce_invalidate_pe(pe); return 0; } @@ -2355,7 +2355,7 @@ static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, if (ret) pe_warn(pe, "Unmapping failed, ret = %ld\n", ret); else - pnv_pci_phb3_tce_invalidate_pe(pe); + pnv_pci_ioda2_tce_invalidate_pe(pe); pnv_pci_unlink_table_and_group(table_group->tables[num], table_group); -- cgit v0.10.2 From 85d5313ed717ad60769491c7c072d23bc0a68e7a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Sep 2016 11:38:31 +0200 Subject: mac80211: reject TSPEC TIDs (TSIDs) for aggregation Since mac80211 doesn't currently support TSIDs 8-15 which can only be used after QoS TSPEC negotiation (and not even after WMM negotiation), reject attempts to set up aggregation sessions for them, which might confuse drivers. In mac80211 we do correctly handle that, but the TSIDs should never get used anyway, and drivers might not be able to handle it. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index a9aff60..afa9468 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -261,10 +261,16 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, .timeout = timeout, .ssn = start_seq_num, }; - int i, ret = -EOPNOTSUPP; u16 status = WLAN_STATUS_REQUEST_DECLINED; + if (tid >= IEEE80211_FIRST_TSPEC_TSID) { + ht_dbg(sta->sdata, + "STA %pM requests BA session on unsupported tid %d\n", + sta->sta.addr, tid); + goto end_no_lock; + } + if (!sta->sta.ht_cap.ht_supported) { ht_dbg(sta->sdata, "STA %pM erroneously requests BA session on tid %d w/o QoS\n", diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 5650c46..45319cc 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -584,6 +584,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, ieee80211_hw_check(&local->hw, TX_AMPDU_SETUP_IN_HW)) return -EINVAL; + if (WARN_ON(tid >= IEEE80211_FIRST_TSPEC_TSID)) + return -EINVAL; + ht_dbg(sdata, "Open BA session requested for %pM tid %u\n", pubsta->addr, tid); -- cgit v0.10.2 From cecf62352aee2b4fe114aafd1b8c5f265a4243ce Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 15 Sep 2016 11:22:33 +0300 Subject: perf/x86/intel: Don't disable "intel_bts" around "intel" event batching At the moment, intel_bts events get disabled from intel PMU's disable callback, which includes event scheduling transactions of said PMU, which have nothing to do with intel_bts events. We do want to keep intel_bts events off inside the PMI handler to avoid filling up their buffer too soon. This patch moves intel_bts enabling/disabling directly to the PMI handler. Reported-by: Vince Weaver Signed-off-by: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160915082233.11065-1-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 2cbde2f..4c9a79b 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -1730,9 +1730,11 @@ static __initconst const u64 knl_hw_cache_extra_regs * disabled state if called consecutively. * * During consecutive calls, the same disable value will be written to related - * registers, so the PMU state remains unchanged. hw.state in - * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive - * calls. + * registers, so the PMU state remains unchanged. + * + * intel_bts events don't coexist with intel PMU's BTS events because of + * x86_add_exclusive(x86_lbr_exclusive_lbr); there's no need to keep them + * disabled around intel PMU's event batching etc, only inside the PMI handler. */ static void __intel_pmu_disable_all(void) { @@ -1742,8 +1744,6 @@ static void __intel_pmu_disable_all(void) if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) intel_pmu_disable_bts(); - else - intel_bts_disable_local(); intel_pmu_pebs_disable_all(); } @@ -1771,8 +1771,7 @@ static void __intel_pmu_enable_all(int added, bool pmi) return; intel_pmu_enable_bts(event->hw.config); - } else - intel_bts_enable_local(); + } } static void intel_pmu_enable_all(int added) @@ -2073,6 +2072,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) */ if (!x86_pmu.late_ack) apic_write(APIC_LVTPC, APIC_DM_NMI); + intel_bts_disable_local(); __intel_pmu_disable_all(); handled = intel_pmu_drain_bts_buffer(); handled += intel_bts_interrupt(); @@ -2172,6 +2172,7 @@ done: /* Only restore PMU state when it's active. See x86_pmu_disable(). */ if (cpuc->enabled) __intel_pmu_enable_all(0, true); + intel_bts_enable_local(); /* * Only unmask the NMI after the overflow counters -- cgit v0.10.2 From a6805884e263e82d9fb87bd5f39ad4bb38cde246 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 17 Aug 2016 13:44:50 +0300 Subject: ARM: keystone: defconfig: Fix USB configuration Simply enabling CONFIG_KEYSTONE_USB_PHY doesn't work anymore as it depends on CONFIG_NOP_USB_XCEIV. We need to enable that as well. This fixes USB on Keystone boards from v4.8-rc1 onwards. Signed-off-by: Roger Quadros Acked-by: Santosh Shilimkar Signed-off-by: Arnd Bergmann diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig index 71b42e6..78cd2f1 100644 --- a/arch/arm/configs/keystone_defconfig +++ b/arch/arm/configs/keystone_defconfig @@ -161,6 +161,7 @@ CONFIG_USB_MON=y CONFIG_USB_XHCI_HCD=y CONFIG_USB_STORAGE=y CONFIG_USB_DWC3=y +CONFIG_NOP_USB_XCEIV=y CONFIG_KEYSTONE_USB_PHY=y CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y -- cgit v0.10.2 From 7892a1f64a447b6f65fe2888688883b7c26d81d3 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 9 Aug 2016 12:36:41 -0300 Subject: [media] rcar-fcp: Make sure rcar_fcp_enable() returns 0 on success When resuming from suspend-to-RAM on r8a7795/salvator-x: dpm_run_callback(): pm_genpd_resume_noirq+0x0/0x90 returns 1 PM: Device fe940000.fdp1 failed to resume noirq: error 1 dpm_run_callback(): pm_genpd_resume_noirq+0x0/0x90 returns 1 PM: Device fe944000.fdp1 failed to resume noirq: error 1 dpm_run_callback(): pm_genpd_resume_noirq+0x0/0x90 returns 1 PM: Device fe948000.fdp1 failed to resume noirq: error 1 According to its documentation, rcar_fcp_enable() returns 0 on success or a negative error code if an error occurs. Hence fdp1_pm_runtime_resume() and vsp1_pm_runtime_resume() forward its return value to their callers. However, rcar_fcp_enable() forwards the return value of pm_runtime_get_sync(), which can actually be 1 on success, leading to the resume failure above. To fix this, consider only negative values returned by pm_runtime_get_sync() to be failures. Fixes: 7b49235e83b2347c ("[media] v4l: Add Renesas R-Car FCP driver") Signed-off-by: Geert Uytterhoeven Reviewed-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/platform/rcar-fcp.c b/drivers/media/platform/rcar-fcp.c index 6a7bcc3..bc50c69 100644 --- a/drivers/media/platform/rcar-fcp.c +++ b/drivers/media/platform/rcar-fcp.c @@ -99,10 +99,16 @@ EXPORT_SYMBOL_GPL(rcar_fcp_put); */ int rcar_fcp_enable(struct rcar_fcp_device *fcp) { + int error; + if (!fcp) return 0; - return pm_runtime_get_sync(fcp->dev); + error = pm_runtime_get_sync(fcp->dev); + if (error < 0) + return error; + + return 0; } EXPORT_SYMBOL_GPL(rcar_fcp_enable); -- cgit v0.10.2 From 54c5ef2e93ea002dc5dd63349298b2778fe59edb Mon Sep 17 00:00:00 2001 From: Beni Lev Date: Wed, 10 Aug 2016 17:03:43 +0300 Subject: iwlwifi: mvm: update TX queue before making a copy of the skb Off-channel action frames (such as ANQP frames) must be sent either on the AUX queue or on the offchannel queue, otherwise the firmware will cause a SYSASSERT. In the current implementation, the queue to be used is correctly set in the original skb, but this is done after it is copied. Thus the copy remains with the original, incorrect queue. Fix this by setting the queue in the original skb before copying it. Fixes: commit 5c08b0f5026f ("iwlwifi: mvm: don't override the rate with the AMSDU len") Cc: stable@vger.kernel.org # v4.6+ Signed-off-by: Beni Lev Signed-off-by: Luca Coelho diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index c6585ab..b3a87a3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -513,6 +513,15 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) int hdrlen = ieee80211_hdrlen(hdr->frame_control); int queue; + /* IWL_MVM_OFFCHANNEL_QUEUE is used for ROC packets that can be used + * in 2 different types of vifs, P2P & STATION. P2P uses the offchannel + * queue. STATION (HS2.0) uses the auxiliary context of the FW, + * and hence needs to be sent on the aux queue + */ + if (IEEE80211_SKB_CB(skb)->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE && + skb_info->control.vif->type == NL80211_IFTYPE_STATION) + IEEE80211_SKB_CB(skb)->hw_queue = mvm->aux_queue; + memcpy(&info, skb->cb, sizeof(info)); if (WARN_ON_ONCE(info.flags & IEEE80211_TX_CTL_AMPDU)) @@ -526,16 +535,6 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) /* This holds the amsdu headers length */ skb_info->driver_data[0] = (void *)(uintptr_t)0; - /* - * IWL_MVM_OFFCHANNEL_QUEUE is used for ROC packets that can be used - * in 2 different types of vifs, P2P & STATION. P2P uses the offchannel - * queue. STATION (HS2.0) uses the auxiliary context of the FW, - * and hence needs to be sent on the aux queue - */ - if (IEEE80211_SKB_CB(skb)->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE && - info.control.vif->type == NL80211_IFTYPE_STATION) - IEEE80211_SKB_CB(skb)->hw_queue = mvm->aux_queue; - queue = info.hw_queue; /* -- cgit v0.10.2 From b0eaf4506f5f95d15d6731d72c0ddf4a2179eefa Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 14 Sep 2016 23:39:12 +0200 Subject: kvm: x86: correctly reset dest_map->vector when restoring LAPIC state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When userspace sends KVM_SET_LAPIC, KVM schedules a check between the vCPU's IRR and ISR and the IOAPIC redirection table, in order to re-establish the IOAPIC's dest_map (the list of CPUs servicing the real-time clock interrupt with the corresponding vectors). However, __rtc_irq_eoi_tracking_restore_one was forgetting to set dest_map->vectors. Because of this, the IOAPIC did not process the real-time clock interrupt EOI, ioapic->rtc_status.pending_eoi got stuck at a non-zero value, and further RTC interrupts were reported to userspace as coalesced. Fixes: 9e4aabe2bb3454c83dac8139cf9974503ee044db Fixes: 4d99ba898dd0c521ca6cdfdde55c9b58aea3cb3d Cc: stable@vger.kernel.org Cc: Joerg Roedel Cc: David Gilbert Reviewed-by: Radim Krčmář Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 5f42d03..c7220ba 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -109,6 +109,7 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) { bool new_val, old_val; struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; + struct dest_map *dest_map = &ioapic->rtc_status.dest_map; union kvm_ioapic_redirect_entry *e; e = &ioapic->redirtbl[RTC_GSI]; @@ -117,16 +118,17 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) return; new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector); - old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map); + old_val = test_bit(vcpu->vcpu_id, dest_map->map); if (new_val == old_val) return; if (new_val) { - __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map); + __set_bit(vcpu->vcpu_id, dest_map->map); + dest_map->vectors[vcpu->vcpu_id] = e->fields.vector; ioapic->rtc_status.pending_eoi++; } else { - __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map); + __clear_bit(vcpu->vcpu_id, dest_map->map); ioapic->rtc_status.pending_eoi--; rtc_status_pending_eoi_check_valid(ioapic); } -- cgit v0.10.2 From 1c109fabbd51863475cd12ac206bdd249aee35af Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 15 Sep 2016 02:35:29 +0100 Subject: fix minor infoleak in get_user_ex() get_user_ex(x, ptr) should zero x on failure. It's not a lot of a leak (at most we are leaking uninitialized 64bit value off the kernel stack, and in a fairly constrained situation, at that), but the fix is trivial, so... Cc: stable@vger.kernel.org Signed-off-by: Al Viro [ This sat in different branch from the uaccess fixes since mid-August ] Signed-off-by: Linus Torvalds diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index e3af86f..2131c4c 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -433,7 +433,11 @@ do { \ #define __get_user_asm_ex(x, addr, itype, rtype, ltype) \ asm volatile("1: mov"itype" %1,%"rtype"0\n" \ "2:\n" \ - _ASM_EXTABLE_EX(1b, 2b) \ + ".section .fixup,\"ax\"\n" \ + "3:xor"itype" %"rtype"0,%"rtype"0\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE_EX(1b, 3b) \ : ltype(x) : "m" (__m(addr))) #define __put_user_nocheck(x, ptr, size) \ -- cgit v0.10.2 From 5297e0f0fe13305a1fc7f01986be0dccd063d57a Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 14 Sep 2016 20:20:00 -0700 Subject: vfs: fix return type of ioctl_file_dedupe_range All the VFS functions in the dedupe ioctl path return int status, so the ioctl handler ought to as well. Found by Coverity, CID 1350952. Signed-off-by: Darrick J. Wong Signed-off-by: Linus Torvalds diff --git a/fs/ioctl.c b/fs/ioctl.c index 0f56deb..26aba09 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -568,7 +568,7 @@ static int ioctl_fsthaw(struct file *filp) return thaw_super(sb); } -static long ioctl_file_dedupe_range(struct file *file, void __user *arg) +static int ioctl_file_dedupe_range(struct file *file, void __user *arg) { struct file_dedupe_range __user *argp = arg; struct file_dedupe_range *same = NULL; -- cgit v0.10.2 From b71dbf1032f546bf3efd60fb5d9d0cefd200a508 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 14 Sep 2016 20:20:44 -0700 Subject: vfs: cap dedupe request structure size at PAGE_SIZE Kirill A Shutemov reports that the kernel doesn't try to cap dest_count in any way, and uses the number to allocate kernel memory. This causes high order allocation warnings in the kernel log if someone passes in a big enough value. We should clamp the allocation at PAGE_SIZE to avoid stressing the VM. The two existing users of the dedupe ioctl never send more than 120 requests, so we can safely clamp dest_range at PAGE_SIZE, because with 4k pages we can handle up to 127 dedupe candidates. Given the max extent length of 16MB, we can end up doing 2GB of IO which is plenty. [ Note: the "offsetof()" can't overflow, because 'count' is just a 16-bit integer. That's not obvious in the limited context of the patch, so I'm noting it here because it made me go look. - Linus ] Reported-by: "Kirill A. Shutemov" Signed-off-by: Darrick J. Wong Signed-off-by: Linus Torvalds diff --git a/fs/ioctl.c b/fs/ioctl.c index 26aba09..c415668 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -582,6 +582,10 @@ static int ioctl_file_dedupe_range(struct file *file, void __user *arg) } size = offsetof(struct file_dedupe_range __user, info[count]); + if (size > PAGE_SIZE) { + ret = -ENOMEM; + goto out; + } same = memdup_user(argp, size); if (IS_ERR(same)) { -- cgit v0.10.2 From 22f6b4d34fcf039c63a94e7670e0da24f8575a5a Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 16 Sep 2016 00:31:22 +0200 Subject: aio: mark AIO pseudo-fs noexec This ensures that do_mmap() won't implicitly make AIO memory mappings executable if the READ_IMPLIES_EXEC personality flag is set. Such behavior is problematic because the security_mmap_file LSM hook doesn't catch this case, potentially permitting an attacker to bypass a W^X policy enforced by SELinux. I have tested the patch on my machine. To test the behavior, compile and run this: #define _GNU_SOURCE #include #include #include #include #include #include #include int main(void) { personality(READ_IMPLIES_EXEC); aio_context_t ctx = 0; if (syscall(__NR_io_setup, 1, &ctx)) err(1, "io_setup"); char cmd[1000]; sprintf(cmd, "cat /proc/%d/maps | grep -F '/[aio]'", (int)getpid()); system(cmd); return 0; } In the output, "rw-s" is good, "rwxs" is bad. Signed-off-by: Jann Horn Signed-off-by: Linus Torvalds diff --git a/fs/aio.c b/fs/aio.c index fb8e45b..4fe81d1 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -239,7 +239,12 @@ static struct dentry *aio_mount(struct file_system_type *fs_type, static const struct dentry_operations ops = { .d_dname = simple_dname, }; - return mount_pseudo(fs_type, "aio:", NULL, &ops, AIO_RING_MAGIC); + struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, &ops, + AIO_RING_MAGIC); + + if (!IS_ERR(root)) + root->d_sb->s_iflags |= SB_I_NOEXEC; + return root; } /* aio_setup -- cgit v0.10.2 From 778935778c3b88e5152a88765850009006ef2e32 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 26 Apr 2016 10:42:25 -0700 Subject: PM / runtime: Use _rcuidle for runtime suspend tracepoints Further testing with false negatives suppressed by commit 293e2421fe25 ("rcu: Remove superfluous versions of rcu_read_lock_sched_held()") identified a few more unprotected uses of RCU from the idle loop. Because RCU actively ignores idle-loop code (for energy-efficiency reasons, among other things), using RCU from the idle loop can result in too-short grace periods, in turn resulting in arbitrary misbehavior. The affected function is rpm_suspend(). The resulting lockdep-RCU splat is as follows: ------------------------------------------------------------------------ Warning from omap3 =============================== [ INFO: suspicious RCU usage. ] 4.6.0-rc5-next-20160426+ #1112 Not tainted ------------------------------- include/trace/events/rpm.h:63 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from idle CPU! rcu_scheduler_active = 1, debug_locks = 0 RCU used illegally from extended quiescent state! 1 lock held by swapper/0/0: #0: (&(&dev->power.lock)->rlock){-.-...}, at: [] __pm_runtime_suspend+0x54/0x84 stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.6.0-rc5-next-20160426+ #1112 Hardware name: Generic OMAP36xx (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0xb0/0xe4) [] (dump_stack) from [] (rpm_suspend+0x604/0x7e4) [] (rpm_suspend) from [] (__pm_runtime_suspend+0x64/0x84) [] (__pm_runtime_suspend) from [] (omap2_gpio_prepare_for_idle+0x5c/0x70) [] (omap2_gpio_prepare_for_idle) from [] (omap_sram_idle+0x140/0x244) [] (omap_sram_idle) from [] (omap3_enter_idle_bm+0xfc/0x1ec) [] (omap3_enter_idle_bm) from [] (cpuidle_enter_state+0x80/0x3d4) [] (cpuidle_enter_state) from [] (cpu_startup_entry+0x198/0x3a0) [] (cpu_startup_entry) from [] (start_kernel+0x354/0x3c8) [] (start_kernel) from [<8000807c>] (0x8000807c) ------------------------------------------------------------------------ Reported-by: Tony Lindgren Signed-off-by: Paul E. McKenney Tested-by: Tony Lindgren Tested-by: Guenter Roeck [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 17995fa..82a081e 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -419,7 +419,7 @@ static int rpm_suspend(struct device *dev, int rpmflags) struct device *parent = NULL; int retval; - trace_rpm_suspend(dev, rpmflags); + trace_rpm_suspend_rcuidle(dev, rpmflags); repeat: retval = rpm_check_suspend_allowed(dev); @@ -549,7 +549,7 @@ static int rpm_suspend(struct device *dev, int rpmflags) } out: - trace_rpm_return_int(dev, _THIS_IP_, retval); + trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval); return retval; -- cgit v0.10.2 From 2a292822f00f7409fc0bd6b2d09efc5b8e6c9c5d Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 14 Sep 2016 13:09:24 +0200 Subject: net/mlx4_en: fix off by one in error handling If an error occurs in mlx4_init_eq_table the index used in the err_out_unmap label is one too big which results in a panic in mlx4_free_eq. This patch fixes the index in the error path. Signed-off-by: Sebastian Ott Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index f613977..cf8f8a7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -1305,8 +1305,8 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) return 0; err_out_unmap: - while (i >= 0) - mlx4_free_eq(dev, &priv->eq_table.eq[i--]); + while (i > 0) + mlx4_free_eq(dev, &priv->eq_table.eq[--i]); #ifdef CONFIG_RFS_ACCEL for (i = 1; i <= dev->caps.num_ports; i++) { if (mlx4_priv(dev)->port[i].rmap) { -- cgit v0.10.2 From 7077dc415b113ac17a6696c432bad2d66574e4fb Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Wed, 14 Sep 2016 21:29:34 +0800 Subject: net: ethernet: mediatek: fix module loading automatically based on MODULE_DEVICE_TABLE The device table is required to load modules based on modaliases. After adding MODULE_DEVICE_TABLE, below entries for example will be added to modules.alias: alias of:N*T*Cmediatek,mt7623-ethC* mtk_eth_soc Signed-off-by: Sean Wang Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index d919915..3743af8 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1923,6 +1923,7 @@ const struct of_device_id of_mtk_match[] = { { .compatible = "mediatek,mt7623-eth" }, {}, }; +MODULE_DEVICE_TABLE(of, of_mtk_match); static struct platform_driver mtk_driver = { .probe = mtk_probe, -- cgit v0.10.2 From 01afd972a737879c1466a12f696601a2ce91ea84 Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Wed, 14 Sep 2016 19:06:44 +0300 Subject: net/ibm/emac: add set mac addr callback add realization for mac address set and remove dummy callback. Signed-off-by: Ivan Mikhaylov Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 4c9771d..2dfc603 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -980,6 +980,33 @@ static void emac_set_multicast_list(struct net_device *ndev) __emac_set_multicast_list(dev); } +static int emac_set_mac_address(struct net_device *ndev, void *sa) +{ + struct emac_instance *dev = netdev_priv(ndev); + struct sockaddr *addr = sa; + struct emac_regs __iomem *p = dev->emacp; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + mutex_lock(&dev->link_lock); + + memcpy(ndev->dev_addr, addr->sa_data, ndev->addr_len); + + emac_rx_disable(dev); + emac_tx_disable(dev); + out_be32(&p->iahr, (ndev->dev_addr[0] << 8) | ndev->dev_addr[1]); + out_be32(&p->ialr, (ndev->dev_addr[2] << 24) | + (ndev->dev_addr[3] << 16) | (ndev->dev_addr[4] << 8) | + ndev->dev_addr[5]); + emac_tx_enable(dev); + emac_rx_enable(dev); + + mutex_unlock(&dev->link_lock); + + return 0; +} + static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu) { int rx_sync_size = emac_rx_sync_size(new_mtu); @@ -2686,7 +2713,7 @@ static const struct net_device_ops emac_netdev_ops = { .ndo_do_ioctl = emac_ioctl, .ndo_tx_timeout = emac_tx_timeout, .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = emac_set_mac_address, .ndo_start_xmit = emac_start_xmit, .ndo_change_mtu = eth_change_mtu, }; @@ -2699,7 +2726,7 @@ static const struct net_device_ops emac_gige_netdev_ops = { .ndo_do_ioctl = emac_ioctl, .ndo_tx_timeout = emac_tx_timeout, .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = emac_set_mac_address, .ndo_start_xmit = emac_start_xmit_sg, .ndo_change_mtu = emac_change_mtu, }; -- cgit v0.10.2 From 7106a069f45b15e63d14484e72969e64798e641c Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Wed, 14 Sep 2016 19:06:45 +0300 Subject: net/ibm/emac: add mutex to 'set multicast list' for preventing race conditions within ioctl calls. Signed-off-by: Ivan Mikhaylov Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 2dfc603..7af09cb 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -977,7 +977,10 @@ static void emac_set_multicast_list(struct net_device *ndev) dev->mcast_pending = 1; return; } + + mutex_lock(&dev->link_lock); __emac_set_multicast_list(dev); + mutex_unlock(&dev->link_lock); } static int emac_set_mac_address(struct net_device *ndev, void *sa) -- cgit v0.10.2 From d6f64d725bac20df66b2eacd847fc41d7a1905e0 Mon Sep 17 00:00:00 2001 From: Mark Tomlinson Date: Thu, 15 Sep 2016 11:40:05 +1200 Subject: net: VRF: Pass original iif to ip_route_input() The function ip_rcv_finish() calls l3mdev_ip_rcv(). On any VRF except the global VRF, this replaces skb->dev with the VRF master interface. When calling ip_route_input_noref() from here, the checks for forwarding look at this master device instead of the initial ingress interface. This will allow packets to be routed which normally would be dropped. For example, an interface that is not assigned an IP address should drop packets, but because the checking is against the master device, the packet will be forwarded. The fix here is to still call l3mdev_ip_rcv(), but remember the initial net_device. This is passed to the other functions within ip_rcv_finish, so they still see the original interface. Signed-off-by: Mark Tomlinson Acked-by: David Ahern Signed-off-by: David S. Miller diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 4b351af..d6feabb 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -312,6 +312,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; + struct net_device *dev = skb->dev; /* if ingress device is enslaved to an L3 master device pass the * skb to its handler for processing @@ -341,7 +342,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) */ if (!skb_valid_dst(skb)) { int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, skb->dev); + iph->tos, dev); if (unlikely(err)) { if (err == -EXDEV) __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER); @@ -370,7 +371,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) __IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len); } else if (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) { - struct in_device *in_dev = __in_dev_get_rcu(skb->dev); + struct in_device *in_dev = __in_dev_get_rcu(dev); /* RFC 1122 3.3.6: * -- cgit v0.10.2 From bc6c03fa3cacd31b873e36ca16ef9678269deae6 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 15 Sep 2016 03:45:07 +0000 Subject: nfp: fix error return code in nfp_net_netdev_open() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 73725d9dfd99 ("nfp: allocate ring SW structs dynamically") Signed-off-by: Wei Yongjun Acked-by: Jakub Kicinski Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 252e492..39dadfc 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2044,12 +2044,16 @@ static int nfp_net_netdev_open(struct net_device *netdev) nn->rx_rings = kcalloc(nn->num_rx_rings, sizeof(*nn->rx_rings), GFP_KERNEL); - if (!nn->rx_rings) + if (!nn->rx_rings) { + err = -ENOMEM; goto err_free_lsc; + } nn->tx_rings = kcalloc(nn->num_tx_rings, sizeof(*nn->tx_rings), GFP_KERNEL); - if (!nn->tx_rings) + if (!nn->tx_rings) { + err = -ENOMEM; goto err_free_rx_rings; + } for (r = 0; r < nn->num_r_vecs; r++) { err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r); -- cgit v0.10.2 From e830baa9c3f0023769ba9aab19eb44c892769d87 Mon Sep 17 00:00:00 2001 From: Hans Wippel Date: Thu, 15 Sep 2016 14:39:21 +0200 Subject: qeth: restore device features after recovery After device recovery, only a basic set of network device features is enabled on the device. If features like checksum offloading or TSO were enabled by the user before the recovery, this results in a mismatch between the network device features, that the kernel assumes to be enabled on the device, and the features actually enabled on the device. This patch tries to restore previously set features, that require changes on the device, after the recovery of a device. In case of an error, the network device's features are changed to contain only the features that are actually turned on. Signed-off-by: Hans Wippel Signed-off-by: Ursula Braun Signed-off-by: David S. Miller diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index bf40063..6d4b68c4 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -999,6 +999,7 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *, __u16, __u16, enum qeth_prot_versions); int qeth_set_features(struct net_device *, netdev_features_t); +int qeth_recover_features(struct net_device *); netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t); /* exports for OSN */ diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 7dba6c8..6ad5a14 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -6131,6 +6131,35 @@ static int qeth_set_ipa_tso(struct qeth_card *card, int on) return rc; } +/* try to restore device features on a device after recovery */ +int qeth_recover_features(struct net_device *dev) +{ + struct qeth_card *card = dev->ml_priv; + netdev_features_t recover = dev->features; + + if (recover & NETIF_F_IP_CSUM) { + if (qeth_set_ipa_csum(card, 1, IPA_OUTBOUND_CHECKSUM)) + recover ^= NETIF_F_IP_CSUM; + } + if (recover & NETIF_F_RXCSUM) { + if (qeth_set_ipa_csum(card, 1, IPA_INBOUND_CHECKSUM)) + recover ^= NETIF_F_RXCSUM; + } + if (recover & NETIF_F_TSO) { + if (qeth_set_ipa_tso(card, 1)) + recover ^= NETIF_F_TSO; + } + + if (recover == dev->features) + return 0; + + dev_warn(&card->gdev->dev, + "Device recovery failed to restore all offload features\n"); + dev->features = recover; + return -EIO; +} +EXPORT_SYMBOL_GPL(qeth_recover_features); + int qeth_set_features(struct net_device *dev, netdev_features_t features) { struct qeth_card *card = dev->ml_priv; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 7bc20c5..54fd891 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1246,6 +1246,9 @@ contin: } /* this also sets saved unicast addresses */ qeth_l2_set_rx_mode(card->dev); + rtnl_lock(); + qeth_recover_features(card->dev); + rtnl_unlock(); } /* let user_space know that device is online */ kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 7293466..2f51271 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3269,6 +3269,7 @@ contin: else dev_open(card->dev); qeth_l3_set_multicast_list(card->dev); + qeth_recover_features(card->dev); rtnl_unlock(); } qeth_trace_features(card); -- cgit v0.10.2 From 016930b88a1d6eb6e6b3287d593e13ca06986acc Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 15 Sep 2016 14:39:22 +0200 Subject: s390/qeth: use ip_lock for hsuid configuration qeth_l3_dev_hsuid_store() changes the ip hash table, which requires the ip_lock. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index 65645b1..0e00a5c 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -297,7 +297,9 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev, addr->u.a6.pfxlen = 0; addr->type = QETH_IP_TYPE_NORMAL; + spin_lock_bh(&card->ip_lock); qeth_l3_delete_ip(card, addr); + spin_unlock_bh(&card->ip_lock); kfree(addr); } @@ -329,7 +331,10 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev, addr->type = QETH_IP_TYPE_NORMAL; } else return -ENOMEM; + + spin_lock_bh(&card->ip_lock); qeth_l3_add_ip(card, addr); + spin_unlock_bh(&card->ip_lock); kfree(addr); return count; -- cgit v0.10.2 From a7531c1cc09855df5e33ceefe4fdfc2d74ccab19 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 15 Sep 2016 14:39:23 +0200 Subject: s390/qeth: allow hsuid configuration in DOWN state The qeth IP address mapping logic has been reworked recently. It causes now problems to specify qeth sysfs attribute "hsuid" in DOWN state, which is allowed. Postpone registering or deregistering of IP-addresses in this case. Signed-off-by: Ursula Braun Reviewed-by: Thomas Richter Signed-off-by: David S. Miller diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 2f51271..4ba82e1 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -257,6 +257,11 @@ int qeth_l3_delete_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) if (addr->in_progress) return -EINPROGRESS; + if (!qeth_card_hw_is_reachable(card)) { + addr->disp_flag = QETH_DISP_ADDR_DELETE; + return 0; + } + rc = qeth_l3_deregister_addr_entry(card, addr); hash_del(&addr->hnode); @@ -296,6 +301,11 @@ int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr) hash_add(card->ip_htable, &addr->hnode, qeth_l3_ipaddr_hash(addr)); + if (!qeth_card_hw_is_reachable(card)) { + addr->disp_flag = QETH_DISP_ADDR_ADD; + return 0; + } + /* qeth_l3_register_addr_entry can go to sleep * if we add a IPV4 addr. It is caused by the reason * that SETIP ipa cmd starts ARP staff for IPV4 addr. @@ -390,12 +400,16 @@ static void qeth_l3_recover_ip(struct qeth_card *card) int i; int rc; - QETH_CARD_TEXT(card, 4, "recoverip"); + QETH_CARD_TEXT(card, 4, "recovrip"); spin_lock_bh(&card->ip_lock); hash_for_each_safe(card->ip_htable, i, tmp, addr, hnode) { - if (addr->disp_flag == QETH_DISP_ADDR_ADD) { + if (addr->disp_flag == QETH_DISP_ADDR_DELETE) { + qeth_l3_deregister_addr_entry(card, addr); + hash_del(&addr->hnode); + kfree(addr); + } else if (addr->disp_flag == QETH_DISP_ADDR_ADD) { if (addr->proto == QETH_PROT_IPV4) { addr->in_progress = 1; spin_unlock_bh(&card->ip_lock); @@ -407,10 +421,8 @@ static void qeth_l3_recover_ip(struct qeth_card *card) if (!rc) { addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING; - if (addr->ref_counter < 1) { + if (addr->ref_counter < 1) qeth_l3_delete_ip(card, addr); - kfree(addr); - } } else { hash_del(&addr->hnode); kfree(addr); -- cgit v0.10.2 From 903e48531e8b5d414c8f1960eacac24c31f60344 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 15 Sep 2016 14:39:24 +0200 Subject: qeth: check not more than 16 SBALEs on the completion queue af_iucv socket programs with HiperSockets as transport make use of the qdio completion queue. Running such an af_iucv socket program may result in a crash: [90341.677709] Oops: 0038 ilc:2 [#1] SMP [90341.677743] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.6.0-20160720.0.0e86ec7.5e62689.fc23.s390xperformance #1 [90341.677744] Hardware name: IBM 2964 N96 703 (LPAR) [90341.677746] task: 00000000edb79f00 ti: 00000000edb84000 task.ti: 00000000edb84000 [90341.677748] Krnl PSW : 0704d00180000000 000000000075bc50 (qeth_qdio_input_handler+0x258/0x4e0) [90341.677756] R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:1 PM:0 RI:0 EA:3 Krnl GPRS: 000003d10391e900 0000000000000001 00000000e61e6000 0000000000000005 [90341.677759] 0000000000a9e6ec 5420040001a77400 0000000000000001 000000000000006f [90341.677761] 00000000e0d83f00 0000000000000003 0000000000000010 5420040001a77400 [90341.677784] 000000007ba8b000 0000000000943fd0 000000000075bc4e 00000000ed3b3c10 [90341.677793] Krnl Code: 000000000075bc42: e320cc180004 lg %r2,3096(%r12) 000000000075bc48: c0e5ffffc5cc brasl %r14,7547e0 #000000000075bc4e: 1816 lr %r1,%r6 >000000000075bc50: ba19b008 cs %r1,%r9,8(%r11) 000000000075bc54: ec180041017e cij %r1,1,8,75bcd6 000000000075bc5a: 5810b008 l %r1,8(%r11) 000000000075bc5e: ec16005c027e cij %r1,2,6,75bd16 000000000075bc64: 5090b008 st %r9,8(%r11) [90341.677807] Call Trace: [90341.677810] ([<000000000075bbc0>] qeth_qdio_input_handler+0x1c8/0x4e0) [90341.677812] ([<000000000070efbc>] qdio_kick_handler+0x124/0x2a8) [90341.677814] ([<0000000000713570>] __tiqdio_inbound_processing+0xf0/0xcd0) [90341.677818] ([<0000000000143312>] tasklet_action+0x92/0x120) [90341.677823] ([<00000000008b6e72>] __do_softirq+0x112/0x308) [90341.677824] ([<0000000000142bce>] irq_exit+0xd6/0xf8) [90341.677829] ([<000000000010b1d2>] do_IRQ+0x6a/0x88) [90341.677830] ([<00000000008b6322>] io_int_handler+0x112/0x220) [90341.677832] ([<0000000000102b2e>] enabled_wait+0x56/0xa8) [90341.677833] ([<0000000000000000>] (null)) [90341.677835] ([<0000000000102e32>] arch_cpu_idle+0x32/0x48) [90341.677838] ([<000000000018a126>] cpu_startup_entry+0x266/0x2b0) [90341.677841] ([<0000000000113b38>] smp_start_secondary+0x100/0x110) [90341.677843] ([<00000000008b68a6>] restart_int_handler+0x62/0x78) [90341.677845] ([<00000000008b6588>] psw_idle+0x3c/0x40) [90341.677846] Last Breaking-Event-Address: [90341.677848] [<00000000007547ec>] qeth_dbf_longtext+0xc/0xc0 [90341.677849] [90341.677850] Kernel panic - not syncing: Fatal exception in interrupt qeth_qdio_cq_handler() analyzes SBALs on this completion queue, but does not observe the limit of 16 SBAL elements per SBAL. This patch adds the additional check to process not more than 16 SBAL elements. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 6ad5a14..20cf296 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -3619,7 +3619,8 @@ static void qeth_qdio_cq_handler(struct qeth_card *card, int e; e = 0; - while (buffer->element[e].addr) { + while ((e < QDIO_MAX_ELEMENTS_PER_BUFFER) && + buffer->element[e].addr) { unsigned long phys_aob_addr; phys_aob_addr = (unsigned long) buffer->element[e].addr; -- cgit v0.10.2 From 243f750fc6f5d8e4dec984a9a785941c67452b8f Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 15 Sep 2016 14:39:25 +0200 Subject: qeth: do not limit number of gso segments To reduce the need of skb_linearize() calls, gso_max_segs of qeth net_devices had been limited according to the maximum number of qdio SBAL elements. But a gso segment cannot be larger than the mtu-size, while an SBAL element can contain up to 4096 bytes. The gso_max_segs limitation limits the maximum packet size given to the qeth driver. Performance measurements with tso-enabled qeth network interfaces and mtu-size 1500 showed, that the disadvantage of smaller packets is much more severe than the advantage of fewer skb_linearize() calls. This patch gets rid of the gso_max_segs limitations in the qeth driver. Signed-off-by: Ursula Braun Reviewed-by: Thomas Richter Signed-off-by: David S. Miller diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 54fd891..2081c18 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1131,7 +1131,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) qeth_l2_request_initial_mac(card); card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) * PAGE_SIZE; - card->dev->gso_max_segs = (QETH_MAX_BUFFER_ELEMENTS(card) - 1); SET_NETDEV_DEV(card->dev, &card->gdev->dev); netif_napi_add(card->dev, &card->napi, qeth_l2_poll, QETH_NAPI_WEIGHT); netif_carrier_off(card->dev); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 4ba82e1..0cbbc80 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3148,7 +3148,6 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) netif_keep_dst(card->dev); card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) * PAGE_SIZE; - card->dev->gso_max_segs = (QETH_MAX_BUFFER_ELEMENTS(card) - 1); SET_NETDEV_DEV(card->dev, &card->gdev->dev); netif_napi_add(card->dev, &card->napi, qeth_l3_poll, QETH_NAPI_WEIGHT); -- cgit v0.10.2 From 5722963a8e83309dad831cf6968c4c805aa342c0 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 15 Sep 2016 14:39:26 +0200 Subject: qeth: do not turn on SG per default According to recent performance measurements, turning on net_device feature NETIF_F_SG only behaves well, but turning on feature NETIF_F_GSO shows bad results. Since the kernel activates NETIF_F_GSO automatically as soon as the driver configures feature NETIF_F_SG, qeth should not activate feature NETIF_F_SG per default, until the qeth problems with NETIF_F_GSO are solved. Signed-off-by: Ursula Braun Reviewed-by: Thomas Richter Signed-off-by: David S. Miller diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 2081c18..bb27058 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1124,8 +1124,6 @@ static int qeth_l2_setup_netdev(struct qeth_card *card) card->dev->hw_features |= NETIF_F_RXCSUM; card->dev->vlan_features |= NETIF_F_RXCSUM; } - /* Turn on SG per default */ - card->dev->features |= NETIF_F_SG; } card->info.broadcast_capable = 1; qeth_l2_request_initial_mac(card); diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 0cbbc80..c00f6db 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3120,7 +3120,6 @@ static int qeth_l3_setup_netdev(struct qeth_card *card) card->dev->vlan_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO; - card->dev->features = NETIF_F_SG; } } } else if (card->info.type == QETH_CARD_TYPE_IQD) { -- cgit v0.10.2 From 732a59cb6e7faed7a40da6665a517945c95fc895 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 15 Sep 2016 14:39:27 +0200 Subject: s390/qeth: fix setting VIPA address commit 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback") restructured the internal address handling. This work broke setting a virtual IP address. The command echo 10.1.1.1 > /sys/bus/ccwgroup/devices//vipa/add4 fails with file exist error even if the IP address has not been set before. It turned out that the search result for the IP address search is handled incorrectly in the VIPA case. This patch fixes the setting of an virtual IP address. Signed-off-by: Thomas Richter Signed-off-by: Ursula Braun Signed-off-by: David S. Miller diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index c00f6db..272d9e7 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -701,7 +701,7 @@ int qeth_l3_add_vipa(struct qeth_card *card, enum qeth_prot_versions proto, spin_lock_bh(&card->ip_lock); - if (!qeth_l3_ip_from_hash(card, ipaddr)) + if (qeth_l3_ip_from_hash(card, ipaddr)) rc = -EEXIST; else qeth_l3_add_ip(card, ipaddr); @@ -769,7 +769,7 @@ int qeth_l3_add_rxip(struct qeth_card *card, enum qeth_prot_versions proto, spin_lock_bh(&card->ip_lock); - if (!qeth_l3_ip_from_hash(card, ipaddr)) + if (qeth_l3_ip_from_hash(card, ipaddr)) rc = -EEXIST; else qeth_l3_add_ip(card, ipaddr); -- cgit v0.10.2 From 95f60084acbcee6c466256cf26eb52191fad9edc Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 15 Sep 2016 18:13:50 +0300 Subject: perf/x86/intel/pt: Fix an off-by-one in address filter configuration PT address filter configuration requires that a range is specified by its first and last address, but at the moment we're obtaining the end of the range by adding user specified size to its start, which is off by one from what it actually needs to be. Fix this and make sure that zero-sized filters don't pass the filter validation. Reported-by: Adrian Hunter Signed-off-by: Alexander Shishkin Acked-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: stable@vger.kernel.org # v4.7 Cc: stable@vger.kernel.org#v4.7 Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160915151352.21306-2-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 04bb5fb..5ec0100 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1081,7 +1081,7 @@ static int pt_event_addr_filters_validate(struct list_head *filters) list_for_each_entry(filter, filters, entry) { /* PT doesn't support single address triggers */ - if (!filter->range) + if (!filter->range || !filter->size) return -EOPNOTSUPP; if (!filter->inode && !kernel_ip(filter->offset)) @@ -1111,7 +1111,7 @@ static void pt_event_addr_filters_sync(struct perf_event *event) } else { /* apply the offset */ msr_a = filter->offset + offs[range]; - msr_b = filter->size + msr_a; + msr_b = filter->size + msr_a - 1; } filters->filter[range].msr_a = msr_a; -- cgit v0.10.2 From ddfdad991e55b65c1cc4ee29502f6dceee04455a Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 15 Sep 2016 18:13:51 +0300 Subject: perf/x86/intel/pt: Fix kernel address filter's offset validation The kernel_ip() filter is used mostly by the DS/LBR code to look at the branch addresses, but Intel PT also uses it to validate the address filter offsets for kernel addresses, for which it is not sufficient: supplying something in bits 64:48 that's not a sign extension of the lower address bits (like 0xf00d000000000000) throws a #GP. This patch adds address validation for the user supplied kernel filters. Reported-by: Adrian Hunter Signed-off-by: Alexander Shishkin Acked-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: stable@vger.kernel.org # v4.7 Cc: stable@vger.kernel.org#v4.7 Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160915151352.21306-3-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 5ec0100..1f94963 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1074,6 +1074,11 @@ static void pt_addr_filters_fini(struct perf_event *event) event->hw.addr_filters = NULL; } +static inline bool valid_kernel_ip(unsigned long ip) +{ + return virt_addr_valid(ip) && kernel_ip(ip); +} + static int pt_event_addr_filters_validate(struct list_head *filters) { struct perf_addr_filter *filter; @@ -1084,7 +1089,7 @@ static int pt_event_addr_filters_validate(struct list_head *filters) if (!filter->range || !filter->size) return -EOPNOTSUPP; - if (!filter->inode && !kernel_ip(filter->offset)) + if (!filter->inode && !valid_kernel_ip(filter->offset)) return -EINVAL; if (++range > pt_cap_get(PT_CAP_num_address_ranges)) -- cgit v0.10.2 From 1155bafcb79208abc6ae234c6e135ac70607755c Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Thu, 15 Sep 2016 18:13:52 +0300 Subject: perf/x86/intel/pt: Do validate the size of a kernel address filter Right now, the kernel address filters in PT are prone to integer overflow that may happen in adding filter's size to its offset to obtain the end of the range. Such an overflow would also throw a #GP in the PT event configuration path. Fix this by explicitly validating the result of this calculation. Reported-by: Adrian Hunter Signed-off-by: Alexander Shishkin Acked-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: stable@vger.kernel.org # v4.7 Cc: stable@vger.kernel.org#v4.7 Cc: vince@deater.net Link: http://lkml.kernel.org/r/20160915151352.21306-4-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 1f94963..861a7d9 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1089,8 +1089,13 @@ static int pt_event_addr_filters_validate(struct list_head *filters) if (!filter->range || !filter->size) return -EOPNOTSUPP; - if (!filter->inode && !valid_kernel_ip(filter->offset)) - return -EINVAL; + if (!filter->inode) { + if (!valid_kernel_ip(filter->offset)) + return -EINVAL; + + if (!valid_kernel_ip(filter->offset + filter->size)) + return -EINVAL; + } if (++range > pt_cap_get(PT_CAP_num_address_ranges)) return -EOPNOTSUPP; -- cgit v0.10.2 From 080fe0b790ad438fc1b61621dac37c1964ce7f35 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 24 Aug 2016 14:12:08 +0100 Subject: perf/x86/amd: Make HW_CACHE_REFERENCES and HW_CACHE_MISSES measure L2 While the Intel PMU monitors the LLC when perf enables the HW_CACHE_REFERENCES and HW_CACHE_MISSES events, these events monitor L1 instruction cache fetches (0x0080) and instruction cache misses (0x0081) on the AMD PMU. This is extremely confusing when monitoring the same workload across Intel and AMD machines, since parameters like, $ perf stat -e cache-references,cache-misses measure completely different things. Instead, make the AMD PMU measure instruction/data cache and TLB fill requests to the L2 and instruction/data cache and TLB misses in the L2 when HW_CACHE_REFERENCES and HW_CACHE_MISSES are enabled, respectively. That way the events measure unified caches on both platforms. Signed-off-by: Matt Fleming Acked-by: Peter Zijlstra Cc: Cc: Borislav Petkov Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1472044328-21302-1-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index e07a22b..f5f4b3f 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -119,8 +119,8 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, - [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d, + [PERF_COUNT_HW_CACHE_MISSES] = 0x077e, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c index 39b9112..cd94443 100644 --- a/arch/x86/kvm/pmu_amd.c +++ b/arch/x86/kvm/pmu_amd.c @@ -23,8 +23,8 @@ static struct kvm_event_hw_type_mapping amd_event_mapping[] = { [0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES }, [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS }, - [2] = { 0x80, 0x00, PERF_COUNT_HW_CACHE_REFERENCES }, - [3] = { 0x81, 0x00, PERF_COUNT_HW_CACHE_MISSES }, + [2] = { 0x7d, 0x07, PERF_COUNT_HW_CACHE_REFERENCES }, + [3] = { 0x7e, 0x07, PERF_COUNT_HW_CACHE_MISSES }, [4] = { 0xc2, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, [5] = { 0xc3, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, [6] = { 0xd0, 0x00, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, -- cgit v0.10.2 From ed3d6d0ac036bcff6e4649a6f8fb987bb4e34444 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 16 Sep 2016 09:24:44 -0500 Subject: usb: musb: Fix tusb6010 compile error on blackfin We have CONFIG_BLACKFIN ifdef redefining all musb registers in musb_regs.h and tusb6010.h is never included causing a build error with blackfin-allmodconfig and COMPILE_TEST. Let's fix the issue by not building tusb6010 if CONFIG_BLACKFIN is selected. Reported-by: kbuild test robot Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig index 886526b..73cfa13 100644 --- a/drivers/usb/musb/Kconfig +++ b/drivers/usb/musb/Kconfig @@ -87,7 +87,7 @@ config USB_MUSB_DA8XX config USB_MUSB_TUSB6010 tristate "TUSB6010" depends on HAS_IOMEM - depends on ARCH_OMAP2PLUS || COMPILE_TEST + depends on (ARCH_OMAP2PLUS || COMPILE_TEST) && !BLACKFIN depends on NOP_USB_XCEIV = USB_MUSB_HDRC # both built-in or both modules config USB_MUSB_OMAP2PLUS -- cgit v0.10.2 From 08c5cd37480f59ea39682f4585d92269be6b1424 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 16 Sep 2016 10:24:26 -0400 Subject: USB: change bInterval default to 10 ms Some full-speed mceusb infrared transceivers contain invalid endpoint descriptors for their interrupt endpoints, with bInterval set to 0. In the past they have worked out okay with the mceusb driver, because the driver sets the bInterval field in the descriptor to 1, overwriting whatever value may have been there before. However, this approach was never sanctioned by the USB core, and in fact it does not work with xHCI controllers, because they use the bInterval value that was present when the configuration was installed. Currently usbcore uses 32 ms as the default interval if the value in the endpoint descriptor is invalid. It turns out that these IR transceivers don't work properly unless the interval is set to 10 ms or below. To work around this mceusb problem, this patch changes the endpoint-descriptor parsing routine, making the default interval value be 10 ms rather than 32 ms. Signed-off-by: Alan Stern Tested-by: Wade Berrier CC: Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 15ce4ab..a2d90ac 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -240,8 +240,10 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, memcpy(&endpoint->desc, d, n); INIT_LIST_HEAD(&endpoint->urb_list); - /* Fix up bInterval values outside the legal range. Use 32 ms if no - * proper value can be guessed. */ + /* + * Fix up bInterval values outside the legal range. + * Use 10 or 8 ms if no proper value can be guessed. + */ i = 0; /* i = min, j = max, n = default */ j = 255; if (usb_endpoint_xfer_int(d)) { @@ -250,13 +252,15 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, case USB_SPEED_SUPER_PLUS: case USB_SPEED_SUPER: case USB_SPEED_HIGH: - /* Many device manufacturers are using full-speed + /* + * Many device manufacturers are using full-speed * bInterval values in high-speed interrupt endpoint - * descriptors. Try to fix those and fall back to a - * 32 ms default value otherwise. */ + * descriptors. Try to fix those and fall back to an + * 8-ms default value otherwise. + */ n = fls(d->bInterval*8); if (n == 0) - n = 9; /* 32 ms = 2^(9-1) uframes */ + n = 7; /* 8 ms = 2^(7-1) uframes */ j = 16; /* @@ -271,10 +275,12 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, } break; default: /* USB_SPEED_FULL or _LOW */ - /* For low-speed, 10 ms is the official minimum. + /* + * For low-speed, 10 ms is the official minimum. * But some "overclocked" devices might want faster - * polling so we'll allow it. */ - n = 32; + * polling so we'll allow it. + */ + n = 10; break; } } else if (usb_endpoint_xfer_isoc(d)) { @@ -282,10 +288,10 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, j = 16; switch (to_usb_device(ddev)->speed) { case USB_SPEED_HIGH: - n = 9; /* 32 ms = 2^(9-1) uframes */ + n = 7; /* 8 ms = 2^(7-1) uframes */ break; default: /* USB_SPEED_FULL */ - n = 6; /* 32 ms = 2^(6-1) frames */ + n = 4; /* 8 ms = 2^(4-1) frames */ break; } } -- cgit v0.10.2 From 5e102b3b4fa944815aead89e63eec2a35069a07b Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Fri, 2 Sep 2016 23:46:53 +0300 Subject: IB/rxe: fix GFP_KERNEL in spinlock context There is skb_clone(skb, GFP_KERNEL) in spinlock context in rxe_rcv_mcast_pkt(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Acked-by: Moni Shoua Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 3d464c2..144d2f1 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -312,7 +312,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) * make a copy of the skb to post to the next qp */ skb_copy = (mce->qp_list.next != &mcg->qp_list) ? - skb_clone(skb, GFP_KERNEL) : NULL; + skb_clone(skb, GFP_ATOMIC) : NULL; pkt->qp = qp; rxe_add_ref(qp); -- cgit v0.10.2 From 344bacca8cd811809fc33a249f2738ab757d327f Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Mon, 12 Sep 2016 09:55:28 +0300 Subject: IB/ipoib: Don't allow MC joins during light MC flush This fix solves a race between light flush and on the fly joins. Light flush doesn't set the device to down and unset IPOIB_OPER_UP flag, this means that if while flushing we have a MC join in progress and the QP was attached to BC MGID we can have a mismatches when re-attaching a QP to the BC MGID. The light flush would set the broadcast group to NULL causing an on the fly join to rejoin and reattach to the BC MCG as well as adding the BC MGID to the multicast list. The flush process would later on remove the BC MGID and detach it from the QP. On the next flush the BC MGID is present in the multicast list but not found when trying to detach it because of the previous double attach and single detach. [18332.714265] ------------[ cut here ]------------ [18332.717775] WARNING: CPU: 6 PID: 3767 at drivers/infiniband/core/verbs.c:280 ib_dealloc_pd+0xff/0x120 [ib_core] ... [18332.775198] Hardware name: Red Hat KVM, BIOS Bochs 01/01/2011 [18332.779411] 0000000000000000 ffff8800b50dfbb0 ffffffff813fed47 0000000000000000 [18332.784960] 0000000000000000 ffff8800b50dfbf0 ffffffff8109add1 0000011832f58300 [18332.790547] ffff880226a596c0 ffff880032482000 ffff880032482830 ffff880226a59280 [18332.796199] Call Trace: [18332.798015] [] dump_stack+0x63/0x8c [18332.801831] [] __warn+0xd1/0xf0 [18332.805403] [] warn_slowpath_null+0x1d/0x20 [18332.809706] [] ib_dealloc_pd+0xff/0x120 [ib_core] [18332.814384] [] ipoib_transport_dev_cleanup+0xfc/0x1d0 [ib_ipoib] [18332.820031] [] ipoib_ib_dev_cleanup+0x98/0x110 [ib_ipoib] [18332.825220] [] ipoib_dev_cleanup+0x2d8/0x550 [ib_ipoib] [18332.830290] [] ipoib_uninit+0x2f/0x40 [ib_ipoib] [18332.834911] [] rollback_registered_many+0x1aa/0x2c0 [18332.839741] [] rollback_registered+0x31/0x40 [18332.844091] [] unregister_netdevice_queue+0x48/0x80 [18332.848880] [] ipoib_vlan_delete+0x1fb/0x290 [ib_ipoib] [18332.853848] [] delete_child+0x7d/0xf0 [ib_ipoib] [18332.858474] [] dev_attr_store+0x18/0x30 [18332.862510] [] sysfs_kf_write+0x3a/0x50 [18332.866349] [] kernfs_fop_write+0x120/0x170 [18332.870471] [] __vfs_write+0x28/0xe0 [18332.874152] [] ? percpu_down_read+0x1f/0x50 [18332.878274] [] vfs_write+0xa2/0x1a0 [18332.881896] [] SyS_write+0x46/0xa0 [18332.885632] [] do_syscall_64+0x57/0xb0 [18332.889709] [] entry_SYSCALL64_slow_path+0x25/0x25 [18332.894727] ---[ end trace 09ebbe31f831ef17 ]--- Fixes: ee1e2c82c245 ("IPoIB: Refresh paths instead of flushing them on SM change events") Signed-off-by: Alex Vesker Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index dc6d241..be11d5d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -1161,8 +1161,17 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, } if (level == IPOIB_FLUSH_LIGHT) { + int oper_up; ipoib_mark_paths_invalid(dev); + /* Set IPoIB operation as down to prevent races between: + * the flush flow which leaves MCG and on the fly joins + * which can happen during that time. mcast restart task + * should deal with join requests we missed. + */ + oper_up = test_and_clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags); ipoib_mcast_dev_flush(dev); + if (oper_up) + set_bit(IPOIB_FLAG_OPER_UP, &priv->flags); ipoib_flush_ah(dev); } -- cgit v0.10.2 From e5ac40cd66c2f3cd11bc5edc658f012661b16347 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Mon, 12 Sep 2016 19:16:18 +0300 Subject: IB/mlx4: Fix incorrect MC join state bit-masking on SR-IOV Because of an incorrect bit-masking done on the join state bits, when handling a join request we failed to detect a difference between the group join state and the request join state when joining as send only full member (0x8). This caused the MC join request not to be sent. This issue is relevant only when SRIOV is enabled and SM supports send only full member. This fix separates scope bits and join states bits a nibble each. Fixes: b9c5d6a64358 ('IB/mlx4: Add multicast group (MCG) paravirtualization for SR-IOV') Signed-off-by: Alex Vesker Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 8f7ad07..097bfcc 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -489,7 +489,7 @@ static u8 get_leave_state(struct mcast_group *group) if (!group->members[i]) leave_state |= (1 << i); - return leave_state & (group->rec.scope_join_state & 7); + return leave_state & (group->rec.scope_join_state & 0xf); } static int join_group(struct mcast_group *group, int slave, u8 join_mask) @@ -564,8 +564,8 @@ static void mlx4_ib_mcg_timeout_handler(struct work_struct *work) } else mcg_warn_group(group, "DRIVER BUG\n"); } else if (group->state == MCAST_LEAVE_SENT) { - if (group->rec.scope_join_state & 7) - group->rec.scope_join_state &= 0xf8; + if (group->rec.scope_join_state & 0xf) + group->rec.scope_join_state &= 0xf0; group->state = MCAST_IDLE; mutex_unlock(&group->lock); if (release_group(group, 1)) @@ -605,7 +605,7 @@ static int handle_leave_req(struct mcast_group *group, u8 leave_mask, static int handle_join_req(struct mcast_group *group, u8 join_mask, struct mcast_req *req) { - u8 group_join_state = group->rec.scope_join_state & 7; + u8 group_join_state = group->rec.scope_join_state & 0xf; int ref = 0; u16 status; struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data; @@ -690,8 +690,8 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work) u8 cur_join_state; resp_join_state = ((struct ib_sa_mcmember_data *) - group->response_sa_mad.data)->scope_join_state & 7; - cur_join_state = group->rec.scope_join_state & 7; + group->response_sa_mad.data)->scope_join_state & 0xf; + cur_join_state = group->rec.scope_join_state & 0xf; if (method == IB_MGMT_METHOD_GET_RESP) { /* successfull join */ @@ -710,7 +710,7 @@ process_requests: req = list_first_entry(&group->pending_list, struct mcast_req, group_list); sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data; - req_join_state = sa_data->scope_join_state & 0x7; + req_join_state = sa_data->scope_join_state & 0xf; /* For a leave request, we will immediately answer the VF, and * update our internal counters. The actual leave will be sent -- cgit v0.10.2 From baa0be7026e2f7d1d40bfd45909044169e9e3c68 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 12 Sep 2016 19:16:19 +0300 Subject: IB/mlx4: Fix code indentation in QP1 MAD flow The indentation in the QP1 GRH flow in procedure build_mlx_header is really confusing. Fix it, in preparation for a commit which touches this code. Fixes: 1ffeb2eb8be9 ("IB/mlx4: SR-IOV IB context objects and proxy/tunnel SQP support") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 768085f..e398c04 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2493,24 +2493,26 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, sqp->ud_header.grh.flow_label = ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; - if (is_eth) + if (is_eth) { memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16); - else { - if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { - /* When multi-function is enabled, the ib_core gid - * indexes don't necessarily match the hw ones, so - * we must use our own cache */ - sqp->ud_header.grh.source_gid.global.subnet_prefix = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - subnet_prefix; - sqp->ud_header.grh.source_gid.global.interface_id = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - guid_cache[ah->av.ib.gid_index]; - } else - ib_get_cached_gid(ib_dev, - be32_to_cpu(ah->av.ib.port_pd) >> 24, - ah->av.ib.gid_index, - &sqp->ud_header.grh.source_gid, NULL); + } else { + if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { + /* When multi-function is enabled, the ib_core gid + * indexes don't necessarily match the hw ones, so + * we must use our own cache + */ + sqp->ud_header.grh.source_gid.global.subnet_prefix = + to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. + subnet_prefix; + sqp->ud_header.grh.source_gid.global.interface_id = + to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. + guid_cache[ah->av.ib.gid_index]; + } else { + ib_get_cached_gid(ib_dev, + be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, + &sqp->ud_header.grh.source_gid, NULL); + } } memcpy(sqp->ud_header.grh.destination_gid.raw, ah->av.ib.dgid, 16); -- cgit v0.10.2 From 8ec07bf8a8b57d6c58927a16a0a22c0115cf2855 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 12 Sep 2016 19:16:20 +0300 Subject: IB/mlx4: Use correct subnet-prefix in QP1 mads under SR-IOV When sending QP1 MAD packets which use a GRH, the source GID (which consists of the 64-bit subnet prefix, and the 64 bit port GUID) must be included in the packet GRH. For SR-IOV, a GID cache is used, since the source GID needs to be the slave's source GID, and not the Hypervisor's GID. This cache also included a subnet_prefix. Unfortunately, the subnet_prefix field in the cache was never initialized (to the default subnet prefix 0xfe80::0). As a result, this field remained all zeroes. Therefore, when SR-IOV was active, all QP1 packets which included a GRH had a source GID subnet prefix of all-zeroes. However, the subnet-prefix should initially be 0xfe80::0 (the default subnet prefix). In addition, if OpenSM modifies a port's subnet prefix, the new subnet prefix must be used in the GRH when sending QP1 packets. To fix this we now initialize the subnet prefix in the SR-IOV GID cache to the default subnet prefix. We update the cached value if/when OpenSM modifies the port's subnet prefix. We take this cached value when sending QP1 packets when SR-IOV is active. Note that the value is stored as an atomic64. This eliminates any need for locking when the subnet prefix is being updated. Note also that we depend on the FW generating the "port management change" event for tracking subnet-prefix changes performed by OpenSM. If running early FW (before 2.9.4630), subnet prefix changes will not be tracked (but the default subnet prefix still will be stored in the cache; therefore users who do not modify the subnet prefix will not have a problem). IF there is a need for such tracking also for early FW, we will add that capability in a subsequent patch. Fixes: 1ffeb2eb8be9 ("IB/mlx4: SR-IOV IB context objects and proxy/tunnel SQP support") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 9c2e53d..0f21c3a 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1128,6 +1128,27 @@ void handle_port_mgmt_change_event(struct work_struct *work) /* Generate GUID changed event */ if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) { + if (mlx4_is_master(dev->dev)) { + union ib_gid gid; + int err = 0; + + if (!eqe->event.port_mgmt_change.params.port_info.gid_prefix) + err = __mlx4_ib_query_gid(&dev->ib_dev, port, 0, &gid, 1); + else + gid.global.subnet_prefix = + eqe->event.port_mgmt_change.params.port_info.gid_prefix; + if (err) { + pr_warn("Could not change QP1 subnet prefix for port %d: query_gid error (%d)\n", + port, err); + } else { + pr_debug("Changing QP1 subnet prefix for port %d. old=0x%llx. new=0x%llx\n", + port, + (u64)atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix), + be64_to_cpu(gid.global.subnet_prefix)); + atomic64_set(&dev->sriov.demux[port - 1].subnet_prefix, + be64_to_cpu(gid.global.subnet_prefix)); + } + } mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); /*if master, notify all slaves*/ if (mlx4_is_master(dev->dev)) @@ -2202,6 +2223,8 @@ int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev) if (err) goto demux_err; dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id; + atomic64_set(&dev->sriov.demux[i].subnet_prefix, + be64_to_cpu(gid.global.subnet_prefix)); err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1, &dev->sriov.sqps[i]); if (err) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 7c5832e..686ab48 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -448,7 +448,7 @@ struct mlx4_ib_demux_ctx { struct workqueue_struct *wq; struct workqueue_struct *ud_wq; spinlock_t ud_lock; - __be64 subnet_prefix; + atomic64_t subnet_prefix; __be64 guid_cache[128]; struct mlx4_ib_dev *dev; /* the following lock protects both mcg_table and mcg_mgid0_list */ diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index e398c04..7fb9629 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2502,8 +2502,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, * we must use our own cache */ sqp->ud_header.grh.source_gid.global.subnet_prefix = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - subnet_prefix; + cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov. + demux[sqp->qp.port - 1]. + subnet_prefix))); sqp->ud_header.grh.source_gid.global.interface_id = to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. guid_cache[ah->av.ib.gid_index]; -- cgit v0.10.2 From 69d269d38910e697e461ec5677368f57d2046cbe Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Mon, 12 Sep 2016 19:16:22 +0300 Subject: IB/mlx4: Diagnostic HW counters are not supported in slave mode Modify the mlx4_ib_diag_counters() to avoid the following error in the hypervisor when the slave tries to query the hardware counters in SR-IOV mode. mlx4_core 0000:81:00.0: Unknown command:0x30 accepted from slave:1 Fixes: 3f85f2aaabf7 ("IB/mlx4: Add diagnostic hardware counters") Signed-off-by: Kamal Heib Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 2af44c2..87ba9bc 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2202,6 +2202,9 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev) bool per_port = !!(ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT); + if (mlx4_is_slave(ibdev->dev)) + return 0; + for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) { /* i == 1 means we are building port counters */ if (i && !per_port) -- cgit v0.10.2 From 7fae6655a0c897875bd34501ec092232b526d3e4 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Mon, 12 Sep 2016 19:16:23 +0300 Subject: IB/mlx5: Enable MAD_IFC commands for IB ports only MAD_IFC command is supported only for physical functions (PF) and when physical port is IB. The proposed fix enforces it. Fixes: d603c809ef91 ("IB/mlx5: Fix decision on using MAD_IFC") Reported-by: David Chang Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 8150ea3..0480b64 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -288,7 +288,9 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) { - return !MLX5_CAP_GEN(dev->mdev, ib_virt); + if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) + return !MLX5_CAP_GEN(dev->mdev, ib_virt); + return 0; } enum { -- cgit v0.10.2 From ee3da804ad1b1dd4c766199a6e8443542b0aaaef Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 12 Sep 2016 19:16:24 +0300 Subject: IB/mlx5: Set source mac address in FTE Set the source mac address in the FTE when L2 specification is provided. Fixes: 038d2ef87572 ('IB/mlx5: Add flow steering support') Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 0480b64..e19537c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1430,6 +1430,13 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, dmac_47_16), ib_spec->eth.val.dst_mac); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + smac_47_16), + ib_spec->eth.mask.src_mac); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + smac_47_16), + ib_spec->eth.val.src_mac); + if (ib_spec->eth.mask.vlan_tag) { MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, vlan_tag, 1); -- cgit v0.10.2 From dfdd6158ca2c8838313470c5ab79243d04da70b2 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 7 Sep 2016 14:04:04 +0300 Subject: IB/rxe: Fix kernel panic in udp_setup_tunnel Disable creation of a UDP socket for ipv6 when CONFIG_IPV6 is not enabeld. Since udp_sock_create6() returns 0 when CONFIG_IPV6 is not set [ 46.888632] IP: [] setup_udp_tunnel_sock+0x6/0x4f [ 46.891355] *pdpt = 0000000000000000 *pde = f000ff53f000ff53 [ 46.893918] Oops: 0002 [#1] PREEMPT [ 46.896014] CPU: 0 PID: 1 Comm: swapper Not tainted 4.7.0-rc4-00001-g8700e3e #1 [ 46.900280] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Debian-1.8.2-1 04/01/2014 [ 46.904905] task: cf06c040 ti: cf05e000 task.ti: cf05e000 [ 46.907854] EIP: 0060:[] EFLAGS: 00210246 CPU: 0 [ 46.911137] EIP is at setup_udp_tunnel_sock+0x6/0x4f [ 46.914070] EAX: 00000044 EBX: 00000001 ECX: cf05fef0 EDX: ca8142e0 [ 46.917236] ESI: c2c4505b EDI: cf05fef0 EBP: cf05fed0 ESP: cf05fed0 [ 46.919836] DS: 007b ES: 007b FS: 0000 GS: 00e0 SS: 0068 [ 46.922046] CR0: 80050033 CR2: 000001fc CR3: 02cec000 CR4: 000006b0 [ 46.924550] Stack: [ 46.926014] cf05ff10 c1fd4657 ca8142e0 0000000a 00000000 00000000 0000b712 00000008 [ 46.931274] 00000000 6bb5bd01 c1fd48de 00000000 00000000 cf05ff1c 00000000 00000000 [ 46.936122] cf05ff1c c1fd4bdf 00000000 cf05ff28 c2c4507b ffffffff cf05ff88 c2bf1c74 [ 46.942350] Call Trace: [ 46.944403] [] rxe_setup_udp_tunnel+0x8f/0x99 [ 46.947689] [] ? net_to_rxe+0x4e/0x4e [ 46.950567] [] rxe_net_init+0xe/0xa4 [ 46.953147] [] rxe_module_init+0x20/0x4c [ 46.955448] [] do_one_initcall+0x89/0x113 [ 46.957797] [] ? set_debug_rodata+0xf/0xf [ 46.959966] [] ? kernel_init_freeable+0xbe/0x15b [ 46.962262] [] kernel_init_freeable+0xde/0x15b [ 46.964418] [] kernel_init+0x8/0xd0 [ 46.966618] [] ret_from_kernel_thread+0xe/0x24 [ 46.969592] [] ? rest_init+0x6f/0x6f Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 55f0e8f..ddd5927 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -362,15 +362,34 @@ static int __init rxe_module_init(void) return err; } - err = rxe_net_init(); + err = rxe_net_ipv4_init(); if (err) { - pr_err("rxe: unable to init\n"); + pr_err("rxe: unable to init ipv4 tunnel\n"); rxe_cache_exit(); - return err; + goto exit; + } + + err = rxe_net_ipv6_init(); + if (err) { + pr_err("rxe: unable to init ipv6 tunnel\n"); + rxe_cache_exit(); + goto exit; } + + err = register_netdevice_notifier(&rxe_net_notifier); + if (err) { + pr_err("rxe: Failed to rigister netdev notifier\n"); + goto exit; + } + pr_info("rxe: loaded\n"); return 0; + +exit: + rxe_release_udp_tunnel(recv_sockets.sk4); + rxe_release_udp_tunnel(recv_sockets.sk6); + return err; } static void __exit rxe_module_exit(void) diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 0b8d2ea..eedf2f1 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -275,9 +275,10 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port, return sock; } -static void rxe_release_udp_tunnel(struct socket *sk) +void rxe_release_udp_tunnel(struct socket *sk) { - udp_tunnel_sock_release(sk); + if (sk) + udp_tunnel_sock_release(sk); } static void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port, @@ -658,51 +659,45 @@ out: return NOTIFY_OK; } -static struct notifier_block rxe_net_notifier = { +struct notifier_block rxe_net_notifier = { .notifier_call = rxe_notify, }; -int rxe_net_init(void) +int rxe_net_ipv4_init(void) { - int err; - spin_lock_init(&dev_list_lock); - recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, - htons(ROCE_V2_UDP_DPORT), true); - if (IS_ERR(recv_sockets.sk6)) { - recv_sockets.sk6 = NULL; - pr_err("rxe: Failed to create IPv6 UDP tunnel\n"); - return -1; - } - recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net, - htons(ROCE_V2_UDP_DPORT), false); + htons(ROCE_V2_UDP_DPORT), false); if (IS_ERR(recv_sockets.sk4)) { - rxe_release_udp_tunnel(recv_sockets.sk6); recv_sockets.sk4 = NULL; - recv_sockets.sk6 = NULL; pr_err("rxe: Failed to create IPv4 UDP tunnel\n"); return -1; } - err = register_netdevice_notifier(&rxe_net_notifier); - if (err) { - rxe_release_udp_tunnel(recv_sockets.sk6); - rxe_release_udp_tunnel(recv_sockets.sk4); - pr_err("rxe: Failed to rigister netdev notifier\n"); - } - - return err; + return 0; } -void rxe_net_exit(void) +int rxe_net_ipv6_init(void) { - if (recv_sockets.sk6) - rxe_release_udp_tunnel(recv_sockets.sk6); +#if IS_ENABLED(CONFIG_IPV6) - if (recv_sockets.sk4) - rxe_release_udp_tunnel(recv_sockets.sk4); + spin_lock_init(&dev_list_lock); + recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, + htons(ROCE_V2_UDP_DPORT), true); + if (IS_ERR(recv_sockets.sk6)) { + recv_sockets.sk6 = NULL; + pr_err("rxe: Failed to create IPv6 UDP tunnel\n"); + return -1; + } +#endif + return 0; +} + +void rxe_net_exit(void) +{ + rxe_release_udp_tunnel(recv_sockets.sk6); + rxe_release_udp_tunnel(recv_sockets.sk4); unregister_netdevice_notifier(&rxe_net_notifier); } diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h index 7b06f76..0daf7f0 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.h +++ b/drivers/infiniband/sw/rxe/rxe_net.h @@ -44,10 +44,13 @@ struct rxe_recv_sockets { }; extern struct rxe_recv_sockets recv_sockets; +extern struct notifier_block rxe_net_notifier; +void rxe_release_udp_tunnel(struct socket *sk); struct rxe_dev *rxe_net_add(struct net_device *ndev); -int rxe_net_init(void); +int rxe_net_ipv4_init(void); +int rxe_net_ipv6_init(void); void rxe_net_exit(void); #endif /* RXE_NET_H */ -- cgit v0.10.2 From 908948877bbbb240f67d7eb27d3a392beb394a07 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 7 Sep 2016 14:04:05 +0300 Subject: IB/rxe: Fix duplicate atomic request handling When handling ack for atomic opcodes like "fetch&add" or "cmp&swp", the method send_atomic_ack() saves the ack before sending it, in case it gets lost and never reach the requester. In which case the method duplicate_request() will need to find it using the duplicated request.psn. But send_atomic_ack() used a wrong psn value and thus the above ack was never found. This fix uses the ack.psn to locate the ack in case its needed. This fix also copies the ack packet to the skb's control buffer since duplicate_request() will need it when calling rxe_xmit_packet() Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index ebb03b4..3e0f0f2 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -972,11 +972,13 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, free_rd_atomic_resource(qp, res); rxe_advance_resp_resource(qp); + memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(skb->cb)); + res->type = RXE_ATOMIC_MASK; res->atomic.skb = skb; - res->first_psn = qp->resp.psn; - res->last_psn = qp->resp.psn; - res->cur_psn = qp->resp.psn; + res->first_psn = ack_pkt.psn; + res->last_psn = ack_pkt.psn; + res->cur_psn = ack_pkt.psn; rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb_copy); if (rc) { @@ -1116,8 +1118,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, rc = RESPST_CLEANUP; goto out; } - bth_set_psn(SKB_TO_PKT(skb_copy), - qp->resp.psn - 1); + /* Resend the result. */ rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, pkt, skb_copy); -- cgit v0.10.2 From 3050b99850247695cb07a5c15265afcc08bcf400 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 7 Sep 2016 14:04:06 +0300 Subject: IB/rxe: Fix race condition between requester and completer rxe_requester() is sending a pkt with rxe_xmit_packet() and then calls rxe_update() to update the wqe and qp's psn values. But sometimes the response is received before the requester had time to update the wqe in which case the completer acts on errornous wqe values. This fix updates the wqe and qp before actually sending the request and rolls back when xmit fails. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 33b2d9d..13a848a 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -511,24 +511,21 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, } static void update_wqe_state(struct rxe_qp *qp, - struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt, - enum wqe_state *prev_state) + struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt) { - enum wqe_state prev_state_ = wqe->state; - if (pkt->mask & RXE_END_MASK) { if (qp_type(qp) == IB_QPT_RC) wqe->state = wqe_state_pending; } else { wqe->state = wqe_state_processing; } - - *prev_state = prev_state_; } -static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt, int payload) +static void update_wqe_psn(struct rxe_qp *qp, + struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt, + int payload) { /* number of packets left to send including current one */ int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu; @@ -546,9 +543,34 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK; else qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; +} - qp->req.opcode = pkt->opcode; +static void save_state(struct rxe_send_wqe *wqe, + struct rxe_qp *qp, + struct rxe_send_wqe *rollback_wqe, + struct rxe_qp *rollback_qp) +{ + rollback_wqe->state = wqe->state; + rollback_wqe->first_psn = wqe->first_psn; + rollback_wqe->last_psn = wqe->last_psn; + rollback_qp->req.psn = qp->req.psn; +} +static void rollback_state(struct rxe_send_wqe *wqe, + struct rxe_qp *qp, + struct rxe_send_wqe *rollback_wqe, + struct rxe_qp *rollback_qp) +{ + wqe->state = rollback_wqe->state; + wqe->first_psn = rollback_wqe->first_psn; + wqe->last_psn = rollback_wqe->last_psn; + qp->req.psn = rollback_qp->req.psn; +} + +static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt, int payload) +{ + qp->req.opcode = pkt->opcode; if (pkt->mask & RXE_END_MASK) qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index); @@ -571,7 +593,8 @@ int rxe_requester(void *arg) int mtu; int opcode; int ret; - enum wqe_state prev_state; + struct rxe_qp rollback_qp; + struct rxe_send_wqe rollback_wqe; next_wqe: if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) @@ -688,13 +711,21 @@ next_wqe: goto err; } - update_wqe_state(qp, wqe, &pkt, &prev_state); + /* + * To prevent a race on wqe access between requester and completer, + * wqe members state and psn need to be set before calling + * rxe_xmit_packet(). + * Otherwise, completer might initiate an unjustified retry flow. + */ + save_state(wqe, qp, &rollback_wqe, &rollback_qp); + update_wqe_state(qp, wqe, &pkt); + update_wqe_psn(qp, wqe, &pkt, payload); ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb); if (ret) { qp->need_req_skb = 1; kfree_skb(skb); - wqe->state = prev_state; + rollback_state(wqe, qp, &rollback_wqe, &rollback_qp); if (ret == -EAGAIN) { rxe_run_task(&qp->req.task, 1); -- cgit v0.10.2 From c1cc72cb6ff95cbe02ed747866576dccab4a5911 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 7 Sep 2016 14:04:07 +0300 Subject: IB/rxe: Fix kmem_cache leak Decrement qp reference when handling error path in completer to prevent kmem_cache leak. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 36f67de..1c59ef2 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -689,7 +689,14 @@ int rxe_completer(void *arg) qp->req.need_retry = 1; rxe_run_task(&qp->req.task, 1); } + + if (pkt) { + rxe_drop_ref(pkt->qp); + kfree_skb(skb); + } + goto exit; + } else { wqe->status = IB_WC_RETRY_EXC_ERR; state = COMPST_ERROR; @@ -716,6 +723,12 @@ int rxe_completer(void *arg) case COMPST_ERROR: do_complete(qp, wqe); rxe_qp_error(qp); + + if (pkt) { + rxe_drop_ref(pkt->qp); + kfree_skb(skb); + } + goto exit; } } -- cgit v0.10.2 From e4618d40eb3dc1a6d1f55f7150ea25bb23ab410a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 9 Sep 2016 08:15:37 +0100 Subject: IB/rdmavt: Don't vfree a kzalloc'ed memory region The userspace memory region 'mr' is allocated with kzalloc in __rvt_alloc_mr however it is incorrectly being freed with vfree in __rvt_free_mr. Fix this by using kfree to free it. Signed-off-by: Colin Ian King Reviewed-by: Leon Romanovsky Acked-by: Dennis Dalessandro Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 80c4b6b..46b6497 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -294,7 +294,7 @@ static void __rvt_free_mr(struct rvt_mr *mr) { rvt_deinit_mregion(&mr->mr); rvt_free_lkey(&mr->mr); - vfree(mr); + kfree(mr); } /** -- cgit v0.10.2 From cce94483e47e8e3d74cf4475dea33f9fd4b6ad9f Mon Sep 17 00:00:00 2001 From: Filipe Manco Date: Thu, 15 Sep 2016 17:10:46 +0200 Subject: xen-netback: fix error handling on netback_probe() In case of error during netback_probe() (e.g. an entry missing on the xenstore) netback_remove() is called on the new device, which will set the device backend state to XenbusStateClosed by calling set_backend_state(). However, the backend state wasn't initialized by netback_probe() at this point, which will cause and invalid transaction and set_backend_state() to BUG(). Initialize the backend state at the beginning of netback_probe() to XenbusStateInitialising, and create two new valid state transitions on set_backend_state(), from XenbusStateInitialising to XenbusStateClosed, and from XenbusStateInitialising to XenbusStateInitWait. Signed-off-by: Filipe Manco Acked-by: Wei Liu Signed-off-by: David S. Miller diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 6a31f26..daf4c78 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -271,6 +271,11 @@ static int netback_probe(struct xenbus_device *dev, be->dev = dev; dev_set_drvdata(&dev->dev, be); + be->state = XenbusStateInitialising; + err = xenbus_switch_state(dev, XenbusStateInitialising); + if (err) + goto fail; + sg = 1; do { @@ -383,11 +388,6 @@ static int netback_probe(struct xenbus_device *dev, be->hotplug_script = script; - err = xenbus_switch_state(dev, XenbusStateInitWait); - if (err) - goto fail; - - be->state = XenbusStateInitWait; /* This kicks hotplug scripts, so do it immediately. */ err = backend_create_xenvif(be); @@ -492,20 +492,20 @@ static inline void backend_switch_state(struct backend_info *be, /* Handle backend state transitions: * - * The backend state starts in InitWait and the following transitions are + * The backend state starts in Initialising and the following transitions are * allowed. * - * InitWait -> Connected - * - * ^ \ | - * | \ | - * | \ | - * | \ | - * | \ | - * | \ | - * | V V + * Initialising -> InitWait -> Connected + * \ + * \ ^ \ | + * \ | \ | + * \ | \ | + * \ | \ | + * \ | \ | + * \ | \ | + * V | V V * - * Closed <-> Closing + * Closed <-> Closing * * The state argument specifies the eventual state of the backend and the * function transitions to that state via the shortest path. @@ -515,6 +515,20 @@ static void set_backend_state(struct backend_info *be, { while (be->state != state) { switch (be->state) { + case XenbusStateInitialising: + switch (state) { + case XenbusStateInitWait: + case XenbusStateConnected: + case XenbusStateClosing: + backend_switch_state(be, XenbusStateInitWait); + break; + case XenbusStateClosed: + backend_switch_state(be, XenbusStateClosed); + break; + default: + BUG(); + } + break; case XenbusStateClosed: switch (state) { case XenbusStateInitWait: -- cgit v0.10.2 From ffb4d6c8508657824bcef68a36b2a0f9d8c09d10 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Sep 2016 08:12:33 -0700 Subject: tcp: fix overflow in __tcp_retransmit_skb() If a TCP socket gets a large write queue, an overflow can happen in a test in __tcp_retransmit_skb() preventing all retransmits. The flow then stalls and resets after timeouts. Tested: sysctl -w net.core.wmem_max=1000000000 netperf -H dest -- -s 1000000000 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bdaef7f..f53d0cc 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2605,7 +2605,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) * copying overhead: fragmentation, tunneling, mangling etc. */ if (atomic_read(&sk->sk_wmem_alloc) > - min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) + min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), + sk->sk_sndbuf)) return -EAGAIN; if (skb_still_in_host_queue(sk, skb)) -- cgit v0.10.2 From 20c64d5cd5a2bdcdc8982a06cb05e5e1bd851a3d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Sep 2016 08:48:46 -0700 Subject: net: avoid sk_forward_alloc overflows A malicious TCP receiver, sending SACK, can force the sender to split skbs in write queue and increase its memory usage. Then, when socket is closed and its write queue purged, we might overflow sk_forward_alloc (It becomes negative) sk_mem_reclaim() does nothing in this case, and more than 2GB are leaked from TCP perspective (tcp_memory_allocated is not changed) Then warnings trigger from inet_sock_destruct() and sk_stream_kill_queues() seeing a not zero sk_forward_alloc All TCP stack can be stuck because TCP is under memory pressure. A simple fix is to preemptively reclaim from sk_mem_uncharge(). This makes sure a socket wont have more than 2 MB forward allocated, after burst and idle period. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/include/net/sock.h b/include/net/sock.h index ff5be7e..8741988 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1332,6 +1332,16 @@ static inline void sk_mem_uncharge(struct sock *sk, int size) if (!sk_has_account(sk)) return; sk->sk_forward_alloc += size; + + /* Avoid a possible overflow. + * TCP send queues can make this happen, if sk_mem_reclaim() + * is not called and more than 2 GBytes are released at once. + * + * If we reach 2 MBytes, reclaim 1 MBytes right now, there is + * no need to hold that much forward allocation anyway. + */ + if (unlikely(sk->sk_forward_alloc >= 1 << 21)) + __sk_mem_reclaim(sk, 1 << 20); } static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) -- cgit v0.10.2 From 8ab86c00e349cef9fb14719093a7f198bcc72629 Mon Sep 17 00:00:00 2001 From: "phil.turnbull@oracle.com" Date: Thu, 15 Sep 2016 12:41:44 -0400 Subject: irda: Free skb on irda_accept error path. skb is not freed if newsk is NULL. Rework the error path so free_skb is unconditionally called on function exit. Fixes: c3ea9fa27413 ("[IrDA] af_irda: IRDA_ASSERT cleanups") Signed-off-by: Phil Turnbull Signed-off-by: David S. Miller diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 8d2f7c9..ccc2444 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -832,7 +832,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) struct sock *sk = sock->sk; struct irda_sock *new, *self = irda_sk(sk); struct sock *newsk; - struct sk_buff *skb; + struct sk_buff *skb = NULL; int err; err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0); @@ -900,7 +900,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) err = -EPERM; /* value does not seem to make sense. -arnd */ if (!new->tsap) { pr_debug("%s(), dup failed!\n", __func__); - kfree_skb(skb); goto out; } @@ -919,7 +918,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) /* Clean up the original one to keep it in listen state */ irttp_listen(self->tsap); - kfree_skb(skb); sk->sk_ack_backlog--; newsock->state = SS_CONNECTED; @@ -927,6 +925,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) irda_connect_response(new); err = 0; out: + kfree_skb(skb); release_sock(sk); return err; } -- cgit v0.10.2 From 4496195ddd75c4ad57b783739414e69b7d79843e Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 15 Sep 2016 15:02:38 -0300 Subject: sctp: fix SSN comparision This function actually operates on u32 yet its paramteres were declared as u16, causing integer truncation upon calling. Note in patch context that ADDIP_SERIAL_SIGN_BIT is already 32 bits. Signed-off-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index efc0174..bafe2a0 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -382,7 +382,7 @@ enum { ADDIP_SERIAL_SIGN_BIT = (1<<31) }; -static inline int ADDIP_SERIAL_gte(__u16 s, __u16 t) +static inline int ADDIP_SERIAL_gte(__u32 s, __u32 t) { return ((s) == (t)) || (((t) - (s)) & ADDIP_SERIAL_SIGN_BIT); } -- cgit v0.10.2 From 2835d2d9e366a2985b24051d228333bfba82f3a7 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 15 Sep 2016 22:47:51 +0200 Subject: bna: add missing per queue ethtool stat Commit ba5ca784 "bna: check for dma mapping errors" added besides other things a statistic that counts number of DMA buffer mapping failures per each Rx queue. This counter is not included in ethtool stats output. Fixes: ba5ca784 "bna: check for dma mapping errors" Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c index 0e4fdc3..5671353 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c +++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c @@ -31,7 +31,7 @@ #define BNAD_NUM_TXF_COUNTERS 12 #define BNAD_NUM_RXF_COUNTERS 10 #define BNAD_NUM_CQ_COUNTERS (3 + 5) -#define BNAD_NUM_RXQ_COUNTERS 6 +#define BNAD_NUM_RXQ_COUNTERS 7 #define BNAD_NUM_TXQ_COUNTERS 5 #define BNAD_ETHTOOL_STATS_NUM \ @@ -658,6 +658,8 @@ bnad_get_strings(struct net_device *netdev, u32 stringset, u8 *string) string += ETH_GSTRING_LEN; sprintf(string, "rxq%d_allocbuf_failed", q_num); string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_mapbuf_failed", q_num); + string += ETH_GSTRING_LEN; sprintf(string, "rxq%d_producer_index", q_num); string += ETH_GSTRING_LEN; sprintf(string, "rxq%d_consumer_index", q_num); @@ -678,6 +680,9 @@ bnad_get_strings(struct net_device *netdev, u32 stringset, u8 *string) sprintf(string, "rxq%d_allocbuf_failed", q_num); string += ETH_GSTRING_LEN; + sprintf(string, "rxq%d_mapbuf_failed", + q_num); + string += ETH_GSTRING_LEN; sprintf(string, "rxq%d_producer_index", q_num); string += ETH_GSTRING_LEN; -- cgit v0.10.2 From 37dd348270c1a48f0234354a06c0ce052b6c85b1 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 15 Sep 2016 22:47:52 +0200 Subject: bna: fix crash in bnad_get_strings() Commit 6e7333d "net: add rx_nohandler stat counter" added the new entry rx_nohandler into struct rtnl_link_stats64. Unfortunately the bna driver foolishly depends on the structure. It uses part of it for ethtool statistics and it's not bad but the driver assumes its size is constant as it defines string for each existing entry. The problem occurs when the structure is extended because you need to modify bna driver as well. If not any attempt to retrieve ethtool statistics results in crash in bnad_get_strings(). The patch changes BNAD_ETHTOOL_STATS_NUM so it counts real number of strings in the array and also removes rtnl_link_stats64 entries that are not used in output and are always zero. Fixes: 6e7333d "net: add rx_nohandler stat counter" Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c index 5671353..31f61a7 100644 --- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c +++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c @@ -34,12 +34,7 @@ #define BNAD_NUM_RXQ_COUNTERS 7 #define BNAD_NUM_TXQ_COUNTERS 5 -#define BNAD_ETHTOOL_STATS_NUM \ - (sizeof(struct rtnl_link_stats64) / sizeof(u64) + \ - sizeof(struct bnad_drv_stats) / sizeof(u64) + \ - offsetof(struct bfi_enet_stats, rxf_stats[0]) / sizeof(u64)) - -static const char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = { +static const char *bnad_net_stats_strings[] = { "rx_packets", "tx_packets", "rx_bytes", @@ -50,22 +45,10 @@ static const char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = { "tx_dropped", "multicast", "collisions", - "rx_length_errors", - "rx_over_errors", "rx_crc_errors", "rx_frame_errors", - "rx_fifo_errors", - "rx_missed_errors", - - "tx_aborted_errors", - "tx_carrier_errors", "tx_fifo_errors", - "tx_heartbeat_errors", - "tx_window_errors", - - "rx_compressed", - "tx_compressed", "netif_queue_stop", "netif_queue_wakeup", @@ -254,6 +237,8 @@ static const char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = { "fc_tx_fid_parity_errors", }; +#define BNAD_ETHTOOL_STATS_NUM ARRAY_SIZE(bnad_net_stats_strings) + static int bnad_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd) { @@ -859,9 +844,9 @@ bnad_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, u64 *buf) { struct bnad *bnad = netdev_priv(netdev); - int i, j, bi; + int i, j, bi = 0; unsigned long flags; - struct rtnl_link_stats64 *net_stats64; + struct rtnl_link_stats64 net_stats64; u64 *stats64; u32 bmap; @@ -876,14 +861,25 @@ bnad_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats, * under the same lock */ spin_lock_irqsave(&bnad->bna_lock, flags); - bi = 0; - memset(buf, 0, stats->n_stats * sizeof(u64)); - - net_stats64 = (struct rtnl_link_stats64 *)buf; - bnad_netdev_qstats_fill(bnad, net_stats64); - bnad_netdev_hwstats_fill(bnad, net_stats64); - bi = sizeof(*net_stats64) / sizeof(u64); + memset(&net_stats64, 0, sizeof(net_stats64)); + bnad_netdev_qstats_fill(bnad, &net_stats64); + bnad_netdev_hwstats_fill(bnad, &net_stats64); + + buf[bi++] = net_stats64.rx_packets; + buf[bi++] = net_stats64.tx_packets; + buf[bi++] = net_stats64.rx_bytes; + buf[bi++] = net_stats64.tx_bytes; + buf[bi++] = net_stats64.rx_errors; + buf[bi++] = net_stats64.tx_errors; + buf[bi++] = net_stats64.rx_dropped; + buf[bi++] = net_stats64.tx_dropped; + buf[bi++] = net_stats64.multicast; + buf[bi++] = net_stats64.collisions; + buf[bi++] = net_stats64.rx_length_errors; + buf[bi++] = net_stats64.rx_crc_errors; + buf[bi++] = net_stats64.rx_frame_errors; + buf[bi++] = net_stats64.tx_fifo_errors; /* Get netif_queue_stopped from stack */ bnad->stats.drv_stats.netif_queue_stopped = netif_queue_stopped(netdev); -- cgit v0.10.2 From d4690f1e1cdabb4d61207b6787b1605a0dc0aeab Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 16 Sep 2016 00:11:45 +0100 Subject: fix iov_iter_fault_in_readable() ... by turning it into what used to be multipages counterpart Cc: stable@vger.kernel.org Signed-off-by: Al Viro Signed-off-by: Linus Torvalds diff --git a/include/linux/uio.h b/include/linux/uio.h index 1b5d1cd..75b4aaf 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -76,7 +76,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes); void iov_iter_advance(struct iov_iter *i, size_t bytes); int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); -int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes); +#define iov_iter_fault_in_multipages_readable iov_iter_fault_in_readable size_t iov_iter_single_seg_count(const struct iov_iter *i); size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 9e8c738..7e3138c 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -291,33 +291,13 @@ done: } /* - * Fault in the first iovec of the given iov_iter, to a maximum length - * of bytes. Returns 0 on success, or non-zero if the memory could not be - * accessed (ie. because it is an invalid address). - * - * writev-intensive code may want this to prefault several iovecs -- that - * would be possible (callers must not rely on the fact that _only_ the - * first iovec will be faulted with the current implementation). - */ -int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) -{ - if (!(i->type & (ITER_BVEC|ITER_KVEC))) { - char __user *buf = i->iov->iov_base + i->iov_offset; - bytes = min(bytes, i->iov->iov_len - i->iov_offset); - return fault_in_pages_readable(buf, bytes); - } - return 0; -} -EXPORT_SYMBOL(iov_iter_fault_in_readable); - -/* * Fault in one or more iovecs of the given iov_iter, to a maximum length of * bytes. For each iovec, fault in each page that constitutes the iovec. * * Return 0 on success, or non-zero if the memory could not be accessed (i.e. * because it is an invalid address). */ -int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes) +int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) { size_t skip = i->iov_offset; const struct iovec *iov; @@ -334,7 +314,7 @@ int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes) } return 0; } -EXPORT_SYMBOL(iov_iter_fault_in_multipages_readable); +EXPORT_SYMBOL(iov_iter_fault_in_readable); void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov, unsigned long nr_segs, -- cgit v0.10.2 From 6244bd651236d86f59387d43c531b5f942a92b38 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Mon, 8 Aug 2016 17:48:20 -0600 Subject: exynos-drm: Fix unsupported GEM memory type error message to be clear Fix unsupported GEM memory type error message to include the memory type information. Signed-off-by: Shuah Khan Signed-off-by: Inki Dae diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c index e016640..40ce841 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c @@ -55,11 +55,11 @@ static int check_fb_gem_memory_type(struct drm_device *drm_dev, flags = exynos_gem->flags; /* - * without iommu support, not support physically non-continuous memory - * for framebuffer. + * Physically non-contiguous memory type for framebuffer is not + * supported without IOMMU. */ if (IS_NONCONTIG_BUFFER(flags)) { - DRM_ERROR("cannot use this gem memory type for fb.\n"); + DRM_ERROR("Non-contiguous GEM memory is not supported.\n"); return -EINVAL; } -- cgit v0.10.2 From 479f12545460809cfc9093d90d6ed82d76388e97 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 31 Aug 2016 14:55:54 +0200 Subject: drm/exynos: fimc: fix system and runtime pm integration Use generic helpers instead of open-coding usage of runtime pm for system sleep pm, which was potentially broken for some corner cases. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c index 0525c56..147ef0d 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c @@ -1753,32 +1753,6 @@ static int fimc_clk_ctrl(struct fimc_context *ctx, bool enable) return 0; } -#ifdef CONFIG_PM_SLEEP -static int fimc_suspend(struct device *dev) -{ - struct fimc_context *ctx = get_fimc_context(dev); - - DRM_DEBUG_KMS("id[%d]\n", ctx->id); - - if (pm_runtime_suspended(dev)) - return 0; - - return fimc_clk_ctrl(ctx, false); -} - -static int fimc_resume(struct device *dev) -{ - struct fimc_context *ctx = get_fimc_context(dev); - - DRM_DEBUG_KMS("id[%d]\n", ctx->id); - - if (!pm_runtime_suspended(dev)) - return fimc_clk_ctrl(ctx, true); - - return 0; -} -#endif - static int fimc_runtime_suspend(struct device *dev) { struct fimc_context *ctx = get_fimc_context(dev); @@ -1799,7 +1773,8 @@ static int fimc_runtime_resume(struct device *dev) #endif static const struct dev_pm_ops fimc_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(fimc_suspend, fimc_resume) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) SET_RUNTIME_PM_OPS(fimc_runtime_suspend, fimc_runtime_resume, NULL) }; -- cgit v0.10.2 From 83bd7b20aaf499030bf857ef64de3c19309b107d Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 31 Aug 2016 14:55:55 +0200 Subject: drm/exynos: gsc: fix system and runtime pm integration Use generic helpers instead of open-coding usage of runtime pm for system sleep pm, which was potentially broken for some corner cases. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 5d20da8..b1894aa 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -1760,32 +1760,6 @@ static int gsc_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP -static int gsc_suspend(struct device *dev) -{ - struct gsc_context *ctx = get_gsc_context(dev); - - DRM_DEBUG_KMS("id[%d]\n", ctx->id); - - if (pm_runtime_suspended(dev)) - return 0; - - return gsc_clk_ctrl(ctx, false); -} - -static int gsc_resume(struct device *dev) -{ - struct gsc_context *ctx = get_gsc_context(dev); - - DRM_DEBUG_KMS("id[%d]\n", ctx->id); - - if (!pm_runtime_suspended(dev)) - return gsc_clk_ctrl(ctx, true); - - return 0; -} -#endif - #ifdef CONFIG_PM static int gsc_runtime_suspend(struct device *dev) { @@ -1807,7 +1781,8 @@ static int gsc_runtime_resume(struct device *dev) #endif static const struct dev_pm_ops gsc_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(gsc_suspend, gsc_resume) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) SET_RUNTIME_PM_OPS(gsc_runtime_suspend, gsc_runtime_resume, NULL) }; -- cgit v0.10.2 From 5b67723e6096f5470f361656cd108430d3b12c67 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 31 Aug 2016 14:55:56 +0200 Subject: drm/exynos: rotator: fix system and runtime pm integration Use generic helpers instead of open-coding usage of runtime pm for system sleep pm, which was potentially broken for some corner cases. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c index 404367a..6591e40 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c +++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c @@ -794,29 +794,6 @@ static int rotator_clk_crtl(struct rot_context *rot, bool enable) return 0; } - -#ifdef CONFIG_PM_SLEEP -static int rotator_suspend(struct device *dev) -{ - struct rot_context *rot = dev_get_drvdata(dev); - - if (pm_runtime_suspended(dev)) - return 0; - - return rotator_clk_crtl(rot, false); -} - -static int rotator_resume(struct device *dev) -{ - struct rot_context *rot = dev_get_drvdata(dev); - - if (!pm_runtime_suspended(dev)) - return rotator_clk_crtl(rot, true); - - return 0; -} -#endif - static int rotator_runtime_suspend(struct device *dev) { struct rot_context *rot = dev_get_drvdata(dev); @@ -833,7 +810,8 @@ static int rotator_runtime_resume(struct device *dev) #endif static const struct dev_pm_ops rotator_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(rotator_suspend, rotator_resume) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) SET_RUNTIME_PM_OPS(rotator_runtime_suspend, rotator_runtime_resume, NULL) }; -- cgit v0.10.2 From b05984e21a7e000bf5074ace00d7a574944b2c16 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 31 Aug 2016 14:55:57 +0200 Subject: drm/exynos: g2d: fix system and runtime pm integration Move code from system sleep pm to runtime pm callbacks to ensure proper driver state preservation when device is under power domain. Then, use generic helpers for using runtime pm for system sleep pm. Signed-off-by: Marek Szyprowski Signed-off-by: Inki Dae diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index 4bf00f5..6eca8bb 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -1475,8 +1475,8 @@ static int g2d_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP -static int g2d_suspend(struct device *dev) +#ifdef CONFIG_PM +static int g2d_runtime_suspend(struct device *dev) { struct g2d_data *g2d = dev_get_drvdata(dev); @@ -1490,25 +1490,6 @@ static int g2d_suspend(struct device *dev) flush_work(&g2d->runqueue_work); - return 0; -} - -static int g2d_resume(struct device *dev) -{ - struct g2d_data *g2d = dev_get_drvdata(dev); - - g2d->suspended = false; - g2d_exec_runqueue(g2d); - - return 0; -} -#endif - -#ifdef CONFIG_PM -static int g2d_runtime_suspend(struct device *dev) -{ - struct g2d_data *g2d = dev_get_drvdata(dev); - clk_disable_unprepare(g2d->gate_clk); return 0; @@ -1523,12 +1504,16 @@ static int g2d_runtime_resume(struct device *dev) if (ret < 0) dev_warn(dev, "failed to enable clock.\n"); + g2d->suspended = false; + g2d_exec_runqueue(g2d); + return ret; } #endif static const struct dev_pm_ops g2d_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(g2d_suspend, g2d_resume) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) SET_RUNTIME_PM_OPS(g2d_runtime_suspend, g2d_runtime_resume, NULL) }; -- cgit v0.10.2 From 65c0044ca8d7c7bbccae37f0ff2972f0210e9f41 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 17 Sep 2016 07:52:49 -0700 Subject: avr32: fix 'undefined reference to `___copy_from_user' avr32 builds fail with: arch/avr32/kernel/built-in.o: In function `arch_ptrace': (.text+0x650): undefined reference to `___copy_from_user' arch/avr32/kernel/built-in.o:(___ksymtab+___copy_from_user+0x0): undefined reference to `___copy_from_user' kernel/built-in.o: In function `proc_doulongvec_ms_jiffies_minmax': (.text+0x5dd8): undefined reference to `___copy_from_user' kernel/built-in.o: In function `proc_dointvec_minmax_sysadmin': sysctl.c:(.text+0x6174): undefined reference to `___copy_from_user' kernel/built-in.o: In function `ptrace_has_cap': ptrace.c:(.text+0x69c0): undefined reference to `___copy_from_user' kernel/built-in.o:ptrace.c:(.text+0x6b90): more undefined references to `___copy_from_user' follow Fixes: 8630c32275ba ("avr32: fix copy_from_user()") Cc: Al Viro Acked-by: Havard Skinnemoen Acked-by: Hans-Christian Noren Egtvedt Signed-off-by: Guenter Roeck diff --git a/arch/avr32/lib/copy_user.S b/arch/avr32/lib/copy_user.S index 96a6de9..0753734 100644 --- a/arch/avr32/lib/copy_user.S +++ b/arch/avr32/lib/copy_user.S @@ -23,8 +23,8 @@ */ .text .align 1 - .global copy_from_user - .type copy_from_user, @function + .global ___copy_from_user + .type ___copy_from_user, @function ___copy_from_user: branch_if_kernel r8, __copy_user ret_if_privileged r8, r11, r10, r10 -- cgit v0.10.2 From 8e4b72054f554967827e18be1de0e8122e6efc04 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 17 Sep 2016 12:57:24 -0700 Subject: openrisc: fix the fix of copy_from_user() Since commit acb2505d0119 ("openrisc: fix copy_from_user()"), copy_from_user() returns the number of bytes requested, not the number of bytes not copied. Cc: Al Viro Fixes: acb2505d0119 ("openrisc: fix copy_from_user()") Signed-off-by: Guenter Roeck diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h index cbad29b..5cc6b4f 100644 --- a/arch/openrisc/include/asm/uaccess.h +++ b/arch/openrisc/include/asm/uaccess.h @@ -276,7 +276,7 @@ copy_from_user(void *to, const void *from, unsigned long n) unsigned long res = n; if (likely(access_ok(VERIFY_READ, from, n))) - n = __copy_tofrom_user(to, from, n); + res = __copy_tofrom_user(to, from, n); if (unlikely(res)) memset(to + (n - res), 0, res); return res; -- cgit v0.10.2 From 3be7988674ab33565700a37b210f502563d932e6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 18 Sep 2016 17:27:41 -0700 Subject: Linux 4.8-rc7 diff --git a/Makefile b/Makefile index 1a8c8dd..74e22c2 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Psychotic Stoned Sheep # *DOCUMENTATION* -- cgit v0.10.2 From 4158dbe1be9b420e1fdd9ec5c033647a605ca485 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 18 Sep 2016 22:51:38 +0900 Subject: Subject: [PATCH, RESEND] drm: exynos: avoid unused function warning When CONFIG_PM is not set, we get a warning about an unused function: drivers/gpu/drm/exynos/exynos_drm_gsc.c:1219:12: error: 'gsc_clk_ctrl' defined but not used [-Werror=unused-function] static int gsc_clk_ctrl(struct gsc_context *ctx, bool enable) ^~~~~~~~~~~~ This removes the two #ifdef checks in this file and instead marks the functions as __maybe_unused, which is a more reliable way of doing the same, allowing better build coverage and avoiding the warning above. Signed-off-by: Arnd Bergmann Signed-off-by: Inki Dae diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index b1894aa..52a9d26 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -1760,8 +1760,7 @@ static int gsc_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int gsc_runtime_suspend(struct device *dev) +static int __maybe_unused gsc_runtime_suspend(struct device *dev) { struct gsc_context *ctx = get_gsc_context(dev); @@ -1770,7 +1769,7 @@ static int gsc_runtime_suspend(struct device *dev) return gsc_clk_ctrl(ctx, false); } -static int gsc_runtime_resume(struct device *dev) +static int __maybe_unused gsc_runtime_resume(struct device *dev) { struct gsc_context *ctx = get_gsc_context(dev); @@ -1778,7 +1777,6 @@ static int gsc_runtime_resume(struct device *dev) return gsc_clk_ctrl(ctx, true); } -#endif static const struct dev_pm_ops gsc_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, -- cgit v0.10.2 From 19cd120319ef5390404a5d9c829c3a7962f184a8 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Fri, 16 Sep 2016 10:50:13 +0200 Subject: stmmac: fix PWRDWN into the PMT register for global unicast. MAC devices use the RWKPKTEN and MGKPKTEN bits of the PMT Control/Status register to generate power management events. So this patch is to properly set the RWKPKTEN [BIT(2)] inside the PMT register (needed in case of global unicast). Reported-by: Aditi SHARMA Signed-off-by: Giuseppe Cavallaro Cc: Alexandre TORGUE Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index cbefe9e..885a5e6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -261,7 +261,7 @@ static void dwmac1000_pmt(struct mac_device_info *hw, unsigned long mode) } if (mode & WAKE_UCAST) { pr_debug("GMAC: WOL on global unicast\n"); - pmt |= global_unicast; + pmt |= power_down | global_unicast | wake_up_frame_en; } writel(pmt, ioaddr + GMAC_PMT); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index df5580d..51019b7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -102,7 +102,7 @@ static void dwmac4_pmt(struct mac_device_info *hw, unsigned long mode) } if (mode & WAKE_UCAST) { pr_debug("GMAC: WOL on global unicast\n"); - pmt |= global_unicast; + pmt |= power_down | global_unicast | wake_up_frame_en; } writel(pmt, ioaddr + GMAC_PMT); -- cgit v0.10.2 From 47a66e45d7a7613322549c2475ea9d809baaf514 Mon Sep 17 00:00:00 2001 From: "Kristian H. Kristensen" Date: Tue, 13 Sep 2016 14:20:45 -0700 Subject: drm: Only use compat ioctl for addfb2 on X86/IA64 Similar to struct drm_update_draw, struct drm_mode_fb_cmd2 has an unaligned 64 bit field (modifier). This get packed differently between 32 bit and 64 bit modes on architectures that can handle unaligned 64 bit access (X86 and IA64). Other architectures pack the structs the same and don't need the compat wrapper. Use the same condition for drm_mode_fb_cmd2 as we use for drm_update_draw. Note that only the modifier will be packed differently between compat and non-compat versions. Reviewed-by: Rob Clark Signed-off-by: Kristian H. Kristensen [seanpaul added not at bottom of commit msg re: modifier] Signed-off-by: Sean Paul Link: http://patchwork.freedesktop.org/patch/msgid/1473801645-116011-1-git-send-email-hoegsberg@chromium.org Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c index 57676f8..a628975 100644 --- a/drivers/gpu/drm/drm_ioc32.c +++ b/drivers/gpu/drm/drm_ioc32.c @@ -1015,6 +1015,7 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd, return 0; } +#if defined(CONFIG_X86) || defined(CONFIG_IA64) typedef struct drm_mode_fb_cmd232 { u32 fb_id; u32 width; @@ -1071,6 +1072,7 @@ static int compat_drm_mode_addfb2(struct file *file, unsigned int cmd, return 0; } +#endif static drm_ioctl_compat_t *drm_compat_ioctls[] = { [DRM_IOCTL_NR(DRM_IOCTL_VERSION32)] = compat_drm_version, @@ -1104,7 +1106,9 @@ static drm_ioctl_compat_t *drm_compat_ioctls[] = { [DRM_IOCTL_NR(DRM_IOCTL_UPDATE_DRAW32)] = compat_drm_update_draw, #endif [DRM_IOCTL_NR(DRM_IOCTL_WAIT_VBLANK32)] = compat_drm_wait_vblank, +#if defined(CONFIG_X86) || defined(CONFIG_IA64) [DRM_IOCTL_NR(DRM_IOCTL_MODE_ADDFB232)] = compat_drm_mode_addfb2, +#endif }; /** -- cgit v0.10.2 From b588479358ce26f32138e0f0a7ab0678f8e3e601 Mon Sep 17 00:00:00 2001 From: Ilan Tayari Date: Sun, 18 Sep 2016 07:42:53 +0000 Subject: xfrm: Fix memory leak of aead algorithm name commit 1a6509d99122 ("[IPSEC]: Add support for combined mode algorithms") introduced aead. The function attach_aead kmemdup()s the algorithm name during xfrm_state_construct(). However this memory is never freed. Implementation has since been slightly modified in commit ee5c23176fcc ("xfrm: Clone states properly on migration") without resolving this leak. This patch adds a kfree() call for the aead algorithm name. Fixes: 1a6509d99122 ("[IPSEC]: Add support for combined mode algorithms") Signed-off-by: Ilan Tayari Acked-by: Rami Rosen Signed-off-by: Steffen Klassert diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 9895a8c..a30f898d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -332,6 +332,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) { tasklet_hrtimer_cancel(&x->mtimer); del_timer_sync(&x->rtimer); + kfree(x->aead); kfree(x->aalg); kfree(x->ealg); kfree(x->calg); -- cgit v0.10.2 From 4de349e786a3a2d51bd02d56f3de151bbc3c3df9 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 17 Aug 2016 12:41:08 -0300 Subject: can: flexcan: fix resume function On a imx6ul-pico board the following error is seen during system suspend: dpm_run_callback(): platform_pm_resume+0x0/0x54 returns -110 PM: Device 2090000.flexcan failed to resume: error -110 The reason for this suspend error is because when the CAN interface is not active the clocks are disabled and then flexcan_chip_enable() will always fail due to a timeout error. In order to fix this issue, only call flexcan_chip_enable/disable() when the CAN interface is active. Based on a patch from Dong Aisheng in the NXP kernel. Signed-off-by: Fabio Estevam Cc: linux-stable Signed-off-by: Marc Kleine-Budde diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 41c0fc9..16f7cad 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -1268,11 +1268,10 @@ static int __maybe_unused flexcan_suspend(struct device *device) struct flexcan_priv *priv = netdev_priv(dev); int err; - err = flexcan_chip_disable(priv); - if (err) - return err; - if (netif_running(dev)) { + err = flexcan_chip_disable(priv); + if (err) + return err; netif_stop_queue(dev); netif_device_detach(dev); } @@ -1285,13 +1284,17 @@ static int __maybe_unused flexcan_resume(struct device *device) { struct net_device *dev = dev_get_drvdata(device); struct flexcan_priv *priv = netdev_priv(dev); + int err; priv->can.state = CAN_STATE_ERROR_ACTIVE; if (netif_running(dev)) { netif_device_attach(dev); netif_start_queue(dev); + err = flexcan_chip_enable(priv); + if (err) + return err; } - return flexcan_chip_enable(priv); + return 0; } static SIMPLE_DEV_PM_OPS(flexcan_pm_ops, flexcan_suspend, flexcan_resume); -- cgit v0.10.2 From d8feef9bd447381952a33e6284241006f394c080 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Sun, 18 Sep 2016 11:24:50 -0300 Subject: [media] cx23885/saa7134: assign q->dev to the PCI device Fix a regression caused by commit 2bc46b3ad3c1 ("[media] media/pci: convert drivers to use the new vb2_queue dev field"). Three places where q->dev should be set were missed, causing a WARN. Fixes: 2bc46b3ad3c1 ("[media] media/pci: convert drivers to use the new vb2_queue dev field"). Signed-off-by: Hans Verkuil Reported-by: Marton Balint Signed-off-by: Mauro Carvalho Chehab diff --git a/drivers/media/pci/cx23885/cx23885-417.c b/drivers/media/pci/cx23885/cx23885-417.c index efec2d1..4d080da 100644 --- a/drivers/media/pci/cx23885/cx23885-417.c +++ b/drivers/media/pci/cx23885/cx23885-417.c @@ -1552,6 +1552,7 @@ int cx23885_417_register(struct cx23885_dev *dev) q->mem_ops = &vb2_dma_sg_memops; q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; q->lock = &dev->lock; + q->dev = &dev->pci->dev; err = vb2_queue_init(q); if (err < 0) diff --git a/drivers/media/pci/saa7134/saa7134-dvb.c b/drivers/media/pci/saa7134/saa7134-dvb.c index db987e5..59a4b5f 100644 --- a/drivers/media/pci/saa7134/saa7134-dvb.c +++ b/drivers/media/pci/saa7134/saa7134-dvb.c @@ -1238,6 +1238,7 @@ static int dvb_init(struct saa7134_dev *dev) q->buf_struct_size = sizeof(struct saa7134_buf); q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; q->lock = &dev->lock; + q->dev = &dev->pci->dev; ret = vb2_queue_init(q); if (ret) { vb2_dvb_dealloc_frontends(&dev->frontends); diff --git a/drivers/media/pci/saa7134/saa7134-empress.c b/drivers/media/pci/saa7134/saa7134-empress.c index ca417a4..791a516 100644 --- a/drivers/media/pci/saa7134/saa7134-empress.c +++ b/drivers/media/pci/saa7134/saa7134-empress.c @@ -295,6 +295,7 @@ static int empress_init(struct saa7134_dev *dev) q->buf_struct_size = sizeof(struct saa7134_buf); q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; q->lock = &dev->lock; + q->dev = &dev->pci->dev; err = vb2_queue_init(q); if (err) return err; -- cgit v0.10.2 From 67326666e2d45ebea7db3ed8e3e735f15e60dd91 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 19 Sep 2016 10:52:14 -0500 Subject: scripts: add script for translating stack dump function offsets addr2line doesn't work with KASLR addresses. Add a basic addr2line wrapper script which takes the 'func+offset/size' format as input. Signed-off-by: Josh Poimboeuf Signed-off-by: Linus Torvalds diff --git a/scripts/faddr2line b/scripts/faddr2line new file mode 100755 index 0000000..4fbfe83 --- /dev/null +++ b/scripts/faddr2line @@ -0,0 +1,177 @@ +#!/bin/bash +# +# Translate stack dump function offsets. +# +# addr2line doesn't work with KASLR addresses. This works similarly to +# addr2line, but instead takes the 'func+0x123' format as input: +# +# $ ./scripts/faddr2line ~/k/vmlinux meminfo_proc_show+0x5/0x568 +# meminfo_proc_show+0x5/0x568: +# meminfo_proc_show at fs/proc/meminfo.c:27 +# +# If the address is part of an inlined function, the full inline call chain is +# printed: +# +# $ ./scripts/faddr2line ~/k/vmlinux native_write_msr+0x6/0x27 +# native_write_msr+0x6/0x27: +# arch_static_branch at arch/x86/include/asm/msr.h:121 +# (inlined by) static_key_false at include/linux/jump_label.h:125 +# (inlined by) native_write_msr at arch/x86/include/asm/msr.h:125 +# +# The function size after the '/' in the input is optional, but recommended. +# It's used to help disambiguate any duplicate symbol names, which can occur +# rarely. If the size is omitted for a duplicate symbol then it's possible for +# multiple code sites to be printed: +# +# $ ./scripts/faddr2line ~/k/vmlinux raw_ioctl+0x5 +# raw_ioctl+0x5/0x20: +# raw_ioctl at drivers/char/raw.c:122 +# +# raw_ioctl+0x5/0xb1: +# raw_ioctl at net/ipv4/raw.c:876 +# +# Multiple addresses can be specified on a single command line: +# +# $ ./scripts/faddr2line ~/k/vmlinux type_show+0x10/45 free_reserved_area+0x90 +# type_show+0x10/0x2d: +# type_show at drivers/video/backlight/backlight.c:213 +# +# free_reserved_area+0x90/0x123: +# free_reserved_area at mm/page_alloc.c:6429 (discriminator 2) + + +set -o errexit +set -o nounset + +command -v awk >/dev/null 2>&1 || die "awk isn't installed" +command -v readelf >/dev/null 2>&1 || die "readelf isn't installed" +command -v addr2line >/dev/null 2>&1 || die "addr2line isn't installed" + +usage() { + echo "usage: faddr2line ..." >&2 + exit 1 +} + +warn() { + echo "$1" >&2 +} + +die() { + echo "ERROR: $1" >&2 + exit 1 +} + +# Try to figure out the source directory prefix so we can remove it from the +# addr2line output. HACK ALERT: This assumes that start_kernel() is in +# kernel/init.c! This only works for vmlinux. Otherwise it falls back to +# printing the absolute path. +find_dir_prefix() { + local objfile=$1 + + local start_kernel_addr=$(readelf -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}') + [[ -z $start_kernel_addr ]] && return + + local file_line=$(addr2line -e $objfile $start_kernel_addr) + [[ -z $file_line ]] && return + + local prefix=${file_line%init/main.c:*} + if [[ -z $prefix ]] || [[ $prefix = $file_line ]]; then + return + fi + + DIR_PREFIX=$prefix + return 0 +} + +__faddr2line() { + local objfile=$1 + local func_addr=$2 + local dir_prefix=$3 + local print_warnings=$4 + + local func=${func_addr%+*} + local offset=${func_addr#*+} + offset=${offset%/*} + local size= + [[ $func_addr =~ "/" ]] && size=${func_addr#*/} + + if [[ -z $func ]] || [[ -z $offset ]] || [[ $func = $func_addr ]]; then + warn "bad func+offset $func_addr" + DONE=1 + return + fi + + # Go through each of the object's symbols which match the func name. + # In rare cases there might be duplicates. + while read symbol; do + local fields=($symbol) + local sym_base=0x${fields[1]} + local sym_size=${fields[2]} + local sym_type=${fields[3]} + + # calculate the address + local addr=$(($sym_base + $offset)) + if [[ -z $addr ]] || [[ $addr = 0 ]]; then + warn "bad address: $sym_base + $offset" + DONE=1 + return + fi + local hexaddr=0x$(printf %x $addr) + + # weed out non-function symbols + if [[ $sym_type != "FUNC" ]]; then + [[ $print_warnings = 1 ]] && + echo "skipping $func address at $hexaddr due to non-function symbol" + continue + fi + + # if the user provided a size, make sure it matches the symbol's size + if [[ -n $size ]] && [[ $size -ne $sym_size ]]; then + [[ $print_warnings = 1 ]] && + echo "skipping $func address at $hexaddr due to size mismatch ($size != $sym_size)" + continue; + fi + + # make sure the provided offset is within the symbol's range + if [[ $offset -gt $sym_size ]]; then + [[ $print_warnings = 1 ]] && + echo "skipping $func address at $hexaddr due to size mismatch ($offset > $sym_size)" + continue + fi + + # separate multiple entries with a blank line + [[ $FIRST = 0 ]] && echo + FIRST=0 + + local hexsize=0x$(printf %x $sym_size) + echo "$func+$offset/$hexsize:" + addr2line -fpie $objfile $hexaddr | sed "s;$dir_prefix;;" + DONE=1 + + done < <(readelf -sW $objfile | awk -v f=$func '$8 == f {print}') +} + +[[ $# -lt 2 ]] && usage + +objfile=$1 +[[ ! -f $objfile ]] && die "can't find objfile $objfile" +shift + +DIR_PREFIX=supercalifragilisticexpialidocious +find_dir_prefix $objfile + +FIRST=1 +while [[ $# -gt 0 ]]; do + func_addr=$1 + shift + + # print any matches found + DONE=0 + __faddr2line $objfile $func_addr $DIR_PREFIX 0 + + # if no match was found, print warnings + if [[ $DONE = 0 ]]; then + __faddr2line $objfile $func_addr $DIR_PREFIX 1 + warn "no match for $func_addr" + fi +done -- cgit v0.10.2 From 7fadce0d60d09427e0027d3d468781b08ca0b3d1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 19 Sep 2016 14:49:08 -0700 Subject: scripts/faddr2line: improve on base path filtering a bit Due to our compiler include directives, the build pathnames for header files often end up being of the form "$srcdir/./include/linux/xyz.h", which ends up having that extra "." path component after the build base in it. Teach faddr2line to skip that too, to make code generated in inline functions in header files match the filename for the regular C files. Rabin Vincent pointed out that I can't make a stricter regexp match by using the " at " prefix for the pathname, because that ends up being locale-dependent. But this does require that the path match be preceded by a space, to make it a bit more strict (that matters mainly if we didn't find any base_dir at all, and we only end up with the "./" part of the match) Acked-by: Josh Poimboeuf Cc: Rabin Vincent Signed-off-by: Linus Torvalds diff --git a/scripts/faddr2line b/scripts/faddr2line index 4fbfe83..450b332 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -145,7 +145,7 @@ __faddr2line() { local hexsize=0x$(printf %x $sym_size) echo "$func+$offset/$hexsize:" - addr2line -fpie $objfile $hexaddr | sed "s;$dir_prefix;;" + addr2line -fpie $objfile $hexaddr | sed "s; $dir_prefix\(\./\)*; ;" DONE=1 done < <(readelf -sW $objfile | awk -v f=$func '$8 == f {print}') -- cgit v0.10.2 From 9bb627be47a574b764e162e8513d5db78d49e7f5 Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Mon, 19 Sep 2016 14:43:52 -0700 Subject: mem-hotplug: don't clear the only node in new_node_page() Commit 394e31d2ceb4 ("mem-hotplug: alloc new page from a nearest neighbor node when mem-offline") introduced new_node_page() for memory hotplug. In new_node_page(), the nid is cleared before calling __alloc_pages_nodemask(). But if it is the only node of the system, and the first round allocation fails, it will not be able to get memory from an empty nodemask, and will trigger oom. The patch checks whether it is the last node on the system, and if it is, then don't clear the nid in the nodemask. Fixes: 394e31d2ceb4 ("mem-hotplug: alloc new page from a nearest neighbor node when mem-offline") Link: http://lkml.kernel.org/r/1473044391.4250.19.camel@TP420 Signed-off-by: Li Zhong Reported-by: John Allen Acked-by: Vlastimil Babka Cc: Xishi Qiu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 41266dc..b58906b 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1567,7 +1567,9 @@ static struct page *new_node_page(struct page *page, unsigned long private, return alloc_huge_page_node(page_hstate(compound_head(page)), next_node_in(nid, nmask)); - node_clear(nid, nmask); + if (nid != next_node_in(nid, nmask)) + node_clear(nid, nmask); + if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) gfp_mask |= __GFP_HIGHMEM; -- cgit v0.10.2 From e6f0c6e6170fec175fe676495f29029aecdf486c Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Mon, 19 Sep 2016 14:43:55 -0700 Subject: ocfs2/dlm: fix race between convert and migration Commit ac7cf246dfdb ("ocfs2/dlm: fix race between convert and recovery") checks if lockres master has changed to identify whether new master has finished recovery or not. This will introduce a race that right after old master does umount ( means master will change), a new convert request comes. In this case, it will reset lockres state to DLM_RECOVERING and then retry convert, and then fail with lockres->l_action being set to OCFS2_AST_INVALID, which will cause inconsistent lock level between ocfs2 and dlm, and then finally BUG. Since dlm recovery will clear lock->convert_pending in dlm_move_lockres_to_recovery_list, we can use it to correctly identify the race case between convert and recovery. So fix it. Fixes: ac7cf246dfdb ("ocfs2/dlm: fix race between convert and recovery") Link: http://lkml.kernel.org/r/57CE1569.8010704@huawei.com Signed-off-by: Joseph Qi Signed-off-by: Jun Piao Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index cdeafb4..0bb1286 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c @@ -268,7 +268,6 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, struct dlm_lock *lock, int flags, int type) { enum dlm_status status; - u8 old_owner = res->owner; mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type, lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS); @@ -335,7 +334,6 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, spin_lock(&res->spinlock); res->state &= ~DLM_LOCK_RES_IN_PROGRESS; - lock->convert_pending = 0; /* if it failed, move it back to granted queue. * if master returns DLM_NORMAL and then down before sending ast, * it may have already been moved to granted queue, reset to @@ -344,12 +342,14 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm, if (status != DLM_NOTQUEUED) dlm_error(status); dlm_revert_pending_convert(res, lock); - } else if ((res->state & DLM_LOCK_RES_RECOVERING) || - (old_owner != res->owner)) { - mlog(0, "res %.*s is in recovering or has been recovered.\n", - res->lockname.len, res->lockname.name); + } else if (!lock->convert_pending) { + mlog(0, "%s: res %.*s, owner died and lock has been moved back " + "to granted list, retry convert.\n", + dlm->name, res->lockname.len, res->lockname.name); status = DLM_RECOVERING; } + + lock->convert_pending = 0; bail: spin_unlock(&res->spinlock); -- cgit v0.10.2 From d8e3875431956c1f78e142d531f490f76c760ce3 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Mon, 19 Sep 2016 14:43:58 -0700 Subject: MAINTAINERS: Maik has moved Maik is no longer using the plusserver.de email, update with his current email. Link: http://lkml.kernel.org/r/1473007794-27960-1-git-send-email-sudipm.mukherjee@gmail.com Signed-off-by: Sudip Mukherjee Cc: Maik Broemme Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index 644ff65..2551f6e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6103,7 +6103,7 @@ S: Supported F: drivers/cpufreq/intel_pstate.c INTEL FRAMEBUFFER DRIVER (excluding 810 and 815) -M: Maik Broemme +M: Maik Broemme L: linux-fbdev@vger.kernel.org S: Maintained F: Documentation/fb/intelfb.txt -- cgit v0.10.2 From c131f751ab1a852d4dd4b490b3a7fbba7d738de5 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 19 Sep 2016 14:44:01 -0700 Subject: khugepaged: fix use-after-free in collapse_huge_page() hugepage_vma_revalidate() tries to re-check if we still should try to collapse small pages into huge one after the re-acquiring mmap_sem. The problem Dmitry Vyukov reported[1] is that the vma found by hugepage_vma_revalidate() can be suitable for huge pages, but not the same vma we had before dropping mmap_sem. And dereferencing original vma can lead to fun results.. Let's use vma hugepage_vma_revalidate() found instead of assuming it's the same as what we had before the lock was dropped. [1] http://lkml.kernel.org/r/CACT4Y+Z3gigBvhca9kRJFcjX0G70V_nRhbwKBU+yGoESBDKi9Q@mail.gmail.com Link: http://lkml.kernel.org/r/20160907122559.GA6542@black.fi.intel.com Signed-off-by: Kirill A. Shutemov Reported-by: Dmitry Vyukov Reviewed-by: Andrea Arcangeli Cc: Ebru Akagunduz Cc: Vlastimil Babka Cc: Mel Gorman Cc: Johannes Weiner Cc: Vegard Nossum Cc: Sasha Levin Cc: Konstantin Khlebnikov Cc: Andrey Ryabinin Cc: Greg Thelen Cc: Suleiman Souhlal Cc: Hugh Dickins Cc: David Rientjes Cc: syzkaller Cc: Kostya Serebryany Cc: Alexander Potapenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 79c52d0..62339bf 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -838,7 +838,8 @@ static bool hugepage_vma_check(struct vm_area_struct *vma) * value (scan code). */ -static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address) +static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address, + struct vm_area_struct **vmap) { struct vm_area_struct *vma; unsigned long hstart, hend; @@ -846,7 +847,7 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address) if (unlikely(khugepaged_test_exit(mm))) return SCAN_ANY_PROCESS; - vma = find_vma(mm, address); + *vmap = vma = find_vma(mm, address); if (!vma) return SCAN_VMA_NULL; @@ -898,7 +899,7 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, /* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */ if (ret & VM_FAULT_RETRY) { down_read(&mm->mmap_sem); - if (hugepage_vma_revalidate(mm, address)) { + if (hugepage_vma_revalidate(mm, address, &fe.vma)) { /* vma is no longer available, don't continue to swapin */ trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); return false; @@ -923,7 +924,6 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, static void collapse_huge_page(struct mm_struct *mm, unsigned long address, struct page **hpage, - struct vm_area_struct *vma, int node, int referenced) { pmd_t *pmd, _pmd; @@ -933,6 +933,7 @@ static void collapse_huge_page(struct mm_struct *mm, spinlock_t *pmd_ptl, *pte_ptl; int isolated = 0, result = 0; struct mem_cgroup *memcg; + struct vm_area_struct *vma; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ gfp_t gfp; @@ -961,7 +962,7 @@ static void collapse_huge_page(struct mm_struct *mm, } down_read(&mm->mmap_sem); - result = hugepage_vma_revalidate(mm, address); + result = hugepage_vma_revalidate(mm, address, &vma); if (result) { mem_cgroup_cancel_charge(new_page, memcg, true); up_read(&mm->mmap_sem); @@ -994,7 +995,7 @@ static void collapse_huge_page(struct mm_struct *mm, * handled by the anon_vma lock + PG_lock. */ down_write(&mm->mmap_sem); - result = hugepage_vma_revalidate(mm, address); + result = hugepage_vma_revalidate(mm, address, &vma); if (result) goto out; /* check if the pmd is still valid */ @@ -1202,7 +1203,7 @@ out_unmap: if (ret) { node = khugepaged_find_target_node(); /* collapse_huge_page will return with the mmap_sem released */ - collapse_huge_page(mm, address, hpage, vma, node, referenced); + collapse_huge_page(mm, address, hpage, node, referenced); } out: trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced, -- cgit v0.10.2 From 982785c6b05a82c01e90687b7e25ee87c8970b2e Mon Sep 17 00:00:00 2001 From: Ebru Akagunduz Date: Mon, 19 Sep 2016 14:44:04 -0700 Subject: mm, thp: fix leaking mapped pte in __collapse_huge_page_swapin() Currently, khugepaged does not permit swapin if there are enough young pages in a THP. The problem is when a THP does not have enough young pages, khugepaged leaks mapped ptes. This patch prohibits leaking mapped ptes. Link: http://lkml.kernel.org/r/1472820276-7831-1-git-send-email-ebru.akagunduz@gmail.com Signed-off-by: Ebru Akagunduz Suggested-by: Andrea Arcangeli Reviewed-by: Andrea Arcangeli Reviewed-by: Rik van Riel Cc: Vlastimil Babka Cc: Mel Gorman Cc: Kirill A. Shutemov Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 62339bf..728d779 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -882,6 +882,11 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, .pmd = pmd, }; + /* we only decide to swapin, if there is enough young ptes */ + if (referenced < HPAGE_PMD_NR/2) { + trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); + return false; + } fe.pte = pte_offset_map(pmd, address); for (; fe.address < address + HPAGE_PMD_NR*PAGE_SIZE; fe.pte++, fe.address += PAGE_SIZE) { @@ -889,11 +894,6 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, if (!is_swap_pte(pteval)) continue; swapped_in++; - /* we only decide to swapin, if there is enough young ptes */ - if (referenced < HPAGE_PMD_NR/2) { - trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); - return false; - } ret = do_swap_page(&fe, pteval); /* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */ -- cgit v0.10.2 From 4d35427ad7641cba08ea0deffae1a78147ad41c0 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 19 Sep 2016 14:44:07 -0700 Subject: mm: avoid endless recursion in dump_page() dump_page() uses page_mapcount() to get mapcount of the page. page_mapcount() has VM_BUG_ON_PAGE(PageSlab(page)) as mapcount doesn't make sense for slab pages and the field in struct page used for other information. It leads to recursion if dump_page() called for slub page and DEBUG_VM is enabled: dump_page() -> page_mapcount() -> VM_BUG_ON_PAGE() -> dump_page -> ... Let's avoid calling page_mapcount() for slab pages in dump_page(). Link: http://lkml.kernel.org/r/20160908082137.131076-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/debug.c b/mm/debug.c index 8865bfb..74c7cae 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -42,9 +42,11 @@ const struct trace_print_flags vmaflag_names[] = { void __dump_page(struct page *page, const char *reason) { + int mapcount = PageSlab(page) ? 0 : page_mapcount(page); + pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx", - page, page_ref_count(page), page_mapcount(page), - page->mapping, page->index); + page, page_ref_count(page), mapcount, + page->mapping, page_to_pgoff(page)); if (PageCompound(page)) pr_cont(" compound_mapcount: %d", compound_mapcount(page)); pr_cont("\n"); -- cgit v0.10.2 From 08eeb3061e44661afb4cb9eb08780e2fff8bfbc5 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 19 Sep 2016 14:44:09 -0700 Subject: MAINTAINERS: update email for VLYNQ bus entry Link: http://lkml.kernel.org/r/1473218738-21836-1-git-send-email-f.fainelli@gmail.com Signed-off-by: Florian Fainelli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index 2551f6e..a0ce40f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12569,7 +12569,7 @@ F: include/linux/if_*vlan.h F: net/8021q/ VLYNQ BUS -M: Florian Fainelli +M: Florian Fainelli L: openwrt-devel@lists.openwrt.org (subscribers-only) S: Maintained F: drivers/vlynq/vlynq.c -- cgit v0.10.2 From 7cbdb4a286a60c5d519cb9223fe2134d26870d39 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Mon, 19 Sep 2016 14:44:12 -0700 Subject: autofs: use dentry flags to block walks during expire Somewhere along the way the autofs expire operation has changed to hold a spin lock over expired dentry selection. The autofs indirect mount expired dentry selection is complicated and quite lengthy so it isn't appropriate to hold a spin lock over the operation. Commit 47be61845c77 ("fs/dcache.c: avoid soft-lockup in dput()") added a might_sleep() to dput() causing a WARN_ONCE() about this usage to be issued. But the spin lock doesn't need to be held over this check, the autofs dentry info. flags are enough to block walks into dentrys during the expire. I've left the direct mount expire as it is (for now) because it is much simpler and quicker than the indirect mount expire and adding spin lock release and re-aquires would do nothing more than add overhead. Fixes: 47be61845c77 ("fs/dcache.c: avoid soft-lockup in dput()") Link: http://lkml.kernel.org/r/20160912014017.1773.73060.stgit@pluto.themaw.net Signed-off-by: Ian Kent Reported-by: Takashi Iwai Tested-by: Takashi Iwai Cc: Takashi Iwai Cc: NeilBrown Cc: Al Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index b493909..d8e6d42 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -417,6 +417,7 @@ static struct dentry *should_expire(struct dentry *dentry, } return NULL; } + /* * Find an eligible tree to time-out * A tree is eligible if :- @@ -432,6 +433,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, struct dentry *root = sb->s_root; struct dentry *dentry; struct dentry *expired; + struct dentry *found; struct autofs_info *ino; if (!root) @@ -442,31 +444,46 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, dentry = NULL; while ((dentry = get_next_positive_subdir(dentry, root))) { + int flags = how; + spin_lock(&sbi->fs_lock); ino = autofs4_dentry_ino(dentry); - if (ino->flags & AUTOFS_INF_WANT_EXPIRE) - expired = NULL; - else - expired = should_expire(dentry, mnt, timeout, how); - if (!expired) { + if (ino->flags & AUTOFS_INF_WANT_EXPIRE) { spin_unlock(&sbi->fs_lock); continue; } + spin_unlock(&sbi->fs_lock); + + expired = should_expire(dentry, mnt, timeout, flags); + if (!expired) + continue; + + spin_lock(&sbi->fs_lock); ino = autofs4_dentry_ino(expired); ino->flags |= AUTOFS_INF_WANT_EXPIRE; spin_unlock(&sbi->fs_lock); synchronize_rcu(); - spin_lock(&sbi->fs_lock); - if (should_expire(expired, mnt, timeout, how)) { - if (expired != dentry) - dput(dentry); - goto found; - } + /* Make sure a reference is not taken on found if + * things have changed. + */ + flags &= ~AUTOFS_EXP_LEAVES; + found = should_expire(expired, mnt, timeout, how); + if (!found || found != expired) + /* Something has changed, continue */ + goto next; + + if (expired != dentry) + dput(dentry); + + spin_lock(&sbi->fs_lock); + goto found; +next: + spin_lock(&sbi->fs_lock); ino->flags &= ~AUTOFS_INF_WANT_EXPIRE; + spin_unlock(&sbi->fs_lock); if (expired != dentry) dput(expired); - spin_unlock(&sbi->fs_lock); } return NULL; @@ -483,6 +500,7 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk) struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); struct autofs_info *ino = autofs4_dentry_ino(dentry); int status; + int state; /* Block on any pending expire */ if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE)) @@ -490,8 +508,19 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk) if (rcu_walk) return -ECHILD; +retry: spin_lock(&sbi->fs_lock); - if (ino->flags & AUTOFS_INF_EXPIRING) { + state = ino->flags & (AUTOFS_INF_WANT_EXPIRE | AUTOFS_INF_EXPIRING); + if (state == AUTOFS_INF_WANT_EXPIRE) { + spin_unlock(&sbi->fs_lock); + /* + * Possibly being selected for expire, wait until + * it's selected or not. + */ + schedule_timeout_uninterruptible(HZ/10); + goto retry; + } + if (state & AUTOFS_INF_EXPIRING) { spin_unlock(&sbi->fs_lock); pr_debug("waiting for expire %p name=%pd\n", dentry, dentry); -- cgit v0.10.2 From c8de641b1e9c5489aa6ca57b7836acd68e7563f1 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Mon, 19 Sep 2016 14:44:15 -0700 Subject: mm: fix the page_swap_info() BUG_ON check Commit 62c230bc1790 ("mm: add support for a filesystem to activate swap files and use direct_IO for writing swap pages") replaced the swap_aops dirty hook from __set_page_dirty_no_writeback() with swap_set_page_dirty(). For normal cases without these special SWP flags code path falls back to __set_page_dirty_no_writeback() so the behaviour is expected to be the same as before. But swap_set_page_dirty() makes use of the page_swap_info() helper to get the swap_info_struct to check for the flags like SWP_FILE, SWP_BLKDEV etc as desired for those features. This helper has BUG_ON(!PageSwapCache(page)) which is racy and safe only for the set_page_dirty_lock() path. For the set_page_dirty() path which is often needed for cases to be called from irq context, kswapd() can toggle the flag behind the back while the call is getting executed when system is low on memory and heavy swapping is ongoing. This ends up with undesired kernel panic. This patch just moves the check outside the helper to its users appropriately to fix kernel panic for the described path. Couple of users of helpers already take care of SwapCache condition so I skipped them. Link: http://lkml.kernel.org/r/1473460718-31013-1-git-send-email-santosh.shilimkar@oracle.com Signed-off-by: Santosh Shilimkar Cc: Mel Gorman Cc: Joe Perches Cc: Peter Zijlstra Cc: Rik van Riel Cc: David S. Miller Cc: Jens Axboe Cc: Michal Hocko Cc: Hugh Dickins Cc: Al Viro Cc: [4.7.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/page_io.c b/mm/page_io.c index 16bd82fa..eafe5dd 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -264,6 +264,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, int ret; struct swap_info_struct *sis = page_swap_info(page); + BUG_ON(!PageSwapCache(page)); if (sis->flags & SWP_FILE) { struct kiocb kiocb; struct file *swap_file = sis->swap_file; @@ -337,6 +338,7 @@ int swap_readpage(struct page *page) int ret = 0; struct swap_info_struct *sis = page_swap_info(page); + BUG_ON(!PageSwapCache(page)); VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageUptodate(page), page); if (frontswap_load(page) == 0) { @@ -386,6 +388,7 @@ int swap_set_page_dirty(struct page *page) if (sis->flags & SWP_FILE) { struct address_space *mapping = sis->swap_file->f_mapping; + BUG_ON(!PageSwapCache(page)); return mapping->a_ops->set_page_dirty(page); } else { return __set_page_dirty_no_writeback(page); diff --git a/mm/swapfile.c b/mm/swapfile.c index 78cfa29..2657acc 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2724,7 +2724,6 @@ int swapcache_prepare(swp_entry_t entry) struct swap_info_struct *page_swap_info(struct page *page) { swp_entry_t swap = { .val = page_private(page) }; - BUG_ON(!PageSwapCache(page)); return swap_info[swp_type(swap)]; } -- cgit v0.10.2 From 31b4beb473e3bdee1bf79db849502dcb24b5c202 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 19 Sep 2016 14:44:18 -0700 Subject: ipc/shm: fix crash if CONFIG_SHMEM is not set Commit c01d5b300774 ("shmem: get_unmapped_area align huge page") makes use of shm_get_unmapped_area() in shm_file_operations() unconditional to CONFIG_MMU. As Tony Battersby pointed this can lead NULL-pointer dereference on machine with CONFIG_MMU=y and CONFIG_SHMEM=n. In this case ipc/shm is backed by ramfs which doesn't provide f_op->get_unmapped_area for configurations with MMU. The solution is to provide dummy f_op->get_unmapped_area for ramfs when CONFIG_MMU=y, which just call current->mm->get_unmapped_area(). Fixes: c01d5b300774 ("shmem: get_unmapped_area align huge page") Link: http://lkml.kernel.org/r/20160912102704.140442-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Reported-by: Tony Battersby Tested-by: Tony Battersby Cc: Hugh Dickins Cc: [4.7.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c index 183a212..12af049 100644 --- a/fs/ramfs/file-mmu.c +++ b/fs/ramfs/file-mmu.c @@ -27,9 +27,17 @@ #include #include #include +#include #include "internal.h" +static unsigned long ramfs_mmu_get_unmapped_area(struct file *file, + unsigned long addr, unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); +} + const struct file_operations ramfs_file_operations = { .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, @@ -38,6 +46,7 @@ const struct file_operations ramfs_file_operations = { .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .llseek = generic_file_llseek, + .get_unmapped_area = ramfs_mmu_get_unmapped_area, }; const struct inode_operations ramfs_file_inode_operations = { -- cgit v0.10.2 From 2b0ad0085aa47ace4756aa501274a7de0325c09c Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Mon, 19 Sep 2016 14:44:21 -0700 Subject: ocfs2: fix trans extend while flush truncate log Every time, ocfs2_extend_trans() included a credit for truncate log inode, but as that inode had been managed by jbd2 running transaction first time, it will not consume that credit until jbd2_journal_restart(). Since total credits to extend always included the un-consumed ones, there will be more and more un-consumed credit, at last jbd2_journal_restart() will fail due to credit number over the half of max transction credit. The following error was caught when unlinking a large file with many extents: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 13626 at fs/jbd2/transaction.c:269 start_this_handle+0x4c3/0x510 [jbd2]() Modules linked in: ocfs2 nfsd lockd grace nfs_acl auth_rpcgss sunrpc autofs4 ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue configfs sd_mod sg ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables be2iscsi iscsi_boot_sysfs bnx2i cnic uio cxgb4i cxgb4 cxgb3i libcxgbi cxgb3 mdio ib_iser rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr ipv6 iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ppdev xen_kbdfront xen_netfront fb_sys_fops sysimgblt sysfillrect syscopyarea parport_pc parport pcspkr i2c_piix4 i2c_core acpi_cpufreq ext4 jbd2 mbcache xen_blkfront floppy pata_acpi ata_generic ata_piix dm_mirror dm_region_hash dm_log dm_mod CPU: 0 PID: 13626 Comm: unlink Tainted: G W 4.1.12-37.6.3.el6uek.x86_64 #2 Hardware name: Xen HVM domU, BIOS 4.4.4OVM 02/11/2016 Call Trace: dump_stack+0x48/0x5c warn_slowpath_common+0x95/0xe0 warn_slowpath_null+0x1a/0x20 start_this_handle+0x4c3/0x510 [jbd2] jbd2__journal_restart+0x161/0x1b0 [jbd2] jbd2_journal_restart+0x13/0x20 [jbd2] ocfs2_extend_trans+0x74/0x220 [ocfs2] ocfs2_replay_truncate_records+0x93/0x360 [ocfs2] __ocfs2_flush_truncate_log+0x13e/0x3a0 [ocfs2] ocfs2_remove_btree_range+0x458/0x7f0 [ocfs2] ocfs2_commit_truncate+0x1b3/0x6f0 [ocfs2] ocfs2_truncate_for_delete+0xbd/0x380 [ocfs2] ocfs2_wipe_inode+0x136/0x6a0 [ocfs2] ocfs2_delete_inode+0x2a2/0x3e0 [ocfs2] ocfs2_evict_inode+0x28/0x60 [ocfs2] evict+0xab/0x1a0 iput_final+0xf6/0x190 iput+0xc8/0xe0 do_unlinkat+0x1b7/0x310 SyS_unlink+0x16/0x20 system_call_fastpath+0x12/0x71 ---[ end trace 28aa7410e69369cf ]--- JBD2: unlink wants too many credits (251 > 128) Link: http://lkml.kernel.org/r/1473674623-11810-1-git-send-email-junxiao.bi@oracle.com Signed-off-by: Junxiao Bi Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 7dabbc3..5112878 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -5922,7 +5922,6 @@ bail: } static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, - handle_t *handle, struct inode *data_alloc_inode, struct buffer_head *data_alloc_bh) { @@ -5935,11 +5934,19 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, struct ocfs2_truncate_log *tl; struct inode *tl_inode = osb->osb_tl_inode; struct buffer_head *tl_bh = osb->osb_tl_bh; + handle_t *handle; di = (struct ocfs2_dinode *) tl_bh->b_data; tl = &di->id2.i_dealloc; i = le16_to_cpu(tl->tl_used) - 1; while (i >= 0) { + handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC); + if (IS_ERR(handle)) { + status = PTR_ERR(handle); + mlog_errno(status); + goto bail; + } + /* Caller has given us at least enough credits to * update the truncate log dinode */ status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh, @@ -5974,12 +5981,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb, } } - status = ocfs2_extend_trans(handle, - OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC); - if (status < 0) { - mlog_errno(status); - goto bail; - } + ocfs2_commit_trans(osb, handle); i--; } @@ -5994,7 +5996,6 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) { int status; unsigned int num_to_flush; - handle_t *handle; struct inode *tl_inode = osb->osb_tl_inode; struct inode *data_alloc_inode = NULL; struct buffer_head *tl_bh = osb->osb_tl_bh; @@ -6038,21 +6039,11 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) goto out_mutex; } - handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC); - if (IS_ERR(handle)) { - status = PTR_ERR(handle); - mlog_errno(status); - goto out_unlock; - } - - status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode, + status = ocfs2_replay_truncate_records(osb, data_alloc_inode, data_alloc_bh); if (status < 0) mlog_errno(status); - ocfs2_commit_trans(osb, handle); - -out_unlock: brelse(data_alloc_bh); ocfs2_inode_unlock(data_alloc_inode, 1); -- cgit v0.10.2 From d5bf141893880f7283fe97e1812c58ff22c8f9a5 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Mon, 19 Sep 2016 14:44:24 -0700 Subject: ocfs2: fix trans extend while free cached blocks The root cause of this issue is the same with the one fixed by the last patch, but this time credits for allocator inode and group descriptor may not be consumed before trans extend. The following error was caught: WARNING: CPU: 0 PID: 2037 at fs/jbd2/transaction.c:269 start_this_handle+0x4c3/0x510 [jbd2]() Modules linked in: ocfs2 nfsd lockd grace nfs_acl auth_rpcgss sunrpc autofs4 ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue configfs sd_mod sg ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables be2iscsi iscsi_boot_sysfs bnx2i cnic uio cxgb4i cxgb4 cxgb3i libcxgbi cxgb3 mdio ib_iser rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr ipv6 iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ppdev xen_kbdfront fb_sys_fops sysimgblt sysfillrect syscopyarea xen_netfront parport_pc parport pcspkr i2c_piix4 i2c_core acpi_cpufreq ext4 jbd2 mbcache xen_blkfront floppy pata_acpi ata_generic ata_piix dm_mirror dm_region_hash dm_log dm_mod CPU: 0 PID: 2037 Comm: rm Tainted: G W 4.1.12-37.6.3.el6uek.bug24573128v2.x86_64 #2 Hardware name: Xen HVM domU, BIOS 4.4.4OVM 02/11/2016 Call Trace: dump_stack+0x48/0x5c warn_slowpath_common+0x95/0xe0 warn_slowpath_null+0x1a/0x20 start_this_handle+0x4c3/0x510 [jbd2] jbd2__journal_restart+0x161/0x1b0 [jbd2] jbd2_journal_restart+0x13/0x20 [jbd2] ocfs2_extend_trans+0x74/0x220 [ocfs2] ocfs2_free_cached_blocks+0x16b/0x4e0 [ocfs2] ocfs2_run_deallocs+0x70/0x270 [ocfs2] ocfs2_commit_truncate+0x474/0x6f0 [ocfs2] ocfs2_truncate_for_delete+0xbd/0x380 [ocfs2] ocfs2_wipe_inode+0x136/0x6a0 [ocfs2] ocfs2_delete_inode+0x2a2/0x3e0 [ocfs2] ocfs2_evict_inode+0x28/0x60 [ocfs2] evict+0xab/0x1a0 iput_final+0xf6/0x190 iput+0xc8/0xe0 do_unlinkat+0x1b7/0x310 SyS_unlinkat+0x22/0x40 system_call_fastpath+0x12/0x71 ---[ end trace a62437cb060baa71 ]--- JBD2: rm wants too many credits (149 > 128) Link: http://lkml.kernel.org/r/1473674623-11810-2-git-send-email-junxiao.bi@oracle.com Signed-off-by: Junxiao Bi Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 5112878..f165f86 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -6404,43 +6404,34 @@ static int ocfs2_free_cached_blocks(struct ocfs2_super *osb, goto out_mutex; } - handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - mlog_errno(ret); - goto out_unlock; - } - while (head) { if (head->free_bg) bg_blkno = head->free_bg; else bg_blkno = ocfs2_which_suballoc_group(head->free_blk, head->free_bit); + handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + goto out_unlock; + } + trace_ocfs2_free_cached_blocks( (unsigned long long)head->free_blk, head->free_bit); ret = ocfs2_free_suballoc_bits(handle, inode, di_bh, head->free_bit, bg_blkno, 1); - if (ret) { + if (ret) mlog_errno(ret); - goto out_journal; - } - ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE); - if (ret) { - mlog_errno(ret); - goto out_journal; - } + ocfs2_commit_trans(osb, handle); tmp = head; head = head->free_next; kfree(tmp); } -out_journal: - ocfs2_commit_trans(osb, handle); - out_unlock: ocfs2_inode_unlock(inode, 1); brelse(di_bh); -- cgit v0.10.2 From 12703dbfeb15402260e7554d32a34ac40c233990 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 19 Sep 2016 14:44:27 -0700 Subject: fsnotify: add a way to stop queueing events on group shutdown Implement a function that can be called when a group is being shutdown to stop queueing new events to the group. Fanotify will use this. Fixes: 5838d4442bd5 ("fanotify: fix double free of pending permission events") Link: http://lkml.kernel.org/r/1473797711-14111-2-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Miklos Szeredi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/notify/group.c b/fs/notify/group.c index 3e2dd85..b47f7cf 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -40,6 +40,17 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group) } /* + * Stop queueing new events for this group. Once this function returns + * fsnotify_add_event() will not add any new events to the group's queue. + */ +void fsnotify_group_stop_queueing(struct fsnotify_group *group) +{ + mutex_lock(&group->notification_mutex); + group->shutdown = true; + mutex_unlock(&group->notification_mutex); +} + +/* * Trying to get rid of a group. Remove all marks, flush all events and release * the group reference. * Note that another thread calling fsnotify_clear_marks_by_group() may still @@ -47,6 +58,14 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group) */ void fsnotify_destroy_group(struct fsnotify_group *group) { + /* + * Stop queueing new events. The code below is careful enough to not + * require this but fanotify needs to stop queuing events even before + * fsnotify_destroy_group() is called and this makes the other callers + * of fsnotify_destroy_group() to see the same behavior. + */ + fsnotify_group_stop_queueing(group); + /* clear all inode marks for this group, attach them to destroy_list */ fsnotify_detach_group_marks(group); diff --git a/fs/notify/notification.c b/fs/notify/notification.c index a95d8e0..3d76e65 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -82,7 +82,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group, * Add an event to the group notification queue. The group can later pull this * event off the queue to deal with. The function returns 0 if the event was * added to the queue, 1 if the event was merged with some other queued event, - * 2 if the queue of events has overflown. + * 2 if the event was not queued - either the queue of events has overflown + * or the group is shutting down. */ int fsnotify_add_event(struct fsnotify_group *group, struct fsnotify_event *event, @@ -96,6 +97,11 @@ int fsnotify_add_event(struct fsnotify_group *group, mutex_lock(&group->notification_mutex); + if (group->shutdown) { + mutex_unlock(&group->notification_mutex); + return 2; + } + if (group->q_len >= group->max_events) { ret = 2; /* Queue overflow event only if it isn't already queued */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 58205f3..40a9e99 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -148,6 +148,7 @@ struct fsnotify_group { #define FS_PRIO_1 1 /* fanotify content based access control */ #define FS_PRIO_2 2 /* fanotify pre-content access */ unsigned int priority; + bool shutdown; /* group is being shut down, don't queue more events */ /* stores all fastpath marks assoc with this group so they can be cleaned on unregister */ struct mutex mark_mutex; /* protect marks_list */ @@ -292,6 +293,8 @@ extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *op extern void fsnotify_get_group(struct fsnotify_group *group); /* drop reference on a group from fsnotify_alloc_group */ extern void fsnotify_put_group(struct fsnotify_group *group); +/* group destruction begins, stop queuing new events */ +extern void fsnotify_group_stop_queueing(struct fsnotify_group *group); /* destroy group */ extern void fsnotify_destroy_group(struct fsnotify_group *group); /* fasync handler function */ -- cgit v0.10.2 From 96d41019e3ac55f6f0115b0ce97e4f24a3d636d2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 19 Sep 2016 14:44:30 -0700 Subject: fanotify: fix list corruption in fanotify_get_response() fanotify_get_response() calls fsnotify_remove_event() when it finds that group is being released from fanotify_release() (bypass_perm is set). However the event it removes need not be only in the group's notification queue but it can have already moved to access_list (userspace read the event before closing the fanotify instance fd) which is protected by a different lock. Thus when fsnotify_remove_event() races with fanotify_release() operating on access_list, the list can get corrupted. Fix the problem by moving all the logic removing permission events from the lists to one place - fanotify_release(). Fixes: 5838d4442bd5 ("fanotify: fix double free of pending permission events") Link: http://lkml.kernel.org/r/1473797711-14111-3-git-send-email-jack@suse.cz Signed-off-by: Jan Kara Reported-by: Miklos Szeredi Tested-by: Miklos Szeredi Reviewed-by: Miklos Szeredi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index d2f97ec..e0e5f7c 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -67,18 +67,7 @@ static int fanotify_get_response(struct fsnotify_group *group, pr_debug("%s: group=%p event=%p\n", __func__, group, event); - wait_event(group->fanotify_data.access_waitq, event->response || - atomic_read(&group->fanotify_data.bypass_perm)); - - if (!event->response) { /* bypass_perm set */ - /* - * Event was canceled because group is being destroyed. Remove - * it from group's event list because we are responsible for - * freeing the permission event. - */ - fsnotify_remove_event(group, &event->fae.fse); - return 0; - } + wait_event(group->fanotify_data.access_waitq, event->response); /* userspace responded, convert to something usable */ switch (event->response) { diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 8e8e6bc..a643138 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -358,16 +358,20 @@ static int fanotify_release(struct inode *ignored, struct file *file) #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS struct fanotify_perm_event_info *event, *next; + struct fsnotify_event *fsn_event; /* - * There may be still new events arriving in the notification queue - * but since userspace cannot use fanotify fd anymore, no event can - * enter or leave access_list by now. + * Stop new events from arriving in the notification queue. since + * userspace cannot use fanotify fd anymore, no event can enter or + * leave access_list by now either. */ - spin_lock(&group->fanotify_data.access_lock); - - atomic_inc(&group->fanotify_data.bypass_perm); + fsnotify_group_stop_queueing(group); + /* + * Process all permission events on access_list and notification queue + * and simulate reply from userspace. + */ + spin_lock(&group->fanotify_data.access_lock); list_for_each_entry_safe(event, next, &group->fanotify_data.access_list, fae.fse.list) { pr_debug("%s: found group=%p event=%p\n", __func__, group, @@ -379,12 +383,21 @@ static int fanotify_release(struct inode *ignored, struct file *file) spin_unlock(&group->fanotify_data.access_lock); /* - * Since bypass_perm is set, newly queued events will not wait for - * access response. Wake up the already sleeping ones now. - * synchronize_srcu() in fsnotify_destroy_group() will wait for all - * processes sleeping in fanotify_handle_event() waiting for access - * response and thus also for all permission events to be freed. + * Destroy all non-permission events. For permission events just + * dequeue them and set the response. They will be freed once the + * response is consumed and fanotify_get_response() returns. */ + mutex_lock(&group->notification_mutex); + while (!fsnotify_notify_queue_is_empty(group)) { + fsn_event = fsnotify_remove_first_event(group); + if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS)) + fsnotify_destroy_event(group, fsn_event); + else + FANOTIFY_PE(fsn_event)->response = FAN_ALLOW; + } + mutex_unlock(&group->notification_mutex); + + /* Response for all permission events it set, wakeup waiters */ wake_up(&group->fanotify_data.access_waitq); #endif @@ -755,7 +768,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) spin_lock_init(&group->fanotify_data.access_lock); init_waitqueue_head(&group->fanotify_data.access_waitq); INIT_LIST_HEAD(&group->fanotify_data.access_list); - atomic_set(&group->fanotify_data.bypass_perm, 0); #endif switch (flags & FAN_ALL_CLASS_BITS) { case FAN_CLASS_NOTIF: diff --git a/fs/notify/notification.c b/fs/notify/notification.c index 3d76e65..e455e83 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -132,21 +132,6 @@ queue: } /* - * Remove @event from group's notification queue. It is the responsibility of - * the caller to destroy the event. - */ -void fsnotify_remove_event(struct fsnotify_group *group, - struct fsnotify_event *event) -{ - mutex_lock(&group->notification_mutex); - if (!list_empty(&event->list)) { - list_del_init(&event->list); - group->q_len--; - } - mutex_unlock(&group->notification_mutex); -} - -/* * Remove and return the first event from the notification list. It is the * responsibility of the caller to destroy the obtained event */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 40a9e99..7268ed0 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -180,7 +180,6 @@ struct fsnotify_group { spinlock_t access_lock; struct list_head access_list; wait_queue_head_t access_waitq; - atomic_t bypass_perm; #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */ int f_flags; unsigned int max_marks; @@ -307,8 +306,6 @@ extern int fsnotify_add_event(struct fsnotify_group *group, struct fsnotify_event *event, int (*merge)(struct list_head *, struct fsnotify_event *)); -/* Remove passed event from groups notification queue */ -extern void fsnotify_remove_event(struct fsnotify_group *group, struct fsnotify_event *event); /* true if the group notification queue is empty */ extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group); /* return, but do not dequeue the first event on the notification queue */ -- cgit v0.10.2 From 3bb8b653c86f6b1d2cc05aa1744fed4b18f99485 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Mon, 19 Sep 2016 14:44:33 -0700 Subject: ocfs2: fix double unlock in case retry after free truncate log If ocfs2_reserve_cluster_bitmap_bits() fails with ENOSPC, it will try to free truncate log and then retry. Since ocfs2_try_to_free_truncate_log will lock/unlock global bitmap inode, we have to unlock it before calling this function. But when retry reserve and it fails with no global bitmap inode lock taken, it will unlock again in error handling branch and BUG. This issue also exists if no need retry and then ocfs2_inode_lock fails. So fix it. Fixes: 2070ad1aebff ("ocfs2: retry on ENOSPC if sufficient space in truncate log") Link: http://lkml.kernel.org/r/57D91939.6030809@huawei.com Signed-off-by: Joseph Qi Signed-off-by: Jiufei Xue Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index ea47120..6ad3533 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1199,14 +1199,24 @@ retry: inode_unlock((*ac)->ac_inode); ret = ocfs2_try_to_free_truncate_log(osb, bits_wanted); - if (ret == 1) + if (ret == 1) { + iput((*ac)->ac_inode); + (*ac)->ac_inode = NULL; goto retry; + } if (ret < 0) mlog_errno(ret); inode_lock((*ac)->ac_inode); - ocfs2_inode_lock((*ac)->ac_inode, NULL, 1); + ret = ocfs2_inode_lock((*ac)->ac_inode, NULL, 1); + if (ret < 0) { + mlog_errno(ret); + inode_unlock((*ac)->ac_inode); + iput((*ac)->ac_inode); + (*ac)->ac_inode = NULL; + goto bail; + } } if (status < 0) { if (status != -ENOSPC) -- cgit v0.10.2 From db2ba40c277dc545bab531671c3f45ac0afea6f8 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 19 Sep 2016 14:44:36 -0700 Subject: mm: memcontrol: make per-cpu charge cache IRQ-safe for socket accounting During cgroup2 rollout into production, we started encountering css refcount underflows and css access crashes in the memory controller. Splitting the heavily shared css reference counter into logical users narrowed the imbalance down to the cgroup2 socket memory accounting. The problem turns out to be the per-cpu charge cache. Cgroup1 had a separate socket counter, but the new cgroup2 socket accounting goes through the common charge path that uses a shared per-cpu cache for all memory that is being tracked. Those caches are safe against scheduling preemption, but not against interrupts - such as the newly added packet receive path. When cache draining is interrupted by network RX taking pages out of the cache, the resuming drain operation will put references of in-use pages, thus causing the imbalance. Disable IRQs during all per-cpu charge cache operations. Fixes: f7e1cb6ec51b ("mm: memcontrol: account socket memory in unified hierarchy memory controller") Link: http://lkml.kernel.org/r/20160914194846.11153-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Tejun Heo Cc: "David S. Miller" Cc: Michal Hocko Cc: Vladimir Davydov Cc: [4.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9a6a51a..4be518d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1740,17 +1740,22 @@ static DEFINE_MUTEX(percpu_charge_mutex); static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages) { struct memcg_stock_pcp *stock; + unsigned long flags; bool ret = false; if (nr_pages > CHARGE_BATCH) return ret; - stock = &get_cpu_var(memcg_stock); + local_irq_save(flags); + + stock = this_cpu_ptr(&memcg_stock); if (memcg == stock->cached && stock->nr_pages >= nr_pages) { stock->nr_pages -= nr_pages; ret = true; } - put_cpu_var(memcg_stock); + + local_irq_restore(flags); + return ret; } @@ -1771,15 +1776,18 @@ static void drain_stock(struct memcg_stock_pcp *stock) stock->cached = NULL; } -/* - * This must be called under preempt disabled or must be called by - * a thread which is pinned to local cpu. - */ static void drain_local_stock(struct work_struct *dummy) { - struct memcg_stock_pcp *stock = this_cpu_ptr(&memcg_stock); + struct memcg_stock_pcp *stock; + unsigned long flags; + + local_irq_save(flags); + + stock = this_cpu_ptr(&memcg_stock); drain_stock(stock); clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); + + local_irq_restore(flags); } /* @@ -1788,14 +1796,19 @@ static void drain_local_stock(struct work_struct *dummy) */ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) { - struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock); + struct memcg_stock_pcp *stock; + unsigned long flags; + + local_irq_save(flags); + stock = this_cpu_ptr(&memcg_stock); if (stock->cached != memcg) { /* reset if necessary */ drain_stock(stock); stock->cached = memcg; } stock->nr_pages += nr_pages; - put_cpu_var(memcg_stock); + + local_irq_restore(flags); } /* -- cgit v0.10.2 From d979a39d7242e0601bf9b60e89628fb8ac577179 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 19 Sep 2016 14:44:38 -0700 Subject: cgroup: duplicate cgroup reference when cloning sockets When a socket is cloned, the associated sock_cgroup_data is duplicated but not its reference on the cgroup. As a result, the cgroup reference count will underflow when both sockets are destroyed later on. Fixes: bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup") Link: http://lkml.kernel.org/r/20160914194846.11153-2-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Tejun Heo Cc: Michal Hocko Cc: Vladimir Davydov Cc: [4.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d1c51b7..5e8dab5 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -6270,6 +6270,12 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) if (cgroup_sk_alloc_disabled) return; + /* Socket clone path */ + if (skcd->val) { + cgroup_get(sock_cgroup_ptr(skcd)); + return; + } + rcu_read_lock(); while (true) { diff --git a/net/core/sock.c b/net/core/sock.c index 25dab8b..fd7b41e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1362,7 +1362,6 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, if (!try_module_get(prot->owner)) goto out_free_sec; sk_tx_queue_clear(sk); - cgroup_sk_alloc(&sk->sk_cgrp_data); } return sk; @@ -1422,6 +1421,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sock_net_set(sk, net); atomic_set(&sk->sk_wmem_alloc, 1); + cgroup_sk_alloc(&sk->sk_cgrp_data); sock_update_classid(&sk->sk_cgrp_data); sock_update_netprioidx(&sk->sk_cgrp_data); } @@ -1566,6 +1566,9 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_priority = 0; newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); + + cgroup_sk_alloc(&newsk->sk_cgrp_data); + /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) -- cgit v0.10.2 From d21c353d5e99c56cdd5b5c1183ffbcaf23b8b960 Mon Sep 17 00:00:00 2001 From: Ashish Samant Date: Mon, 19 Sep 2016 14:44:42 -0700 Subject: ocfs2: fix start offset to ocfs2_zero_range_for_truncate() If we punch a hole on a reflink such that following conditions are met: 1. start offset is on a cluster boundary 2. end offset is not on a cluster boundary 3. (end offset is somewhere in another extent) or (hole range > MAX_CONTIG_BYTES(1MB)), we dont COW the first cluster starting at the start offset. But in this case, we were wrongly passing this cluster to ocfs2_zero_range_for_truncate() to zero out. This will modify the cluster in place and zero it in the source too. Fix this by skipping this cluster in such a scenario. To reproduce: 1. Create a random file of say 10 MB xfs_io -c 'pwrite -b 4k 0 10M' -f 10MBfile 2. Reflink it reflink -f 10MBfile reflnktest 3. Punch a hole at starting at cluster boundary with range greater that 1MB. You can also use a range that will put the end offset in another extent. fallocate -p -o 0 -l 1048615 reflnktest 4. sync 5. Check the first cluster in the source file. (It will be zeroed out). dd if=10MBfile iflag=direct bs= count=1 | hexdump -C Link: http://lkml.kernel.org/r/1470957147-14185-1-git-send-email-ashish.samant@oracle.com Signed-off-by: Ashish Samant Reported-by: Saar Maoz Reviewed-by: Srinivas Eeda Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Eric Ren Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 4e7b0dc..0b055bf 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1506,7 +1506,8 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, u64 start, u64 len) { int ret = 0; - u64 tmpend, end = start + len; + u64 tmpend = 0; + u64 end = start + len; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); unsigned int csize = osb->s_clustersize; handle_t *handle; @@ -1538,18 +1539,31 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, } /* - * We want to get the byte offset of the end of the 1st cluster. + * If start is on a cluster boundary and end is somewhere in another + * cluster, we have not COWed the cluster starting at start, unless + * end is also within the same cluster. So, in this case, we skip this + * first call to ocfs2_zero_range_for_truncate() truncate and move on + * to the next one. */ - tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1)); - if (tmpend > end) - tmpend = end; + if ((start & (csize - 1)) != 0) { + /* + * We want to get the byte offset of the end of the 1st + * cluster. + */ + tmpend = (u64)osb->s_clustersize + + (start & ~(osb->s_clustersize - 1)); + if (tmpend > end) + tmpend = end; - trace_ocfs2_zero_partial_clusters_range1((unsigned long long)start, - (unsigned long long)tmpend); + trace_ocfs2_zero_partial_clusters_range1( + (unsigned long long)start, + (unsigned long long)tmpend); - ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend); - if (ret) - mlog_errno(ret); + ret = ocfs2_zero_range_for_truncate(inode, handle, start, + tmpend); + if (ret) + mlog_errno(ret); + } if (tmpend < end) { /* -- cgit v0.10.2 From 63b52c4936a2e679639c38ef51a50aa8ca1c5c07 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Mon, 19 Sep 2016 14:44:44 -0700 Subject: Revert "ocfs2: bump up o2cb network protocol version" This reverts commit 38b52efd218b ("ocfs2: bump up o2cb network protocol version"). This commit made rolling upgrade fail. When one node is upgraded to new version with this commit, the remaining nodes will fail to establish connections to it, then the application like VMs on the remaining nodes can't be live migrated to the upgraded one. This will cause an outage. Since negotiate hb timeout behavior didn't change without this commit, so revert it. Fixes: 38b52efd218bf ("ocfs2: bump up o2cb network protocol version") Link: http://lkml.kernel.org/r/1471396924-10375-1-git-send-email-junxiao.bi@oracle.com Signed-off-by: Junxiao Bi Cc: Mark Fasheh Cc: Joel Becker Cc: Joseph Qi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index 94b1836..b95e7df 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h @@ -44,9 +44,6 @@ * version here in tcp_internal.h should not need to be bumped for * filesystem locking changes. * - * New in version 12 - * - Negotiate hb timeout when storage is down. - * * New in version 11 * - Negotiation of filesystem locking in the dlm join. * @@ -78,7 +75,7 @@ * - full 64 bit i_size in the metadata lock lvbs * - introduction of "rw" lock and pushing meta/data locking down */ -#define O2NET_PROTOCOL_VERSION 12ULL +#define O2NET_PROTOCOL_VERSION 11ULL struct o2net_handshake { __be64 protocol_version; __be64 connector_id; -- cgit v0.10.2 From b92ae139c308c5223521ed6ec022148b81312809 Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Mon, 19 Sep 2016 14:44:47 -0700 Subject: rapidio/rio_cm: avoid GFP_KERNEL in atomic context As reported by Alexey Khoroshilov (https://lkml.org/lkml/2016/9/9/737): riocm_send_close() is called from rio_cm_shutdown() under spin_lock_bh(idr_lock), but riocm_send_close() uses a GFP_KERNEL allocation. Fix by taking riocm_send_close() outside of spinlock protected code. [akpm@linux-foundation.org: remove unneeded `if (!list_empty())'] Link: http://lkml.kernel.org/r/20160915175402.10122-1-alexandre.bounine@idt.com Signed-off-by: Alexandre Bounine Reported-by: Alexey Khoroshilov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c index 3fa17ac..cebc296 100644 --- a/drivers/rapidio/rio_cm.c +++ b/drivers/rapidio/rio_cm.c @@ -2247,17 +2247,30 @@ static int rio_cm_shutdown(struct notifier_block *nb, unsigned long code, { struct rio_channel *ch; unsigned int i; + LIST_HEAD(list); riocm_debug(EXIT, "."); + /* + * If there are any channels left in connected state send + * close notification to the connection partner. + * First build a list of channels that require a closing + * notification because function riocm_send_close() should + * be called outside of spinlock protected code. + */ spin_lock_bh(&idr_lock); idr_for_each_entry(&ch_idr, ch, i) { - riocm_debug(EXIT, "close ch %d", ch->id); - if (ch->state == RIO_CM_CONNECTED) - riocm_send_close(ch); + if (ch->state == RIO_CM_CONNECTED) { + riocm_debug(EXIT, "close ch %d", ch->id); + idr_remove(&ch_idr, ch->id); + list_add(&ch->ch_node, &list); + } } spin_unlock_bh(&idr_lock); + list_for_each_entry(ch, &list, ch_node) + riocm_send_close(ch); + return NOTIFY_DONE; } -- cgit v0.10.2 From babd6134a54d70efe875fa5661a20eaecb63f278 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 18 Sep 2016 18:20:27 +0300 Subject: net/mlx5: Fix flow counter bulk command out mailbox allocation The FW command output length should be only the length of struct mlx5_cmd_fc_bulk out field. Failing to do so will cause the memcpy call which is invoked later in the driver to write over wrong memory address and corrupt kernel memory which results in random crashes. This bug was found using the kernel address sanitizer (kasan). Fixes: a351a1b03bf1 ('net/mlx5: Introduce bulk reading of flow counters') Signed-off-by: Roi Dayan Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 9134010..287ade1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -425,11 +425,11 @@ struct mlx5_cmd_fc_bulk * mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num) { struct mlx5_cmd_fc_bulk *b; - int outlen = sizeof(*b) + + int outlen = MLX5_ST_SZ_BYTES(query_flow_counter_out) + MLX5_ST_SZ_BYTES(traffic_counter) * num; - b = kzalloc(outlen, GFP_KERNEL); + b = kzalloc(sizeof(*b) + outlen, GFP_KERNEL); if (!b) return NULL; -- cgit v0.10.2 From 4eea37d7b92076fdeac2a21e5f4dbd92d286719d Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 18 Sep 2016 18:20:28 +0300 Subject: net/mlx5: E-Switch, Fix error flow in the SRIOV e-switch init code When enablement of the SRIOV e-switch in certain mode (switchdev or legacy) fails, we must set the mode to none. Otherwise, we'll run into double free based crashes when further attempting to deal with the e-switch (such as when disabling sriov or unloading the driver). Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 8b78f15..b247949 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1554,6 +1554,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) abort: esw_enable_vport(esw, 0, UC_ADDR_CHANGE); + esw->mode = SRIOV_NONE; return err; } -- cgit v0.10.2 From 6c419ba8e2580ab17c164db6e918e163d3537ec1 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 18 Sep 2016 18:20:29 +0300 Subject: net/mlx5: E-Switch, Handle mode change failures E-switch mode changes involve creating HW tables, potentially allocating netdevices, etc, and things can fail. Add an attempt to rollback to the existing mode when changing to the new mode fails. Only if rollback fails, getting proper SRIOV functionality requires module unload or sriov disablement/enablement. Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 3dc83a9..7de40e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -446,7 +446,7 @@ out: static int esw_offloads_start(struct mlx5_eswitch *esw) { - int err, num_vfs = esw->dev->priv.sriov.num_vfs; + int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; if (esw->mode != SRIOV_LEGACY) { esw_warn(esw->dev, "Can't set offloads mode, SRIOV legacy not enabled\n"); @@ -455,8 +455,12 @@ static int esw_offloads_start(struct mlx5_eswitch *esw) mlx5_eswitch_disable_sriov(esw); err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); - if (err) - esw_warn(esw->dev, "Failed set eswitch to offloads, err %d\n", err); + if (err) { + esw_warn(esw->dev, "Failed setting eswitch to offloads, err %d\n", err); + err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); + if (err1) + esw_warn(esw->dev, "Failed setting eswitch back to legacy, err %d\n", err); + } return err; } @@ -508,12 +512,16 @@ create_ft_err: static int esw_offloads_stop(struct mlx5_eswitch *esw) { - int err, num_vfs = esw->dev->priv.sriov.num_vfs; + int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; mlx5_eswitch_disable_sriov(esw); err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); - if (err) - esw_warn(esw->dev, "Failed set eswitch legacy mode. err %d\n", err); + if (err) { + esw_warn(esw->dev, "Failed setting eswitch to legacy, err %d\n", err); + err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); + if (err1) + esw_warn(esw->dev, "Failed setting eswitch back to offloads, err %d\n", err); + } return err; } -- cgit v0.10.2 From a435a07f9164dda7c0c26e8ad758881f4bafc127 Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Sun, 18 Sep 2016 17:46:07 +0200 Subject: net: ipv6: fallback to full lookup if table lookup is unsuitable Commit 8c14586fc320 ("net: ipv6: Use passed in table for nexthop lookups") introduced a regression: insertion of an IPv6 route in a table not containing the appropriate connected route for the gateway but which contained a non-connected route (like a default gateway) fails while it was previously working: $ ip link add eth0 type dummy $ ip link set up dev eth0 $ ip addr add 2001:db8::1/64 dev eth0 $ ip route add ::/0 via 2001:db8::5 dev eth0 table 20 $ ip route add 2001:db8:cafe::1/128 via 2001:db8::6 dev eth0 table 20 RTNETLINK answers: No route to host $ ip -6 route show table 20 default via 2001:db8::5 dev eth0 metric 1024 pref medium After this patch, we get: $ ip route add 2001:db8:cafe::1/128 via 2001:db8::6 dev eth0 table 20 $ ip -6 route show table 20 2001:db8:cafe::1 via 2001:db8::6 dev eth0 metric 1024 pref medium default via 2001:db8::5 dev eth0 metric 1024 pref medium Fixes: 8c14586fc320 ("net: ipv6: Use passed in table for nexthop lookups") Signed-off-by: Vincent Bernat Acked-by: David Ahern Tested-by: David Ahern Signed-off-by: David S. Miller diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4981755..e3a224b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1986,9 +1986,18 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg) if (!(gwa_type & IPV6_ADDR_UNICAST)) goto out; - if (cfg->fc_table) + if (cfg->fc_table) { grt = ip6_nh_lookup_table(net, cfg, gw_addr); + if (grt) { + if (grt->rt6i_flags & RTF_GATEWAY || + (dev && dev != grt->dst.dev)) { + ip6_rt_put(grt); + grt = NULL; + } + } + } + if (!grt) grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); -- cgit v0.10.2 From 3ed6e498b91a4dc5d0e8b6270a6c144061db2455 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 18 Sep 2016 21:17:19 +0200 Subject: MAINTAINERS: Add an entry for the core network DSA code The core distributed switch architecture code currently does not have a MAINTAINERS entry, which results in some contributions not landing in the right peoples inbox. Signed-off-by: Andrew Lunn Acked-by: Florian Fainelli Acked-by: Vivien Didelot Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index a5e1270..247b418 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8160,6 +8160,15 @@ S: Maintained W: https://fedorahosted.org/dropwatch/ F: net/core/drop_monitor.c +NETWORKING [DSA] +M: Andrew Lunn +M: Vivien Didelot +M: Florian Fainelli +S: Maintained +F: net/dsa/ +F: include/net/dsa.h +F: drivers/net/dsa/ + NETWORKING [GENERAL] M: "David S. Miller" L: netdev@vger.kernel.org -- cgit v0.10.2 From 67a99b7061c07b190ac6c39f136afedbb7aa86e9 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 19 Sep 2016 17:47:41 +0300 Subject: qed: Fix stack corruption on probe Commit fe56b9e6a8d95 ("qed: Add module with basic common support") has introduced a stack corruption during probe, where filling a local struct with data to be sent to management firmware is incorrectly filled; The data is written outside of the struct and corrupts the stack. Changes from v1: ---------------- - Correct the value written [Caught by David Laight] Fixes: fe56b9e6a8d95 ("qed: Add module with basic common support") Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index a240f26..f776a77 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -1153,8 +1153,8 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn, p_drv_version = &union_data.drv_version; p_drv_version->version = p_ver->version; - for (i = 0; i < MCP_DRV_VER_STR_SIZE - 1; i += 4) { - val = cpu_to_be32(p_ver->name[i]); + for (i = 0; i < (MCP_DRV_VER_STR_SIZE - 4) / sizeof(u32); i++) { + val = cpu_to_be32(*((u32 *)&p_ver->name[i * sizeof(u32)])); *(__be32 *)&p_drv_version->name[i * sizeof(u32)] = val; } -- cgit v0.10.2 From f5beeb1851ea6f8cfcf2657f26cb24c0582b4945 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 8 Sep 2016 09:57:07 +0200 Subject: fs/proc/kcore.c: Make bounce buffer global for read Next patch adds bounce buffer for ktext area, so it's convenient to have single bounce buffer for both vmalloc/module and ktext cases. Suggested-by: Linus Torvalds Signed-off-by: Jiri Olsa Acked-by: Kees Cook Signed-off-by: Linus Torvalds diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index a939f5e..bd3ac9d 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -430,6 +430,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) static ssize_t read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) { + char *buf = file->private_data; ssize_t acc = 0; size_t size, tsz; size_t elf_buflen; @@ -500,18 +501,10 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) if (clear_user(buffer, tsz)) return -EFAULT; } else if (is_vmalloc_or_module_addr((void *)start)) { - char * elf_buf; - - elf_buf = kzalloc(tsz, GFP_KERNEL); - if (!elf_buf) - return -ENOMEM; - vread(elf_buf, (char *)start, tsz); + vread(buf, (char *)start, tsz); /* we have to zero-fill user buffer even if no read */ - if (copy_to_user(buffer, elf_buf, tsz)) { - kfree(elf_buf); + if (copy_to_user(buffer, buf, tsz)) return -EFAULT; - } - kfree(elf_buf); } else { if (kern_addr_valid(start)) { unsigned long n; @@ -549,6 +542,11 @@ static int open_kcore(struct inode *inode, struct file *filp) { if (!capable(CAP_SYS_RAWIO)) return -EPERM; + + filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!filp->private_data) + return -ENOMEM; + if (kcore_need_update) kcore_update_ram(); if (i_size_read(inode) != proc_root_kcore->size) { @@ -559,10 +557,16 @@ static int open_kcore(struct inode *inode, struct file *filp) return 0; } +static int release_kcore(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} static const struct file_operations proc_kcore_operations = { .read = read_kcore, .open = open_kcore, + .release = release_kcore, .llseek = default_llseek, }; -- cgit v0.10.2 From df04abfd181acc276ba6762c8206891ae10ae00d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 8 Sep 2016 09:57:08 +0200 Subject: fs/proc/kcore.c: Add bounce buffer for ktext data We hit hardened usercopy feature check for kernel text access by reading kcore file: usercopy: kernel memory exposure attempt detected from ffffffff8179a01f () (4065 bytes) kernel BUG at mm/usercopy.c:75! Bypassing this check for kcore by adding bounce buffer for ktext data. Reported-by: Steve Best Fixes: f5509cc18daa ("mm: Hardened usercopy") Suggested-by: Kees Cook Signed-off-by: Jiri Olsa Acked-by: Kees Cook Signed-off-by: Linus Torvalds diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index bd3ac9d..5c89a07 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -509,7 +509,12 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) if (kern_addr_valid(start)) { unsigned long n; - n = copy_to_user(buffer, (char *)start, tsz); + /* + * Using bounce buffer to bypass the + * hardened user copy kernel text checks. + */ + memcpy(buf, (char *) start, tsz); + n = copy_to_user(buffer, buf, tsz); /* * We cannot distinguish between fault on source * and fault on destination. When this happens -- cgit v0.10.2 From aa4f0601115319a52c80f468c8f007e5aa9277cb Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 20 Sep 2016 08:56:36 -0700 Subject: mm: usercopy: Check for module addresses While running a compile on arm64, I hit a memory exposure usercopy: kernel memory exposure attempt detected from fffffc0000f3b1a8 (buffer_head) (1 bytes) ------------[ cut here ]------------ kernel BUG at mm/usercopy.c:75! Internal error: Oops - BUG: 0 [#1] SMP Modules linked in: ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_conntrack ip_set nfnetlink ebtable_broute bridge stp llc ebtable_nat ip6table_security ip6table_raw ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle iptable_security iptable_raw iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle ebtable_filter ebtables ip6table_filter ip6_tables vfat fat xgene_edac xgene_enet edac_core i2c_xgene_slimpro i2c_core at803x realtek xgene_dma mdio_xgene gpio_dwapb gpio_xgene_sb xgene_rng mailbox_xgene_slimpro nfsd auth_rpcgss nfs_acl lockd grace sunrpc xfs libcrc32c sdhci_of_arasan sdhci_pltfm sdhci mmc_core xhci_plat_hcd gpio_keys CPU: 0 PID: 19744 Comm: updatedb Tainted: G W 4.8.0-rc3-threadinfo+ #1 Hardware name: AppliedMicro X-Gene Mustang Board/X-Gene Mustang Board, BIOS 3.06.12 Aug 12 2016 task: fffffe03df944c00 task.stack: fffffe00d128c000 PC is at __check_object_size+0x70/0x3f0 LR is at __check_object_size+0x70/0x3f0 ... [] __check_object_size+0x70/0x3f0 [] filldir64+0x158/0x1a0 [] __fat_readdir+0x4a0/0x558 [fat] [] fat_readdir+0x34/0x40 [fat] [] iterate_dir+0x190/0x1e0 [] SyS_getdents64+0x88/0x120 [] el0_svc_naked+0x24/0x28 fffffc0000f3b1a8 is a module address. Modules may have compiled in strings which could get copied to userspace. In this instance, it looks like "." which matches with a size of 1 byte. Extend the is_vmalloc_addr check to be is_vmalloc_or_module_addr to cover all possible cases. Signed-off-by: Laura Abbott Signed-off-by: Kees Cook diff --git a/mm/usercopy.c b/mm/usercopy.c index 089328f..3c8da0a 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -207,8 +207,11 @@ static inline const char *check_heap_object(const void *ptr, unsigned long n, * Some architectures (arm64) return true for virt_addr_valid() on * vmalloced addresses. Work around this by checking for vmalloc * first. + * + * We also need to check for module addresses explicitly since we + * may copy static data from modules to userspace */ - if (is_vmalloc_addr(ptr)) + if (is_vmalloc_or_module_addr(ptr)) return NULL; if (!virt_addr_valid(ptr)) -- cgit v0.10.2 From e23d4159b109167126e5bcd7f3775c95de7fee47 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 20 Sep 2016 20:07:42 +0100 Subject: fix fault_in_multipages_...() on architectures with no-op access_ok() Switching iov_iter fault-in to multipages variants has exposed an old bug in underlying fault_in_multipages_...(); they break if the range passed to them wraps around. Normally access_ok() done by callers will prevent such (and it's a guaranteed EFAULT - ERR_PTR() values fall into such a range and they should not point to any valid objects). However, on architectures where userland and kernel live in different MMU contexts (e.g. s390) access_ok() is a no-op and on those a range with a wraparound can reach fault_in_multipages_...(). Since any wraparound means EFAULT there, the fix is trivial - turn those while (uaddr <= end) ... into if (unlikely(uaddr > end)) return -EFAULT; do ... while (uaddr <= end); Reported-by: Jan Stancek Tested-by: Jan Stancek Cc: stable@vger.kernel.org # v3.5+ Signed-off-by: Al Viro Signed-off-by: Linus Torvalds diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 66a1260..7e3d537 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -571,56 +571,56 @@ static inline int fault_in_pages_readable(const char __user *uaddr, int size) */ static inline int fault_in_multipages_writeable(char __user *uaddr, int size) { - int ret = 0; char __user *end = uaddr + size - 1; if (unlikely(size == 0)) - return ret; + return 0; + if (unlikely(uaddr > end)) + return -EFAULT; /* * Writing zeroes into userspace here is OK, because we know that if * the zero gets there, we'll be overwriting it. */ - while (uaddr <= end) { - ret = __put_user(0, uaddr); - if (ret != 0) - return ret; + do { + if (unlikely(__put_user(0, uaddr) != 0)) + return -EFAULT; uaddr += PAGE_SIZE; - } + } while (uaddr <= end); /* Check whether the range spilled into the next page. */ if (((unsigned long)uaddr & PAGE_MASK) == ((unsigned long)end & PAGE_MASK)) - ret = __put_user(0, end); + return __put_user(0, end); - return ret; + return 0; } static inline int fault_in_multipages_readable(const char __user *uaddr, int size) { volatile char c; - int ret = 0; const char __user *end = uaddr + size - 1; if (unlikely(size == 0)) - return ret; + return 0; - while (uaddr <= end) { - ret = __get_user(c, uaddr); - if (ret != 0) - return ret; + if (unlikely(uaddr > end)) + return -EFAULT; + + do { + if (unlikely(__get_user(c, uaddr) != 0)) + return -EFAULT; uaddr += PAGE_SIZE; - } + } while (uaddr <= end); /* Check whether the range spilled into the next page. */ if (((unsigned long)uaddr & PAGE_MASK) == ((unsigned long)end & PAGE_MASK)) { - ret = __get_user(c, end); - (void)c; + return __get_user(c, end); } - return ret; + return 0; } int add_to_page_cache_locked(struct page *page, struct address_space *mapping, -- cgit v0.10.2 From 9b86a8d19bd6406a10de5f924bf2a003a502d427 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Tue, 20 Sep 2016 12:00:52 +0530 Subject: cxgb4/cxgb4vf: Allocate more queues for 25G and 100G adapter We were missing check for 25G and 100G while checking port speed, which lead to less number of queues getting allocated for 25G & 100G adapters and leading to low throughput. Adding the missing check for both NIC and vNIC driver. Also fixes port advertisement for 25G and 100G in ethtool output. Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 2e2aa9f..edd2338 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -419,8 +419,8 @@ struct link_config { unsigned short supported; /* link capabilities */ unsigned short advertising; /* advertised capabilities */ unsigned short lp_advertising; /* peer advertised capabilities */ - unsigned short requested_speed; /* speed user has requested */ - unsigned short speed; /* actual link speed */ + unsigned int requested_speed; /* speed user has requested */ + unsigned int speed; /* actual link speed */ unsigned char requested_fc; /* flow control user has requested */ unsigned char fc; /* actual link flow control */ unsigned char autoneg; /* autonegotiating? */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index c762a8c..3ceafb55 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4305,10 +4305,17 @@ static const struct pci_error_handlers cxgb4_eeh = { .resume = eeh_resume, }; +/* Return true if the Link Configuration supports "High Speeds" (those greater + * than 1Gb/s). + */ static inline bool is_x_10g_port(const struct link_config *lc) { - return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0 || - (lc->supported & FW_PORT_CAP_SPEED_40G) != 0; + unsigned int speeds, high_speeds; + + speeds = FW_PORT_CAP_SPEED_V(FW_PORT_CAP_SPEED_G(lc->supported)); + high_speeds = speeds & ~(FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G); + + return high_speeds != 0; } static inline void init_rspq(struct adapter *adap, struct sge_rspq *q, @@ -4756,8 +4763,12 @@ static void print_port_info(const struct net_device *dev) bufp += sprintf(bufp, "1000/"); if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G) bufp += sprintf(bufp, "10G/"); + if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_25G) + bufp += sprintf(bufp, "25G/"); if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_40G) bufp += sprintf(bufp, "40G/"); + if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_100G) + bufp += sprintf(bufp, "100G/"); if (bufp != buf) --bufp; sprintf(bufp, "BASE-%s", t4_get_port_type_description(pi->port_type)); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index dc92c80..660204b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -3627,7 +3627,8 @@ void t4_ulprx_read_la(struct adapter *adap, u32 *la_buf) } #define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\ - FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_40G | \ + FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_25G | \ + FW_PORT_CAP_SPEED_40G | FW_PORT_CAP_SPEED_100G | \ FW_PORT_CAP_ANEG) /** @@ -7196,8 +7197,12 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl) speed = 1000; else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G)) speed = 10000; + else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G)) + speed = 25000; else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G)) speed = 40000; + else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G)) + speed = 100000; lc = &pi->link_cfg; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index a89b307..30507d4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -2265,6 +2265,12 @@ enum fw_port_cap { FW_PORT_CAP_802_3_ASM_DIR = 0x8000, }; +#define FW_PORT_CAP_SPEED_S 0 +#define FW_PORT_CAP_SPEED_M 0x3f +#define FW_PORT_CAP_SPEED_V(x) ((x) << FW_PORT_CAP_SPEED_S) +#define FW_PORT_CAP_SPEED_G(x) \ + (((x) >> FW_PORT_CAP_SPEED_S) & FW_PORT_CAP_SPEED_M) + enum fw_port_mdi { FW_PORT_CAP_MDI_UNCHANGED, FW_PORT_CAP_MDI_AUTO, diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h index 8ee5414..17a2bbc 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h @@ -108,8 +108,8 @@ struct link_config { unsigned int supported; /* link capabilities */ unsigned int advertising; /* advertised capabilities */ unsigned short lp_advertising; /* peer advertised capabilities */ - unsigned short requested_speed; /* speed user has requested */ - unsigned short speed; /* actual link speed */ + unsigned int requested_speed; /* speed user has requested */ + unsigned int speed; /* actual link speed */ unsigned char requested_fc; /* flow control user has requested */ unsigned char fc; /* actual link flow control */ unsigned char autoneg; /* autonegotiating? */ @@ -271,10 +271,17 @@ static inline bool is_10g_port(const struct link_config *lc) return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0; } +/* Return true if the Link Configuration supports "High Speeds" (those greater + * than 1Gb/s). + */ static inline bool is_x_10g_port(const struct link_config *lc) { - return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0 || - (lc->supported & FW_PORT_CAP_SPEED_40G) != 0; + unsigned int speeds, high_speeds; + + speeds = FW_PORT_CAP_SPEED_V(FW_PORT_CAP_SPEED_G(lc->supported)); + high_speeds = speeds & ~(FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G); + + return high_speeds != 0; } static inline unsigned int core_ticks_per_usec(const struct adapter *adapter) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c index 427bfa7..b5622b1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c @@ -314,8 +314,9 @@ int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size, } #define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\ - FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_40G | \ - FW_PORT_CAP_SPEED_100G | FW_PORT_CAP_ANEG) + FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_25G | \ + FW_PORT_CAP_SPEED_40G | FW_PORT_CAP_SPEED_100G | \ + FW_PORT_CAP_ANEG) /** * init_link_config - initialize a link's SW state @@ -1712,8 +1713,12 @@ int t4vf_handle_fw_rpl(struct adapter *adapter, const __be64 *rpl) speed = 1000; else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G)) speed = 10000; + else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G)) + speed = 25000; else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G)) speed = 40000; + else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G)) + speed = 100000; /* * Scan all of our "ports" (Virtual Interfaces) looking for -- cgit v0.10.2 From e6449539828ac3b7c74b648793291640bcca8259 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 20 Sep 2016 16:22:05 +0800 Subject: r8152: move some functions Move the following functions forward. r8152_mmd_indirect() r8152_mmd_read() r8152_mmd_write() r8152_eee_en() r8152b_enable_eee() r8153_eee_en() r8153_enable_eee() r8152b_enable_fc() r8153_aldps_en() Signed-off-by: Hayes Wang Signed-off-by: David S. Miller diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index f41a8ad..ae7db46 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2552,6 +2552,77 @@ static void r8152_aldps_en(struct r8152 *tp, bool enable) } } +static inline void r8152_mmd_indirect(struct r8152 *tp, u16 dev, u16 reg) +{ + ocp_reg_write(tp, OCP_EEE_AR, FUN_ADDR | dev); + ocp_reg_write(tp, OCP_EEE_DATA, reg); + ocp_reg_write(tp, OCP_EEE_AR, FUN_DATA | dev); +} + +static u16 r8152_mmd_read(struct r8152 *tp, u16 dev, u16 reg) +{ + u16 data; + + r8152_mmd_indirect(tp, dev, reg); + data = ocp_reg_read(tp, OCP_EEE_DATA); + ocp_reg_write(tp, OCP_EEE_AR, 0x0000); + + return data; +} + +static void r8152_mmd_write(struct r8152 *tp, u16 dev, u16 reg, u16 data) +{ + r8152_mmd_indirect(tp, dev, reg); + ocp_reg_write(tp, OCP_EEE_DATA, data); + ocp_reg_write(tp, OCP_EEE_AR, 0x0000); +} + +static void r8152_eee_en(struct r8152 *tp, bool enable) +{ + u16 config1, config2, config3; + u32 ocp_data; + + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR); + config1 = ocp_reg_read(tp, OCP_EEE_CONFIG1) & ~sd_rise_time_mask; + config2 = ocp_reg_read(tp, OCP_EEE_CONFIG2); + config3 = ocp_reg_read(tp, OCP_EEE_CONFIG3) & ~fast_snr_mask; + + if (enable) { + ocp_data |= EEE_RX_EN | EEE_TX_EN; + config1 |= EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | RX_QUIET_EN; + config1 |= sd_rise_time(1); + config2 |= RG_DACQUIET_EN | RG_LDVQUIET_EN; + config3 |= fast_snr(42); + } else { + ocp_data &= ~(EEE_RX_EN | EEE_TX_EN); + config1 &= ~(EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | + RX_QUIET_EN); + config1 |= sd_rise_time(7); + config2 &= ~(RG_DACQUIET_EN | RG_LDVQUIET_EN); + config3 |= fast_snr(511); + } + + ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data); + ocp_reg_write(tp, OCP_EEE_CONFIG1, config1); + ocp_reg_write(tp, OCP_EEE_CONFIG2, config2); + ocp_reg_write(tp, OCP_EEE_CONFIG3, config3); +} + +static void r8152b_enable_eee(struct r8152 *tp) +{ + r8152_eee_en(tp, true); + r8152_mmd_write(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, MDIO_EEE_100TX); +} + +static void r8152b_enable_fc(struct r8152 *tp) +{ + u16 anar; + + anar = r8152_mdio_read(tp, MII_ADVERTISE); + anar |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM; + r8152_mdio_write(tp, MII_ADVERTISE, anar); +} + static void rtl8152_disable(struct r8152 *tp) { r8152_aldps_en(tp, false); @@ -2701,6 +2772,47 @@ static void r8152b_enter_oob(struct r8152 *tp) ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data); } +static void r8153_aldps_en(struct r8152 *tp, bool enable) +{ + u16 data; + + data = ocp_reg_read(tp, OCP_POWER_CFG); + if (enable) { + data |= EN_ALDPS; + ocp_reg_write(tp, OCP_POWER_CFG, data); + } else { + data &= ~EN_ALDPS; + ocp_reg_write(tp, OCP_POWER_CFG, data); + msleep(20); + } +} + +static void r8153_eee_en(struct r8152 *tp, bool enable) +{ + u32 ocp_data; + u16 config; + + ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR); + config = ocp_reg_read(tp, OCP_EEE_CFG); + + if (enable) { + ocp_data |= EEE_RX_EN | EEE_TX_EN; + config |= EEE10_EN; + } else { + ocp_data &= ~(EEE_RX_EN | EEE_TX_EN); + config &= ~EEE10_EN; + } + + ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data); + ocp_reg_write(tp, OCP_EEE_CFG, config); +} + +static void r8153_enable_eee(struct r8152 *tp) +{ + r8153_eee_en(tp, true); + ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX); +} + static void r8153_hw_phy_cfg(struct r8152 *tp) { u32 ocp_data; @@ -2866,21 +2978,6 @@ static void r8153_enter_oob(struct r8152 *tp) ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data); } -static void r8153_aldps_en(struct r8152 *tp, bool enable) -{ - u16 data; - - data = ocp_reg_read(tp, OCP_POWER_CFG); - if (enable) { - data |= EN_ALDPS; - ocp_reg_write(tp, OCP_POWER_CFG, data); - } else { - data &= ~EN_ALDPS; - ocp_reg_write(tp, OCP_POWER_CFG, data); - msleep(20); - } -} - static void rtl8153_disable(struct r8152 *tp) { r8153_aldps_en(tp, false); @@ -3246,103 +3343,6 @@ static int rtl8152_close(struct net_device *netdev) return res; } -static inline void r8152_mmd_indirect(struct r8152 *tp, u16 dev, u16 reg) -{ - ocp_reg_write(tp, OCP_EEE_AR, FUN_ADDR | dev); - ocp_reg_write(tp, OCP_EEE_DATA, reg); - ocp_reg_write(tp, OCP_EEE_AR, FUN_DATA | dev); -} - -static u16 r8152_mmd_read(struct r8152 *tp, u16 dev, u16 reg) -{ - u16 data; - - r8152_mmd_indirect(tp, dev, reg); - data = ocp_reg_read(tp, OCP_EEE_DATA); - ocp_reg_write(tp, OCP_EEE_AR, 0x0000); - - return data; -} - -static void r8152_mmd_write(struct r8152 *tp, u16 dev, u16 reg, u16 data) -{ - r8152_mmd_indirect(tp, dev, reg); - ocp_reg_write(tp, OCP_EEE_DATA, data); - ocp_reg_write(tp, OCP_EEE_AR, 0x0000); -} - -static void r8152_eee_en(struct r8152 *tp, bool enable) -{ - u16 config1, config2, config3; - u32 ocp_data; - - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR); - config1 = ocp_reg_read(tp, OCP_EEE_CONFIG1) & ~sd_rise_time_mask; - config2 = ocp_reg_read(tp, OCP_EEE_CONFIG2); - config3 = ocp_reg_read(tp, OCP_EEE_CONFIG3) & ~fast_snr_mask; - - if (enable) { - ocp_data |= EEE_RX_EN | EEE_TX_EN; - config1 |= EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | RX_QUIET_EN; - config1 |= sd_rise_time(1); - config2 |= RG_DACQUIET_EN | RG_LDVQUIET_EN; - config3 |= fast_snr(42); - } else { - ocp_data &= ~(EEE_RX_EN | EEE_TX_EN); - config1 &= ~(EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | - RX_QUIET_EN); - config1 |= sd_rise_time(7); - config2 &= ~(RG_DACQUIET_EN | RG_LDVQUIET_EN); - config3 |= fast_snr(511); - } - - ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data); - ocp_reg_write(tp, OCP_EEE_CONFIG1, config1); - ocp_reg_write(tp, OCP_EEE_CONFIG2, config2); - ocp_reg_write(tp, OCP_EEE_CONFIG3, config3); -} - -static void r8152b_enable_eee(struct r8152 *tp) -{ - r8152_eee_en(tp, true); - r8152_mmd_write(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, MDIO_EEE_100TX); -} - -static void r8153_eee_en(struct r8152 *tp, bool enable) -{ - u32 ocp_data; - u16 config; - - ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR); - config = ocp_reg_read(tp, OCP_EEE_CFG); - - if (enable) { - ocp_data |= EEE_RX_EN | EEE_TX_EN; - config |= EEE10_EN; - } else { - ocp_data &= ~(EEE_RX_EN | EEE_TX_EN); - config &= ~EEE10_EN; - } - - ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data); - ocp_reg_write(tp, OCP_EEE_CFG, config); -} - -static void r8153_enable_eee(struct r8152 *tp) -{ - r8153_eee_en(tp, true); - ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX); -} - -static void r8152b_enable_fc(struct r8152 *tp) -{ - u16 anar; - - anar = r8152_mdio_read(tp, MII_ADVERTISE); - anar |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM; - r8152_mdio_write(tp, MII_ADVERTISE, anar); -} - static void rtl_tally_reset(struct r8152 *tp) { u32 ocp_data; -- cgit v0.10.2 From 2dd436daac7848dbf3fe799cf59c1408871a14e3 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 20 Sep 2016 16:22:06 +0800 Subject: r8152: move enabling PHY Move enabling PHY to init(), otherwise some other settings may fail. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index ae7db46..dbf11ba 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2632,14 +2632,6 @@ static void rtl8152_disable(struct r8152 *tp) static void r8152b_hw_phy_cfg(struct r8152 *tp) { - u16 data; - - data = r8152_mdio_read(tp, MII_BMCR); - if (data & BMCR_PDOWN) { - data &= ~BMCR_PDOWN; - r8152_mdio_write(tp, MII_BMCR, data); - } - set_bit(PHY_RESET, &tp->flags); } @@ -2818,16 +2810,6 @@ static void r8153_hw_phy_cfg(struct r8152 *tp) u32 ocp_data; u16 data; - if (tp->version == RTL_VER_03 || tp->version == RTL_VER_04 || - tp->version == RTL_VER_05) - ocp_reg_write(tp, OCP_ADC_CFG, CKADSEL_L | ADC_EN | EN_EMI_L); - - data = r8152_mdio_read(tp, MII_BMCR); - if (data & BMCR_PDOWN) { - data &= ~BMCR_PDOWN; - r8152_mdio_write(tp, MII_BMCR, data); - } - if (tp->version == RTL_VER_03) { data = ocp_reg_read(tp, OCP_EEE_CFG); data &= ~CTAP_SHORT_EN; @@ -3355,10 +3337,17 @@ static void rtl_tally_reset(struct r8152 *tp) static void r8152b_init(struct r8152 *tp) { u32 ocp_data; + u16 data; if (test_bit(RTL8152_UNPLUG, &tp->flags)) return; + data = r8152_mdio_read(tp, MII_BMCR); + if (data & BMCR_PDOWN) { + data &= ~BMCR_PDOWN; + r8152_mdio_write(tp, MII_BMCR, data); + } + r8152_aldps_en(tp, false); if (tp->version == RTL_VER_01) { @@ -3394,6 +3383,7 @@ static void r8152b_init(struct r8152 *tp) static void r8153_init(struct r8152 *tp) { u32 ocp_data; + u16 data; int i; if (test_bit(RTL8152_UNPLUG, &tp->flags)) @@ -3416,6 +3406,23 @@ static void r8153_init(struct r8152 *tp) msleep(20); } + if (tp->version == RTL_VER_03 || tp->version == RTL_VER_04 || + tp->version == RTL_VER_05) + ocp_reg_write(tp, OCP_ADC_CFG, CKADSEL_L | ADC_EN | EN_EMI_L); + + data = r8152_mdio_read(tp, MII_BMCR); + if (data & BMCR_PDOWN) { + data &= ~BMCR_PDOWN; + r8152_mdio_write(tp, MII_BMCR, data); + } + + for (i = 0; i < 500; i++) { + ocp_data = ocp_reg_read(tp, OCP_PHY_STATUS) & PHY_STAT_MASK; + if (ocp_data == PHY_STAT_LAN_ON) + break; + msleep(20); + } + usb_disable_lpm(tp->udev); r8153_u2p3en(tp, false); -- cgit v0.10.2 From ef39df8eaba48c0de779440f41a648b17a560953 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 20 Sep 2016 16:22:07 +0800 Subject: r8152: move PHY settings to hw_phy_cfg Move the PHY relative settings together to hw_phy_cfg(). Signed-off-by: Hayes Wang Signed-off-by: David S. Miller diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index dbf11ba..9ce5bd5 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2632,6 +2632,10 @@ static void rtl8152_disable(struct r8152 *tp) static void r8152b_hw_phy_cfg(struct r8152 *tp) { + r8152b_enable_eee(tp); + r8152_aldps_en(tp, true); + r8152b_enable_fc(tp); + set_bit(PHY_RESET, &tp->flags); } @@ -2839,6 +2843,10 @@ static void r8153_hw_phy_cfg(struct r8152 *tp) sram_write(tp, SRAM_10M_AMP1, 0x00af); sram_write(tp, SRAM_10M_AMP2, 0x0208); + r8153_enable_eee(tp); + r8153_aldps_en(tp, true); + r8152b_enable_fc(tp); + set_bit(PHY_RESET, &tp->flags); } @@ -3369,9 +3377,6 @@ static void r8152b_init(struct r8152 *tp) SPDWN_RXDV_MSK | SPDWN_LINKCHG_MSK; ocp_write_word(tp, MCU_TYPE_PLA, PLA_GPHY_INTR_IMR, ocp_data); - r8152b_enable_eee(tp); - r8152_aldps_en(tp, true); - r8152b_enable_fc(tp); rtl_tally_reset(tp); /* enable rx aggregation */ @@ -3490,9 +3495,6 @@ static void r8153_init(struct r8152 *tp) ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, 0); ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, 0); - r8153_enable_eee(tp); - r8153_aldps_en(tp, true); - r8152b_enable_fc(tp); rtl_tally_reset(tp); r8153_u2p3en(tp, true); } -- cgit v0.10.2 From af0287ec10c62c84cc5cd1bad4fd37644a1ac41d Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 20 Sep 2016 16:22:08 +0800 Subject: r8152: remove r8153_enable_eee Remove r8153_enable_eee(). Signed-off-by: Hayes Wang Signed-off-by: David S. Miller diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 9ce5bd5..e7a05dd 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2803,12 +2803,6 @@ static void r8153_eee_en(struct r8152 *tp, bool enable) ocp_reg_write(tp, OCP_EEE_CFG, config); } -static void r8153_enable_eee(struct r8152 *tp) -{ - r8153_eee_en(tp, true); - ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX); -} - static void r8153_hw_phy_cfg(struct r8152 *tp) { u32 ocp_data; @@ -2843,7 +2837,9 @@ static void r8153_hw_phy_cfg(struct r8152 *tp) sram_write(tp, SRAM_10M_AMP1, 0x00af); sram_write(tp, SRAM_10M_AMP2, 0x0208); - r8153_enable_eee(tp); + r8153_eee_en(tp, true); + ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX); + r8153_aldps_en(tp, true); r8152b_enable_fc(tp); -- cgit v0.10.2 From d768c61bc353a0e0de3f839e1de99eee7d4eca10 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Tue, 20 Sep 2016 16:22:09 +0800 Subject: r8152: disable ALDPS and EEE before setting PHY Disable ALDPS and EEE to avoid the possible failure when setting the PHY. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index e7a05dd..c254248 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -32,7 +32,7 @@ #define NETNEXT_VERSION "08" /* Information for net */ -#define NET_VERSION "5" +#define NET_VERSION "6" #define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION #define DRIVER_AUTHOR "Realtek linux nic maintainers " @@ -2808,6 +2808,13 @@ static void r8153_hw_phy_cfg(struct r8152 *tp) u32 ocp_data; u16 data; + /* disable ALDPS before updating the PHY parameters */ + r8153_aldps_en(tp, false); + + /* disable EEE before updating the PHY parameters */ + r8153_eee_en(tp, false); + ocp_reg_write(tp, OCP_EEE_ADV, 0); + if (tp->version == RTL_VER_03) { data = ocp_reg_read(tp, OCP_EEE_CFG); data &= ~CTAP_SHORT_EN; @@ -3390,7 +3397,6 @@ static void r8153_init(struct r8152 *tp) if (test_bit(RTL8152_UNPLUG, &tp->flags)) return; - r8153_aldps_en(tp, false); r8153_u1u2en(tp, false); for (i = 0; i < 500; i++) { -- cgit v0.10.2 From b5036cd4ed3173ab8cdbc85e2ba74acf46bafb51 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 20 Sep 2016 16:17:22 +0200 Subject: ipmr, ip6mr: return lastuse relative to now When I introduced the lastuse member I made a subtle error because it was returned as an absolute value but that is meaningless to user-space as it doesn't allow to see how old exactly an entry is. Let's make it similar to how the bridge returns such values and make it relative to "now" (jiffies). This allows us to show the actual age of the entries and is much more useful (e.g. user-space daemons can age out entries, iproute2 can display the lastuse properly). Fixes: 43b9e1274060 ("net: ipmr/ip6mr: add support for keeping an entry age") Reported-by: Satish Ashok Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 2625332..a87bcd2 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2076,6 +2076,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct rta_mfc_stats mfcs; struct nlattr *mp_attr; struct rtnexthop *nhp; + unsigned long lastuse; int ct; /* If cache is unresolved, don't try to parse IIF and OIF */ @@ -2105,12 +2106,14 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, nla_nest_end(skb, mp_attr); + lastuse = READ_ONCE(c->mfc_un.res.lastuse); + lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0; + mfcs.mfcs_packets = c->mfc_un.res.pkt; mfcs.mfcs_bytes = c->mfc_un.res.bytes; mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) || - nla_put_u64_64bit(skb, RTA_EXPIRES, - jiffies_to_clock_t(c->mfc_un.res.lastuse), + nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse), RTA_PAD)) return -EMSGSIZE; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 6122f9c..fccb5dd 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2239,6 +2239,7 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, struct rta_mfc_stats mfcs; struct nlattr *mp_attr; struct rtnexthop *nhp; + unsigned long lastuse; int ct; /* If cache is unresolved, don't try to parse IIF and OIF */ @@ -2269,12 +2270,14 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, nla_nest_end(skb, mp_attr); + lastuse = READ_ONCE(c->mfc_un.res.lastuse); + lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0; + mfcs.mfcs_packets = c->mfc_un.res.pkt; mfcs.mfcs_bytes = c->mfc_un.res.bytes; mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) || - nla_put_u64_64bit(skb, RTA_EXPIRES, - jiffies_to_clock_t(c->mfc_un.res.lastuse), + nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse), RTA_PAD)) return -EMSGSIZE; -- cgit v0.10.2 From 63c43787d35e45562a6b5927e2edc8f4783d95b8 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 19 Sep 2016 16:17:57 +0200 Subject: vti6: fix input path Since commit 1625f4529957, vti6 is broken, all input packets are dropped (LINUX_MIB_XFRMINNOSTATES is incremented). XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 is set by vti6_rcv() before calling xfrm6_rcv()/xfrm6_rcv_spi(), thus we cannot set to NULL that value in xfrm6_rcv_spi(). A new function xfrm6_rcv_tnl() that enables to pass a value to xfrm6_rcv_spi() is added, so that xfrm6_rcv() is not touched (this function is used in several handlers). CC: Alexey Kodanev Fixes: 1625f4529957 ("net/xfrm_input: fix possible NULL deref of tunnel.ip6->parms.i_key") Signed-off-by: Nicolas Dichtel Signed-off-by: Steffen Klassert diff --git a/include/net/xfrm.h b/include/net/xfrm.h index adfebd6..1793431 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1540,8 +1540,10 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); void xfrm4_local_error(struct sk_buff *skb, u32 mtu); int xfrm6_extract_header(struct sk_buff *skb); int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb); -int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi); +int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, + struct ip6_tnl *t); int xfrm6_transport_finish(struct sk_buff *skb, int async); +int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t); int xfrm6_rcv(struct sk_buff *skb); int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 52a2f73..5bd3afd 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -321,11 +321,9 @@ static int vti6_rcv(struct sk_buff *skb) goto discard; } - XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t; - rcu_read_unlock(); - return xfrm6_rcv(skb); + return xfrm6_rcv_tnl(skb, t); } rcu_read_unlock(); return -EINVAL; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 00a2d40..b578956 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -21,9 +21,10 @@ int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb) return xfrm6_extract_header(skb); } -int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) +int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi, + struct ip6_tnl *t) { - XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t; XFRM_SPI_SKB_CB(skb)->family = AF_INET6; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); return xfrm_input(skb, nexthdr, spi, 0); @@ -49,13 +50,18 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) return -1; } -int xfrm6_rcv(struct sk_buff *skb) +int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t) { return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], - 0); + 0, t); } -EXPORT_SYMBOL(xfrm6_rcv); +EXPORT_SYMBOL(xfrm6_rcv_tnl); +int xfrm6_rcv(struct sk_buff *skb) +{ + return xfrm6_rcv_tnl(skb, NULL); +} +EXPORT_SYMBOL(xfrm6_rcv); int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto) { diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 5743044..e1c0bbe 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -236,7 +236,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb) __be32 spi; spi = xfrm6_tunnel_spi_lookup(net, (const xfrm_address_t *)&iph->saddr); - return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi); + return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi, NULL); } static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, -- cgit v0.10.2 From 8d58790b832e13d6006d842037732304af357c3c Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 19 Sep 2016 21:34:01 +0200 Subject: net: can: ifi: Configure transmitter delay Configure the transmitter delay register at +0x1c to correctly handle the CAN FD bitrate switch (BRS). This moves the SSP (secondary sample point) to a proper offset, so that the TDC mechanism works and won't generate error frames on the CAN link. Signed-off-by: Marek Vasut Cc: Marc Kleine-Budde Cc: Mark Rutland Cc: Oliver Hartkopp Cc: Wolfgang Grandegger Cc: linux-stable Signed-off-by: Marc Kleine-Budde diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c index 2d1d22e..368bb07 100644 --- a/drivers/net/can/ifi_canfd/ifi_canfd.c +++ b/drivers/net/can/ifi_canfd/ifi_canfd.c @@ -81,6 +81,10 @@ #define IFI_CANFD_TIME_SET_TIMEA_4_12_6_6 BIT(15) #define IFI_CANFD_TDELAY 0x1c +#define IFI_CANFD_TDELAY_DEFAULT 0xb +#define IFI_CANFD_TDELAY_MASK 0x3fff +#define IFI_CANFD_TDELAY_ABS BIT(14) +#define IFI_CANFD_TDELAY_EN BIT(15) #define IFI_CANFD_ERROR 0x20 #define IFI_CANFD_ERROR_TX_OFFSET 0 @@ -641,7 +645,7 @@ static void ifi_canfd_set_bittiming(struct net_device *ndev) struct ifi_canfd_priv *priv = netdev_priv(ndev); const struct can_bittiming *bt = &priv->can.bittiming; const struct can_bittiming *dbt = &priv->can.data_bittiming; - u16 brp, sjw, tseg1, tseg2; + u16 brp, sjw, tseg1, tseg2, tdc; /* Configure bit timing */ brp = bt->brp - 2; @@ -664,6 +668,11 @@ static void ifi_canfd_set_bittiming(struct net_device *ndev) (brp << IFI_CANFD_TIME_PRESCALE_OFF) | (sjw << IFI_CANFD_TIME_SJW_OFF_7_9_8_8), priv->base + IFI_CANFD_FTIME); + + /* Configure transmitter delay */ + tdc = (dbt->brp * (dbt->phase_seg1 + 1)) & IFI_CANFD_TDELAY_MASK; + writel(IFI_CANFD_TDELAY_EN | IFI_CANFD_TDELAY_ABS | tdc, + priv->base + IFI_CANFD_TDELAY); } static void ifi_canfd_set_filter(struct net_device *ndev, const u32 id, -- cgit v0.10.2 From fba1296624bf95fc07057da1e26beee8a733180c Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 20 Sep 2016 14:55:31 +0300 Subject: net/mlx4_core: Fix to clean devlink resources This patch cleans devlink resources by calling devlink_port_unregister() to avoid the following issues: - Kernel panic when triggering reset flow. - Memory leak due to unfreed resources in mlx4_init_port_info(). Fixes: 09d4d087cd48 ("mlx4: Implement devlink interface") Signed-off-by: Kamal Heib Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 75dd2e3..7183ac4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2970,6 +2970,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) mlx4_err(dev, "Failed to create mtu file for port %d\n", port); device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr); + devlink_port_unregister(&info->devlink_port); info->port = -1; } @@ -2984,6 +2985,8 @@ static void mlx4_cleanup_port_info(struct mlx4_port_info *info) device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr); device_remove_file(&info->dev->persist->pdev->dev, &info->port_mtu_attr); + devlink_port_unregister(&info->devlink_port); + #ifdef CONFIG_RFS_ACCEL free_irq_cpu_rmap(info->rmap); info->rmap = NULL; -- cgit v0.10.2 From adb03115f4590baa280ddc440a8eff08a6be0cb7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Sep 2016 18:06:17 -0700 Subject: net: get rid of an signed integer overflow in ip_idents_reserve() Jiri Pirko reported an UBSAN warning happening in ip_idents_reserve() [] UBSAN: Undefined behaviour in ./arch/x86/include/asm/atomic.h:156:11 [] signed integer overflow: [] -2117905507 + -695755206 cannot be represented in type 'int' Since we do not have uatomic_add_return() yet, use atomic_cmpxchg() so that the arithmetics can be done using unsigned int. Fixes: 04ca6973f7c1 ("ip: make IP identifiers less predictable") Signed-off-by: Eric Dumazet Reported-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a1f2830..b5b47a2 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -476,12 +476,18 @@ u32 ip_idents_reserve(u32 hash, int segs) atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ; u32 old = ACCESS_ONCE(*p_tstamp); u32 now = (u32)jiffies; - u32 delta = 0; + u32 new, delta = 0; if (old != now && cmpxchg(p_tstamp, old, now) == old) delta = prandom_u32_max(now - old); - return atomic_add_return(segs + delta, p_id) - segs; + /* Do not use atomic_add_return() as it makes UBSAN unhappy */ + do { + old = (u32)atomic_read(p_id); + new = old + delta + segs; + } while (atomic_cmpxchg(p_id, old, new) != old); + + return new - segs; } EXPORT_SYMBOL(ip_idents_reserve); -- cgit v0.10.2 From 75c9510b8f745f75280029a8a9f96567f55f401e Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 20 Sep 2016 23:33:15 -0400 Subject: MAINTAINERS: Update b44 maintainer. Taking over as maintainer since Gary Zambrano is no longer working for Broadcom. Signed-off-by: Michael Chan Acked-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index 247b418..3df4be3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2500,7 +2500,7 @@ S: Supported F: kernel/bpf/ BROADCOM B44 10/100 ETHERNET DRIVER -M: Gary Zambrano +M: Michael Chan L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/broadcom/b44.* -- cgit v0.10.2 From de1d657816c6fbb70f07b01d50ec669dff0d4e60 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 21 Sep 2016 16:16:14 -0700 Subject: tcp: fix under-accounting retransmit SNMP counters This patch fixes these under-accounting SNMP rtx stats LINUX_MIB_TCPFORWARDRETRANS LINUX_MIB_TCPFASTRETRANS LINUX_MIB_TCPSLOWSTARTRETRANS when retransmitting TSO packets Fixes: 10d3be569243 ("tcp-tso: do not split TSO packets at retransmit time") Signed-off-by: Yuchung Cheng Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f53d0cc..e15ec82 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2831,7 +2831,7 @@ begin_fwd: if (tcp_retransmit_skb(sk, skb, segs)) return; - NET_INC_STATS(sock_net(sk), mib_idx); + NET_ADD_STATS(sock_net(sk), mib_idx, tcp_skb_pcount(skb)); if (tcp_in_cwnd_reduction(sk)) tp->prr_out += tcp_skb_pcount(skb); -- cgit v0.10.2 From 7e32b44361abc77fbc01f2b97b045c405b2583e5 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 21 Sep 2016 16:16:15 -0700 Subject: tcp: properly account Fast Open SYN-ACK retrans Since the TFO socket is accepted right off SYN-data, the socket owner can call getsockopt(TCP_INFO) to collect ongoing SYN-ACK retransmission or timeout stats (i.e., tcpi_total_retrans, tcpi_retransmits). Currently those stats are only updated upon handshake completes. This patch fixes it. Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Neal Cardwell Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3ebf45b..08323bd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5885,7 +5885,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) * so release it. */ if (req) { - tp->total_retrans = req->num_retrans; + inet_csk(sk)->icsk_retransmits = 0; reqsk_fastopen_remove(sk, req, false); } else { /* Make sure socket is routed, for correct metrics. */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e15ec82..5288cec 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3568,6 +3568,8 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) if (!res) { __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); + if (unlikely(tcp_passive_fastopen(sk))) + tcp_sk(sk)->total_retrans++; } return res; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index d84930b..f712b41 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -384,6 +384,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk) */ inet_rtx_syn_ack(sk, req); req->num_timeout++; + icsk->icsk_retransmits++; inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); } -- cgit v0.10.2