From a8a572a6b5f2a79280d6e302cb3c1cb1fbaeb3e8 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Thu, 29 Oct 2015 09:51:16 -0400 Subject: xfrm: dst_entries_init() per-net dst_ops Remove the dst_entries_init/destroy calls for xfrm4 and xfrm6 dst_ops templates; their dst_entries counters will never be used. Move the xfrm dst_ops initialization from the common xfrm/xfrm_policy.c to xfrm4/xfrm4_policy.c and xfrm6/xfrm6_policy.c, and call dst_entries_init and dst_entries_destroy for each net namespace. The ipv4 and ipv6 xfrms each create dst_ops template, and perform dst_entries_init on the templates. The template values are copied to each net namespace's xfrm.xfrm*_dst_ops. The problem there is the dst_ops pcpuc_entries field is a percpu counter and cannot be used correctly by simply copying it to another object. The result of this is a very subtle bug; changes to the dst entries counter from one net namespace may sometimes get applied to a different net namespace dst entries counter. This is because of how the percpu counter works; it has a main count field as well as a pointer to the percpu variables. Each net namespace maintains its own main count variable, but all point to one set of percpu variables. When any net namespace happens to change one of the percpu variables to outside its small batch range, its count is moved to the net namespace's main count variable. So with multiple net namespaces operating concurrently, the dst_ops entries counter can stray from the actual value that it should be; if counts are consistently moved from one net namespace to another (which my testing showed is likely), then one net namespace winds up with a negative dst_ops count while another winds up with a continually increasing count, eventually reaching its gc_thresh limit, which causes all new traffic on the net namespace to fail with -ENOBUFS. Signed-off-by: Dan Streetman Signed-off-by: Dan Streetman Signed-off-by: Steffen Klassert diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index c10a9ee..126ff90 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -236,7 +236,7 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, xfrm_dst_ifdown(dst, dev); } -static struct dst_ops xfrm4_dst_ops = { +static struct dst_ops xfrm4_dst_ops_template = { .family = AF_INET, .gc = xfrm4_garbage_collect, .update_pmtu = xfrm4_update_pmtu, @@ -250,7 +250,7 @@ static struct dst_ops xfrm4_dst_ops = { static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { .family = AF_INET, - .dst_ops = &xfrm4_dst_ops, + .dst_ops = &xfrm4_dst_ops_template, .dst_lookup = xfrm4_dst_lookup, .get_saddr = xfrm4_get_saddr, .decode_session = _decode_session4, @@ -272,7 +272,7 @@ static struct ctl_table xfrm4_policy_table[] = { { } }; -static int __net_init xfrm4_net_init(struct net *net) +static int __net_init xfrm4_net_sysctl_init(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; @@ -300,7 +300,7 @@ err_alloc: return -ENOMEM; } -static void __net_exit xfrm4_net_exit(struct net *net) +static void __net_exit xfrm4_net_sysctl_exit(struct net *net) { struct ctl_table *table; @@ -312,12 +312,44 @@ static void __net_exit xfrm4_net_exit(struct net *net) if (!net_eq(net, &init_net)) kfree(table); } +#else /* CONFIG_SYSCTL */ +static int inline xfrm4_net_sysctl_init(struct net *net) +{ + return 0; +} + +static void inline xfrm4_net_sysctl_exit(struct net *net) +{ +} +#endif + +static int __net_init xfrm4_net_init(struct net *net) +{ + int ret; + + memcpy(&net->xfrm.xfrm4_dst_ops, &xfrm4_dst_ops_template, + sizeof(xfrm4_dst_ops_template)); + ret = dst_entries_init(&net->xfrm.xfrm4_dst_ops); + if (ret) + return ret; + + ret = xfrm4_net_sysctl_init(net); + if (ret) + dst_entries_destroy(&net->xfrm.xfrm4_dst_ops); + + return ret; +} + +static void __net_exit xfrm4_net_exit(struct net *net) +{ + xfrm4_net_sysctl_exit(net); + dst_entries_destroy(&net->xfrm.xfrm4_dst_ops); +} static struct pernet_operations __net_initdata xfrm4_net_ops = { .init = xfrm4_net_init, .exit = xfrm4_net_exit, }; -#endif static void __init xfrm4_policy_init(void) { @@ -326,13 +358,9 @@ static void __init xfrm4_policy_init(void) void __init xfrm4_init(void) { - dst_entries_init(&xfrm4_dst_ops); - xfrm4_state_init(); xfrm4_policy_init(); xfrm4_protocol_init(); -#ifdef CONFIG_SYSCTL register_pernet_subsys(&xfrm4_net_ops); -#endif } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index da55e0c..d51a18d 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -281,7 +281,7 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, xfrm_dst_ifdown(dst, dev); } -static struct dst_ops xfrm6_dst_ops = { +static struct dst_ops xfrm6_dst_ops_template = { .family = AF_INET6, .gc = xfrm6_garbage_collect, .update_pmtu = xfrm6_update_pmtu, @@ -295,7 +295,7 @@ static struct dst_ops xfrm6_dst_ops = { static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .family = AF_INET6, - .dst_ops = &xfrm6_dst_ops, + .dst_ops = &xfrm6_dst_ops_template, .dst_lookup = xfrm6_dst_lookup, .get_saddr = xfrm6_get_saddr, .decode_session = _decode_session6, @@ -327,7 +327,7 @@ static struct ctl_table xfrm6_policy_table[] = { { } }; -static int __net_init xfrm6_net_init(struct net *net) +static int __net_init xfrm6_net_sysctl_init(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; @@ -355,7 +355,7 @@ err_alloc: return -ENOMEM; } -static void __net_exit xfrm6_net_exit(struct net *net) +static void __net_exit xfrm6_net_sysctl_exit(struct net *net) { struct ctl_table *table; @@ -367,24 +367,52 @@ static void __net_exit xfrm6_net_exit(struct net *net) if (!net_eq(net, &init_net)) kfree(table); } +#else /* CONFIG_SYSCTL */ +static int inline xfrm6_net_sysctl_init(struct net *net) +{ + return 0; +} + +static void inline xfrm6_net_sysctl_exit(struct net *net) +{ +} +#endif + +static int __net_init xfrm6_net_init(struct net *net) +{ + int ret; + + memcpy(&net->xfrm.xfrm6_dst_ops, &xfrm6_dst_ops_template, + sizeof(xfrm6_dst_ops_template)); + ret = dst_entries_init(&net->xfrm.xfrm6_dst_ops); + if (ret) + return ret; + + ret = xfrm6_net_sysctl_init(net); + if (ret) + dst_entries_destroy(&net->xfrm.xfrm6_dst_ops); + + return ret; +} + +static void __net_exit xfrm6_net_exit(struct net *net) +{ + xfrm6_net_sysctl_exit(net); + dst_entries_destroy(&net->xfrm.xfrm6_dst_ops); +} static struct pernet_operations xfrm6_net_ops = { .init = xfrm6_net_init, .exit = xfrm6_net_exit, }; -#endif int __init xfrm6_init(void) { int ret; - dst_entries_init(&xfrm6_dst_ops); - ret = xfrm6_policy_init(); - if (ret) { - dst_entries_destroy(&xfrm6_dst_ops); + if (ret) goto out; - } ret = xfrm6_state_init(); if (ret) goto out_policy; @@ -393,9 +421,7 @@ int __init xfrm6_init(void) if (ret) goto out_state; -#ifdef CONFIG_SYSCTL register_pernet_subsys(&xfrm6_net_ops); -#endif out: return ret; out_state: @@ -407,11 +433,8 @@ out_policy: void xfrm6_fini(void) { -#ifdef CONFIG_SYSCTL unregister_pernet_subsys(&xfrm6_net_ops); -#endif xfrm6_protocol_fini(); xfrm6_policy_fini(); xfrm6_state_fini(); - dst_entries_destroy(&xfrm6_dst_ops); } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 94af3d0..bacd30b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2807,7 +2807,6 @@ static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { - struct net *net; int err = 0; if (unlikely(afinfo == NULL)) return -EINVAL; @@ -2838,26 +2837,6 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) } spin_unlock(&xfrm_policy_afinfo_lock); - rtnl_lock(); - for_each_net(net) { - struct dst_ops *xfrm_dst_ops; - - switch (afinfo->family) { - case AF_INET: - xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops; - break; -#if IS_ENABLED(CONFIG_IPV6) - case AF_INET6: - xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops; - break; -#endif - default: - BUG(); - } - *xfrm_dst_ops = *afinfo->dst_ops; - } - rtnl_unlock(); - return err; } EXPORT_SYMBOL(xfrm_policy_register_afinfo); @@ -2893,22 +2872,6 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); -static void __net_init xfrm_dst_ops_init(struct net *net) -{ - struct xfrm_policy_afinfo *afinfo; - - rcu_read_lock(); - afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]); - if (afinfo) - net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops; -#if IS_ENABLED(CONFIG_IPV6) - afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]); - if (afinfo) - net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops; -#endif - rcu_read_unlock(); -} - static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); @@ -3057,7 +3020,6 @@ static int __net_init xfrm_net_init(struct net *net) rv = xfrm_policy_init(net); if (rv < 0) goto out_policy; - xfrm_dst_ops_init(net); rv = xfrm_sysctl_init(net); if (rv < 0) goto out_sysctl; -- cgit v0.10.2 From 1dbe162d53e11665b48a1c122899ffc2c068bef4 Mon Sep 17 00:00:00 2001 From: Dongdong Liu Date: Fri, 4 Dec 2015 16:32:25 -0600 Subject: PCI: hisi: Fix hisi_pcie_cfg_read() 32-bit reads For 32-bit config reads (size == 4), hisi_pcie_cfg_read() returned success but never filled in the data we read. Return the register data for 32-bit config reads. Without this fix, PCI doesn't work at all because enumeration depends on 32-bit config reads. The driver was tested internally, but got broken in the process of upstreaming, so this fixes the breakage. Fixes: 500a1d9a43e0 ("PCI: hisi: Add HiSilicon SoC Hip05 PCIe driver") Signed-off-by: Dongdong Liu Signed-off-by: Bjorn Helgaas Reviewed-by: Zhou Wang diff --git a/drivers/pci/host/pcie-hisi.c b/drivers/pci/host/pcie-hisi.c index 163671a..77f7c66 100644 --- a/drivers/pci/host/pcie-hisi.c +++ b/drivers/pci/host/pcie-hisi.c @@ -61,7 +61,9 @@ static int hisi_pcie_cfg_read(struct pcie_port *pp, int where, int size, *val = *(u8 __force *) walker; else if (size == 2) *val = *(u16 __force *) walker; - else if (size != 4) + else if (size == 4) + *val = reg_val; + else return PCIBIOS_BAD_REGISTER_NUMBER; return PCIBIOS_SUCCESSFUL; -- cgit v0.10.2 From 628a2918afe42fae2f90749ad3721853fd06b262 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 3 Dec 2015 17:26:59 +0100 Subject: iwlwifi: separate firmware version for 7260 devices The 7260 devices aren't going to be updated for completely new firmware versions any more (only bugfixes), and haven't been since API version 17. Encode that in the data structures to avoid trying to load FW images that will never exist. Signed-off-by: Johannes Berg Signed-off-by: Emmanuel Grumbach diff --git a/drivers/net/wireless/iwlwifi/iwl-7000.c b/drivers/net/wireless/iwlwifi/iwl-7000.c index bf88ec3..d9a4aee 100644 --- a/drivers/net/wireless/iwlwifi/iwl-7000.c +++ b/drivers/net/wireless/iwlwifi/iwl-7000.c @@ -69,13 +69,19 @@ #include "iwl-agn-hw.h" /* Highest firmware API version supported */ -#define IWL7260_UCODE_API_MAX 19 +#define IWL7260_UCODE_API_MAX 17 +#define IWL7265_UCODE_API_MAX 19 +#define IWL7265D_UCODE_API_MAX 19 /* Oldest version we won't warn about */ #define IWL7260_UCODE_API_OK 13 +#define IWL7265_UCODE_API_OK 13 +#define IWL7265D_UCODE_API_OK 13 /* Lowest firmware API version supported */ #define IWL7260_UCODE_API_MIN 13 +#define IWL7265_UCODE_API_MIN 13 +#define IWL7265D_UCODE_API_MIN 13 /* NVM versions */ #define IWL7260_NVM_VERSION 0x0a1d @@ -149,10 +155,7 @@ static const struct iwl_ht_params iwl7000_ht_params = { .ht40_bands = BIT(IEEE80211_BAND_2GHZ) | BIT(IEEE80211_BAND_5GHZ), }; -#define IWL_DEVICE_7000 \ - .ucode_api_max = IWL7260_UCODE_API_MAX, \ - .ucode_api_ok = IWL7260_UCODE_API_OK, \ - .ucode_api_min = IWL7260_UCODE_API_MIN, \ +#define IWL_DEVICE_7000_COMMON \ .device_family = IWL_DEVICE_FAMILY_7000, \ .max_inst_size = IWL60_RTC_INST_SIZE, \ .max_data_size = IWL60_RTC_DATA_SIZE, \ @@ -163,6 +166,24 @@ static const struct iwl_ht_params iwl7000_ht_params = { .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, \ .dccm_offset = IWL7000_DCCM_OFFSET +#define IWL_DEVICE_7000 \ + IWL_DEVICE_7000_COMMON, \ + .ucode_api_max = IWL7260_UCODE_API_MAX, \ + .ucode_api_ok = IWL7260_UCODE_API_OK, \ + .ucode_api_min = IWL7260_UCODE_API_MIN + +#define IWL_DEVICE_7005 \ + IWL_DEVICE_7000_COMMON, \ + .ucode_api_max = IWL7265_UCODE_API_MAX, \ + .ucode_api_ok = IWL7265_UCODE_API_OK, \ + .ucode_api_min = IWL7265_UCODE_API_MIN + +#define IWL_DEVICE_7005D \ + IWL_DEVICE_7000_COMMON, \ + .ucode_api_max = IWL7265D_UCODE_API_MAX, \ + .ucode_api_ok = IWL7265D_UCODE_API_OK, \ + .ucode_api_min = IWL7265D_UCODE_API_MIN + const struct iwl_cfg iwl7260_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 7260", .fw_name_pre = IWL7260_FW_PRE, @@ -266,7 +287,7 @@ static const struct iwl_ht_params iwl7265_ht_params = { const struct iwl_cfg iwl3165_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 3165", .fw_name_pre = IWL7265D_FW_PRE, - IWL_DEVICE_7000, + IWL_DEVICE_7005D, .ht_params = &iwl7000_ht_params, .nvm_ver = IWL3165_NVM_VERSION, .nvm_calib_ver = IWL3165_TX_POWER_VERSION, @@ -277,7 +298,7 @@ const struct iwl_cfg iwl3165_2ac_cfg = { const struct iwl_cfg iwl7265_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 7265", .fw_name_pre = IWL7265_FW_PRE, - IWL_DEVICE_7000, + IWL_DEVICE_7005, .ht_params = &iwl7265_ht_params, .nvm_ver = IWL7265_NVM_VERSION, .nvm_calib_ver = IWL7265_TX_POWER_VERSION, @@ -288,7 +309,7 @@ const struct iwl_cfg iwl7265_2ac_cfg = { const struct iwl_cfg iwl7265_2n_cfg = { .name = "Intel(R) Dual Band Wireless N 7265", .fw_name_pre = IWL7265_FW_PRE, - IWL_DEVICE_7000, + IWL_DEVICE_7005, .ht_params = &iwl7265_ht_params, .nvm_ver = IWL7265_NVM_VERSION, .nvm_calib_ver = IWL7265_TX_POWER_VERSION, @@ -299,7 +320,7 @@ const struct iwl_cfg iwl7265_2n_cfg = { const struct iwl_cfg iwl7265_n_cfg = { .name = "Intel(R) Wireless N 7265", .fw_name_pre = IWL7265_FW_PRE, - IWL_DEVICE_7000, + IWL_DEVICE_7005, .ht_params = &iwl7265_ht_params, .nvm_ver = IWL7265_NVM_VERSION, .nvm_calib_ver = IWL7265_TX_POWER_VERSION, @@ -310,7 +331,7 @@ const struct iwl_cfg iwl7265_n_cfg = { const struct iwl_cfg iwl7265d_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 7265", .fw_name_pre = IWL7265D_FW_PRE, - IWL_DEVICE_7000, + IWL_DEVICE_7005D, .ht_params = &iwl7265_ht_params, .nvm_ver = IWL7265D_NVM_VERSION, .nvm_calib_ver = IWL7265_TX_POWER_VERSION, @@ -321,7 +342,7 @@ const struct iwl_cfg iwl7265d_2ac_cfg = { const struct iwl_cfg iwl7265d_2n_cfg = { .name = "Intel(R) Dual Band Wireless N 7265", .fw_name_pre = IWL7265D_FW_PRE, - IWL_DEVICE_7000, + IWL_DEVICE_7005D, .ht_params = &iwl7265_ht_params, .nvm_ver = IWL7265D_NVM_VERSION, .nvm_calib_ver = IWL7265_TX_POWER_VERSION, @@ -332,7 +353,7 @@ const struct iwl_cfg iwl7265d_2n_cfg = { const struct iwl_cfg iwl7265d_n_cfg = { .name = "Intel(R) Wireless N 7265", .fw_name_pre = IWL7265D_FW_PRE, - IWL_DEVICE_7000, + IWL_DEVICE_7005D, .ht_params = &iwl7265_ht_params, .nvm_ver = IWL7265D_NVM_VERSION, .nvm_calib_ver = IWL7265_TX_POWER_VERSION, @@ -342,5 +363,5 @@ const struct iwl_cfg iwl7265d_n_cfg = { MODULE_FIRMWARE(IWL7260_MODULE_FIRMWARE(IWL7260_UCODE_API_OK)); MODULE_FIRMWARE(IWL3160_MODULE_FIRMWARE(IWL7260_UCODE_API_OK)); -MODULE_FIRMWARE(IWL7265_MODULE_FIRMWARE(IWL7260_UCODE_API_OK)); -MODULE_FIRMWARE(IWL7265D_MODULE_FIRMWARE(IWL7260_UCODE_API_OK)); +MODULE_FIRMWARE(IWL7265_MODULE_FIRMWARE(IWL7265_UCODE_API_OK)); +MODULE_FIRMWARE(IWL7265D_MODULE_FIRMWARE(IWL7265D_UCODE_API_OK)); -- cgit v0.10.2 From 4585436091cd812b1165aab71bd4847ea1cb08ec Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 11 Dec 2015 09:06:25 +0100 Subject: iwlwifi: mvm: protect RCU dereference in iwl_mvm_get_key_sta_id Properly protect the RCU dereference in iwl_mvm_get_key_sta_id() when coming from iwl_mvm_update_tkip_key() which cannot hold the mvm->mutex by moving the call into the RCU critical section. Modify the check to use rcu_dereference_check() to permit this. Fixes: 9513c5e18a0d ("iwlwifi: mvm: Avoid dereferencing sta if it was already flushed") Reported-by: Laura Abbott Signed-off-by: Johannes Berg Signed-off-by: Emmanuel Grumbach diff --git a/drivers/net/wireless/iwlwifi/mvm/sta.c b/drivers/net/wireless/iwlwifi/mvm/sta.c index 354acbd..2b976b1 100644 --- a/drivers/net/wireless/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/iwlwifi/mvm/sta.c @@ -1222,8 +1222,8 @@ static u8 iwl_mvm_get_key_sta_id(struct iwl_mvm *mvm, mvmvif->ap_sta_id != IWL_MVM_STATION_COUNT) { u8 sta_id = mvmvif->ap_sta_id; - sta = rcu_dereference_protected(mvm->fw_id_to_mac_id[sta_id], - lockdep_is_held(&mvm->mutex)); + sta = rcu_dereference_check(mvm->fw_id_to_mac_id[sta_id], + lockdep_is_held(&mvm->mutex)); /* * It is possible that the 'sta' parameter is NULL, * for example when a GTK is removed - the sta_id will then @@ -1590,14 +1590,15 @@ void iwl_mvm_update_tkip_key(struct iwl_mvm *mvm, u16 *phase1key) { struct iwl_mvm_sta *mvm_sta; - u8 sta_id = iwl_mvm_get_key_sta_id(mvm, vif, sta); + u8 sta_id; bool mcast = !(keyconf->flags & IEEE80211_KEY_FLAG_PAIRWISE); - if (WARN_ON_ONCE(sta_id == IWL_MVM_STATION_COUNT)) - return; - rcu_read_lock(); + sta_id = iwl_mvm_get_key_sta_id(mvm, vif, sta); + if (WARN_ON_ONCE(sta_id == IWL_MVM_STATION_COUNT)) + goto unlock; + if (!sta) { sta = rcu_dereference(mvm->fw_id_to_mac_id[sta_id]); if (WARN_ON(IS_ERR_OR_NULL(sta))) { @@ -1609,6 +1610,8 @@ void iwl_mvm_update_tkip_key(struct iwl_mvm *mvm, mvm_sta = iwl_mvm_sta_from_mac80211(sta); iwl_mvm_send_sta_key(mvm, mvm_sta, keyconf, mcast, iv32, phase1key, CMD_ASYNC, keyconf->hw_key_idx); + + unlock: rcu_read_unlock(); } -- cgit v0.10.2 From aa47e42c60dfa31f81a3fe357451acfe1a12ca1e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 15 Dec 2015 21:29:16 +0100 Subject: netfilter: nf_tables: use skb->protocol instead of assuming ethernet header Otherwise we may end up with incorrect network and transport header for other protocols. Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 7b9c053..edb3502f 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -94,7 +94,7 @@ nft_do_chain_netdev(void *priv, struct sk_buff *skb, { struct nft_pktinfo pkt; - switch (eth_hdr(skb)->h_proto) { + switch (skb->protocol) { case htons(ETH_P_IP): nft_netdev_set_pktinfo_ipv4(&pkt, skb, state); break; -- cgit v0.10.2 From d5f79b6e4d169039903cc869e16e59ad861dd479 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 18 Dec 2015 14:32:07 +0100 Subject: netfilter: nft_ct: include direction when dumping NFT_CT_L3PROTOCOL key one nft userspace test case fails with 'ct l3proto original ipv4' mismatches 'ct l3proto ipv4' ... because NFTA_CT_DIRECTION attr is missing. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 8cbca34..9399215 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -366,6 +366,7 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; switch (priv->key) { + case NFT_CT_L3PROTOCOL: case NFT_CT_PROTOCOL: case NFT_CT_SRC: case NFT_CT_DST: -- cgit v0.10.2 From 1873c58d4a45bd4d7104ba1482fcd9c3bd094cd1 Mon Sep 17 00:00:00 2001 From: "Pascal Speck (Iktek)" Date: Fri, 4 Dec 2015 16:55:17 +0100 Subject: ethernet:ti:cpsw: fix phy identification with multiple slaves on fixed-phy When using more than one slave with ti cpsw and fixed phy the pd->phy_id will be always zero, but slave_data->phy_id must be unique. pd->phy_id means a "phy hardware id" whereas slave_data->phy_id means an "unique id", so we should use pd->addr which has the same unique meaning. Fixes: 1f71e8c96fc6 ("drivers: net: cpsw: Add support for fixed-link PHY") Signed-off-by: Pascal Speck Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 48b92c9..e3b220d 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2047,7 +2047,7 @@ static int cpsw_probe_dt(struct cpsw_priv *priv, if (!pd) return -ENODEV; snprintf(slave_data->phy_id, sizeof(slave_data->phy_id), - PHY_ID_FMT, pd->bus->id, pd->phy_id); + PHY_ID_FMT, pd->bus->id, pd->addr); goto no_phy_slave; } parp = of_get_property(slave_node, "phy_id", &lenp); -- cgit v0.10.2 From f1eea5c15ae799a1291f0f481fa3ea09be913fa9 Mon Sep 17 00:00:00 2001 From: David Rivshin Date: Wed, 16 Dec 2015 23:02:10 -0500 Subject: drivers: net: cpsw: fix RMII/RGMII mode when used with fixed-link PHY Commit 1f71e8c96fc654724723ce987e0a8b2aeb81746d ("drivers: net: cpsw: Add support for fixed-link PHY") did not parse the "phy-mode" property in the case of a fixed-link PHY, leaving slave_data->phy_if with its default of PHY_INTERFACE_MODE_NA(0). This later gets passed to phy_connect() in cpsw_slave_open(), and eventually to cpsw_phy_sel() where it hits a default case that configures the MAC for MII mode. The user visible symptom is that while kernel log messages seem to indicate that the interface is set up, there is no network communication. Eventually a watchdog error occurs: NETDEV WATCHDOG: eth0 (cpsw): transmit queue 0 timed out Fixes: 1f71e8c96fc6 ("drivers: net: cpsw: Add support for fixed-link PHY") Signed-off-by: David Rivshin Signed-off-by: David S. Miller diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt index 9853f8e..28a4781 100644 --- a/Documentation/devicetree/bindings/net/cpsw.txt +++ b/Documentation/devicetree/bindings/net/cpsw.txt @@ -40,18 +40,18 @@ Optional properties: Slave Properties: Required properties: -- phy_id : Specifies slave phy id - phy-mode : See ethernet.txt file in the same directory Optional properties: - dual_emac_res_vlan : Specifies VID to be used to segregate the ports - mac-address : See ethernet.txt file in the same directory +- phy_id : Specifies slave phy id - phy-handle : See ethernet.txt file in the same directory Slave sub-nodes: - fixed-link : See fixed-link.txt file in the same directory - Either the properties phy_id and phy-mode, - or the sub-node fixed-link can be specified + Either the property phy_id, or the sub-node + fixed-link can be specified Note: "ti,hwmods" field is used to fetch the base address and irq resources from TI, omap hwmod data base during device registration. diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index e3b220d..bc6d20d 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2026,17 +2026,15 @@ static int cpsw_probe_dt(struct cpsw_priv *priv, for_each_child_of_node(node, slave_node) { struct cpsw_slave_data *slave_data = data->slave_data + i; const void *mac_addr = NULL; - u32 phyid; int lenp; const __be32 *parp; - struct device_node *mdio_node; - struct platform_device *mdio; /* This is no slave child node, continue */ if (strcmp(slave_node->name, "slave")) continue; priv->phy_node = of_parse_phandle(slave_node, "phy-handle", 0); + parp = of_get_property(slave_node, "phy_id", &lenp); if (of_phy_is_fixed_link(slave_node)) { struct phy_device *pd; @@ -2048,23 +2046,29 @@ static int cpsw_probe_dt(struct cpsw_priv *priv, return -ENODEV; snprintf(slave_data->phy_id, sizeof(slave_data->phy_id), PHY_ID_FMT, pd->bus->id, pd->addr); + } else if (parp) { + u32 phyid; + struct device_node *mdio_node; + struct platform_device *mdio; + + if (lenp != (sizeof(__be32) * 2)) { + dev_err(&pdev->dev, "Invalid slave[%d] phy_id property\n", i); + goto no_phy_slave; + } + mdio_node = of_find_node_by_phandle(be32_to_cpup(parp)); + phyid = be32_to_cpup(parp+1); + mdio = of_find_device_by_node(mdio_node); + of_node_put(mdio_node); + if (!mdio) { + dev_err(&pdev->dev, "Missing mdio platform device\n"); + return -EINVAL; + } + snprintf(slave_data->phy_id, sizeof(slave_data->phy_id), + PHY_ID_FMT, mdio->name, phyid); + } else { + dev_err(&pdev->dev, "No slave[%d] phy_id or fixed-link property\n", i); goto no_phy_slave; } - parp = of_get_property(slave_node, "phy_id", &lenp); - if ((parp == NULL) || (lenp != (sizeof(void *) * 2))) { - dev_err(&pdev->dev, "Missing slave[%d] phy_id property\n", i); - goto no_phy_slave; - } - mdio_node = of_find_node_by_phandle(be32_to_cpup(parp)); - phyid = be32_to_cpup(parp+1); - mdio = of_find_device_by_node(mdio_node); - of_node_put(mdio_node); - if (!mdio) { - dev_err(&pdev->dev, "Missing mdio platform device\n"); - return -EINVAL; - } - snprintf(slave_data->phy_id, sizeof(slave_data->phy_id), - PHY_ID_FMT, mdio->name, phyid); slave_data->phy_if = of_get_phy_mode(slave_node); if (slave_data->phy_if < 0) { dev_err(&pdev->dev, "Missing or malformed slave[%d] phy-mode property\n", -- cgit v0.10.2 From dfc0a6d39aad6d633141726eb2e37e15bda1fccd Mon Sep 17 00:00:00 2001 From: David Rivshin Date: Wed, 16 Dec 2015 23:02:11 -0500 Subject: drivers: net: cpsw: increment reference count on fixed-link PHY node When a fixed-link sub-node exists in a slave node, the slave node is also the PHY node. Since this is a separate use of the slave node, of_node_get() should be used to increment the reference count. Fixes: 1f71e8c96fc6 ("drivers: net: cpsw: Add support for fixed-link PHY") Signed-off-by: David Rivshin Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index bc6d20d..3b489ca 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2036,16 +2036,21 @@ static int cpsw_probe_dt(struct cpsw_priv *priv, priv->phy_node = of_parse_phandle(slave_node, "phy-handle", 0); parp = of_get_property(slave_node, "phy_id", &lenp); if (of_phy_is_fixed_link(slave_node)) { - struct phy_device *pd; + struct device_node *phy_node; + struct phy_device *phy_dev; + /* In the case of a fixed PHY, the DT node associated + * to the PHY is the Ethernet MAC DT node. + */ ret = of_phy_register_fixed_link(slave_node); if (ret) return ret; - pd = of_phy_find_device(slave_node); - if (!pd) + phy_node = of_node_get(slave_node); + phy_dev = of_phy_find_device(phy_node); + if (!phy_dev) return -ENODEV; snprintf(slave_data->phy_id, sizeof(slave_data->phy_id), - PHY_ID_FMT, pd->bus->id, pd->addr); + PHY_ID_FMT, phy_dev->bus->id, phy_dev->addr); } else if (parp) { u32 phyid; struct device_node *mdio_node; -- cgit v0.10.2 From fc9f5ea9b4ecbe9b7839c92f0a54261809c723d3 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 17 Dec 2015 15:35:37 +0200 Subject: net/mlx4_en: Remove dependency between timestamping capability and service_task Service task is responsible for other tasks in addition to timestamping overflow check. Launch it even if timestamping is not supported by device. Fixes: 07841f9d94c1 ('net/mlx4_en: Schedule napi when RX buffers allocation fails') Signed-off-by: Eugenia Emantayev Signed-off-by: Eran Ben Elisha Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 886e1bc..4eef316 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -3058,9 +3058,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, } queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); - if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) - queue_delayed_work(mdev->workqueue, &priv->service_task, - SERVICE_TASK_DELAY); + queue_delayed_work(mdev->workqueue, &priv->service_task, + SERVICE_TASK_DELAY); mlx4_en_set_stats_bitmap(mdev->dev, &priv->stats_bitmap, mdev->profile.prof[priv->port].rx_ppp, -- cgit v0.10.2 From 90683061dd50b0d70f01466c2d694f4e928a86f3 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 17 Dec 2015 15:35:38 +0200 Subject: net/mlx4_en: Fix HW timestamp init issue upon system startup mlx4_en_init_timestamp was called before creation of netdev and port init, thus used uninitialized values. Specifically - NIC frequency was incorrect causing wrong calculations and later wrong HW timestamps. Fixes: 1ec4864b1017 ('net/mlx4_en: Fixed crash when port type is changed') Signed-off-by: Eugenia Emantayev Signed-off-by: Marina Varshaver Signed-off-by: Eran Ben Elisha Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index 8a083d7..038f9ce 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -242,6 +242,13 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) unsigned long flags; u64 ns, zero = 0; + /* mlx4_en_init_timestamp is called for each netdev. + * mdev->ptp_clock is common for all ports, skip initialization if + * was done for other port. + */ + if (mdev->ptp_clock) + return; + rwlock_init(&mdev->clock_lock); memset(&mdev->cycles, 0, sizeof(mdev->cycles)); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index 005f910..e0ec280 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -232,9 +232,6 @@ static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr) if (mdev->pndev[i]) mlx4_en_destroy_netdev(mdev->pndev[i]); - if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) - mlx4_en_remove_timestamp(mdev); - flush_workqueue(mdev->workqueue); destroy_workqueue(mdev->workqueue); (void) mlx4_mr_free(dev, &mdev->mr); @@ -320,10 +317,6 @@ static void *mlx4_en_add(struct mlx4_dev *dev) mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) mdev->port_cnt++; - /* Initialize time stamp mechanism */ - if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) - mlx4_en_init_timestamp(mdev); - /* Set default number of RX rings*/ mlx4_en_set_num_rx_rings(mdev); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 4eef316..7869f97 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2072,6 +2072,9 @@ void mlx4_en_destroy_netdev(struct net_device *dev) /* flush any pending task for this netdev */ flush_workqueue(mdev->workqueue); + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) + mlx4_en_remove_timestamp(mdev); + /* Detach the netdev so tasks would not attempt to access it */ mutex_lock(&mdev->state_lock); mdev->pndev[priv->port] = NULL; @@ -3058,6 +3061,10 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, } queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); + /* Initialize time stamp mechanism */ + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) + mlx4_en_init_timestamp(mdev); + queue_delayed_work(mdev->workqueue, &priv->service_task, SERVICE_TASK_DELAY); -- cgit v0.10.2 From 6e3cd5fa65318f35ec9c9f61bc5cdb55d4783cb9 Mon Sep 17 00:00:00 2001 From: Venkat Duvvuru Date: Fri, 18 Dec 2015 01:40:50 +0530 Subject: be2net: Avoid accessing eq object in be_msix_register routine, when i < 0. When the first request_irq fails in be_msix_register, i value would be zero. The current code decrements the i value and accesses the eq object without validating the decremented "i" value. This can cause an "invalid memory address access" violation. This patch fixes the problem by accessing the eq object after validating the "i" value. Signed-off-by: Venkat Duvvuru Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index b6ad029..6598820 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -3299,8 +3299,10 @@ static int be_msix_register(struct be_adapter *adapter) return 0; err_msix: - for (i--, eqo = &adapter->eq_obj[i]; i >= 0; i--, eqo--) + for (i--; i >= 0; i--) { + eqo = &adapter->eq_obj[i]; free_irq(be_msix_vec_get(adapter, eqo), eqo); + } dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n", status); be_msix_disable(adapter); -- cgit v0.10.2 From acf673a3187edf72068ee2f92f4dc47d66baed47 Mon Sep 17 00:00:00 2001 From: David Miller Date: Thu, 17 Dec 2015 16:05:32 -0500 Subject: 6pack: Fix use after free in sixpack_close(). Need to do the unregister_device() after all references to the driver private have been done. Also we need to use del_timer_sync() for the timers so that we don't have any asynchronous references after the unregister. Signed-off-by: David S. Miller diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 7c4a415..9f0b1c3 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -683,14 +683,14 @@ static void sixpack_close(struct tty_struct *tty) if (!atomic_dec_and_test(&sp->refcnt)) down(&sp->dead_sem); - unregister_netdev(sp->dev); - - del_timer(&sp->tx_t); - del_timer(&sp->resync_t); + del_timer_sync(&sp->tx_t); + del_timer_sync(&sp->resync_t); /* Free all 6pack frame buffers. */ kfree(sp->rbuff); kfree(sp->xbuff); + + unregister_netdev(sp->dev); } /* Perform I/O control on an active 6pack channel. */ -- cgit v0.10.2 From d79f16c046086f4fe0d42184a458e187464eb83e Mon Sep 17 00:00:00 2001 From: David Miller Date: Thu, 17 Dec 2015 16:05:49 -0500 Subject: mkiss: Fix use after free in mkiss_close(). Need to do the unregister_device() after all references to the driver private have been done. Signed-off-by: David S. Miller diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index 216bfd3..0b72b9d 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -798,13 +798,13 @@ static void mkiss_close(struct tty_struct *tty) if (!atomic_dec_and_test(&ax->refcnt)) down(&ax->dead_sem); - unregister_netdev(ax->dev); - /* Free all AX25 frame buffers. */ kfree(ax->rbuff); kfree(ax->xbuff); ax->tty = NULL; + + unregister_netdev(ax->dev); } /* Perform I/O control on an active ax25 channel. */ -- cgit v0.10.2 From 6d3c348a63685410b12bf961b97063efeef2f901 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 17 Dec 2015 16:46:39 -0800 Subject: ipip: ioctl: Remove superfluous IP-TTL handling. IP-TTL case is already handled in ip_tunnel_ioctl() API. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index f34c31d..a09fb0d 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -253,9 +253,6 @@ ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) p.i_key = p.o_key = 0; p.i_flags = p.o_flags = 0; - if (p.iph.ttl) - p.iph.frag_off |= htons(IP_DF); - err = ip_tunnel_ioctl(dev, &p, cmd); if (err) return err; -- cgit v0.10.2 From ea2465af3bbfa7994d134a401503966ee98710b6 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Fri, 18 Dec 2015 10:42:12 +0200 Subject: bnx2x: Prevent FW assertion when using Vxlan FW has a rare corner case in which a fragmented packet using lots of frags would not be linearized, causing the FW to assert while trying to transmit the packet. To prevent this, we need to make sure the window of fragements containing MSS worth of data contains 1 BD less than for regular packets due to the additional parsing BD. Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index f8d7a2f..c82ab87 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -3430,25 +3430,29 @@ static u32 bnx2x_xmit_type(struct bnx2x *bp, struct sk_buff *skb) return rc; } -#if (MAX_SKB_FRAGS >= MAX_FETCH_BD - 3) +/* VXLAN: 4 = 1 (for linear data BD) + 3 (2 for PBD and last BD) */ +#define BNX2X_NUM_VXLAN_TSO_WIN_SUB_BDS 4 + +/* Regular: 3 = 1 (for linear data BD) + 2 (for PBD and last BD) */ +#define BNX2X_NUM_TSO_WIN_SUB_BDS 3 + +#if (MAX_SKB_FRAGS >= MAX_FETCH_BD - BDS_PER_TX_PKT) /* check if packet requires linearization (packet is too fragmented) no need to check fragmentation if page size > 8K (there will be no violation to FW restrictions) */ static int bnx2x_pkt_req_lin(struct bnx2x *bp, struct sk_buff *skb, u32 xmit_type) { - int to_copy = 0; - int hlen = 0; - int first_bd_sz = 0; + int first_bd_sz = 0, num_tso_win_sub = BNX2X_NUM_TSO_WIN_SUB_BDS; + int to_copy = 0, hlen = 0; - /* 3 = 1 (for linear data BD) + 2 (for PBD and last BD) */ - if (skb_shinfo(skb)->nr_frags >= (MAX_FETCH_BD - 3)) { + if (xmit_type & XMIT_GSO_ENC) + num_tso_win_sub = BNX2X_NUM_VXLAN_TSO_WIN_SUB_BDS; + if (skb_shinfo(skb)->nr_frags >= (MAX_FETCH_BD - num_tso_win_sub)) { if (xmit_type & XMIT_GSO) { unsigned short lso_mss = skb_shinfo(skb)->gso_size; - /* Check if LSO packet needs to be copied: - 3 = 1 (for headers BD) + 2 (for PBD and last BD) */ - int wnd_size = MAX_FETCH_BD - 3; + int wnd_size = MAX_FETCH_BD - num_tso_win_sub; /* Number of windows to check */ int num_wnds = skb_shinfo(skb)->nr_frags - wnd_size; int wnd_idx = 0; -- cgit v0.10.2 From e905eabc90a5b787d8708df164543ee295bea5f2 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Fri, 18 Dec 2015 19:43:15 +0900 Subject: openvswitch: correct encoding of set tunnel action attributes In a set action tunnel attributes should be encoded in a nested action. I noticed this because ovs-dpctl was reporting an error when dumping flows due to the incorrect encoding of tunnel attributes in a set action. Fixes: fc4099f17240 ("openvswitch: Fix egress tunnel info.") Signed-off-by: Simon Horman Signed-off-by: David S. Miller diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 907d6fd..d1bd4a4 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2434,7 +2434,10 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) if (!start) return -EMSGSIZE; - err = ovs_nla_put_tunnel_info(skb, tun_info); + err = ip_tun_to_nlattr(skb, &tun_info->key, + ip_tunnel_info_opts(tun_info), + tun_info->options_len, + ip_tunnel_info_af(tun_info)); if (err) return err; nla_nest_end(skb, start); -- cgit v0.10.2 From b4a1b4f5047e4f54e194681125c74c0aa64d637d Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 18 Dec 2015 01:34:26 +0000 Subject: KEYS: Fix race between read and revoke This fixes CVE-2015-7550. There's a race between keyctl_read() and keyctl_revoke(). If the revoke happens between keyctl_read() checking the validity of a key and the key's semaphore being taken, then the key type read method will see a revoked key. This causes a problem for the user-defined key type because it assumes in its read method that there will always be a payload in a non-revoked key and doesn't check for a NULL pointer. Fix this by making keyctl_read() check the validity of a key after taking semaphore instead of before. I think the bug was introduced with the original keyrings code. This was discovered by a multithreaded test program generated by syzkaller (http://github.com/google/syzkaller). Here's a cleaned up version: #include #include #include void *thr0(void *arg) { key_serial_t key = (unsigned long)arg; keyctl_revoke(key); return 0; } void *thr1(void *arg) { key_serial_t key = (unsigned long)arg; char buffer[16]; keyctl_read(key, buffer, 16); return 0; } int main() { key_serial_t key = add_key("user", "%", "foo", 3, KEY_SPEC_USER_KEYRING); pthread_t th[5]; pthread_create(&th[0], 0, thr0, (void *)(unsigned long)key); pthread_create(&th[1], 0, thr1, (void *)(unsigned long)key); pthread_create(&th[2], 0, thr0, (void *)(unsigned long)key); pthread_create(&th[3], 0, thr1, (void *)(unsigned long)key); pthread_join(th[0], 0); pthread_join(th[1], 0); pthread_join(th[2], 0); pthread_join(th[3], 0); return 0; } Build as: cc -o keyctl-race keyctl-race.c -lkeyutils -lpthread Run as: while keyctl-race; do :; done as it may need several iterations to crash the kernel. The crash can be summarised as: BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 IP: [] user_read+0x56/0xa3 ... Call Trace: [] keyctl_read_key+0xb6/0xd7 [] SyS_keyctl+0x83/0xe0 [] entry_SYSCALL_64_fastpath+0x12/0x6f Reported-by: Dmitry Vyukov Signed-off-by: David Howells Tested-by: Dmitry Vyukov Cc: stable@vger.kernel.org Signed-off-by: James Morris diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index fb111ea..1c3872a 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -751,16 +751,16 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen) /* the key is probably readable - now try to read it */ can_read_key: - ret = key_validate(key); - if (ret == 0) { - ret = -EOPNOTSUPP; - if (key->type->read) { - /* read the data with the semaphore held (since we - * might sleep) */ - down_read(&key->sem); + ret = -EOPNOTSUPP; + if (key->type->read) { + /* Read the data with the semaphore held (since we might sleep) + * to protect against the key being updated or revoked. + */ + down_read(&key->sem); + ret = key_validate(key); + if (ret == 0) ret = key->type->read(key, buffer, buflen); - up_read(&key->sem); - } + up_read(&key->sem); } error2: -- cgit v0.10.2 From 179ccc0a73641ffd24e44ff10a7bd494efe98d8d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 19 Dec 2015 10:45:28 +0800 Subject: rhashtable: Kill harmless RCU warning in rhashtable_walk_init The commit c6ff5268293ef98e48a99597e765ffc417e39fa5 ("rhashtable: Fix walker list corruption") causes a suspicious RCU usage warning because we no longer hold ht->mutex when we dereference ht->tbl. However, this is a false positive because we now hold ht->lock which also guarantees that ht->tbl won't disppear from under us. This patch kills the warning by using rcu_dereference_protected. Reported-by: kernel test robot Signed-off-by: Herbert Xu Signed-off-by: David S. Miller diff --git a/lib/rhashtable.c b/lib/rhashtable.c index eb9240c..51282f5 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -519,7 +519,8 @@ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter) return -ENOMEM; spin_lock(&ht->lock); - iter->walker->tbl = rht_dereference(ht->tbl, ht); + iter->walker->tbl = + rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock)); list_add(&iter->walker->list, &iter->walker->tbl->walkers); spin_unlock(&ht->lock); -- cgit v0.10.2 From 45af55006c2c8f49bddc6296224e70d752a1372c Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 19 Dec 2015 15:13:49 +0300 Subject: natsemi: add checks for dma mapping errors refill_rx() and start_tx() do not check if mapping dma memory succeed. The patch adds the checks and failure handling. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index b83f7c0..122c2ee 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -1937,6 +1937,12 @@ static void refill_rx(struct net_device *dev) break; /* Better luck next round. */ np->rx_dma[entry] = pci_map_single(np->pci_dev, skb->data, buflen, PCI_DMA_FROMDEVICE); + if (pci_dma_mapping_error(np->pci_dev, + np->rx_dma[entry])) { + dev_kfree_skb_any(skb); + np->rx_skbuff[entry] = NULL; + break; /* Better luck next round. */ + } np->rx_ring[entry].addr = cpu_to_le32(np->rx_dma[entry]); } np->rx_ring[entry].cmd_status = cpu_to_le32(np->rx_buf_sz); @@ -2093,6 +2099,12 @@ static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev) np->tx_skbuff[entry] = skb; np->tx_dma[entry] = pci_map_single(np->pci_dev, skb->data,skb->len, PCI_DMA_TODEVICE); + if (pci_dma_mapping_error(np->pci_dev, np->tx_dma[entry])) { + np->tx_skbuff[entry] = NULL; + dev_kfree_skb_irq(skb); + dev->stats.tx_dropped++; + return NETDEV_TX_OK; + } np->tx_ring[entry].addr = cpu_to_le32(np->tx_dma[entry]); -- cgit v0.10.2 From 670c0d62ea5d16026adde5f3538b1caaa904a909 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Fri, 18 Dec 2015 14:43:33 +0100 Subject: net: usb: cdc_ncm: Adding Dell DW5812 LTE Verizon Mobile Broadband Card Unlike DW5550, Dell DW5812 is a mobile broadband card with no ARP capabilities: the patch makes this device to use wwan_noarp_info struct Signed-off-by: Daniele Palmas Signed-off-by: David S. Miller diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 1e9843a..3a71e60 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1558,6 +1558,15 @@ static const struct usb_device_id cdc_devs[] = { .driver_info = (unsigned long) &wwan_info, }, + /* DW5812 LTE Verizon Mobile Broadband Card + * Unlike DW5550 this device requires FLAG_NOARP + */ + { USB_DEVICE_AND_INTERFACE_INFO(0x413c, 0x81bb, + USB_CLASS_COMM, + USB_CDC_SUBCLASS_NCM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&wwan_noarp_info, + }, + /* Dell branded MBM devices like DW5550 */ { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_VENDOR, -- cgit v0.10.2 From fb83d5f283dc699c891b30c341e758d9a060a7c6 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Fri, 18 Dec 2015 14:43:34 +0100 Subject: net: usb: cdc_ncm: Adding Dell DW5813 LTE AT&T Mobile Broadband Card Unlike DW5550, Dell DW5813 is a mobile broadband card with no ARP capabilities: the patch makes this device to use wwan_noarp_info struct Signed-off-by: Daniele Palmas Signed-off-by: David S. Miller diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 3a71e60..3694052 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -1567,6 +1567,15 @@ static const struct usb_device_id cdc_devs[] = { .driver_info = (unsigned long)&wwan_noarp_info, }, + /* DW5813 LTE AT&T Mobile Broadband Card + * Unlike DW5550 this device requires FLAG_NOARP + */ + { USB_DEVICE_AND_INTERFACE_INFO(0x413c, 0x81bc, + USB_CLASS_COMM, + USB_CDC_SUBCLASS_NCM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&wwan_noarp_info, + }, + /* Dell branded MBM devices like DW5550 */ { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_VENDOR, -- cgit v0.10.2 From 0d96e4bab2855a030077cc695a3563fd7cb0e7d8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 18 Dec 2015 19:16:57 +0800 Subject: crypto: algif_skcipher - Use new skcipher interface This patch replaces uses of ablkcipher with the new skcipher interface. Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu Tested-by: diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index af31a0e..973fe45 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -47,7 +47,7 @@ struct skcipher_ctx { bool merge; bool enc; - struct ablkcipher_request req; + struct skcipher_request req; }; struct skcipher_async_rsgl { @@ -64,13 +64,13 @@ struct skcipher_async_req { }; #define GET_SREQ(areq, ctx) (struct skcipher_async_req *)((char *)areq + \ - crypto_ablkcipher_reqsize(crypto_ablkcipher_reqtfm(&ctx->req))) + crypto_skcipher_reqsize(crypto_skcipher_reqtfm(&ctx->req))) #define GET_REQ_SIZE(ctx) \ - crypto_ablkcipher_reqsize(crypto_ablkcipher_reqtfm(&ctx->req)) + crypto_skcipher_reqsize(crypto_skcipher_reqtfm(&ctx->req)) #define GET_IV_SIZE(ctx) \ - crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(&ctx->req)) + crypto_skcipher_ivsize(crypto_skcipher_reqtfm(&ctx->req)) #define MAX_SGL_ENTS ((4096 - sizeof(struct skcipher_sg_list)) / \ sizeof(struct scatterlist) - 1) @@ -302,8 +302,8 @@ static int skcipher_sendmsg(struct socket *sock, struct msghdr *msg, struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); struct skcipher_ctx *ctx = ask->private; - struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(&ctx->req); - unsigned ivsize = crypto_ablkcipher_ivsize(tfm); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(&ctx->req); + unsigned ivsize = crypto_skcipher_ivsize(tfm); struct skcipher_sg_list *sgl; struct af_alg_control con = {}; long copied = 0; @@ -507,7 +507,7 @@ static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg, struct skcipher_sg_list *sgl; struct scatterlist *sg; struct skcipher_async_req *sreq; - struct ablkcipher_request *req; + struct skcipher_request *req; struct skcipher_async_rsgl *last_rsgl = NULL; unsigned int txbufs = 0, len = 0, tx_nents = skcipher_all_sg_nents(ctx); unsigned int reqlen = sizeof(struct skcipher_async_req) + @@ -531,9 +531,9 @@ static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg, } sg_init_table(sreq->tsg, tx_nents); memcpy(sreq->iv, ctx->iv, GET_IV_SIZE(ctx)); - ablkcipher_request_set_tfm(req, crypto_ablkcipher_reqtfm(&ctx->req)); - ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, - skcipher_async_cb, sk); + skcipher_request_set_tfm(req, crypto_skcipher_reqtfm(&ctx->req)); + skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + skcipher_async_cb, sk); while (iov_iter_count(&msg->msg_iter)) { struct skcipher_async_rsgl *rsgl; @@ -608,10 +608,10 @@ static int skcipher_recvmsg_async(struct socket *sock, struct msghdr *msg, if (mark) sg_mark_end(sreq->tsg + txbufs - 1); - ablkcipher_request_set_crypt(req, sreq->tsg, sreq->first_sgl.sgl.sg, - len, sreq->iv); - err = ctx->enc ? crypto_ablkcipher_encrypt(req) : - crypto_ablkcipher_decrypt(req); + skcipher_request_set_crypt(req, sreq->tsg, sreq->first_sgl.sgl.sg, + len, sreq->iv); + err = ctx->enc ? crypto_skcipher_encrypt(req) : + crypto_skcipher_decrypt(req); if (err == -EINPROGRESS) { atomic_inc(&ctx->inflight); err = -EIOCBQUEUED; @@ -632,7 +632,7 @@ static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg, struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); struct skcipher_ctx *ctx = ask->private; - unsigned bs = crypto_ablkcipher_blocksize(crypto_ablkcipher_reqtfm( + unsigned bs = crypto_skcipher_blocksize(crypto_skcipher_reqtfm( &ctx->req)); struct skcipher_sg_list *sgl; struct scatterlist *sg; @@ -669,14 +669,13 @@ static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg, if (!used) goto free; - ablkcipher_request_set_crypt(&ctx->req, sg, - ctx->rsgl.sg, used, - ctx->iv); + skcipher_request_set_crypt(&ctx->req, sg, ctx->rsgl.sg, used, + ctx->iv); err = af_alg_wait_for_completion( ctx->enc ? - crypto_ablkcipher_encrypt(&ctx->req) : - crypto_ablkcipher_decrypt(&ctx->req), + crypto_skcipher_encrypt(&ctx->req) : + crypto_skcipher_decrypt(&ctx->req), &ctx->completion); free: @@ -751,17 +750,17 @@ static struct proto_ops algif_skcipher_ops = { static void *skcipher_bind(const char *name, u32 type, u32 mask) { - return crypto_alloc_ablkcipher(name, type, mask); + return crypto_alloc_skcipher(name, type, mask); } static void skcipher_release(void *private) { - crypto_free_ablkcipher(private); + crypto_free_skcipher(private); } static int skcipher_setkey(void *private, const u8 *key, unsigned int keylen) { - return crypto_ablkcipher_setkey(private, key, keylen); + return crypto_skcipher_setkey(private, key, keylen); } static void skcipher_wait(struct sock *sk) @@ -778,13 +777,13 @@ static void skcipher_sock_destruct(struct sock *sk) { struct alg_sock *ask = alg_sk(sk); struct skcipher_ctx *ctx = ask->private; - struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(&ctx->req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(&ctx->req); if (atomic_read(&ctx->inflight)) skcipher_wait(sk); skcipher_free_sgl(sk); - sock_kzfree_s(sk, ctx->iv, crypto_ablkcipher_ivsize(tfm)); + sock_kzfree_s(sk, ctx->iv, crypto_skcipher_ivsize(tfm)); sock_kfree_s(sk, ctx, ctx->len); af_alg_release_parent(sk); } @@ -793,20 +792,20 @@ static int skcipher_accept_parent(void *private, struct sock *sk) { struct skcipher_ctx *ctx; struct alg_sock *ask = alg_sk(sk); - unsigned int len = sizeof(*ctx) + crypto_ablkcipher_reqsize(private); + unsigned int len = sizeof(*ctx) + crypto_skcipher_reqsize(private); ctx = sock_kmalloc(sk, len, GFP_KERNEL); if (!ctx) return -ENOMEM; - ctx->iv = sock_kmalloc(sk, crypto_ablkcipher_ivsize(private), + ctx->iv = sock_kmalloc(sk, crypto_skcipher_ivsize(private), GFP_KERNEL); if (!ctx->iv) { sock_kfree_s(sk, ctx, len); return -ENOMEM; } - memset(ctx->iv, 0, crypto_ablkcipher_ivsize(private)); + memset(ctx->iv, 0, crypto_skcipher_ivsize(private)); INIT_LIST_HEAD(&ctx->tsgl); ctx->len = len; @@ -819,9 +818,9 @@ static int skcipher_accept_parent(void *private, struct sock *sk) ask->private = ctx; - ablkcipher_request_set_tfm(&ctx->req, private); - ablkcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG, - af_alg_complete, &ctx->completion); + skcipher_request_set_tfm(&ctx->req, private); + skcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG, + af_alg_complete, &ctx->completion); sk->sk_destruct = skcipher_sock_destruct; -- cgit v0.10.2 From ce8c839b74e3017996fad4e1b7ba2e2625ede82f Mon Sep 17 00:00:00 2001 From: Vijay Pandurangan Date: Fri, 18 Dec 2015 14:34:59 -0500 Subject: =?UTF-8?q?veth:=20don=E2=80=99t=20modify=20ip=5Fsummed;=20doing?= =?UTF-8?q?=20so=20treats=20packets=20with=20bad=20checksums=20as=20good.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Packets that arrive from real hardware devices have ip_summed == CHECKSUM_UNNECESSARY if the hardware verified the checksums, or CHECKSUM_NONE if the packet is bad or it was unable to verify it. The current version of veth will replace CHECKSUM_NONE with CHECKSUM_UNNECESSARY, which causes corrupt packets routed from hardware to a veth device to be delivered to the application. This caused applications at Twitter to receive corrupt data when network hardware was corrupting packets. We believe this was added as an optimization to skip computing and verifying checksums for communication between containers. However, locally generated packets have ip_summed == CHECKSUM_PARTIAL, so the code as written does nothing for them. As far as we can tell, after removing this code, these packets are transmitted from one stack to another unmodified (tcpdump shows invalid checksums on both sides, as expected), and they are delivered correctly to applications. We didn’t test every possible network configuration, but we tried a few common ones such as bridging containers, using NAT between the host and a container, and routing from hardware devices to containers. We have effectively deployed this in production at Twitter (by disabling RX checksum offloading on veth devices). This code dates back to the first version of the driver, commit ("[NET]: Virtual ethernet device driver"), so I suspect this bug occurred mostly because the driver API has evolved significantly since then. Commit <0b7967503dc97864f283a> ("net/veth: Fix packet checksumming") (in December 2010) fixed this for packets that get created locally and sent to hardware devices, by not changing CHECKSUM_PARTIAL. However, the same issue still occurs for packets coming in from hardware devices. Co-authored-by: Evan Jones Signed-off-by: Evan Jones Cc: Nicolas Dichtel Cc: Phil Sutter Cc: Toshiaki Makita Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Vijay Pandurangan Acked-by: Cong Wang Signed-off-by: David S. Miller diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 0ef4a5a..ba21d07 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -117,12 +117,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) kfree_skb(skb); goto drop; } - /* don't change ip_summed == CHECKSUM_PARTIAL, as that - * will cause bad checksum on forwarded packets - */ - if (skb->ip_summed == CHECKSUM_NONE && - rcv->features & NETIF_F_RXCSUM) - skb->ip_summed = CHECKSUM_UNNECESSARY; if (likely(dev_forward_skb(rcv, skb) == NET_RX_SUCCESS)) { struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats); -- cgit v0.10.2 From 5cbf20c747987346f8352b156e3f05d3b84ac4ac Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Sun, 20 Dec 2015 01:48:04 +0300 Subject: sh_eth: fix 16-bit descriptor field access endianness too Commit 1299653affa4 ("sh_eth: fix descriptor access endianness") only addressed the 32-bit buffer address field byte-swapping but the driver still accesses 16-bit frame/buffer length descriptor fields without the necessary byte-swapping -- which should affect the big-endian kernels. In order to be able to use {cpu|edmac}_to_{edmac|cpu}(), we need to declare the RX/TX descriptor word 1 as a 32-bit field and use shifts/masking to access the 16-bit subfields (which gets rid of the ugly #ifdef'ery too)... Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index a0eaf50..6a8fc0f 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1167,6 +1167,7 @@ static void sh_eth_ring_format(struct net_device *ndev) int tx_ringsize = sizeof(*txdesc) * mdp->num_tx_ring; int skbuff_size = mdp->rx_buf_sz + SH_ETH_RX_ALIGN + 32 - 1; dma_addr_t dma_addr; + u32 buf_len; mdp->cur_rx = 0; mdp->cur_tx = 0; @@ -1187,9 +1188,9 @@ static void sh_eth_ring_format(struct net_device *ndev) /* RX descriptor */ rxdesc = &mdp->rx_ring[i]; /* The size of the buffer is a multiple of 32 bytes. */ - rxdesc->buffer_length = ALIGN(mdp->rx_buf_sz, 32); - dma_addr = dma_map_single(&ndev->dev, skb->data, - rxdesc->buffer_length, + buf_len = ALIGN(mdp->rx_buf_sz, 32); + rxdesc->len = cpu_to_edmac(mdp, buf_len << 16); + dma_addr = dma_map_single(&ndev->dev, skb->data, buf_len, DMA_FROM_DEVICE); if (dma_mapping_error(&ndev->dev, dma_addr)) { kfree_skb(skb); @@ -1220,7 +1221,7 @@ static void sh_eth_ring_format(struct net_device *ndev) mdp->tx_skbuff[i] = NULL; txdesc = &mdp->tx_ring[i]; txdesc->status = cpu_to_edmac(mdp, TD_TFP); - txdesc->buffer_length = 0; + txdesc->len = cpu_to_edmac(mdp, 0); if (i == 0) { /* Tx descriptor address set */ sh_eth_write(ndev, mdp->tx_desc_dma, TDLAR); @@ -1429,7 +1430,8 @@ static int sh_eth_txfree(struct net_device *ndev) if (mdp->tx_skbuff[entry]) { dma_unmap_single(&ndev->dev, edmac_to_cpu(mdp, txdesc->addr), - txdesc->buffer_length, DMA_TO_DEVICE); + edmac_to_cpu(mdp, txdesc->len) >> 16, + DMA_TO_DEVICE); dev_kfree_skb_irq(mdp->tx_skbuff[entry]); mdp->tx_skbuff[entry] = NULL; free_num++; @@ -1439,7 +1441,7 @@ static int sh_eth_txfree(struct net_device *ndev) txdesc->status |= cpu_to_edmac(mdp, TD_TDLE); ndev->stats.tx_packets++; - ndev->stats.tx_bytes += txdesc->buffer_length; + ndev->stats.tx_bytes += edmac_to_cpu(mdp, txdesc->len) >> 16; } return free_num; } @@ -1458,6 +1460,7 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota) u32 desc_status; int skbuff_size = mdp->rx_buf_sz + SH_ETH_RX_ALIGN + 32 - 1; dma_addr_t dma_addr; + u32 buf_len; boguscnt = min(boguscnt, *quota); limit = boguscnt; @@ -1466,7 +1469,7 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota) /* RACT bit must be checked before all the following reads */ dma_rmb(); desc_status = edmac_to_cpu(mdp, rxdesc->status); - pkt_len = rxdesc->frame_length; + pkt_len = edmac_to_cpu(mdp, rxdesc->len) & RD_RFL; if (--boguscnt < 0) break; @@ -1532,7 +1535,8 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota) entry = mdp->dirty_rx % mdp->num_rx_ring; rxdesc = &mdp->rx_ring[entry]; /* The size of the buffer is 32 byte boundary. */ - rxdesc->buffer_length = ALIGN(mdp->rx_buf_sz, 32); + buf_len = ALIGN(mdp->rx_buf_sz, 32); + rxdesc->len = cpu_to_edmac(mdp, buf_len << 16); if (mdp->rx_skbuff[entry] == NULL) { skb = netdev_alloc_skb(ndev, skbuff_size); @@ -1540,8 +1544,7 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota) break; /* Better luck next round. */ sh_eth_set_receive_align(skb); dma_addr = dma_map_single(&ndev->dev, skb->data, - rxdesc->buffer_length, - DMA_FROM_DEVICE); + buf_len, DMA_FROM_DEVICE); if (dma_mapping_error(&ndev->dev, dma_addr)) { kfree_skb(skb); break; @@ -2407,7 +2410,7 @@ static int sh_eth_start_xmit(struct sk_buff *skb, struct net_device *ndev) return NETDEV_TX_OK; } txdesc->addr = cpu_to_edmac(mdp, dma_addr); - txdesc->buffer_length = skb->len; + txdesc->len = cpu_to_edmac(mdp, skb->len << 16); dma_wmb(); /* TACT bit must be set after all the above writes */ if (entry >= mdp->num_tx_ring - 1) diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h index 26ad1cf..72fcfc9 100644 --- a/drivers/net/ethernet/renesas/sh_eth.h +++ b/drivers/net/ethernet/renesas/sh_eth.h @@ -283,7 +283,7 @@ enum DMAC_IM_BIT { DMAC_M_RINT1 = 0x00000001, }; -/* Receive descriptor bit */ +/* Receive descriptor 0 bits */ enum RD_STS_BIT { RD_RACT = 0x80000000, RD_RDLE = 0x40000000, RD_RFP1 = 0x20000000, RD_RFP0 = 0x10000000, @@ -298,6 +298,12 @@ enum RD_STS_BIT { #define RDFEND RD_RFP0 #define RD_RFP (RD_RFP1|RD_RFP0) +/* Receive descriptor 1 bits */ +enum RD_LEN_BIT { + RD_RFL = 0x0000ffff, /* receive frame length */ + RD_RBL = 0xffff0000, /* receive buffer length */ +}; + /* FCFTR */ enum FCFTR_BIT { FCFTR_RFF2 = 0x00040000, FCFTR_RFF1 = 0x00020000, @@ -307,7 +313,7 @@ enum FCFTR_BIT { #define DEFAULT_FIFO_F_D_RFF (FCFTR_RFF2 | FCFTR_RFF1 | FCFTR_RFF0) #define DEFAULT_FIFO_F_D_RFD (FCFTR_RFD2 | FCFTR_RFD1 | FCFTR_RFD0) -/* Transmit descriptor bit */ +/* Transmit descriptor 0 bits */ enum TD_STS_BIT { TD_TACT = 0x80000000, TD_TDLE = 0x40000000, TD_TFP1 = 0x20000000, TD_TFP0 = 0x10000000, @@ -317,6 +323,11 @@ enum TD_STS_BIT { #define TDFEND TD_TFP0 #define TD_TFP (TD_TFP1|TD_TFP0) +/* Transmit descriptor 1 bits */ +enum TD_LEN_BIT { + TD_TBL = 0xffff0000, /* transmit buffer length */ +}; + /* RMCR */ enum RMCR_BIT { RMCR_RNC = 0x00000001, @@ -425,15 +436,9 @@ enum TSU_FWSLC_BIT { */ struct sh_eth_txdesc { u32 status; /* TD0 */ -#if defined(__LITTLE_ENDIAN) - u16 pad0; /* TD1 */ - u16 buffer_length; /* TD1 */ -#else - u16 buffer_length; /* TD1 */ - u16 pad0; /* TD1 */ -#endif + u32 len; /* TD1 */ u32 addr; /* TD2 */ - u32 pad1; /* padding data */ + u32 pad0; /* padding data */ } __aligned(2) __packed; /* The sh ether Rx buffer descriptors. @@ -441,13 +446,7 @@ struct sh_eth_txdesc { */ struct sh_eth_rxdesc { u32 status; /* RD0 */ -#if defined(__LITTLE_ENDIAN) - u16 frame_length; /* RD1 */ - u16 buffer_length; /* RD1 */ -#else - u16 buffer_length; /* RD1 */ - u16 frame_length; /* RD1 */ -#endif + u32 len; /* RD1 */ u32 addr; /* RD2 */ u32 pad0; /* padding data */ } __aligned(2) __packed; -- cgit v0.10.2 From ef9cdd0fed3875b1ae9cc85987d8143354b2d4c8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 21 Dec 2015 09:56:01 +0100 Subject: switchdev: bridge: Pass ageing time as clock_t instead of jiffies The bridge's ageing time is offloaded to hardware when: 1) A port joins a bridge 2) The ageing time of the bridge is changed In the first case the ageing time is offloaded as jiffies, but in the second case it's offloaded as clock_t, which is what existing switchdev drivers expect to receive. Fixes: 6ac311ae8bfb ("Adding switchdev ageing notification on port bridged") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 5396ff08..12045de 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -39,7 +39,7 @@ void br_init_port(struct net_bridge_port *p) struct switchdev_attr attr = { .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME, .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP | SWITCHDEV_F_DEFER, - .u.ageing_time = p->br->ageing_time, + .u.ageing_time = jiffies_to_clock_t(p->br->ageing_time), }; int err; -- cgit v0.10.2 From e459dfeeb64008b2d23bdf600f03b3605dbb8152 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Mon, 21 Dec 2015 12:54:45 +0300 Subject: ipv6/addrlabel: fix ip6addrlbl_get() ip6addrlbl_get() has never worked. If ip6addrlbl_hold() succeeded, ip6addrlbl_get() will exit with '-ESRCH'. If ip6addrlbl_hold() failed, ip6addrlbl_get() will use about to be free ip6addrlbl_entry pointer. Fix this by inverting ip6addrlbl_hold() check. Fixes: 2a8cc6c89039 ("[IPV6] ADDRCONF: Support RFC3484 configurable address selection policy table.") Signed-off-by: Andrey Ryabinin Reviewed-by: Cong Wang Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 882124e..a8f6986 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -552,7 +552,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh) rcu_read_lock(); p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); - if (p && ip6addrlbl_hold(p)) + if (p && !ip6addrlbl_hold(p)) p = NULL; lseq = ip6addrlbl_table.seq; rcu_read_unlock(); -- cgit v0.10.2 From 5449a5ca9bc27dd51a462de7ca0b1cd861cd2bd0 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 21 Dec 2015 10:55:45 -0800 Subject: addrconf: always initialize sysctl table data When sysctl performs restrict writes, it allows to write from a middle position of a sysctl file, which requires us to initialize the table data before calling proc_dostring() for the write case. Fixes: 3d1bec99320d ("ipv6: introduce secret_stable to ipv6_devconf") Reported-by: Sasha Levin Acked-by: Hannes Frederic Sowa Tested-by: Sasha Levin Signed-off-by: Cong Wang Signed-off-by: David S. Miller diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 17f8e7e..1f21087 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5369,13 +5369,10 @@ static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write, goto out; } - if (!write) { - err = snprintf(str, sizeof(str), "%pI6", - &secret->secret); - if (err >= sizeof(str)) { - err = -EIO; - goto out; - } + err = snprintf(str, sizeof(str), "%pI6", &secret->secret); + if (err >= sizeof(str)) { + err = -EIO; + goto out; } err = proc_dostring(&lctl, write, buffer, lenp, ppos); -- cgit v0.10.2 From d3805611130af9b911e908af9f67a3f64f4f0914 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 22 Dec 2015 15:48:44 -0700 Subject: block: Split bios on chunk boundaries For h/w that advertise their block storage's underlying chunk size, it's a big performance win to not submit commands that cross them. This patch uses that criteria if it is provided. If it is not provided, this patch uses the max sectors as before. Signed-off-by: Keith Busch Signed-off-by: Jens Axboe diff --git a/block/blk-merge.c b/block/blk-merge.c index e01405a..e73846a 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -81,7 +81,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, struct bio *new = NULL; bio_for_each_segment(bv, bio, iter) { - if (sectors + (bv.bv_len >> 9) > queue_max_sectors(q)) + if (sectors + (bv.bv_len >> 9) > blk_max_size_offset(q, bio->bi_iter.bi_sector)) goto split; /* -- cgit v0.10.2 From fac51590c1a077809984139e9bb9e06ed366f219 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Mon, 21 Dec 2015 17:01:24 +0200 Subject: IB/cma: cma_match_net_dev needs to take into account port_num Previously, cma_match_net_dev called cma_protocol_roce which tried to verify that the IB device uses RoCE protocol. However, if rdma_id wasn't bound to a port, then the check would occur against the first port of the device without regard to whether that port was even of the same type as the type of port the incoming packet was received on. Fix this by passing the port of the request and only checking against the same port of the device. Reported-by: Or Gerlitz Fixes: b8cab5dab15f ('IB/cma: Accept connection without a valid netdev on RoCE') Signed-off-by: Matan Barak Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index d2d5d00..2d762a2 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1265,15 +1265,17 @@ static bool cma_protocol_roce(const struct rdma_cm_id *id) return cma_protocol_roce_dev_port(device, port_num); } -static bool cma_match_net_dev(const struct rdma_id_private *id_priv, - const struct net_device *net_dev) +static bool cma_match_net_dev(const struct rdma_cm_id *id, + const struct net_device *net_dev, + u8 port_num) { - const struct rdma_addr *addr = &id_priv->id.route.addr; + const struct rdma_addr *addr = &id->route.addr; if (!net_dev) /* This request is an AF_IB request or a RoCE request */ - return addr->src_addr.ss_family == AF_IB || - cma_protocol_roce(&id_priv->id); + return (!id->port_num || id->port_num == port_num) && + (addr->src_addr.ss_family == AF_IB || + cma_protocol_roce_dev_port(id->device, port_num)); return !addr->dev_addr.bound_dev_if || (net_eq(dev_net(net_dev), addr->dev_addr.net) && @@ -1295,13 +1297,13 @@ static struct rdma_id_private *cma_find_listener( hlist_for_each_entry(id_priv, &bind_list->owners, node) { if (cma_match_private_data(id_priv, ib_event->private_data)) { if (id_priv->id.device == cm_id->device && - cma_match_net_dev(id_priv, net_dev)) + cma_match_net_dev(&id_priv->id, net_dev, req->port)) return id_priv; list_for_each_entry(id_priv_dev, &id_priv->listen_list, listen_list) { if (id_priv_dev->id.device == cm_id->device && - cma_match_net_dev(id_priv_dev, net_dev)) + cma_match_net_dev(&id_priv_dev->id, net_dev, req->port)) return id_priv_dev; } } -- cgit v0.10.2 From df4176677fdf7ac0c5748083eb7a5b269fb1e156 Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Thu, 17 Dec 2015 10:54:15 +0800 Subject: IB/mlx4: Replace kfree with kvfree in mlx4_ib_destroy_srq Commit 0ef2f05c7e02ff99c0b5b583d7dee2cd12b053f2 uses vmalloc for WR buffers when needed and uses kvfree to free the buffers. It missed changing kfree to kvfree in mlx4_ib_destroy_srq(). Reported-by: Matthew Finaly Signed-off-by: Wengang Wang Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 8d133c4..c394376 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -286,7 +286,7 @@ int mlx4_ib_destroy_srq(struct ib_srq *srq) mlx4_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db); ib_umem_release(msrq->umem); } else { - kfree(msrq->wrid); + kvfree(msrq->wrid); mlx4_buf_free(dev->dev, msrq->msrq.max << msrq->msrq.wqe_shift, &msrq->buf); mlx4_db_free(dev->dev, &msrq->db); -- cgit v0.10.2 From ae35b56e367b9fef7f5de701cf8c1c3dd954dded Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 10 Dec 2015 18:22:31 +0200 Subject: drm/i915: Unbreak check_digital_port_conflicts() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Atomic changes broke check_digital_port_conflicts(). It needs to look at the global situation instead of just trying to find a conflict within the current atomic state. This bug made my HSW explode spectacularly after I had split the DDI encoders into separate DP and HDMI encoders. With the fix, things seem much more solid. I hope holding the connection_mutex is enough protection that we can actually walk the connectors even if they're not part of the current atomic state... v2: Regenerate the patch so that it actually applies (Jani) Cc: stable@vger.kernel.org Cc: Ander Conselvan de Oliveira Fixes: 5448a00d3f06 ("drm/i915: Don't use staged config in check_digital_port_conflicts()") Signed-off-by: Ville Syrjälä Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1449764551-12466-1-git-send-email-ville.syrjala@linux.intel.com (cherry picked from commit 0bff4858653312a10c83709e0009c3adb87e6f1e) Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index beb0374..32cf973 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12123,18 +12123,22 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc, static bool check_digital_port_conflicts(struct drm_atomic_state *state) { struct drm_device *dev = state->dev; - struct intel_encoder *encoder; struct drm_connector *connector; - struct drm_connector_state *connector_state; unsigned int used_ports = 0; - int i; /* * Walk the connector list instead of the encoder * list to detect the problem on ddi platforms * where there's just one encoder per digital port. */ - for_each_connector_in_state(state, connector, connector_state, i) { + drm_for_each_connector(connector, dev) { + struct drm_connector_state *connector_state; + struct intel_encoder *encoder; + + connector_state = drm_atomic_get_existing_connector_state(state, connector); + if (!connector_state) + connector_state = connector->state; + if (!connector_state->best_encoder) continue; -- cgit v0.10.2 From c1a9a291cee0890eb0f435243f3fb84fefb04348 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 23 Dec 2015 22:44:37 +0100 Subject: ipv6: honor ifindex in case we receive ll addresses in router advertisements Marc Haber reported we don't honor interface indexes when we receive link local router addresses in router advertisements. Luckily the non-strict version of ipv6_chk_addr already does the correct job here, so we can simply use it to lighten the checks and use those addresses by default without any configuration change. Link: Reported-by: Marc Haber Cc: Marc Haber Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index d6161e1..84afb9a 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1183,7 +1183,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) */ if (!in6_dev->cnf.accept_ra_from_local && ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, - NULL, 0)) { + in6_dev->dev, 0)) { ND_PRINTK(2, info, "RA from local address detected on dev: %s: default router ignored\n", skb->dev->name); @@ -1337,7 +1337,7 @@ skip_linkparms: #ifdef CONFIG_IPV6_ROUTE_INFO if (!in6_dev->cnf.accept_ra_from_local && ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, - NULL, 0)) { + in6_dev->dev, 0)) { ND_PRINTK(2, info, "RA from local address detected on dev: %s: router info ignored.\n", skb->dev->name); -- cgit v0.10.2 From 184fc8b5ee601cd83dbbdf3e6cfec5f5b8d3b41a Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 23 Dec 2015 16:54:27 +0100 Subject: geneve: initialize needed_headroom Currently the needed_headroom field for the geneve device is left to the default value. This patch set it to space required for basic geneve encapsulation, so that we can avoid the skb head re-allocation on xmit. This give a 6% speedup for unsegment traffic on geneve tunnel. v1 -> v2: - add ETH_HLEN for the lower device to the needed headroom Signed-off-by: Paolo Abeni Acked-by: Hannes Frederic Sowa Acked-by: John W. Linville Signed-off-by: David S. Miller diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index c2b79f5..58efdec 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1155,7 +1155,7 @@ static int geneve_configure(struct net *net, struct net_device *dev, struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_dev *t, *geneve = netdev_priv(dev); bool tun_collect_md, tun_on_same_port; - int err; + int err, encap_len; if (!remote) return -EINVAL; @@ -1187,6 +1187,14 @@ static int geneve_configure(struct net *net, struct net_device *dev, if (t) return -EBUSY; + /* make enough headroom for basic scenario */ + encap_len = GENEVE_BASE_HLEN + ETH_HLEN; + if (remote->sa.sa_family == AF_INET) + encap_len += sizeof(struct iphdr); + else + encap_len += sizeof(struct ipv6hdr); + dev->needed_headroom = encap_len + ETH_HLEN; + if (metadata) { if (tun_on_same_port) return -EPERM; -- cgit v0.10.2 From 1dfddff5fcd869fcab0c52fafae099dfa435a935 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 23 Dec 2015 13:42:43 +0100 Subject: net: cdc_ncm: avoid changing RX/TX buffers on MTU changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NCM buffer sizes are negotiated with the device independently of the network device MTU. The RX buffers are allocated by the usbnet framework based on the rx_urb_size value set by cdc_ncm. A single RX buffer can hold a number of MTU sized packets. The default usbnet change_mtu ndo only modifies rx_urb_size if it is equal to hard_mtu. And the cdc_ncm driver will set rx_urb_size and hard_mtu independently of each other, based on dwNtbInMaxSize and dwNtbOutMaxSize respectively. It was therefore assumed that usbnet_change_mtu() would never touch rx_urb_size. This failed to consider the case where dwNtbInMaxSize and dwNtbOutMaxSize happens to be equal. Fix by implementing an NCM specific change_mtu ndo, modifying the netdev MTU without touching the buffer size settings. Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index 8973abd..bdd83d9 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -100,7 +100,7 @@ static const struct net_device_ops cdc_mbim_netdev_ops = { .ndo_stop = usbnet_stop, .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, - .ndo_change_mtu = usbnet_change_mtu, + .ndo_change_mtu = cdc_ncm_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_vlan_rx_add_vid = cdc_mbim_rx_add_vid, diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 3694052..e8a1144 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -689,6 +690,33 @@ static void cdc_ncm_free(struct cdc_ncm_ctx *ctx) kfree(ctx); } +/* we need to override the usbnet change_mtu ndo for two reasons: + * - respect the negotiated maximum datagram size + * - avoid unwanted changes to rx and tx buffers + */ +int cdc_ncm_change_mtu(struct net_device *net, int new_mtu) +{ + struct usbnet *dev = netdev_priv(net); + struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; + int maxmtu = ctx->max_datagram_size - cdc_ncm_eth_hlen(dev); + + if (new_mtu <= 0 || new_mtu > maxmtu) + return -EINVAL; + net->mtu = new_mtu; + return 0; +} +EXPORT_SYMBOL_GPL(cdc_ncm_change_mtu); + +static const struct net_device_ops cdc_ncm_netdev_ops = { + .ndo_open = usbnet_open, + .ndo_stop = usbnet_stop, + .ndo_start_xmit = usbnet_start_xmit, + .ndo_tx_timeout = usbnet_tx_timeout, + .ndo_change_mtu = cdc_ncm_change_mtu, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, +}; + int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting, int drvflags) { struct cdc_ncm_ctx *ctx; @@ -823,6 +851,9 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_ /* add our sysfs attrs */ dev->net->sysfs_groups[0] = &cdc_ncm_sysfs_attr_group; + /* must handle MTU changes */ + dev->net->netdev_ops = &cdc_ncm_netdev_ops; + return 0; error2: diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 1f6526c..3a375d0 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -138,6 +138,7 @@ struct cdc_ncm_ctx { }; u8 cdc_ncm_select_altsetting(struct usb_interface *intf); +int cdc_ncm_change_mtu(struct net_device *net, int new_mtu); int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting, int drvflags); void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf); struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign); -- cgit v0.10.2 From 3358a5c0c1578fa215f90a0e750579cd6258ddd9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 24 Dec 2015 12:21:22 +0300 Subject: qlcnic: fix a loop exit condition better In the original code, if we succeeded on the last iteration through the loop then we still returned failure. Fixes: 389e4e04ad2d ('qlcnic: fix a timeout loop') Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_vnic.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_vnic.c index b1a452f..3490675 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_vnic.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_vnic.c @@ -252,7 +252,7 @@ int qlcnic_83xx_check_vnic_state(struct qlcnic_adapter *adapter) state = QLCRDX(ahw, QLC_83XX_VNIC_STATE); } - if (!idc->vnic_wait_limit) { + if (state != QLCNIC_DEV_NPAR_OPER) { dev_err(&adapter->pdev->dev, "vNIC mode not operational, state check timed out.\n"); return -EIO; -- cgit v0.10.2 From 9ba0b9636dc07a328ad3bffe9b22edb4cbb2901b Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 23 Dec 2015 16:28:40 -0200 Subject: sctp: use GFP_USER for user-controlled kmalloc Commit cacc06215271 ("sctp: use GFP_USER for user-controlled kmalloc") missed two other spots. For connectx, as it's more likely to be used by kernel users of the API, it detects if GFP_USER should be used or not. Fixes: cacc06215271 ("sctp: use GFP_USER for user-controlled kmalloc") Reported-by: Dmitry Vyukov Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 9b6cc6d..570f96a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1301,8 +1301,9 @@ static int __sctp_setsockopt_connectx(struct sock *sk, int addrs_size, sctp_assoc_t *assoc_id) { - int err = 0; struct sockaddr *kaddrs; + gfp_t gfp = GFP_KERNEL; + int err = 0; pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n", __func__, sk, addrs, addrs_size); @@ -1315,7 +1316,9 @@ static int __sctp_setsockopt_connectx(struct sock *sk, return -EFAULT; /* Alloc space for the address array in kernel memory. */ - kaddrs = kmalloc(addrs_size, GFP_KERNEL); + if (sk->sk_socket->file) + gfp = GFP_USER | __GFP_NOWARN; + kaddrs = kmalloc(addrs_size, gfp); if (unlikely(!kaddrs)) return -ENOMEM; @@ -5773,7 +5776,7 @@ static int sctp_getsockopt_assoc_ids(struct sock *sk, int len, len = sizeof(struct sctp_assoc_ids) + sizeof(sctp_assoc_t) * num; - ids = kmalloc(len, GFP_KERNEL); + ids = kmalloc(len, GFP_USER | __GFP_NOWARN); if (unlikely(!ids)) return -ENOMEM; -- cgit v0.10.2 From 3538a5c8ffa37c715029af4a2e384c077558eb18 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 23 Dec 2015 16:44:09 -0200 Subject: sctp: label accepted/peeled off sockets Accepted or peeled off sockets were missing a security label (e.g. SELinux) which means that socket was in "unlabeled" state. This patch clones the sock's label from the parent sock and resolves the issue (similar to AF_BLUETOOTH protocol family). Cc: Paul Moore Cc: David Teigland Signed-off-by: Marcelo Ricardo Leitner Acked-by: Paul Moore Signed-off-by: David S. Miller diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 570f96a..529ed35 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -7202,6 +7202,8 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, if (newsk->sk_flags & SK_FLAGS_TIMESTAMP) net_enable_timestamp(); + + security_sk_clone(sk, newsk); } static inline void sctp_copy_descendant(struct sock *sk_to, -- cgit v0.10.2 From c6002d5602ab36c19ef4fe0e20ecfa28aaabf028 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 24 Dec 2015 13:14:05 -0500 Subject: RDMA/ocrdma: Fix vlan-id assignment in qp parameters vlan-id is wrongly getting as 0 when PFC is enabled. Set vlan-id configured by user in QP parameters. In case vlan interface is not used, flash a warning to user to configure vlan and assign vlan-id as 0 in qp params. Fixes: dbf727de7440 ('IB/core: Use GID table in AH creation and dmac resolution') Cc: Matan Barak Signed-off-by: Devesh Sharma Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 30f67be..4fc2bb4 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -2515,9 +2515,10 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid)); cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8); - if (vlan_id < 0x1000) { - if (dev->pfc_state) { - vlan_id = 0; + if (vlan_id == 0xFFFF) + vlan_id = 0; + if (vlan_id || dev->pfc_state) { + if (!vlan_id) { pr_err("ocrdma%d:Using VLAN with PFC is recommended\n", dev->id); pr_err("ocrdma%d:Using VLAN 0 for this connection\n", -- cgit v0.10.2 From 36ac0db0dbf7081afe4137d444ef85614213b8eb Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 24 Dec 2015 13:14:06 -0500 Subject: RDMA/ocrdma: Dispatch only port event when port state changes Dispatch only port event to IB stack when port state changes. Don't explicitly modify qps to error. Let application listen to port events on async event queue or let QP fail with retry-exceeded completion error. Signed-off-by: Padmanabh Ratnakar Signed-off-by: Devesh Sharma Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 62b7009..ebe40b4 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -386,30 +386,7 @@ static int ocrdma_open(struct ocrdma_dev *dev) static int ocrdma_close(struct ocrdma_dev *dev) { - int i; - struct ocrdma_qp *qp, **cur_qp; struct ib_event err_event; - struct ib_qp_attr attrs; - int attr_mask = IB_QP_STATE; - - attrs.qp_state = IB_QPS_ERR; - mutex_lock(&dev->dev_lock); - if (dev->qp_tbl) { - cur_qp = dev->qp_tbl; - for (i = 0; i < OCRDMA_MAX_QP; i++) { - qp = cur_qp[i]; - if (qp && qp->ibqp.qp_type != IB_QPT_GSI) { - /* change the QP state to ERROR */ - _ocrdma_modify_qp(&qp->ibqp, &attrs, attr_mask); - - err_event.event = IB_EVENT_QP_FATAL; - err_event.element.qp = &qp->ibqp; - err_event.device = &dev->ibdev; - ib_dispatch_event(&err_event); - } - } - } - mutex_unlock(&dev->dev_lock); err_event.event = IB_EVENT_PORT_ERR; err_event.element.port_num = 1; -- cgit v0.10.2 From 10a214dc996236e6547b84fb5ca007316b30c2e6 Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 24 Dec 2015 13:14:07 -0500 Subject: RDMA/ocrdma: Depend on async link events from CNA Recently Dough Ledford reported a deadlock happening between ocrdma-load sequence and NetworkManager service issuing "open" on be2net interface. The deadlock happens when any be2net hook (e.g. open/close) is called in parallel to insmod ocrdma.ko. A. be2net is sending administrative open/close event to ocrdma holding device_list_mutex. It does this from ndo_open/ndo_stop hooks of be2net. So sequence of locks is rtnl_lock---> device_list lock B. When new ocrdma roce device gets registered, infiniband stack now takes rtnl_lock in ib_register_device() in GID initialization routines. So sequence of locks in this path is device_list lock ---> rtnl_lock. This improper locking sequence causes deadlock. With this patch we stop using administrative open and close events injected by be2net driver. These events were used to dispatch PORT_ACTIVE and PORT_ERROR events to the IB-stack. This patch implements a logic to receive async-link-events generated from CNA whenever link-state-change is detected. Now on, these async-events will be used to dispatch PORT_ACTIVE and PORT_ERROR events to IB-stack. Depending on async-events from CNA removes the need to hold device-list-mutex and thus breaks the busy-wait scenario. Reported-by: Doug Ledford CC: Sathya Perla Signed-off-by: Padmanabh Ratnakar Signed-off-by: Selvin Xavier Signed-off-by: Devesh Sharma Signed-off-by: Doug Ledford diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index ae80590..040bb8b 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -232,6 +232,10 @@ struct phy_info { u16 interface_type; }; +enum ocrdma_flags { + OCRDMA_FLAGS_LINK_STATUS_INIT = 0x01 +}; + struct ocrdma_dev { struct ib_device ibdev; struct ocrdma_dev_attr attr; @@ -287,6 +291,7 @@ struct ocrdma_dev { atomic_t update_sl; u16 pvid; u32 asic_id; + u32 flags; ulong last_stats_time; struct mutex stats_lock; /* provide synch for debugfs operations */ @@ -591,4 +596,9 @@ static inline u8 ocrdma_is_enabled_and_synced(u32 state) (state & OCRDMA_STATE_FLAG_SYNC); } +static inline u8 ocrdma_get_ae_link_state(u32 ae_state) +{ + return ((ae_state & OCRDMA_AE_LSC_LS_MASK) >> OCRDMA_AE_LSC_LS_SHIFT); +} + #endif diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 4fc2bb4..283ca84 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -579,6 +579,8 @@ static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev, cmd->async_event_bitmap = BIT(OCRDMA_ASYNC_GRP5_EVE_CODE); cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_RDMA_EVE_CODE); + /* Request link events on this MQ. */ + cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_LINK_EVE_CODE); cmd->async_cqid_ringsize = cq->id; cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) << @@ -819,20 +821,42 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev, } } +static void ocrdma_process_link_state(struct ocrdma_dev *dev, + struct ocrdma_ae_mcqe *cqe) +{ + struct ocrdma_ae_lnkst_mcqe *evt; + u8 lstate; + + evt = (struct ocrdma_ae_lnkst_mcqe *)cqe; + lstate = ocrdma_get_ae_link_state(evt->speed_state_ptn); + + if (!(lstate & OCRDMA_AE_LSC_LLINK_MASK)) + return; + + if (dev->flags & OCRDMA_FLAGS_LINK_STATUS_INIT) + ocrdma_update_link_state(dev, (lstate & OCRDMA_LINK_ST_MASK)); +} + static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe) { /* async CQE processing */ struct ocrdma_ae_mcqe *cqe = ae_cqe; u32 evt_code = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_CODE_MASK) >> OCRDMA_AE_MCQE_EVENT_CODE_SHIFT; - - if (evt_code == OCRDMA_ASYNC_RDMA_EVE_CODE) + switch (evt_code) { + case OCRDMA_ASYNC_LINK_EVE_CODE: + ocrdma_process_link_state(dev, cqe); + break; + case OCRDMA_ASYNC_RDMA_EVE_CODE: ocrdma_dispatch_ibevent(dev, cqe); - else if (evt_code == OCRDMA_ASYNC_GRP5_EVE_CODE) + break; + case OCRDMA_ASYNC_GRP5_EVE_CODE: ocrdma_process_grp5_aync(dev, cqe); - else + break; + default: pr_err("%s(%d) invalid evt code=0x%x\n", __func__, dev->id, evt_code); + } } static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe) @@ -1363,7 +1387,8 @@ mbx_err: return status; } -int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed) +int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed, + u8 *lnk_state) { int status = -ENOMEM; struct ocrdma_get_link_speed_rsp *rsp; @@ -1384,8 +1409,11 @@ int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed) goto mbx_err; rsp = (struct ocrdma_get_link_speed_rsp *)cmd; - *lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK) - >> OCRDMA_PHY_PS_SHIFT; + if (lnk_speed) + *lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK) + >> OCRDMA_PHY_PS_SHIFT; + if (lnk_state) + *lnk_state = (rsp->res_lnk_st & OCRDMA_LINK_ST_MASK); mbx_err: kfree(cmd); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h index 7ed885c..ebc1f44 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h @@ -106,7 +106,8 @@ void ocrdma_ring_cq_db(struct ocrdma_dev *, u16 cq_id, bool armed, bool solicited, u16 cqe_popped); /* verbs specific mailbox commands */ -int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed); +int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed, + u8 *lnk_st); int ocrdma_query_config(struct ocrdma_dev *, struct ocrdma_mbx_query_config *config); @@ -153,5 +154,6 @@ char *port_speed_string(struct ocrdma_dev *dev); void ocrdma_init_service_level(struct ocrdma_dev *); void ocrdma_alloc_pd_pool(struct ocrdma_dev *dev); void ocrdma_free_pd_range(struct ocrdma_dev *dev); +void ocrdma_update_link_state(struct ocrdma_dev *dev, u8 lstate); #endif /* __OCRDMA_HW_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index ebe40b4..3afb40b 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -290,6 +290,7 @@ static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev) static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) { int status = 0, i; + u8 lstate = 0; struct ocrdma_dev *dev; dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev)); @@ -319,6 +320,11 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) if (status) goto alloc_err; + /* Query Link state and update */ + status = ocrdma_mbx_get_link_speed(dev, NULL, &lstate); + if (!status) + ocrdma_update_link_state(dev, lstate); + for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++) if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i])) goto sysfs_err; @@ -373,7 +379,7 @@ static void ocrdma_remove(struct ocrdma_dev *dev) ocrdma_remove_free(dev); } -static int ocrdma_open(struct ocrdma_dev *dev) +static int ocrdma_dispatch_port_active(struct ocrdma_dev *dev) { struct ib_event port_event; @@ -384,7 +390,7 @@ static int ocrdma_open(struct ocrdma_dev *dev) return 0; } -static int ocrdma_close(struct ocrdma_dev *dev) +static int ocrdma_dispatch_port_error(struct ocrdma_dev *dev) { struct ib_event err_event; @@ -397,7 +403,7 @@ static int ocrdma_close(struct ocrdma_dev *dev) static void ocrdma_shutdown(struct ocrdma_dev *dev) { - ocrdma_close(dev); + ocrdma_dispatch_port_error(dev); ocrdma_remove(dev); } @@ -408,18 +414,28 @@ static void ocrdma_shutdown(struct ocrdma_dev *dev) static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event) { switch (event) { - case BE_DEV_UP: - ocrdma_open(dev); - break; - case BE_DEV_DOWN: - ocrdma_close(dev); - break; case BE_DEV_SHUTDOWN: ocrdma_shutdown(dev); break; + default: + break; } } +void ocrdma_update_link_state(struct ocrdma_dev *dev, u8 lstate) +{ + if (!(dev->flags & OCRDMA_FLAGS_LINK_STATUS_INIT)) { + dev->flags |= OCRDMA_FLAGS_LINK_STATUS_INIT; + if (!lstate) + return; + } + + if (!lstate) + ocrdma_dispatch_port_error(dev); + else + ocrdma_dispatch_port_active(dev); +} + static struct ocrdma_driver ocrdma_drv = { .name = "ocrdma_driver", .add = ocrdma_add, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 6a38268..99dd6fd 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -465,8 +465,11 @@ struct ocrdma_ae_qp_mcqe { u32 valid_ae_event; }; -#define OCRDMA_ASYNC_RDMA_EVE_CODE 0x14 -#define OCRDMA_ASYNC_GRP5_EVE_CODE 0x5 +enum ocrdma_async_event_code { + OCRDMA_ASYNC_LINK_EVE_CODE = 0x01, + OCRDMA_ASYNC_GRP5_EVE_CODE = 0x05, + OCRDMA_ASYNC_RDMA_EVE_CODE = 0x14 +}; enum ocrdma_async_grp5_events { OCRDMA_ASYNC_EVENT_QOS_VALUE = 0x01, @@ -489,6 +492,44 @@ enum OCRDMA_ASYNC_EVENT_TYPE { OCRDMA_MAX_ASYNC_ERRORS }; +struct ocrdma_ae_lnkst_mcqe { + u32 speed_state_ptn; + u32 qos_reason_falut; + u32 evt_tag; + u32 valid_ae_event; +}; + +enum { + OCRDMA_AE_LSC_PORT_NUM_MASK = 0x3F, + OCRDMA_AE_LSC_PT_SHIFT = 0x06, + OCRDMA_AE_LSC_PT_MASK = (0x03 << + OCRDMA_AE_LSC_PT_SHIFT), + OCRDMA_AE_LSC_LS_SHIFT = 0x08, + OCRDMA_AE_LSC_LS_MASK = (0xFF << + OCRDMA_AE_LSC_LS_SHIFT), + OCRDMA_AE_LSC_LD_SHIFT = 0x10, + OCRDMA_AE_LSC_LD_MASK = (0xFF << + OCRDMA_AE_LSC_LD_SHIFT), + OCRDMA_AE_LSC_PPS_SHIFT = 0x18, + OCRDMA_AE_LSC_PPS_MASK = (0xFF << + OCRDMA_AE_LSC_PPS_SHIFT), + OCRDMA_AE_LSC_PPF_MASK = 0xFF, + OCRDMA_AE_LSC_ER_SHIFT = 0x08, + OCRDMA_AE_LSC_ER_MASK = (0xFF << + OCRDMA_AE_LSC_ER_SHIFT), + OCRDMA_AE_LSC_QOS_SHIFT = 0x10, + OCRDMA_AE_LSC_QOS_MASK = (0xFFFF << + OCRDMA_AE_LSC_QOS_SHIFT) +}; + +enum { + OCRDMA_AE_LSC_PLINK_DOWN = 0x00, + OCRDMA_AE_LSC_PLINK_UP = 0x01, + OCRDMA_AE_LSC_LLINK_DOWN = 0x02, + OCRDMA_AE_LSC_LLINK_MASK = 0x02, + OCRDMA_AE_LSC_LLINK_UP = 0x03 +}; + /* mailbox command request and responses */ enum { OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_SHIFT = 2, @@ -676,7 +717,7 @@ enum { OCRDMA_PHY_PFLT_SHIFT = 0x18, OCRDMA_QOS_LNKSP_MASK = 0xFFFF0000, OCRDMA_QOS_LNKSP_SHIFT = 0x10, - OCRDMA_LLST_MASK = 0xFF, + OCRDMA_LINK_ST_MASK = 0x01, OCRDMA_PLFC_MASK = 0x00000400, OCRDMA_PLFC_SHIFT = 0x8, OCRDMA_PLRFC_MASK = 0x00000200, @@ -691,7 +732,7 @@ struct ocrdma_get_link_speed_rsp { u32 pflt_pps_ld_pnum; u32 qos_lsp; - u32 res_lls; + u32 res_lnk_st; }; enum { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 583001b..76e96f9 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -171,7 +171,7 @@ static inline void get_link_speed_and_width(struct ocrdma_dev *dev, int status; u8 speed; - status = ocrdma_mbx_get_link_speed(dev, &speed); + status = ocrdma_mbx_get_link_speed(dev, &speed, NULL); if (status) speed = OCRDMA_PHYS_LINK_SPEED_ZERO; -- cgit v0.10.2 From f41647ef06536199d3366530da050b411546979d Mon Sep 17 00:00:00 2001 From: Devesh Sharma Date: Thu, 24 Dec 2015 13:14:08 -0500 Subject: RDMA/be2net: Remove open and close entry points Recently Dough Ledford reported a deadlock happening between ocrdma-load sequence and NetworkManager service issueing "open" on be2net interface. The deadlock happens when any be2net hook (e.g. open/close) is called in parallel to insmod ocrdma.ko. A. be2net is sending administrative open/close event to ocrdma holding device_list_mutex. It does this from ndo_open/ndo_stop hooks of be2net. So sequence of locks is rtnl_lock---> device_list lock B. When new ocrdma roce device gets registered, infiniband stack now takes rtnl_lock in ib_register_device() in GID initialization routines. So sequence of locks in this path is device_list lock ---> rtnl_lock. This improper locking sequence causes deadlock. In order to resolve the above deadlock condition, ocrdma intorduced a patch to stop listening to administrative open/close events generated from be2net driver. It now depends on link-state-change async-event generated from CNA. This change leaves behind dead code which used to generate administrative open/close events. This patch cleans-up all that dead code from be2net. Reported-by: Doug Ledford CC: Sathya Perla Signed-off-by: Padmanabh Ratnakar Signed-off-by: Selvin Xavier Signed-off-by: Devesh Sharma Signed-off-by: Doug Ledford diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index d463563..6ee78c2 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -848,8 +848,6 @@ void be_roce_dev_remove(struct be_adapter *); /* * internal function to open-close roce device during ifup-ifdown. */ -void be_roce_dev_open(struct be_adapter *); -void be_roce_dev_close(struct be_adapter *); void be_roce_dev_shutdown(struct be_adapter *); #endif /* BE_H */ diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index b6ad029..ff2ff89 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -3432,8 +3432,6 @@ static int be_close(struct net_device *netdev) be_disable_if_filters(adapter); - be_roce_dev_close(adapter); - if (adapter->flags & BE_FLAGS_NAPI_ENABLED) { for_all_evt_queues(adapter, eqo, i) { napi_disable(&eqo->napi); @@ -3601,8 +3599,6 @@ static int be_open(struct net_device *netdev) be_link_status_update(adapter, link_status); netif_tx_start_all_queues(netdev); - be_roce_dev_open(adapter); - #ifdef CONFIG_BE2NET_VXLAN if (skyhawk_chip(adapter)) vxlan_get_rx_port(netdev); diff --git a/drivers/net/ethernet/emulex/benet/be_roce.c b/drivers/net/ethernet/emulex/benet/be_roce.c index 6036820..4089156 100644 --- a/drivers/net/ethernet/emulex/benet/be_roce.c +++ b/drivers/net/ethernet/emulex/benet/be_roce.c @@ -116,40 +116,6 @@ void be_roce_dev_remove(struct be_adapter *adapter) } } -static void _be_roce_dev_open(struct be_adapter *adapter) -{ - if (ocrdma_drv && adapter->ocrdma_dev && - ocrdma_drv->state_change_handler) - ocrdma_drv->state_change_handler(adapter->ocrdma_dev, - BE_DEV_UP); -} - -void be_roce_dev_open(struct be_adapter *adapter) -{ - if (be_roce_supported(adapter)) { - mutex_lock(&be_adapter_list_lock); - _be_roce_dev_open(adapter); - mutex_unlock(&be_adapter_list_lock); - } -} - -static void _be_roce_dev_close(struct be_adapter *adapter) -{ - if (ocrdma_drv && adapter->ocrdma_dev && - ocrdma_drv->state_change_handler) - ocrdma_drv->state_change_handler(adapter->ocrdma_dev, - BE_DEV_DOWN); -} - -void be_roce_dev_close(struct be_adapter *adapter) -{ - if (be_roce_supported(adapter)) { - mutex_lock(&be_adapter_list_lock); - _be_roce_dev_close(adapter); - mutex_unlock(&be_adapter_list_lock); - } -} - void be_roce_dev_shutdown(struct be_adapter *adapter) { if (be_roce_supported(adapter)) { @@ -177,8 +143,6 @@ int be_roce_register_driver(struct ocrdma_driver *drv) _be_roce_dev_add(dev); netdev = dev->netdev; - if (netif_running(netdev) && netif_oper_up(netdev)) - _be_roce_dev_open(dev); } mutex_unlock(&be_adapter_list_lock); return 0; diff --git a/drivers/net/ethernet/emulex/benet/be_roce.h b/drivers/net/ethernet/emulex/benet/be_roce.h index cde6ef9..fde6097 100644 --- a/drivers/net/ethernet/emulex/benet/be_roce.h +++ b/drivers/net/ethernet/emulex/benet/be_roce.h @@ -60,9 +60,7 @@ struct ocrdma_driver { void (*state_change_handler) (struct ocrdma_dev *, u32 new_state); }; -enum { - BE_DEV_UP = 0, - BE_DEV_DOWN = 1, +enum be_roce_event { BE_DEV_SHUTDOWN = 2 }; -- cgit v0.10.2 From 21491412f2ec6f13d4104de734dec0ba659d092e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 28 Dec 2015 13:01:22 -0700 Subject: block: add blk_start_queue_async() We currently only have an inline/sync helper to restart a stopped queue. If drivers need an async version, they have to roll their own. Add a generic helper instead. Signed-off-by: Jens Axboe diff --git a/block/blk-core.c b/block/blk-core.c index c487b94..33e2f62 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -207,6 +207,22 @@ void blk_delay_queue(struct request_queue *q, unsigned long msecs) EXPORT_SYMBOL(blk_delay_queue); /** + * blk_start_queue_async - asynchronously restart a previously stopped queue + * @q: The &struct request_queue in question + * + * Description: + * blk_start_queue_async() will clear the stop flag on the queue, and + * ensure that the request_fn for the queue is run from an async + * context. + **/ +void blk_start_queue_async(struct request_queue *q) +{ + queue_flag_clear(QUEUE_FLAG_STOPPED, q); + blk_run_queue_async(q); +} +EXPORT_SYMBOL(blk_start_queue_async); + +/** * blk_start_queue - restart a previously stopped queue * @q: The &struct request_queue in question * diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0169ba2..c70e358 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -797,6 +797,7 @@ extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int blk_queue_enter(struct request_queue *q, gfp_t gfp); extern void blk_queue_exit(struct request_queue *q); extern void blk_start_queue(struct request_queue *q); +extern void blk_start_queue_async(struct request_queue *q); extern void blk_stop_queue(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(struct request_queue *q); -- cgit v0.10.2 From 48cc661e7f4cec80b6aa894cc6902c292f201ea8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 28 Dec 2015 13:02:47 -0700 Subject: null_blk: use async queue restart helper If null_blk is run in NULL_IRQ_TIMER mode and with queue_mode NULL_Q_RQ, we need to restart the queue from the hrtimer interrupt. We can't directly invoke the request_fn from that context, so punt the queue run to async kblockd context. Tested-by: Rabin Vincent Signed-off-by: Jens Axboe diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index a428e4e..09e3c0d 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -232,20 +232,19 @@ static void end_cmd(struct nullb_cmd *cmd) break; case NULL_Q_BIO: bio_endio(cmd->bio); - goto free_cmd; + break; } + free_cmd(cmd); + /* Restart queue if needed, as we are freeing a tag */ - if (q && !q->mq_ops && blk_queue_stopped(q)) { + if (queue_mode == NULL_Q_RQ && blk_queue_stopped(q)) { unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - if (blk_queue_stopped(q)) - blk_start_queue(q); + blk_start_queue_async(q); spin_unlock_irqrestore(q->queue_lock, flags); } -free_cmd: - free_cmd(cmd); } static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) -- cgit v0.10.2 From c3293a9ac2a4f9160b85b5e986a8e0c54986e7f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Tue, 29 Dec 2015 14:37:56 +0100 Subject: lightnvm: wrong offset in bad blk lun calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dev->nr_luns reports the total number of luns available in a device while dev->luns_per_chnl is the number of luns per channel. When multiple channels are available, the offset is calculated from a channel and lun id into a linear array. As it multiplies with the total number of luns, we go out of bound when channel id > 0 and causes the kernel to panic when we read a protected kernel memory area. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c index f434e89..a54b339 100644 --- a/drivers/lightnvm/gennvm.c +++ b/drivers/lightnvm/gennvm.c @@ -75,7 +75,7 @@ static int gennvm_block_bb(struct ppa_addr ppa, int nr_blocks, u8 *blks, struct nvm_block *blk; int i; - lun = &gn->luns[(dev->nr_luns * ppa.g.ch) + ppa.g.lun]; + lun = &gn->luns[(dev->luns_per_chnl * ppa.g.ch) + ppa.g.lun]; for (i = 0; i < nr_blocks; i++) { if (blks[i] == 0) -- cgit v0.10.2 From 76cc404bfdc0d419c720de4daaf2584542734f42 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 28 Dec 2015 20:47:08 -0500 Subject: [PATCH] arm: fix handling of F_OFD_... in oabi_fcntl64() Cc: stable@vger.kernel.org # 3.15+ Reviewed-by: Jeff Layton Signed-off-by: Al Viro diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index b83f3b7..087acb5 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -193,15 +193,44 @@ struct oabi_flock64 { pid_t l_pid; } __attribute__ ((packed,aligned(4))); -asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd, +static long do_locks(unsigned int fd, unsigned int cmd, unsigned long arg) { - struct oabi_flock64 user; struct flock64 kernel; - mm_segment_t fs = USER_DS; /* initialized to kill a warning */ - unsigned long local_arg = arg; - int ret; + struct oabi_flock64 user; + mm_segment_t fs; + long ret; + + if (copy_from_user(&user, (struct oabi_flock64 __user *)arg, + sizeof(user))) + return -EFAULT; + kernel.l_type = user.l_type; + kernel.l_whence = user.l_whence; + kernel.l_start = user.l_start; + kernel.l_len = user.l_len; + kernel.l_pid = user.l_pid; + + fs = get_fs(); + set_fs(KERNEL_DS); + ret = sys_fcntl64(fd, cmd, (unsigned long)&kernel); + set_fs(fs); + + if (!ret && (cmd == F_GETLK64 || cmd == F_OFD_GETLK)) { + user.l_type = kernel.l_type; + user.l_whence = kernel.l_whence; + user.l_start = kernel.l_start; + user.l_len = kernel.l_len; + user.l_pid = kernel.l_pid; + if (copy_to_user((struct oabi_flock64 __user *)arg, + &user, sizeof(user))) + ret = -EFAULT; + } + return ret; +} +asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd, + unsigned long arg) +{ switch (cmd) { case F_OFD_GETLK: case F_OFD_SETLK: @@ -209,39 +238,11 @@ asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd, case F_GETLK64: case F_SETLK64: case F_SETLKW64: - if (copy_from_user(&user, (struct oabi_flock64 __user *)arg, - sizeof(user))) - return -EFAULT; - kernel.l_type = user.l_type; - kernel.l_whence = user.l_whence; - kernel.l_start = user.l_start; - kernel.l_len = user.l_len; - kernel.l_pid = user.l_pid; - local_arg = (unsigned long)&kernel; - fs = get_fs(); - set_fs(KERNEL_DS); - } - - ret = sys_fcntl64(fd, cmd, local_arg); + return do_locks(fd, cmd, arg); - switch (cmd) { - case F_GETLK64: - if (!ret) { - user.l_type = kernel.l_type; - user.l_whence = kernel.l_whence; - user.l_start = kernel.l_start; - user.l_len = kernel.l_len; - user.l_pid = kernel.l_pid; - if (copy_to_user((struct oabi_flock64 __user *)arg, - &user, sizeof(user))) - ret = -EFAULT; - } - case F_SETLK64: - case F_SETLKW64: - set_fs(fs); + default: + return sys_fcntl64(fd, cmd, arg); } - - return ret; } struct oabi_epoll_event { -- cgit v0.10.2 From 90c7afc96cbbd77f44094b5b651261968e97de67 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Wed, 23 Dec 2015 14:39:27 -0800 Subject: openvswitch: Fix template leak in error cases. Commit 5b48bb8506c5 ("openvswitch: Fix helper reference leak") fixed a reference leak on helper objects, but inadvertently introduced a leak on the ct template. Previously, ct_info.ct->general.use was initialized to 0 by nf_ct_tmpl_alloc() and only incremented when ovs_ct_copy_action() returned successful. If an error occurred while adding the helper or adding the action to the actions buffer, the __ovs_ct_free_action() cleanup would use nf_ct_put() to free the entry; However, this relies on atomic_dec_and_test(ct_info.ct->general.use). This reference must be incremented first, or nf_ct_put() will never free it. Fix the issue by acquiring a reference to the template immediately after allocation. Fixes: cae3a2627520 ("openvswitch: Allow attaching helpers to ct action") Fixes: 5b48bb8506c5 ("openvswitch: Fix helper reference leak") Signed-off-by: Joe Stringer Signed-off-by: David S. Miller diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 3e88922..e004067 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -698,6 +698,10 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, OVS_NLERR(log, "Failed to allocate conntrack template"); return -ENOMEM; } + + __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status); + nf_conntrack_get(&ct_info.ct->ct_general); + if (helper) { err = ovs_ct_add_helper(&ct_info, helper, key, log); if (err) @@ -709,8 +713,6 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, if (err) goto err_free_ct; - __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status); - nf_conntrack_get(&ct_info.ct->ct_general); return 0; err_free_ct: __ovs_ct_free_action(&ct_info); -- cgit v0.10.2 From c1e3334fa4b2891752f1367b47a60209353ba2f5 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 26 Dec 2015 20:12:13 +0100 Subject: drivers: net: cpsw: fix error return code Propagate the return value of platform_get_irq on failure. A simplified version of the semantic match that finds the two cases where no error code is returned at all is as follows: (http://coccinelle.lip6.fr/) // @@ identifier ret; expression e1,e2; @@ ( if (\(ret < 0\|ret != 0\)) { ... return ret; } | ret = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 3b489ca..fc95806 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2427,7 +2427,7 @@ static int cpsw_probe(struct platform_device *pdev) ndev->irq = platform_get_irq(pdev, 1); if (ndev->irq < 0) { dev_err(priv->dev, "error getting irq resource\n"); - ret = -ENOENT; + ret = ndev->irq; goto clean_ale_ret; } @@ -2448,8 +2448,10 @@ static int cpsw_probe(struct platform_device *pdev) /* RX IRQ */ irq = platform_get_irq(pdev, 1); - if (irq < 0) + if (irq < 0) { + ret = irq; goto clean_ale_ret; + } priv->irqs_table[0] = irq; ret = devm_request_irq(&pdev->dev, irq, cpsw_rx_interrupt, @@ -2461,8 +2463,10 @@ static int cpsw_probe(struct platform_device *pdev) /* TX IRQ */ irq = platform_get_irq(pdev, 2); - if (irq < 0) + if (irq < 0) { + ret = irq; goto clean_ale_ret; + } priv->irqs_table[1] = irq; ret = devm_request_irq(&pdev->dev, irq, cpsw_tx_interrupt, -- cgit v0.10.2 From 398c7500a1f5f74e207bd2edca1b1721b3cc1f1e Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Wed, 23 Dec 2015 21:04:31 -0800 Subject: MIPS: VDSO: Fix build error with binutils 2.24 and earlier Commit 2a037f310bab ("MIPS: VDSO: Fix build error") tries to fix a build error seen with binutils 2.24 and earlier. However, the fix does not work, and again results in the already known build errors if the kernel is built with an earlier version of binutils. CC arch/mips/vdso/gettimeofday.o /tmp/ccnOVbHT.s: Assembler messages: /tmp/ccnOVbHT.s:50: Error: can't resolve `_start' {*UND* section} - `L0 {.text section} /tmp/ccnOVbHT.s:374: Error: can't resolve `_start' {*UND* section} - `L0 {.text section} scripts/Makefile.build:258: recipe for target 'arch/mips/vdso/gettimeofday.o' failed make[2]: *** [arch/mips/vdso/gettimeofday.o] Error 1 Fixes: 2a037f310bab ("MIPS: VDSO: Fix build error") Cc: Qais Yousef Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/11926/ Signed-off-by: Guenter Roeck Signed-off-by: Ralf Baechle diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index 018f8c7..1456890 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -26,7 +26,7 @@ aflags-vdso := $(ccflags-vdso) \ # the comments on that file. # ifndef CONFIG_CPU_MIPSR6 - ifeq ($(call ld-ifversion, -lt, 22500000, y),) + ifeq ($(call ld-ifversion, -lt, 22500000, y),y) $(warning MIPS VDSO requires binutils >= 2.25) obj-vdso-y := $(filter-out gettimeofday.o, $(obj-vdso-y)) ccflags-vdso += -DDISABLE_MIPS_VDSO -- cgit v0.10.2 From 5c9ee4cbf2a945271f25b89b137f2c03bbc3be33 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Tue, 29 Dec 2015 14:54:06 -0800 Subject: ocfs2: fix BUG when calculate new backup super When resizing, it firstly extends the last gd. Once it should backup super in the gd, it calculates new backup super and update the corresponding value. But it currently doesn't consider the situation that the backup super is already done. And in this case, it still sets the bit in gd bitmap and then decrease from bg_free_bits_count, which leads to a corrupted gd and trigger the BUG in ocfs2_block_group_set_bits: BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits); So check whether the backup super is done and then do the updates. Signed-off-by: Joseph Qi Reviewed-by: Jiufei Xue Reviewed-by: Yiwen Jiang Cc: Mark Fasheh Cc: Joel Becker Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index d5da6f6..79b8021 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c @@ -54,11 +54,12 @@ static u16 ocfs2_calc_new_backup_super(struct inode *inode, struct ocfs2_group_desc *gd, u16 cl_cpg, + u16 old_bg_clusters, int set) { int i; u16 backups = 0; - u32 cluster; + u32 cluster, lgd_cluster; u64 blkno, gd_blkno, lgd_blkno = le64_to_cpu(gd->bg_blkno); for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) { @@ -71,6 +72,12 @@ static u16 ocfs2_calc_new_backup_super(struct inode *inode, else if (gd_blkno > lgd_blkno) break; + /* check if already done backup super */ + lgd_cluster = ocfs2_blocks_to_clusters(inode->i_sb, lgd_blkno); + lgd_cluster += old_bg_clusters; + if (lgd_cluster >= cluster) + continue; + if (set) ocfs2_set_bit(cluster % cl_cpg, (unsigned long *)gd->bg_bitmap); @@ -99,6 +106,7 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle, u16 chain, num_bits, backups = 0; u16 cl_bpc = le16_to_cpu(cl->cl_bpc); u16 cl_cpg = le16_to_cpu(cl->cl_cpg); + u16 old_bg_clusters; trace_ocfs2_update_last_group_and_inode(new_clusters, first_new_cluster); @@ -112,6 +120,7 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle, group = (struct ocfs2_group_desc *)group_bh->b_data; + old_bg_clusters = le16_to_cpu(group->bg_bits) / cl_bpc; /* update the group first. */ num_bits = new_clusters * cl_bpc; le16_add_cpu(&group->bg_bits, num_bits); @@ -125,7 +134,7 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle, OCFS2_FEATURE_COMPAT_BACKUP_SB)) { backups = ocfs2_calc_new_backup_super(bm_inode, group, - cl_cpg, 1); + cl_cpg, old_bg_clusters, 1); le16_add_cpu(&group->bg_free_bits_count, -1 * backups); } @@ -163,7 +172,7 @@ out_rollback: if (ret < 0) { ocfs2_calc_new_backup_super(bm_inode, group, - cl_cpg, 0); + cl_cpg, old_bg_clusters, 0); le16_add_cpu(&group->bg_free_bits_count, backups); le16_add_cpu(&group->bg_bits, -1 * num_bits); le16_add_cpu(&group->bg_free_bits_count, -1 * num_bits); -- cgit v0.10.2 From 6df38689e0e9a07ff4f42c06b302e203b33667e9 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Tue, 29 Dec 2015 14:54:10 -0800 Subject: mm: memcontrol: fix possible memcg leak due to interrupted reclaim Memory cgroup reclaim can be interrupted with mem_cgroup_iter_break() once enough pages have been reclaimed, in which case, in contrast to a full round-trip over a cgroup sub-tree, the current position stored in mem_cgroup_reclaim_iter of the target cgroup does not get invalidated and so is left holding the reference to the last scanned cgroup. If the target cgroup does not get scanned again (we might have just reclaimed the last page or all processes might exit and free their memory voluntary), we will leak it, because there is nobody to put the reference held by the iterator. The problem is easy to reproduce by running the following command sequence in a loop: mkdir /sys/fs/cgroup/memory/test echo 100M > /sys/fs/cgroup/memory/test/memory.limit_in_bytes echo $$ > /sys/fs/cgroup/memory/test/cgroup.procs memhog 150M echo $$ > /sys/fs/cgroup/memory/cgroup.procs rmdir test The cgroups generated by it will never get freed. This patch fixes this issue by making mem_cgroup_iter avoid taking reference to the current position. In order not to hit use-after-free bug while running reclaim in parallel with cgroup deletion, we make use of ->css_released cgroup callback to clear references to the dying cgroup in all reclaim iterators that might refer to it. This callback is called right before scheduling rcu work which will free css, so if we access iter->position from rcu read section, we might be sure it won't go away under us. [hannes@cmpxchg.org: clean up css ref handling] Fixes: 5ac8fb31ad2e ("mm: memcontrol: convert reclaim iterator to simple css refcounting") Signed-off-by: Vladimir Davydov Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: [3.19+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e234c21..fc10620 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -903,14 +903,20 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, if (prev && reclaim->generation != iter->generation) goto out_unlock; - do { + while (1) { pos = READ_ONCE(iter->position); + if (!pos || css_tryget(&pos->css)) + break; /* - * A racing update may change the position and - * put the last reference, hence css_tryget(), - * or retry to see the updated position. + * css reference reached zero, so iter->position will + * be cleared by ->css_released. However, we should not + * rely on this happening soon, because ->css_released + * is called from a work queue, and by busy-waiting we + * might block it. So we clear iter->position right + * away. */ - } while (pos && !css_tryget(&pos->css)); + (void)cmpxchg(&iter->position, pos, NULL); + } } if (pos) @@ -956,17 +962,13 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, } if (reclaim) { - if (cmpxchg(&iter->position, pos, memcg) == pos) { - if (memcg) - css_get(&memcg->css); - if (pos) - css_put(&pos->css); - } - /* - * pairs with css_tryget when dereferencing iter->position - * above. + * The position could have already been updated by a competing + * thread, so check that the value hasn't changed since we read + * it to avoid reclaiming from the same cgroup twice. */ + (void)cmpxchg(&iter->position, pos, memcg); + if (pos) css_put(&pos->css); @@ -999,6 +1001,28 @@ void mem_cgroup_iter_break(struct mem_cgroup *root, css_put(&prev->css); } +static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) +{ + struct mem_cgroup *memcg = dead_memcg; + struct mem_cgroup_reclaim_iter *iter; + struct mem_cgroup_per_zone *mz; + int nid, zid; + int i; + + while ((memcg = parent_mem_cgroup(memcg))) { + for_each_node(nid) { + for (zid = 0; zid < MAX_NR_ZONES; zid++) { + mz = &memcg->nodeinfo[nid]->zoneinfo[zid]; + for (i = 0; i <= DEF_PRIORITY; i++) { + iter = &mz->iter[i]; + cmpxchg(&iter->position, + dead_memcg, NULL); + } + } + } + } +} + /* * Iteration constructs for visiting all cgroups (under a tree). If * loops are exited prematurely (break), mem_cgroup_iter_break() must @@ -4324,6 +4348,13 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) wb_memcg_offline(memcg); } +static void mem_cgroup_css_released(struct cgroup_subsys_state *css) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(css); + + invalidate_reclaim_iterators(memcg); +} + static void mem_cgroup_css_free(struct cgroup_subsys_state *css) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); @@ -5185,6 +5216,7 @@ struct cgroup_subsys memory_cgrp_subsys = { .css_alloc = mem_cgroup_css_alloc, .css_online = mem_cgroup_css_online, .css_offline = mem_cgroup_css_offline, + .css_released = mem_cgroup_css_released, .css_free = mem_cgroup_css_free, .css_reset = mem_cgroup_css_reset, .can_attach = mem_cgroup_can_attach, -- cgit v0.10.2 From facca61683f937f31f90307cc64851436c8a3e21 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 29 Dec 2015 14:54:13 -0800 Subject: arch/x86/xen/suspend.c: include xen/xen.h Fix the build warning: arch/x86/xen/suspend.c: In function 'xen_arch_pre_suspend': arch/x86/xen/suspend.c:70:9: error: implicit declaration of function 'xen_pv_domain' [-Werror=implicit-function-declaration] if (xen_pv_domain()) ^ Reported-by: kbuild test robot Cc: Sasha Levin Cc: Konrad Rzeszutek Wilk Cc: Boris Ostrovsky Cc: David Vrabel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 3705eab..df0c405 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -1,6 +1,7 @@ #include #include +#include #include #include #include -- cgit v0.10.2 From 6122192eb6f2a3311bbf4600c5537fbe1c223022 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Tue, 29 Dec 2015 14:54:16 -0800 Subject: m32r: fix build failure m32r allmodconfig is failing with: In file included from ../include/linux/kvm_para.h:4:0, from ../kernel/watchdog.c:26: ../include/uapi/linux/kvm_para.h:30:26: fatal error: asm/kvm_para.h: No such file or directory kvm_para.h was not included in the build. Signed-off-by: Sudip Mukherjee Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index fd104bd..860e440 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -3,6 +3,7 @@ generic-y += clkdev.h generic-y += cputime.h generic-y += exec.h generic-y += irq_work.h +generic-y += kvm_para.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += module.h -- cgit v0.10.2 From 92a8ed4c7643809123ef0a65424569eaacc5c6b0 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Tue, 29 Dec 2015 14:54:19 -0800 Subject: m32r: add io*_rep helpers m32r allmodconfig was failing with the error: error: implicit declaration of function 'read' On checking io.h it turned out that 'read' is not defined but 'readb' is defined and 'ioread8' will then obviously mean 'readb'. At the same time some of the helper functions ioreadN_rep() and iowriteN_rep() were missing which also led to the build failure. Signed-off-by: Sudip Mukherjee Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/arch/m32r/include/asm/io.h b/arch/m32r/include/asm/io.h index 61b8931..4b0f5e0 100644 --- a/arch/m32r/include/asm/io.h +++ b/arch/m32r/include/asm/io.h @@ -168,13 +168,21 @@ static inline void _writel(unsigned long l, unsigned long addr) #define writew_relaxed writew #define writel_relaxed writel -#define ioread8 read +#define ioread8 readb #define ioread16 readw #define ioread32 readl #define iowrite8 writeb #define iowrite16 writew #define iowrite32 writel +#define ioread8_rep(p, dst, count) insb((unsigned long)(p), (dst), (count)) +#define ioread16_rep(p, dst, count) insw((unsigned long)(p), (dst), (count)) +#define ioread32_rep(p, dst, count) insl((unsigned long)(p), (dst), (count)) + +#define iowrite8_rep(p, src, count) outsb((unsigned long)(p), (src), (count)) +#define iowrite16_rep(p, src, count) outsw((unsigned long)(p), (src), (count)) +#define iowrite32_rep(p, src, count) outsl((unsigned long)(p), (src), (count)) + #define ioread16be(addr) be16_to_cpu(readw(addr)) #define ioread32be(addr) be32_to_cpu(readl(addr)) #define iowrite16be(v, addr) writew(cpu_to_be16(v), (addr)) -- cgit v0.10.2 From b5a8bc338e68d5f6f753e14ae59b30e75a5ffdde Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Tue, 29 Dec 2015 14:54:22 -0800 Subject: ocfs2: fix flock panic issue Commit 4f6563677ae8 ("Move locks API users to locks_lock_inode_wait()") move flock/posix lock indentify code to locks_lock_inode_wait(), but missed to set fl_flags to FL_FLOCK which caused the following kernel panic on 4.4.0_rc5. kernel BUG at fs/locks.c:1895! invalid opcode: 0000 [#1] SMP Modules linked in: ocfs2(O) ocfs2_dlmfs(O) ocfs2_stack_o2cb(O) ocfs2_dlm(O) ocfs2_nodemanager(O) ocfs2_stackglue(O) iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi xen_kbdfront xen_netfront xen_fbfront xen_blkfront CPU: 0 PID: 20268 Comm: flock_unit_test Tainted: G O 4.4.0-rc5-next-20151217 #1 Hardware name: Xen HVM domU, BIOS 4.3.1OVM 05/14/2014 task: ffff88007b3672c0 ti: ffff880028b58000 task.ti: ffff880028b58000 RIP: locks_lock_inode_wait+0x2e/0x160 Call Trace: ocfs2_do_flock+0x91/0x160 [ocfs2] ocfs2_flock+0x76/0xd0 [ocfs2] SyS_flock+0x10f/0x1a0 entry_SYSCALL_64_fastpath+0x12/0x71 Code: e5 41 57 41 56 49 89 fe 41 55 41 54 53 48 89 f3 48 81 ec 88 00 00 00 8b 46 40 83 e0 03 83 f8 01 0f 84 ad 00 00 00 83 f8 02 74 04 <0f> 0b eb fe 4c 8d ad 60 ff ff ff 4c 8d 7b 58 e8 0e 8e 73 00 4d RIP locks_lock_inode_wait+0x2e/0x160 RSP ---[ end trace dfca74ec9b5b274c ]--- Fixes: 4f6563677ae8 ("Move locks API users to locks_lock_inode_wait()") Signed-off-by: Junxiao Bi Cc: Mark Fasheh Cc: Joel Becker Cc: Joseph Qi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c index 652ece4..d56f007 100644 --- a/fs/ocfs2/locks.c +++ b/fs/ocfs2/locks.c @@ -67,7 +67,10 @@ static int ocfs2_do_flock(struct file *file, struct inode *inode, */ locks_lock_file_wait(file, - &(struct file_lock){.fl_type = F_UNLCK}); + &(struct file_lock) { + .fl_type = F_UNLCK, + .fl_flags = FL_FLOCK + }); ocfs2_file_unlock(file); } -- cgit v0.10.2 From 5f0f2887f4de9508dcf438deab28f1de8070c271 Mon Sep 17 00:00:00 2001 From: Andrew Banman Date: Tue, 29 Dec 2015 14:54:25 -0800 Subject: mm/memory_hotplug.c: check for missing sections in test_pages_in_a_zone() test_pages_in_a_zone() does not account for the possibility of missing sections in the given pfn range. pfn_valid_within always returns 1 when CONFIG_HOLES_IN_ZONE is not set, allowing invalid pfns from missing sections to pass the test, leading to a kernel oops. Wrap an additional pfn loop with PAGES_PER_SECTION granularity to check for missing sections before proceeding into the zone-check code. This also prevents a crash from offlining memory devices with missing sections. Despite this, it may be a good idea to keep the related patch '[PATCH 3/3] drivers: memory: prohibit offlining of memory blocks with missing sections' because missing sections in a memory block may lead to other problems not covered by the scope of this fix. Signed-off-by: Andrew Banman Acked-by: Alex Thorlton Cc: Russ Anderson Cc: Alex Thorlton Cc: Yinghai Lu Cc: Greg KH Cc: Seth Jennings Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 67d488a..a042a9d 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1375,23 +1375,30 @@ int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) */ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) { - unsigned long pfn; + unsigned long pfn, sec_end_pfn; struct zone *zone = NULL; struct page *page; int i; - for (pfn = start_pfn; + for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn); pfn < end_pfn; - pfn += MAX_ORDER_NR_PAGES) { - i = 0; - /* This is just a CONFIG_HOLES_IN_ZONE check.*/ - while ((i < MAX_ORDER_NR_PAGES) && !pfn_valid_within(pfn + i)) - i++; - if (i == MAX_ORDER_NR_PAGES) + pfn = sec_end_pfn + 1, sec_end_pfn += PAGES_PER_SECTION) { + /* Make sure the memory section is present first */ + if (!present_section_nr(pfn_to_section_nr(pfn))) continue; - page = pfn_to_page(pfn + i); - if (zone && page_zone(page) != zone) - return 0; - zone = page_zone(page); + for (; pfn < sec_end_pfn && pfn < end_pfn; + pfn += MAX_ORDER_NR_PAGES) { + i = 0; + /* This is just a CONFIG_HOLES_IN_ZONE check.*/ + while ((i < MAX_ORDER_NR_PAGES) && + !pfn_valid_within(pfn + i)) + i++; + if (i == MAX_ORDER_NR_PAGES) + continue; + page = pfn_to_page(pfn + i); + if (zone && page_zone(page) != zone) + return 0; + zone = page_zone(page); + } } return 1; } -- cgit v0.10.2 From cc28d6d80f6ab494b10f0e2ec949eacd610f66e3 Mon Sep 17 00:00:00 2001 From: xuejiufei Date: Tue, 29 Dec 2015 14:54:29 -0800 Subject: ocfs2/dlm: clear migration_pending when migration target goes down We have found a BUG on res->migration_pending when migrating lock resources. The situation is as follows. dlm_mark_lockres_migration res->migration_pending = 1; __dlm_lockres_reserve_ast dlm_lockres_release_ast returns with res->migration_pending remains because other threads reserve asts wait dlm_migration_can_proceed returns 1 >>>>>>> o2hb found that target goes down and remove target from domain_map dlm_migration_can_proceed returns 1 dlm_mark_lockres_migrating returns -ESHOTDOWN with res->migration_pending still remains. When reentering dlm_mark_lockres_migrating(), it will trigger the BUG_ON with res->migration_pending. So clear migration_pending when target is down. Signed-off-by: Jiufei Xue Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index ce38b4c..84f2f80 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2843,6 +2843,8 @@ again: res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY; if (!ret) BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING)); + else + res->migration_pending = 0; spin_unlock(&res->spinlock); /* -- cgit v0.10.2 From 6cdb18ad98a49f7e9b95d538a0614cde827404b8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 29 Dec 2015 14:54:32 -0800 Subject: mm/vmstat: fix overflow in mod_zone_page_state() mod_zone_page_state() takes a "delta" integer argument. delta contains the number of pages that should be added or subtracted from a struct zone's vm_stat field. If a zone is larger than 8TB this will cause overflows. E.g. for a zone with a size slightly larger than 8TB the line mod_zone_page_state(zone, NR_ALLOC_BATCH, zone->managed_pages); in mm/page_alloc.c:free_area_init_core() will result in a negative result for the NR_ALLOC_BATCH entry within the zone's vm_stat, since 8TB contain 0x8xxxxxxx pages which will be sign extended to a negative value. Fix this by changing the delta argument to long type. This could fix an early boot problem seen on s390, where we have a 9TB system with only one node. ZONE_DMA contains 2GB and ZONE_NORMAL the rest. The system is trying to allocate a GFP_DMA page but ZONE_DMA is completely empty, so it tries to reclaim pages in an endless loop. This was seen on a heavily patched 3.10 kernel. One possible explaination seem to be the overflows caused by mod_zone_page_state(). Unfortunately I did not have the chance to verify that this patch actually fixes the problem, since I don't have access to the system right now. However the overflow problem does exist anyway. Given the description that a system with slightly less than 8TB does work, this seems to be a candidate for the observed problem. Signed-off-by: Heiko Carstens Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 5dbc8b0..3e5d907 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -176,11 +176,11 @@ extern void zone_statistics(struct zone *, struct zone *, gfp_t gfp); #define sub_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, -(__d)) #ifdef CONFIG_SMP -void __mod_zone_page_state(struct zone *, enum zone_stat_item item, int); +void __mod_zone_page_state(struct zone *, enum zone_stat_item item, long); void __inc_zone_page_state(struct page *, enum zone_stat_item); void __dec_zone_page_state(struct page *, enum zone_stat_item); -void mod_zone_page_state(struct zone *, enum zone_stat_item, int); +void mod_zone_page_state(struct zone *, enum zone_stat_item, long); void inc_zone_page_state(struct page *, enum zone_stat_item); void dec_zone_page_state(struct page *, enum zone_stat_item); @@ -205,7 +205,7 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat, * The functions directly modify the zone and global counters. */ static inline void __mod_zone_page_state(struct zone *zone, - enum zone_stat_item item, int delta) + enum zone_stat_item item, long delta) { zone_page_state_add(delta, zone, item); } diff --git a/mm/vmstat.c b/mm/vmstat.c index 0d5712b..4ebc17d 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -219,7 +219,7 @@ void set_pgdat_percpu_threshold(pg_data_t *pgdat, * particular counter cannot be updated from interrupt context. */ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, - int delta) + long delta) { struct per_cpu_pageset __percpu *pcp = zone->pageset; s8 __percpu *p = pcp->vm_stat_diff + item; @@ -318,8 +318,8 @@ EXPORT_SYMBOL(__dec_zone_page_state); * 1 Overstepping half of threshold * -1 Overstepping minus half of threshold */ -static inline void mod_state(struct zone *zone, - enum zone_stat_item item, int delta, int overstep_mode) +static inline void mod_state(struct zone *zone, enum zone_stat_item item, + long delta, int overstep_mode) { struct per_cpu_pageset __percpu *pcp = zone->pageset; s8 __percpu *p = pcp->vm_stat_diff + item; @@ -357,7 +357,7 @@ static inline void mod_state(struct zone *zone, } void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, - int delta) + long delta) { mod_state(zone, item, delta, 0); } @@ -384,7 +384,7 @@ EXPORT_SYMBOL(dec_zone_page_state); * Use interrupt disable to serialize counter updates */ void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, - int delta) + long delta) { unsigned long flags; -- cgit v0.10.2 From 3d8acd1f667b45c531401c8f0c2033072e32a05d Mon Sep 17 00:00:00 2001 From: Gary Wang Date: Wed, 23 Dec 2015 16:11:35 +0800 Subject: drm/i915: increase the tries for HDMI hotplug live status checking The total delay of HDMI hotplug detecting with 30ms is sometimes not enoughtfor HDMI live status up with specific HDMI monitors in BSW platform. After doing experiments for following monitors, it needs 80ms at least for those worst cases. Lenovo L246 1xwA (4 failed, necessary hot-plug delay: 58/40/60/40ms) Philips HH2AP (9 failed, necessary hot-plug delay: 80/50/50/60/46/40/58/58/39ms) BENQ ET-0035-N (6 failed, necessary hot-plug delay: 60/50/50/80/80/40ms) DELL U2713HM (2 failed, necessary hot-plug delay: 58/59ms) HP HP-LP2475w (5 failed, necessary hot-plug delay: 70/50/40/60/40ms) It looks like 70-80 ms is BSW platform needs in some bad cases of the monitors at this end (8 times delay at most). Keep less than 100ms for HDCP pulse HPD low (with at least 100ms) to respond a plug out. Reviewed-by: Cooper Chiou Tested-by: Gary Wang Cc: Gavin Hindman Cc: Sonika Jindal Cc: Shashank Sharma Cc: Shobhit Kumar Signed-off-by: Gary Wang Link: http://patchwork.freedesktop.org/patch/msgid/1450858295-12804-1-git-send-email-gary.c.wang@intel.com Tested-by: Shobhit Kumar Cc: drm-intel-fixes@lists.freedesktop.org Fixes: 237ed86c693d ("drm/i915: Check live status before reading edid") Signed-off-by: Daniel Vetter (cherry picked from commit f8d03ea0053b23de42c828d559016eabe0b91523) [Jani: undo the file mode change of the original commit] Signed-off-by: Jani Nikula diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 64086f2..e6c035b 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1381,7 +1381,7 @@ intel_hdmi_detect(struct drm_connector *connector, bool force) intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); - for (try = 0; !live_status && try < 4; try++) { + for (try = 0; !live_status && try < 9; try++) { if (try) msleep(10); live_status = intel_digital_port_connected(dev_priv, -- cgit v0.10.2 From 574aab1e02837927e3c94193eedf94128ad10b6d Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Tue, 29 Dec 2015 13:29:55 +0100 Subject: net, socket, socket_wq: fix missing initialization of flags Commit ceb5d58b2170 ("net: fix sock_wake_async() rcu protection") from the current 4.4 release cycle introduced a new flags member in struct socket_wq and moved SOCKWQ_ASYNC_NOSPACE and SOCKWQ_ASYNC_WAITDATA from struct socket's flags member into that new place. Unfortunately, the new flags field is never initialized properly, at least not for the struct socket_wq instance created in sock_alloc_inode(). One particular issue I encountered because of this is that my GNU Emacs failed to draw anything on my desktop -- i.e. what I got is a transparent window, including the title bar. Bisection lead to the commit mentioned above and further investigation by means of strace told me that Emacs is indeed speaking to my Xorg through an O_ASYNC AF_UNIX socket. This is reproducible 100% of times and the fact that properly initializing the struct socket_wq ->flags fixes the issue leads me to the conclusion that somehow SOCKWQ_ASYNC_WAITDATA got set in the uninitialized ->flags, preventing my Emacs from receiving any SIGIO's due to data becoming available and it got stuck. Make sock_alloc_inode() set the newly created struct socket_wq's ->flags member to zero. Fixes: ceb5d58b2170 ("net: fix sock_wake_async() rcu protection") Signed-off-by: Nicolai Stange Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/socket.c b/net/socket.c index 29822d6..d730ef9 100644 --- a/net/socket.c +++ b/net/socket.c @@ -257,6 +257,7 @@ static struct inode *sock_alloc_inode(struct super_block *sb) } init_waitqueue_head(&wq->wait); wq->fasync_list = NULL; + wq->flags = 0; RCU_INIT_POINTER(ei->socket.wq, wq); ei->socket.state = SS_UNCONNECTED; -- cgit v0.10.2 From 068d8bd338e855286aea54e70d1c101569284b21 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 29 Dec 2015 17:49:25 +0800 Subject: sctp: sctp should release assoc when sctp_make_abort_user return NULL in sctp_close In sctp_close, sctp_make_abort_user may return NULL because of memory allocation failure. If this happens, it will bypass any state change and never free the assoc. The assoc has no chance to be freed and it will be kept in memory with the state it had even after the socket is closed by sctp_close(). So if sctp_make_abort_user fails to allocate memory, we should abort the asoc via sctp_primitive_ABORT as well. Just like the annotation in sctp_sf_cookie_wait_prm_abort and sctp_sf_do_9_1_prm_abort said, "Even if we can't send the ABORT due to low memory delete the TCB. This is a departure from our typical NOMEM handling". But then the chunk is NULL (low memory) and the SCTP_CMD_REPLY cmd would dereference the chunk pointer, and system crash. So we should add SCTP_CMD_REPLY cmd only when the chunk is not NULL, just like other places where it adds SCTP_CMD_REPLY cmd. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index cd34a4a..22c2bf3 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4829,7 +4829,8 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort( retval = SCTP_DISPOSITION_CONSUME; - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + if (abort) + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); /* Even if we can't send the ABORT due to low memory delete the * TCB. This is a departure from our typical NOMEM handling. @@ -4966,7 +4967,8 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); retval = SCTP_DISPOSITION_CONSUME; - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + if (abort) + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 529ed35..ef1d90f 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1516,8 +1516,7 @@ static void sctp_close(struct sock *sk, long timeout) struct sctp_chunk *chunk; chunk = sctp_make_abort_user(asoc, NULL, 0); - if (chunk) - sctp_primitive_ABORT(net, asoc, chunk); + sctp_primitive_ABORT(net, asoc, chunk); } else sctp_primitive_SHUTDOWN(net, asoc, NULL); } -- cgit v0.10.2 From 8b30ca73b7cc7f2177cfc4e8274d2ebdba328cd5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 31 Dec 2015 15:18:02 -0500 Subject: sparc: Add all necessary direct socket system calls. The GLIBC folks would like to eliminate socketcall support eventually, and this makes sense regardless so wire them all up. Signed-off-by: David S. Miller diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h index f31a124..5655912 100644 --- a/arch/sparc/include/uapi/asm/unistd.h +++ b/arch/sparc/include/uapi/asm/unistd.h @@ -418,8 +418,11 @@ #define __NR_execveat 350 #define __NR_membarrier 351 #define __NR_userfaultfd 352 +#define __NR_bind 353 +#define __NR_listen 354 +#define __NR_setsockopt 355 -#define NR_syscalls 353 +#define NR_syscalls 356 /* Bitmask values returned from kern_features system call. */ #define KERN_FEATURE_MIXED_MODE_STACK 0x00000001 diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 78e8029..d557a25 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -35,18 +35,18 @@ sys_call_table: /*80*/ .long sys_setgroups16, sys_getpgrp, sys_setgroups, sys_setitimer, sys_ftruncate64 /*85*/ .long sys_swapon, sys_getitimer, sys_setuid, sys_sethostname, sys_setgid /*90*/ .long sys_dup2, sys_setfsuid, sys_fcntl, sys_select, sys_setfsgid -/*95*/ .long sys_fsync, sys_setpriority, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall +/*95*/ .long sys_fsync, sys_setpriority, sys_socket, sys_connect, sys_accept /*100*/ .long sys_getpriority, sys_rt_sigreturn, sys_rt_sigaction, sys_rt_sigprocmask, sys_rt_sigpending /*105*/ .long sys_rt_sigtimedwait, sys_rt_sigqueueinfo, sys_rt_sigsuspend, sys_setresuid, sys_getresuid -/*110*/ .long sys_setresgid, sys_getresgid, sys_setregid, sys_nis_syscall, sys_nis_syscall -/*115*/ .long sys_getgroups, sys_gettimeofday, sys_getrusage, sys_nis_syscall, sys_getcwd +/*110*/ .long sys_setresgid, sys_getresgid, sys_setregid, sys_recvmsg, sys_sendmsg +/*115*/ .long sys_getgroups, sys_gettimeofday, sys_getrusage, sys_getsockopt, sys_getcwd /*120*/ .long sys_readv, sys_writev, sys_settimeofday, sys_fchown16, sys_fchmod -/*125*/ .long sys_nis_syscall, sys_setreuid16, sys_setregid16, sys_rename, sys_truncate -/*130*/ .long sys_ftruncate, sys_flock, sys_lstat64, sys_nis_syscall, sys_nis_syscall -/*135*/ .long sys_nis_syscall, sys_mkdir, sys_rmdir, sys_utimes, sys_stat64 -/*140*/ .long sys_sendfile64, sys_nis_syscall, sys_futex, sys_gettid, sys_getrlimit +/*125*/ .long sys_recvfrom, sys_setreuid16, sys_setregid16, sys_rename, sys_truncate +/*130*/ .long sys_ftruncate, sys_flock, sys_lstat64, sys_sendto, sys_shutdown +/*135*/ .long sys_socketpair, sys_mkdir, sys_rmdir, sys_utimes, sys_stat64 +/*140*/ .long sys_sendfile64, sys_getpeername, sys_futex, sys_gettid, sys_getrlimit /*145*/ .long sys_setrlimit, sys_pivot_root, sys_prctl, sys_pciconfig_read, sys_pciconfig_write -/*150*/ .long sys_nis_syscall, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64 +/*150*/ .long sys_getsockname, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64 /*155*/ .long sys_fcntl64, sys_inotify_rm_watch, sys_statfs, sys_fstatfs, sys_oldumount /*160*/ .long sys_sched_setaffinity, sys_sched_getaffinity, sys_getdomainname, sys_setdomainname, sys_nis_syscall /*165*/ .long sys_quotactl, sys_set_tid_address, sys_mount, sys_ustat, sys_setxattr @@ -87,4 +87,5 @@ sys_call_table: /*335*/ .long sys_syncfs, sys_sendmmsg, sys_setns, sys_process_vm_readv, sys_process_vm_writev /*340*/ .long sys_ni_syscall, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr /*345*/ .long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf -/*350*/ .long sys_execveat, sys_membarrier, sys_userfaultfd +/*350*/ .long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen +/*355*/ .long sys_setsockopt diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 2549c2c..898684c 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -37,15 +37,15 @@ sys_call_table32: /*80*/ .word sys_setgroups16, sys_getpgrp, sys_setgroups, compat_sys_setitimer, sys32_ftruncate64 .word sys_swapon, compat_sys_getitimer, sys_setuid, sys_sethostname, sys_setgid /*90*/ .word sys_dup2, sys_setfsuid, compat_sys_fcntl, sys32_select, sys_setfsgid - .word sys_fsync, sys_setpriority, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall + .word sys_fsync, sys_setpriority, sys_socket, sys_connect, sys_accept /*100*/ .word sys_getpriority, sys32_rt_sigreturn, compat_sys_rt_sigaction, compat_sys_rt_sigprocmask, compat_sys_rt_sigpending .word compat_sys_rt_sigtimedwait, compat_sys_rt_sigqueueinfo, compat_sys_rt_sigsuspend, sys_setresuid, sys_getresuid -/*110*/ .word sys_setresgid, sys_getresgid, sys_setregid, sys_nis_syscall, sys_nis_syscall - .word sys_getgroups, compat_sys_gettimeofday, compat_sys_getrusage, sys_nis_syscall, sys_getcwd +/*110*/ .word sys_setresgid, sys_getresgid, sys_setregid, compat_sys_recvmsg, compat_sys_sendmsg + .word sys_getgroups, compat_sys_gettimeofday, compat_sys_getrusage, compat_sys_getsockopt, sys_getcwd /*120*/ .word compat_sys_readv, compat_sys_writev, compat_sys_settimeofday, sys_fchown16, sys_fchmod - .word sys_nis_syscall, sys_setreuid16, sys_setregid16, sys_rename, compat_sys_truncate -/*130*/ .word compat_sys_ftruncate, sys_flock, compat_sys_lstat64, sys_nis_syscall, sys_nis_syscall - .word sys_nis_syscall, sys_mkdir, sys_rmdir, compat_sys_utimes, compat_sys_stat64 + .word sys_recvfrom, sys_setreuid16, sys_setregid16, sys_rename, compat_sys_truncate +/*130*/ .word compat_sys_ftruncate, sys_flock, compat_sys_lstat64, sys_sendto, sys_shutdown + .word sys_socketpair, sys_mkdir, sys_rmdir, compat_sys_utimes, compat_sys_stat64 /*140*/ .word sys_sendfile64, sys_nis_syscall, sys32_futex, sys_gettid, compat_sys_getrlimit .word compat_sys_setrlimit, sys_pivot_root, sys_prctl, sys_pciconfig_read, sys_pciconfig_write /*150*/ .word sys_nis_syscall, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64 @@ -88,7 +88,8 @@ sys_call_table32: .word sys_syncfs, compat_sys_sendmmsg, sys_setns, compat_sys_process_vm_readv, compat_sys_process_vm_writev /*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr .word sys32_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf -/*350*/ .word sys32_execveat, sys_membarrier, sys_userfaultfd +/*350*/ .word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen + .word compat_sys_setsockopt #endif /* CONFIG_COMPAT */ @@ -168,4 +169,5 @@ sys_call_table: .word sys_syncfs, sys_sendmmsg, sys_setns, sys_process_vm_readv, sys_process_vm_writev /*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr .word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf -/*350*/ .word sys64_execveat, sys_membarrier, sys_userfaultfd +/*350*/ .word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen + .word sys_setsockopt -- cgit v0.10.2 From 42d85c52f88dd0d2159f531eb33cc66d6e3e60c0 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 31 Dec 2015 15:38:56 -0500 Subject: sparc: Wire up mlock2 system call. Signed-off-by: David S. Miller diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h index 5655912..1c26d44 100644 --- a/arch/sparc/include/uapi/asm/unistd.h +++ b/arch/sparc/include/uapi/asm/unistd.h @@ -421,8 +421,9 @@ #define __NR_bind 353 #define __NR_listen 354 #define __NR_setsockopt 355 +#define __NR_mlock2 356 -#define NR_syscalls 356 +#define NR_syscalls 357 /* Bitmask values returned from kern_features system call. */ #define KERN_FEATURE_MIXED_MODE_STACK 0x00000001 diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index d557a25..e663b6c 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -88,4 +88,4 @@ sys_call_table: /*340*/ .long sys_ni_syscall, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr /*345*/ .long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen -/*355*/ .long sys_setsockopt +/*355*/ .long sys_setsockopt, sys_mlock2 diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 898684c..1557121 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -89,7 +89,7 @@ sys_call_table32: /*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr .word sys32_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen - .word compat_sys_setsockopt + .word compat_sys_setsockopt, sys_mlock2 #endif /* CONFIG_COMPAT */ @@ -170,4 +170,4 @@ sys_call_table: /*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr .word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen - .word sys_setsockopt + .word sys_setsockopt, sys_mlock2 -- cgit v0.10.2 From 168309855a7d1e16db751e9c647119fe2d2dc878 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 3 Jan 2016 15:15:37 -0800 Subject: Linux 4.4-rc8 diff --git a/Makefile b/Makefile index 1122433..9d94ade 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 4 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Blurry Fish Butt # *DOCUMENTATION* -- cgit v0.10.2 From b43417216e9ce55e1f1ab7c834c7ab43db0b53e1 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 5 Jan 2016 18:27:29 +0100 Subject: compat_ioctl: don't look up the fd twice In code in fs/compat_ioctl.c that translates ioctl arguments into a in-kernel structure, then performs sys_ioctl, possibly under set_fs(KERNEL_DS), this commit changes the sys_ioctl calls to do_ioctl calls. do_ioctl is a new function that does the same thing as sys_ioctl, but doesn't look up the fd again. This change is made to avoid (potential) security issues because of ioctl handlers that accept one of the ioctl commands I2C_FUNCS, VIDEO_GET_EVENT, MTIOCPOS, MTIOCGET, TIOCGSERIAL, TIOCSSERIAL, RTC_IRQP_READ, RTC_EPOCH_READ. This can happen for multiple reasons: - The ioctl command number could be reused. - The ioctl handler might not check the full ioctl command. This is e.g. true for drm_ioctl. - The ioctl handler is very special, e.g. cuse_file_ioctl The real issue is that set_fs(KERNEL_DS) is used here, but that's fixed in a separate commit "compat_ioctl: don't call do_ioctl under set_fs(KERNEL_DS)". This change mitigates potential security issues by preventing a race that permits invocation of unlocked_ioctl handlers under KERNEL_DS through compat code even if a corresponding compat_ioctl handler exists. So far, no way has been identified to use this to damage kernel memory without having CAP_SYS_ADMIN in the init ns (with the capability, doing reads/writes at arbitrary kernel addresses should be easy through CUSE's ioctl handler with FUSE_IOCTL_UNRESTRICTED set). [AV: two missed sys_ioctl() taken care of] Signed-off-by: Jann Horn Signed-off-by: Al Viro diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index dcf2653..06e60ca 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -115,15 +115,27 @@ #include #endif -static int w_long(unsigned int fd, unsigned int cmd, - compat_ulong_t __user *argp) +static int do_ioctl(struct file *file, unsigned int fd, + unsigned int cmd, unsigned long arg) +{ + int err; + + err = security_file_ioctl(file, cmd, arg); + if (err) + return err; + + return do_vfs_ioctl(file, fd, cmd, arg); +} + +static int w_long(struct file *file, unsigned int fd, + unsigned int cmd, compat_ulong_t __user *argp) { mm_segment_t old_fs = get_fs(); int err; unsigned long val; set_fs (KERNEL_DS); - err = sys_ioctl(fd, cmd, (unsigned long)&val); + err = do_ioctl(file, fd, cmd, (unsigned long)&val); set_fs (old_fs); if (!err && put_user(val, argp)) return -EFAULT; @@ -139,15 +151,15 @@ struct compat_video_event { } u; }; -static int do_video_get_event(unsigned int fd, unsigned int cmd, - struct compat_video_event __user *up) +static int do_video_get_event(struct file *file, unsigned int fd, + unsigned int cmd, struct compat_video_event __user *up) { struct video_event kevent; mm_segment_t old_fs = get_fs(); int err; set_fs(KERNEL_DS); - err = sys_ioctl(fd, cmd, (unsigned long) &kevent); + err = do_ioctl(file, fd, cmd, (unsigned long) &kevent); set_fs(old_fs); if (!err) { @@ -169,8 +181,8 @@ struct compat_video_still_picture { int32_t size; }; -static int do_video_stillpicture(unsigned int fd, unsigned int cmd, - struct compat_video_still_picture __user *up) +static int do_video_stillpicture(struct file *file, unsigned int fd, + unsigned int cmd, struct compat_video_still_picture __user *up) { struct video_still_picture __user *up_native; compat_uptr_t fp; @@ -190,7 +202,7 @@ static int do_video_stillpicture(unsigned int fd, unsigned int cmd, if (err) return -EFAULT; - err = sys_ioctl(fd, cmd, (unsigned long) up_native); + err = do_ioctl(file, fd, cmd, (unsigned long) up_native); return err; } @@ -200,8 +212,8 @@ struct compat_video_spu_palette { compat_uptr_t palette; }; -static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd, - struct compat_video_spu_palette __user *up) +static int do_video_set_spu_palette(struct file *file, unsigned int fd, + unsigned int cmd, struct compat_video_spu_palette __user *up) { struct video_spu_palette __user *up_native; compat_uptr_t palp; @@ -218,7 +230,7 @@ static int do_video_set_spu_palette(unsigned int fd, unsigned int cmd, if (err) return -EFAULT; - err = sys_ioctl(fd, cmd, (unsigned long) up_native); + err = do_ioctl(file, fd, cmd, (unsigned long) up_native); return err; } @@ -276,7 +288,7 @@ static int sg_build_iovec(sg_io_hdr_t __user *sgio, void __user *dxferp, u16 iov return 0; } -static int sg_ioctl_trans(unsigned int fd, unsigned int cmd, +static int sg_ioctl_trans(struct file *file, unsigned int fd, unsigned int cmd, sg_io_hdr32_t __user *sgio32) { sg_io_hdr_t __user *sgio; @@ -289,7 +301,7 @@ static int sg_ioctl_trans(unsigned int fd, unsigned int cmd, if (get_user(interface_id, &sgio32->interface_id)) return -EFAULT; if (interface_id != 'S') - return sys_ioctl(fd, cmd, (unsigned long)sgio32); + return do_ioctl(file, fd, cmd, (unsigned long)sgio32); if (get_user(iovec_count, &sgio32->iovec_count)) return -EFAULT; @@ -349,7 +361,7 @@ static int sg_ioctl_trans(unsigned int fd, unsigned int cmd, if (put_user(compat_ptr(data), &sgio->usr_ptr)) return -EFAULT; - err = sys_ioctl(fd, cmd, (unsigned long) sgio); + err = do_ioctl(file, fd, cmd, (unsigned long) sgio); if (err >= 0) { void __user *datap; @@ -380,13 +392,13 @@ struct compat_sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */ int unused; }; -static int sg_grt_trans(unsigned int fd, unsigned int cmd, struct - compat_sg_req_info __user *o) +static int sg_grt_trans(struct file *file, unsigned int fd, + unsigned int cmd, struct compat_sg_req_info __user *o) { int err, i; sg_req_info_t __user *r; r = compat_alloc_user_space(sizeof(sg_req_info_t)*SG_MAX_QUEUE); - err = sys_ioctl(fd,cmd,(unsigned long)r); + err = do_ioctl(file, fd, cmd, (unsigned long)r); if (err < 0) return err; for (i = 0; i < SG_MAX_QUEUE; i++) { @@ -412,8 +424,8 @@ struct sock_fprog32 { #define PPPIOCSPASS32 _IOW('t', 71, struct sock_fprog32) #define PPPIOCSACTIVE32 _IOW('t', 70, struct sock_fprog32) -static int ppp_sock_fprog_ioctl_trans(unsigned int fd, unsigned int cmd, - struct sock_fprog32 __user *u_fprog32) +static int ppp_sock_fprog_ioctl_trans(struct file *file, unsigned int fd, + unsigned int cmd, struct sock_fprog32 __user *u_fprog32) { struct sock_fprog __user *u_fprog64 = compat_alloc_user_space(sizeof(struct sock_fprog)); void __user *fptr64; @@ -435,7 +447,7 @@ static int ppp_sock_fprog_ioctl_trans(unsigned int fd, unsigned int cmd, else cmd = PPPIOCSACTIVE; - return sys_ioctl(fd, cmd, (unsigned long) u_fprog64); + return do_ioctl(file, fd, cmd, (unsigned long) u_fprog64); } struct ppp_option_data32 { @@ -451,7 +463,7 @@ struct ppp_idle32 { }; #define PPPIOCGIDLE32 _IOR('t', 63, struct ppp_idle32) -static int ppp_gidle(unsigned int fd, unsigned int cmd, +static int ppp_gidle(struct file *file, unsigned int fd, unsigned int cmd, struct ppp_idle32 __user *idle32) { struct ppp_idle __user *idle; @@ -460,7 +472,7 @@ static int ppp_gidle(unsigned int fd, unsigned int cmd, idle = compat_alloc_user_space(sizeof(*idle)); - err = sys_ioctl(fd, PPPIOCGIDLE, (unsigned long) idle); + err = do_ioctl(file, fd, PPPIOCGIDLE, (unsigned long) idle); if (!err) { if (get_user(xmit, &idle->xmit_idle) || @@ -472,7 +484,7 @@ static int ppp_gidle(unsigned int fd, unsigned int cmd, return err; } -static int ppp_scompress(unsigned int fd, unsigned int cmd, +static int ppp_scompress(struct file *file, unsigned int fd, unsigned int cmd, struct ppp_option_data32 __user *odata32) { struct ppp_option_data __user *odata; @@ -492,7 +504,7 @@ static int ppp_scompress(unsigned int fd, unsigned int cmd, sizeof(__u32) + sizeof(int))) return -EFAULT; - return sys_ioctl(fd, PPPIOCSCOMPRESS, (unsigned long) odata); + return do_ioctl(file, fd, PPPIOCSCOMPRESS, (unsigned long) odata); } #ifdef CONFIG_BLOCK @@ -512,7 +524,8 @@ struct mtpos32 { }; #define MTIOCPOS32 _IOR('m', 3, struct mtpos32) -static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, void __user *argp) +static int mt_ioctl_trans(struct file *file, unsigned int fd, + unsigned int cmd, void __user *argp) { mm_segment_t old_fs = get_fs(); struct mtget get; @@ -534,7 +547,7 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, void __user *argp) break; } set_fs (KERNEL_DS); - err = sys_ioctl (fd, kcmd, (unsigned long)karg); + err = do_ioctl(file, fd, kcmd, (unsigned long)karg); set_fs (old_fs); if (err) return err; @@ -605,8 +618,8 @@ struct serial_struct32 { compat_int_t reserved[1]; }; -static int serial_struct_ioctl(unsigned fd, unsigned cmd, - struct serial_struct32 __user *ss32) +static int serial_struct_ioctl(struct file *file, unsigned fd, + unsigned cmd, struct serial_struct32 __user *ss32) { typedef struct serial_struct32 SS32; int err; @@ -629,7 +642,7 @@ static int serial_struct_ioctl(unsigned fd, unsigned cmd, ss.iomap_base = 0UL; } set_fs(KERNEL_DS); - err = sys_ioctl(fd,cmd,(unsigned long)(&ss)); + err = do_ioctl(file, fd, cmd, (unsigned long)&ss); set_fs(oldseg); if (cmd == TIOCGSERIAL && err >= 0) { if (!access_ok(VERIFY_WRITE, ss32, sizeof(SS32))) @@ -674,8 +687,8 @@ struct i2c_rdwr_aligned { struct i2c_msg msgs[0]; }; -static int do_i2c_rdwr_ioctl(unsigned int fd, unsigned int cmd, - struct i2c_rdwr_ioctl_data32 __user *udata) +static int do_i2c_rdwr_ioctl(struct file *file, unsigned int fd, + unsigned int cmd, struct i2c_rdwr_ioctl_data32 __user *udata) { struct i2c_rdwr_aligned __user *tdata; struct i2c_msg __user *tmsgs; @@ -708,11 +721,11 @@ static int do_i2c_rdwr_ioctl(unsigned int fd, unsigned int cmd, put_user(compat_ptr(datap), &tmsgs[i].buf)) return -EFAULT; } - return sys_ioctl(fd, cmd, (unsigned long)tdata); + return do_ioctl(file, fd, cmd, (unsigned long)tdata); } -static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd, - struct i2c_smbus_ioctl_data32 __user *udata) +static int do_i2c_smbus_ioctl(struct file *file, unsigned int fd, + unsigned int cmd, struct i2c_smbus_ioctl_data32 __user *udata) { struct i2c_smbus_ioctl_data __user *tdata; compat_caddr_t datap; @@ -734,7 +747,7 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd, __put_user(compat_ptr(datap), &tdata->data)) return -EFAULT; - return sys_ioctl(fd, cmd, (unsigned long)tdata); + return do_ioctl(file, fd, cmd, (unsigned long)tdata); } #define RTC_IRQP_READ32 _IOR('p', 0x0b, compat_ulong_t) @@ -742,7 +755,8 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd, #define RTC_EPOCH_READ32 _IOR('p', 0x0d, compat_ulong_t) #define RTC_EPOCH_SET32 _IOW('p', 0x0e, compat_ulong_t) -static int rtc_ioctl(unsigned fd, unsigned cmd, void __user *argp) +static int rtc_ioctl(struct file *file, unsigned fd, + unsigned cmd, void __user *argp) { mm_segment_t oldfs = get_fs(); compat_ulong_t val32; @@ -753,7 +767,7 @@ static int rtc_ioctl(unsigned fd, unsigned cmd, void __user *argp) case RTC_IRQP_READ32: case RTC_EPOCH_READ32: set_fs(KERNEL_DS); - ret = sys_ioctl(fd, (cmd == RTC_IRQP_READ32) ? + ret = do_ioctl(file, fd, (cmd == RTC_IRQP_READ32) ? RTC_IRQP_READ : RTC_EPOCH_READ, (unsigned long)&kval); set_fs(oldfs); @@ -762,9 +776,9 @@ static int rtc_ioctl(unsigned fd, unsigned cmd, void __user *argp) val32 = kval; return put_user(val32, (unsigned int __user *)argp); case RTC_IRQP_SET32: - return sys_ioctl(fd, RTC_IRQP_SET, (unsigned long)argp); + return do_ioctl(file, fd, RTC_IRQP_SET, (unsigned long)argp); case RTC_EPOCH_SET32: - return sys_ioctl(fd, RTC_EPOCH_SET, (unsigned long)argp); + return do_ioctl(file, fd, RTC_EPOCH_SET, (unsigned long)argp); } return -ENOIOCTLCMD; @@ -1443,46 +1457,46 @@ static long do_ioctl_trans(int fd, unsigned int cmd, switch (cmd) { case PPPIOCGIDLE32: - return ppp_gidle(fd, cmd, argp); + return ppp_gidle(file, fd, cmd, argp); case PPPIOCSCOMPRESS32: - return ppp_scompress(fd, cmd, argp); + return ppp_scompress(file, fd, cmd, argp); case PPPIOCSPASS32: case PPPIOCSACTIVE32: - return ppp_sock_fprog_ioctl_trans(fd, cmd, argp); + return ppp_sock_fprog_ioctl_trans(file, fd, cmd, argp); #ifdef CONFIG_BLOCK case SG_IO: - return sg_ioctl_trans(fd, cmd, argp); + return sg_ioctl_trans(file, fd, cmd, argp); case SG_GET_REQUEST_TABLE: - return sg_grt_trans(fd, cmd, argp); + return sg_grt_trans(file, fd, cmd, argp); case MTIOCGET32: case MTIOCPOS32: - return mt_ioctl_trans(fd, cmd, argp); + return mt_ioctl_trans(file, fd, cmd, argp); #endif /* Serial */ case TIOCGSERIAL: case TIOCSSERIAL: - return serial_struct_ioctl(fd, cmd, argp); + return serial_struct_ioctl(file, fd, cmd, argp); /* i2c */ case I2C_FUNCS: - return w_long(fd, cmd, argp); + return w_long(file, fd, cmd, argp); case I2C_RDWR: - return do_i2c_rdwr_ioctl(fd, cmd, argp); + return do_i2c_rdwr_ioctl(file, fd, cmd, argp); case I2C_SMBUS: - return do_i2c_smbus_ioctl(fd, cmd, argp); + return do_i2c_smbus_ioctl(file, fd, cmd, argp); /* Not implemented in the native kernel */ case RTC_IRQP_READ32: case RTC_IRQP_SET32: case RTC_EPOCH_READ32: case RTC_EPOCH_SET32: - return rtc_ioctl(fd, cmd, argp); + return rtc_ioctl(file, fd, cmd, argp); /* dvb */ case VIDEO_GET_EVENT: - return do_video_get_event(fd, cmd, argp); + return do_video_get_event(file, fd, cmd, argp); case VIDEO_STILLPICTURE: - return do_video_stillpicture(fd, cmd, argp); + return do_video_stillpicture(file, fd, cmd, argp); case VIDEO_SET_SPU_PALETTE: - return do_video_set_spu_palette(fd, cmd, argp); + return do_video_set_spu_palette(file, fd, cmd, argp); } /* -- cgit v0.10.2 From 66cf191f3eae4582a83cb4251b75b43bee95a999 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 7 Jan 2016 09:53:30 -0500 Subject: compat_ioctl: don't pass fd around when not needed Signed-off-by: Al Viro diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 06e60ca..908837c 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -58,6 +58,8 @@ #include #include +#include "internal.h" + #include #include #include @@ -115,8 +117,7 @@ #include #endif -static int do_ioctl(struct file *file, unsigned int fd, - unsigned int cmd, unsigned long arg) +static int do_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { int err; @@ -124,10 +125,10 @@ static int do_ioctl(struct file *file, unsigned int fd, if (err) return err; - return do_vfs_ioctl(file, fd, cmd, arg); + return vfs_ioctl(file, cmd, arg); } -static int w_long(struct file *file, unsigned int fd, +static int w_long(struct file *file, unsigned int cmd, compat_ulong_t __user *argp) { mm_segment_t old_fs = get_fs(); @@ -135,7 +136,7 @@ static int w_long(struct file *file, unsigned int fd, unsigned long val; set_fs (KERNEL_DS); - err = do_ioctl(file, fd, cmd, (unsigned long)&val); + err = do_ioctl(file, cmd, (unsigned long)&val); set_fs (old_fs); if (!err && put_user(val, argp)) return -EFAULT; @@ -151,7 +152,7 @@ struct compat_video_event { } u; }; -static int do_video_get_event(struct file *file, unsigned int fd, +static int do_video_get_event(struct file *file, unsigned int cmd, struct compat_video_event __user *up) { struct video_event kevent; @@ -159,7 +160,7 @@ static int do_video_get_event(struct file *file, unsigned int fd, int err; set_fs(KERNEL_DS); - err = do_ioctl(file, fd, cmd, (unsigned long) &kevent); + err = do_ioctl(file, cmd, (unsigned long) &kevent); set_fs(old_fs); if (!err) { @@ -181,7 +182,7 @@ struct compat_video_still_picture { int32_t size; }; -static int do_video_stillpicture(struct file *file, unsigned int fd, +static int do_video_stillpicture(struct file *file, unsigned int cmd, struct compat_video_still_picture __user *up) { struct video_still_picture __user *up_native; @@ -202,7 +203,7 @@ static int do_video_stillpicture(struct file *file, unsigned int fd, if (err) return -EFAULT; - err = do_ioctl(file, fd, cmd, (unsigned long) up_native); + err = do_ioctl(file, cmd, (unsigned long) up_native); return err; } @@ -212,7 +213,7 @@ struct compat_video_spu_palette { compat_uptr_t palette; }; -static int do_video_set_spu_palette(struct file *file, unsigned int fd, +static int do_video_set_spu_palette(struct file *file, unsigned int cmd, struct compat_video_spu_palette __user *up) { struct video_spu_palette __user *up_native; @@ -230,7 +231,7 @@ static int do_video_set_spu_palette(struct file *file, unsigned int fd, if (err) return -EFAULT; - err = do_ioctl(file, fd, cmd, (unsigned long) up_native); + err = do_ioctl(file, cmd, (unsigned long) up_native); return err; } @@ -288,7 +289,7 @@ static int sg_build_iovec(sg_io_hdr_t __user *sgio, void __user *dxferp, u16 iov return 0; } -static int sg_ioctl_trans(struct file *file, unsigned int fd, unsigned int cmd, +static int sg_ioctl_trans(struct file *file, unsigned int cmd, sg_io_hdr32_t __user *sgio32) { sg_io_hdr_t __user *sgio; @@ -301,7 +302,7 @@ static int sg_ioctl_trans(struct file *file, unsigned int fd, unsigned int cmd, if (get_user(interface_id, &sgio32->interface_id)) return -EFAULT; if (interface_id != 'S') - return do_ioctl(file, fd, cmd, (unsigned long)sgio32); + return do_ioctl(file, cmd, (unsigned long)sgio32); if (get_user(iovec_count, &sgio32->iovec_count)) return -EFAULT; @@ -361,7 +362,7 @@ static int sg_ioctl_trans(struct file *file, unsigned int fd, unsigned int cmd, if (put_user(compat_ptr(data), &sgio->usr_ptr)) return -EFAULT; - err = do_ioctl(file, fd, cmd, (unsigned long) sgio); + err = do_ioctl(file, cmd, (unsigned long) sgio); if (err >= 0) { void __user *datap; @@ -392,13 +393,13 @@ struct compat_sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */ int unused; }; -static int sg_grt_trans(struct file *file, unsigned int fd, +static int sg_grt_trans(struct file *file, unsigned int cmd, struct compat_sg_req_info __user *o) { int err, i; sg_req_info_t __user *r; r = compat_alloc_user_space(sizeof(sg_req_info_t)*SG_MAX_QUEUE); - err = do_ioctl(file, fd, cmd, (unsigned long)r); + err = do_ioctl(file, cmd, (unsigned long)r); if (err < 0) return err; for (i = 0; i < SG_MAX_QUEUE; i++) { @@ -424,7 +425,7 @@ struct sock_fprog32 { #define PPPIOCSPASS32 _IOW('t', 71, struct sock_fprog32) #define PPPIOCSACTIVE32 _IOW('t', 70, struct sock_fprog32) -static int ppp_sock_fprog_ioctl_trans(struct file *file, unsigned int fd, +static int ppp_sock_fprog_ioctl_trans(struct file *file, unsigned int cmd, struct sock_fprog32 __user *u_fprog32) { struct sock_fprog __user *u_fprog64 = compat_alloc_user_space(sizeof(struct sock_fprog)); @@ -447,7 +448,7 @@ static int ppp_sock_fprog_ioctl_trans(struct file *file, unsigned int fd, else cmd = PPPIOCSACTIVE; - return do_ioctl(file, fd, cmd, (unsigned long) u_fprog64); + return do_ioctl(file, cmd, (unsigned long) u_fprog64); } struct ppp_option_data32 { @@ -463,7 +464,7 @@ struct ppp_idle32 { }; #define PPPIOCGIDLE32 _IOR('t', 63, struct ppp_idle32) -static int ppp_gidle(struct file *file, unsigned int fd, unsigned int cmd, +static int ppp_gidle(struct file *file, unsigned int cmd, struct ppp_idle32 __user *idle32) { struct ppp_idle __user *idle; @@ -472,7 +473,7 @@ static int ppp_gidle(struct file *file, unsigned int fd, unsigned int cmd, idle = compat_alloc_user_space(sizeof(*idle)); - err = do_ioctl(file, fd, PPPIOCGIDLE, (unsigned long) idle); + err = do_ioctl(file, PPPIOCGIDLE, (unsigned long) idle); if (!err) { if (get_user(xmit, &idle->xmit_idle) || @@ -484,7 +485,7 @@ static int ppp_gidle(struct file *file, unsigned int fd, unsigned int cmd, return err; } -static int ppp_scompress(struct file *file, unsigned int fd, unsigned int cmd, +static int ppp_scompress(struct file *file, unsigned int cmd, struct ppp_option_data32 __user *odata32) { struct ppp_option_data __user *odata; @@ -504,7 +505,7 @@ static int ppp_scompress(struct file *file, unsigned int fd, unsigned int cmd, sizeof(__u32) + sizeof(int))) return -EFAULT; - return do_ioctl(file, fd, PPPIOCSCOMPRESS, (unsigned long) odata); + return do_ioctl(file, PPPIOCSCOMPRESS, (unsigned long) odata); } #ifdef CONFIG_BLOCK @@ -524,7 +525,7 @@ struct mtpos32 { }; #define MTIOCPOS32 _IOR('m', 3, struct mtpos32) -static int mt_ioctl_trans(struct file *file, unsigned int fd, +static int mt_ioctl_trans(struct file *file, unsigned int cmd, void __user *argp) { mm_segment_t old_fs = get_fs(); @@ -547,7 +548,7 @@ static int mt_ioctl_trans(struct file *file, unsigned int fd, break; } set_fs (KERNEL_DS); - err = do_ioctl(file, fd, kcmd, (unsigned long)karg); + err = do_ioctl(file, kcmd, (unsigned long)karg); set_fs (old_fs); if (err) return err; @@ -618,7 +619,7 @@ struct serial_struct32 { compat_int_t reserved[1]; }; -static int serial_struct_ioctl(struct file *file, unsigned fd, +static int serial_struct_ioctl(struct file *file, unsigned cmd, struct serial_struct32 __user *ss32) { typedef struct serial_struct32 SS32; @@ -642,7 +643,7 @@ static int serial_struct_ioctl(struct file *file, unsigned fd, ss.iomap_base = 0UL; } set_fs(KERNEL_DS); - err = do_ioctl(file, fd, cmd, (unsigned long)&ss); + err = do_ioctl(file, cmd, (unsigned long)&ss); set_fs(oldseg); if (cmd == TIOCGSERIAL && err >= 0) { if (!access_ok(VERIFY_WRITE, ss32, sizeof(SS32))) @@ -687,7 +688,7 @@ struct i2c_rdwr_aligned { struct i2c_msg msgs[0]; }; -static int do_i2c_rdwr_ioctl(struct file *file, unsigned int fd, +static int do_i2c_rdwr_ioctl(struct file *file, unsigned int cmd, struct i2c_rdwr_ioctl_data32 __user *udata) { struct i2c_rdwr_aligned __user *tdata; @@ -721,10 +722,10 @@ static int do_i2c_rdwr_ioctl(struct file *file, unsigned int fd, put_user(compat_ptr(datap), &tmsgs[i].buf)) return -EFAULT; } - return do_ioctl(file, fd, cmd, (unsigned long)tdata); + return do_ioctl(file, cmd, (unsigned long)tdata); } -static int do_i2c_smbus_ioctl(struct file *file, unsigned int fd, +static int do_i2c_smbus_ioctl(struct file *file, unsigned int cmd, struct i2c_smbus_ioctl_data32 __user *udata) { struct i2c_smbus_ioctl_data __user *tdata; @@ -747,7 +748,7 @@ static int do_i2c_smbus_ioctl(struct file *file, unsigned int fd, __put_user(compat_ptr(datap), &tdata->data)) return -EFAULT; - return do_ioctl(file, fd, cmd, (unsigned long)tdata); + return do_ioctl(file, cmd, (unsigned long)tdata); } #define RTC_IRQP_READ32 _IOR('p', 0x0b, compat_ulong_t) @@ -755,7 +756,7 @@ static int do_i2c_smbus_ioctl(struct file *file, unsigned int fd, #define RTC_EPOCH_READ32 _IOR('p', 0x0d, compat_ulong_t) #define RTC_EPOCH_SET32 _IOW('p', 0x0e, compat_ulong_t) -static int rtc_ioctl(struct file *file, unsigned fd, +static int rtc_ioctl(struct file *file, unsigned cmd, void __user *argp) { mm_segment_t oldfs = get_fs(); @@ -767,7 +768,7 @@ static int rtc_ioctl(struct file *file, unsigned fd, case RTC_IRQP_READ32: case RTC_EPOCH_READ32: set_fs(KERNEL_DS); - ret = do_ioctl(file, fd, (cmd == RTC_IRQP_READ32) ? + ret = do_ioctl(file, (cmd == RTC_IRQP_READ32) ? RTC_IRQP_READ : RTC_EPOCH_READ, (unsigned long)&kval); set_fs(oldfs); @@ -776,9 +777,9 @@ static int rtc_ioctl(struct file *file, unsigned fd, val32 = kval; return put_user(val32, (unsigned int __user *)argp); case RTC_IRQP_SET32: - return do_ioctl(file, fd, RTC_IRQP_SET, (unsigned long)argp); + return do_ioctl(file, RTC_IRQP_SET, (unsigned long)argp); case RTC_EPOCH_SET32: - return do_ioctl(file, fd, RTC_EPOCH_SET, (unsigned long)argp); + return do_ioctl(file, RTC_EPOCH_SET, (unsigned long)argp); } return -ENOIOCTLCMD; @@ -1450,53 +1451,53 @@ IGNORE_IOCTL(FBIOGCURSOR32) * a compat_ioctl operation in the place that handleѕ the * ioctl for the native case. */ -static long do_ioctl_trans(int fd, unsigned int cmd, +static long do_ioctl_trans(unsigned int cmd, unsigned long arg, struct file *file) { void __user *argp = compat_ptr(arg); switch (cmd) { case PPPIOCGIDLE32: - return ppp_gidle(file, fd, cmd, argp); + return ppp_gidle(file, cmd, argp); case PPPIOCSCOMPRESS32: - return ppp_scompress(file, fd, cmd, argp); + return ppp_scompress(file, cmd, argp); case PPPIOCSPASS32: case PPPIOCSACTIVE32: - return ppp_sock_fprog_ioctl_trans(file, fd, cmd, argp); + return ppp_sock_fprog_ioctl_trans(file, cmd, argp); #ifdef CONFIG_BLOCK case SG_IO: - return sg_ioctl_trans(file, fd, cmd, argp); + return sg_ioctl_trans(file, cmd, argp); case SG_GET_REQUEST_TABLE: - return sg_grt_trans(file, fd, cmd, argp); + return sg_grt_trans(file, cmd, argp); case MTIOCGET32: case MTIOCPOS32: - return mt_ioctl_trans(file, fd, cmd, argp); + return mt_ioctl_trans(file, cmd, argp); #endif /* Serial */ case TIOCGSERIAL: case TIOCSSERIAL: - return serial_struct_ioctl(file, fd, cmd, argp); + return serial_struct_ioctl(file, cmd, argp); /* i2c */ case I2C_FUNCS: - return w_long(file, fd, cmd, argp); + return w_long(file, cmd, argp); case I2C_RDWR: - return do_i2c_rdwr_ioctl(file, fd, cmd, argp); + return do_i2c_rdwr_ioctl(file, cmd, argp); case I2C_SMBUS: - return do_i2c_smbus_ioctl(file, fd, cmd, argp); + return do_i2c_smbus_ioctl(file, cmd, argp); /* Not implemented in the native kernel */ case RTC_IRQP_READ32: case RTC_IRQP_SET32: case RTC_EPOCH_READ32: case RTC_EPOCH_SET32: - return rtc_ioctl(file, fd, cmd, argp); + return rtc_ioctl(file, cmd, argp); /* dvb */ case VIDEO_GET_EVENT: - return do_video_get_event(file, fd, cmd, argp); + return do_video_get_event(file, cmd, argp); case VIDEO_STILLPICTURE: - return do_video_stillpicture(file, fd, cmd, argp); + return do_video_stillpicture(file, cmd, argp); case VIDEO_SET_SPU_PALETTE: - return do_video_set_spu_palette(file, fd, cmd, argp); + return do_video_set_spu_palette(file, cmd, argp); } /* @@ -1527,7 +1528,7 @@ static long do_ioctl_trans(int fd, unsigned int cmd, case NBD_SET_BLKSIZE: case NBD_SET_SIZE: case NBD_SET_SIZE_BLOCKS: - return do_vfs_ioctl(file, fd, cmd, arg); + return vfs_ioctl(file, cmd, arg); } return -ENOIOCTLCMD; @@ -1616,7 +1617,7 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, if (compat_ioctl_check_table(XFORM(cmd))) goto found_handler; - error = do_ioctl_trans(fd, cmd, arg, f.file); + error = do_ioctl_trans(cmd, arg, f.file); if (error == -ENOIOCTLCMD) error = -ENOTTY; diff --git a/fs/internal.h b/fs/internal.h index 71859c4d..e38c08c 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -151,3 +151,10 @@ extern void mnt_pin_kill(struct mount *m); * fs/nsfs.c */ extern struct dentry_operations ns_dentry_operations; + +/* + * fs/ioctl.c + */ +extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd, + unsigned long arg); +extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); diff --git a/fs/ioctl.c b/fs/ioctl.c index 5d01d26..41c352e 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -15,6 +15,7 @@ #include #include #include +#include "internal.h" #include @@ -32,8 +33,7 @@ * * Returns 0 on success, -errno on error. */ -static long vfs_ioctl(struct file *filp, unsigned int cmd, - unsigned long arg) +long vfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { int error = -ENOTTY; diff --git a/include/linux/fs.h b/include/linux/fs.h index 3aa5142..51f9f8d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2764,8 +2764,6 @@ extern int vfs_lstat(const char __user *, struct kstat *); extern int vfs_fstat(unsigned int, struct kstat *); extern int vfs_fstatat(int , const char __user *, struct kstat *, int); -extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, - unsigned long arg); extern int __generic_block_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, loff_t start, loff_t len, -- cgit v0.10.2 From a7f61e89af73e9bf760826b20dba4e637221fcb9 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 5 Jan 2016 18:27:30 +0100 Subject: compat_ioctl: don't call do_ioctl under set_fs(KERNEL_DS) This replaces all code in fs/compat_ioctl.c that translated ioctl arguments into a in-kernel structure, then performed do_ioctl under set_fs(KERNEL_DS), with code that allocates data on the user stack and can call the VFS ioctl handler under USER_DS. This is done as a hardening measure because the caller does not know what kind of ioctl handler will be invoked, only that no corresponding compat_ioctl handler exists and what the ioctl command number is. The accidental invocation of an unlocked_ioctl handler that unexpectedly calls copy_to_user could be a severe security issue. Signed-off-by: Jann Horn Signed-off-by: Al Viro diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 908837c..9144b77 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -117,6 +117,13 @@ #include #endif +#define convert_in_user(srcptr, dstptr) \ +({ \ + typeof(*srcptr) val; \ + \ + get_user(val, srcptr) || put_user(val, dstptr); \ +}) + static int do_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { int err; @@ -131,16 +138,17 @@ static int do_ioctl(struct file *file, unsigned int cmd, unsigned long arg) static int w_long(struct file *file, unsigned int cmd, compat_ulong_t __user *argp) { - mm_segment_t old_fs = get_fs(); int err; - unsigned long val; + unsigned long __user *valp = compat_alloc_user_space(sizeof(*valp)); - set_fs (KERNEL_DS); - err = do_ioctl(file, cmd, (unsigned long)&val); - set_fs (old_fs); - if (!err && put_user(val, argp)) + if (valp == NULL) return -EFAULT; - return err; + err = do_ioctl(file, cmd, (unsigned long)valp); + if (err) + return err; + if (convert_in_user(valp, argp)) + return -EFAULT; + return 0; } struct compat_video_event { @@ -155,20 +163,20 @@ struct compat_video_event { static int do_video_get_event(struct file *file, unsigned int cmd, struct compat_video_event __user *up) { - struct video_event kevent; - mm_segment_t old_fs = get_fs(); + struct video_event __user *kevent = + compat_alloc_user_space(sizeof(*kevent)); int err; - set_fs(KERNEL_DS); - err = do_ioctl(file, cmd, (unsigned long) &kevent); - set_fs(old_fs); + if (kevent == NULL) + return -EFAULT; + err = do_ioctl(file, cmd, (unsigned long)kevent); if (!err) { - err = put_user(kevent.type, &up->type); - err |= put_user(kevent.timestamp, &up->timestamp); - err |= put_user(kevent.u.size.w, &up->u.size.w); - err |= put_user(kevent.u.size.h, &up->u.size.h); - err |= put_user(kevent.u.size.aspect_ratio, + err = convert_in_user(&kevent->type, &up->type); + err |= convert_in_user(&kevent->timestamp, &up->timestamp); + err |= convert_in_user(&kevent->u.size.w, &up->u.size.w); + err |= convert_in_user(&kevent->u.size.h, &up->u.size.h); + err |= convert_in_user(&kevent->u.size.aspect_ratio, &up->u.size.aspect_ratio); if (err) err = -EFAULT; @@ -528,10 +536,10 @@ struct mtpos32 { static int mt_ioctl_trans(struct file *file, unsigned int cmd, void __user *argp) { - mm_segment_t old_fs = get_fs(); - struct mtget get; + /* NULL initialization to make gcc shut up */ + struct mtget __user *get = NULL; struct mtget32 __user *umget32; - struct mtpos pos; + struct mtpos __user *pos = NULL; struct mtpos32 __user *upos32; unsigned long kcmd; void *karg; @@ -540,32 +548,34 @@ static int mt_ioctl_trans(struct file *file, switch(cmd) { case MTIOCPOS32: kcmd = MTIOCPOS; - karg = &pos; + pos = compat_alloc_user_space(sizeof(*pos)); + karg = pos; break; default: /* MTIOCGET32 */ kcmd = MTIOCGET; - karg = &get; + get = compat_alloc_user_space(sizeof(*get)); + karg = get; break; } - set_fs (KERNEL_DS); + if (karg == NULL) + return -EFAULT; err = do_ioctl(file, kcmd, (unsigned long)karg); - set_fs (old_fs); if (err) return err; switch (cmd) { case MTIOCPOS32: upos32 = argp; - err = __put_user(pos.mt_blkno, &upos32->mt_blkno); + err = convert_in_user(&pos->mt_blkno, &upos32->mt_blkno); break; case MTIOCGET32: umget32 = argp; - err = __put_user(get.mt_type, &umget32->mt_type); - err |= __put_user(get.mt_resid, &umget32->mt_resid); - err |= __put_user(get.mt_dsreg, &umget32->mt_dsreg); - err |= __put_user(get.mt_gstat, &umget32->mt_gstat); - err |= __put_user(get.mt_erreg, &umget32->mt_erreg); - err |= __put_user(get.mt_fileno, &umget32->mt_fileno); - err |= __put_user(get.mt_blkno, &umget32->mt_blkno); + err = convert_in_user(&get->mt_type, &umget32->mt_type); + err |= convert_in_user(&get->mt_resid, &umget32->mt_resid); + err |= convert_in_user(&get->mt_dsreg, &umget32->mt_dsreg); + err |= convert_in_user(&get->mt_gstat, &umget32->mt_gstat); + err |= convert_in_user(&get->mt_erreg, &umget32->mt_erreg); + err |= convert_in_user(&get->mt_fileno, &umget32->mt_fileno); + err |= convert_in_user(&get->mt_blkno, &umget32->mt_blkno); break; } return err ? -EFAULT: 0; @@ -624,37 +634,36 @@ static int serial_struct_ioctl(struct file *file, { typedef struct serial_struct32 SS32; int err; - struct serial_struct ss; - mm_segment_t oldseg = get_fs(); + struct serial_struct __user *ss = compat_alloc_user_space(sizeof(*ss)); __u32 udata; unsigned int base; + unsigned char *iomem_base; + if (ss == NULL) + return -EFAULT; if (cmd == TIOCSSERIAL) { - if (!access_ok(VERIFY_READ, ss32, sizeof(SS32))) - return -EFAULT; - if (__copy_from_user(&ss, ss32, offsetof(SS32, iomem_base))) + if (copy_in_user(ss, ss32, offsetof(SS32, iomem_base)) || + get_user(udata, &ss32->iomem_base)) return -EFAULT; - if (__get_user(udata, &ss32->iomem_base)) + iomem_base = compat_ptr(udata); + if (put_user(iomem_base, &ss->iomem_base) || + convert_in_user(&ss32->iomem_reg_shift, + &ss->iomem_reg_shift) || + convert_in_user(&ss32->port_high, &ss->port_high) || + put_user(0UL, &ss->iomap_base)) return -EFAULT; - ss.iomem_base = compat_ptr(udata); - if (__get_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift) || - __get_user(ss.port_high, &ss32->port_high)) - return -EFAULT; - ss.iomap_base = 0UL; } - set_fs(KERNEL_DS); - err = do_ioctl(file, cmd, (unsigned long)&ss); - set_fs(oldseg); + err = do_ioctl(file, cmd, (unsigned long)ss); if (cmd == TIOCGSERIAL && err >= 0) { - if (!access_ok(VERIFY_WRITE, ss32, sizeof(SS32))) - return -EFAULT; - if (__copy_to_user(ss32,&ss,offsetof(SS32,iomem_base))) + if (copy_in_user(ss32, ss, offsetof(SS32, iomem_base)) || + get_user(iomem_base, &ss->iomem_base)) return -EFAULT; - base = (unsigned long)ss.iomem_base >> 32 ? - 0xffffffff : (unsigned)(unsigned long)ss.iomem_base; - if (__put_user(base, &ss32->iomem_base) || - __put_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift) || - __put_user(ss.port_high, &ss32->port_high)) + base = (unsigned long)iomem_base >> 32 ? + 0xffffffff : (unsigned)(unsigned long)iomem_base; + if (put_user(base, &ss32->iomem_base) || + convert_in_user(&ss->iomem_reg_shift, + &ss32->iomem_reg_shift) || + convert_in_user(&ss->port_high, &ss32->port_high)) return -EFAULT; } return err; @@ -759,23 +768,20 @@ static int do_i2c_smbus_ioctl(struct file *file, static int rtc_ioctl(struct file *file, unsigned cmd, void __user *argp) { - mm_segment_t oldfs = get_fs(); - compat_ulong_t val32; - unsigned long kval; + unsigned long __user *valp = compat_alloc_user_space(sizeof(*valp)); int ret; + if (valp == NULL) + return -EFAULT; switch (cmd) { case RTC_IRQP_READ32: case RTC_EPOCH_READ32: - set_fs(KERNEL_DS); ret = do_ioctl(file, (cmd == RTC_IRQP_READ32) ? RTC_IRQP_READ : RTC_EPOCH_READ, - (unsigned long)&kval); - set_fs(oldfs); + (unsigned long)valp); if (ret) return ret; - val32 = kval; - return put_user(val32, (unsigned int __user *)argp); + return convert_in_user(valp, (unsigned int __user *)argp); case RTC_IRQP_SET32: return do_ioctl(file, RTC_IRQP_SET, (unsigned long)argp); case RTC_EPOCH_SET32: -- cgit v0.10.2