From dd3733b3e798daf778a1ec08557f388f00fdc2f6 Mon Sep 17 00:00:00 2001 From: Alexey Andriyanov Date: Fri, 6 Feb 2015 22:32:20 +0300 Subject: ipvs: fix inability to remove a mixed-family RS The current code prevents any operation with a mixed-family dest unless IP_VS_CONN_F_TUNNEL flag is set. The problem is that it's impossible for the client to follow this rule, because ip_vs_genl_parse_dest does not even read the destination conn_flags when cmd = IPVS_CMD_DEL_DEST (need_full_dest = 0). Also, not every client can pass this flag when removing a dest. ipvsadm, for example, does not support the "-i" command line option together with the "-d" option. This change disables any checks for mixed-family on IPVS_CMD_DEL_DEST command. Signed-off-by: Alexey Andriyanov Fixes: bc18d37f676f ("ipvs: Allow heterogeneous pools now that we support them") Acked-by: Julian Anastasov Signed-off-by: Simon Horman diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index b8295a4..fdcda8b 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3399,7 +3399,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) if (udest.af == 0) udest.af = svc->af; - if (udest.af != svc->af) { + if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) { /* The synchronization protocol is incompatible * with mixed family services */ -- cgit v0.10.2 From 520aa7414bb590f39d0d1591b06018e60cbc7cf4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 12 Feb 2015 22:15:31 +0100 Subject: netfilter: nft_compat: fix module refcount underflow Feb 12 18:20:42 nfdev kernel: ------------[ cut here ]------------ Feb 12 18:20:42 nfdev kernel: WARNING: CPU: 4 PID: 4359 at kernel/module.c:963 module_put+0x9b/0xba() Feb 12 18:20:42 nfdev kernel: CPU: 4 PID: 4359 Comm: ebtables-compat Tainted: G W 3.19.0-rc6+ #43 [...] Feb 12 18:20:42 nfdev kernel: Call Trace: Feb 12 18:20:42 nfdev kernel: [] dump_stack+0x4c/0x65 Feb 12 18:20:42 nfdev kernel: [] warn_slowpath_common+0x9c/0xb6 Feb 12 18:20:42 nfdev kernel: [] ? module_put+0x9b/0xba Feb 12 18:20:42 nfdev kernel: [] warn_slowpath_null+0x15/0x17 Feb 12 18:20:42 nfdev kernel: [] module_put+0x9b/0xba Feb 12 18:20:42 nfdev kernel: [] nft_match_destroy+0x45/0x4c Feb 12 18:20:42 nfdev kernel: [] nf_tables_rule_destroy+0x28/0x70 Reported-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso Tested-by: Arturo Borrero Gonzalez diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 265e190..b636486 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -578,8 +578,12 @@ nft_match_select_ops(const struct nft_ctx *ctx, struct xt_match *match = nft_match->ops.data; if (strcmp(match->name, mt_name) == 0 && - match->revision == rev && match->family == family) + match->revision == rev && match->family == family) { + if (!try_module_get(match->me)) + return ERR_PTR(-ENOENT); + return &nft_match->ops; + } } match = xt_request_find_match(family, mt_name, rev); @@ -648,8 +652,12 @@ nft_target_select_ops(const struct nft_ctx *ctx, struct xt_target *target = nft_target->ops.data; if (strcmp(target->name, tg_name) == 0 && - target->revision == rev && target->family == family) + target->revision == rev && target->family == family) { + if (!try_module_get(target->me)) + return ERR_PTR(-ENOENT); + return &nft_target->ops; + } } target = xt_request_find_target(family, tg_name, rev); -- cgit v0.10.2 From cef9ed86ed62eeffcd017882278bbece32001f86 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 13 Feb 2015 12:47:50 +0100 Subject: netfilter: xt_recent: don't reject rule if new hitcount exceeds table max given: -A INPUT -m recent --update --seconds 30 --hitcount 4 and iptables-save > foo then iptables-restore < foo will fail with: kernel: xt_recent: hitcount (4) is larger than packets to be remembered (4) for table DEFAULT Even when the check is fixed, the restore won't work if the hitcount is increased to e.g. 6, since by the time checkentry runs it will find the 'old' incarnation of the table. We can avoid this by increasing the maximum threshold silently; we only have to rm all the current entries of the table (these entries would not have enough room to handle the increased hitcount). This even makes (not-very-useful) -A INPUT -m recent --update --seconds 30 --hitcount 4 -A INPUT -m recent --update --seconds 30 --hitcount 42 work. Fixes: abc86d0f99242b7f142b (netfilter: xt_recent: relax ip_pkt_list_tot restrictions) Tracked-down-by: Chris Vine Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 30dbe34..45e1b30 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -378,12 +378,11 @@ static int recent_mt_check(const struct xt_mtchk_param *par, mutex_lock(&recent_mutex); t = recent_table_lookup(recent_net, info->name); if (t != NULL) { - if (info->hit_count > t->nstamps_max_mask) { - pr_info("hitcount (%u) is larger than packets to be remembered (%u) for table %s\n", - info->hit_count, t->nstamps_max_mask + 1, - info->name); - ret = -EINVAL; - goto out; + if (nstamp_mask > t->nstamps_max_mask) { + spin_lock_bh(&recent_lock); + recent_table_flush(t); + t->nstamps_max_mask = nstamp_mask; + spin_unlock_bh(&recent_lock); } t->refcnt++; -- cgit v0.10.2 From 78296c97ca1fd3b104f12e1f1fbc06c46635990b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 15 Feb 2015 19:03:45 -0800 Subject: netfilter: xt_socket: fix a stack corruption bug As soon as extract_icmp6_fields() returns, its local storage (automatic variables) is deallocated and can be overwritten. Lets add an additional parameter to make sure storage is valid long enough. While we are at it, adds some const qualifiers. Signed-off-by: Eric Dumazet Fixes: b64c9256a9b76 ("tproxy: added IPv6 support to the socket match") Signed-off-by: Pablo Neira Ayuso diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 1ba6793..13332db 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -243,12 +243,13 @@ static int extract_icmp6_fields(const struct sk_buff *skb, unsigned int outside_hdrlen, int *protocol, - struct in6_addr **raddr, - struct in6_addr **laddr, + const struct in6_addr **raddr, + const struct in6_addr **laddr, __be16 *rport, - __be16 *lport) + __be16 *lport, + struct ipv6hdr *ipv6_var) { - struct ipv6hdr *inside_iph, _inside_iph; + const struct ipv6hdr *inside_iph; struct icmp6hdr *icmph, _icmph; __be16 *ports, _ports[2]; u8 inside_nexthdr; @@ -263,12 +264,14 @@ extract_icmp6_fields(const struct sk_buff *skb, if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK) return 1; - inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), sizeof(_inside_iph), &_inside_iph); + inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), + sizeof(*ipv6_var), ipv6_var); if (inside_iph == NULL) return 1; inside_nexthdr = inside_iph->nexthdr; - inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + sizeof(_inside_iph), + inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + + sizeof(*ipv6_var), &inside_nexthdr, &inside_fragoff); if (inside_hdrlen < 0) return 1; /* hjm: Packet has no/incomplete transport layer headers. */ @@ -315,10 +318,10 @@ xt_socket_get_sock_v6(struct net *net, const u8 protocol, static bool socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) { - struct ipv6hdr *iph = ipv6_hdr(skb); + struct ipv6hdr ipv6_var, *iph = ipv6_hdr(skb); struct udphdr _hdr, *hp = NULL; struct sock *sk = skb->sk; - struct in6_addr *daddr = NULL, *saddr = NULL; + const struct in6_addr *daddr = NULL, *saddr = NULL; __be16 uninitialized_var(dport), uninitialized_var(sport); int thoff = 0, uninitialized_var(tproto); const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; @@ -342,7 +345,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) } else if (tproto == IPPROTO_ICMPV6) { if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr, - &sport, &dport)) + &sport, &dport, &ipv6_var)) return false; } else { return false; -- cgit v0.10.2 From 1c4cff0cf55011792125b6041bc4e9713e46240f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ignacy=20Gaw=C4=99dzki?= Date: Fri, 13 Feb 2015 14:47:05 -0800 Subject: gen_stats.c: Duplicate xstats buffer for later use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The gnet_stats_copy_app() function gets called, more often than not, with its second argument a pointer to an automatic variable in the caller's stack. Therefore, to avoid copying garbage afterwards when calling gnet_stats_finish_copy(), this data is better copied to a dynamically allocated memory that gets freed after use. [xiyou.wangcong@gmail.com: remove a useless kfree()] Signed-off-by: Ignacy Gawędzki Signed-off-by: Cong Wang Signed-off-by: David S. Miller diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index 0c08062..1e2f46a 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -32,6 +32,9 @@ gnet_stats_copy(struct gnet_dump *d, int type, void *buf, int size) return 0; nla_put_failure: + kfree(d->xstats); + d->xstats = NULL; + d->xstats_len = 0; spin_unlock_bh(d->lock); return -1; } @@ -305,7 +308,9 @@ int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len) { if (d->compat_xstats) { - d->xstats = st; + d->xstats = kmemdup(st, len, GFP_ATOMIC); + if (!d->xstats) + goto err_out; d->xstats_len = len; } @@ -313,6 +318,11 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len) return gnet_stats_copy(d, TCA_STATS_APP, st, len); return 0; + +err_out: + d->xstats_len = 0; + spin_unlock_bh(d->lock); + return -1; } EXPORT_SYMBOL(gnet_stats_copy_app); @@ -345,6 +355,9 @@ gnet_stats_finish_copy(struct gnet_dump *d) return -1; } + kfree(d->xstats); + d->xstats = NULL; + d->xstats_len = 0; spin_unlock_bh(d->lock); return 0; } -- cgit v0.10.2 From aa183323312dbd1457096b4b94ca274b35fc462b Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 15 Feb 2015 17:44:20 -0200 Subject: ehea: Register memory hotplug, reboot and crash hooks on adapter probe ehea creates memory hotplug, reboot and crash hooks even if there are no adapters in the box. Just create them when we probe our first adapter. [cascardo: use ehea_register_memory_hooks return code] Signed-off-by: Anton Blanchard Tested-by: Thadeu Lima de Souza Cascardo Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index e8a1adb..c05e507 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -3262,6 +3262,139 @@ static void ehea_remove_device_sysfs(struct platform_device *dev) device_remove_file(&dev->dev, &dev_attr_remove_port); } +static int ehea_reboot_notifier(struct notifier_block *nb, + unsigned long action, void *unused) +{ + if (action == SYS_RESTART) { + pr_info("Reboot: freeing all eHEA resources\n"); + ibmebus_unregister_driver(&ehea_driver); + } + return NOTIFY_DONE; +} + +static struct notifier_block ehea_reboot_nb = { + .notifier_call = ehea_reboot_notifier, +}; + +static int ehea_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + int ret = NOTIFY_BAD; + struct memory_notify *arg = data; + + mutex_lock(&dlpar_mem_lock); + + switch (action) { + case MEM_CANCEL_OFFLINE: + pr_info("memory offlining canceled"); + /* Fall through: re-add canceled memory block */ + + case MEM_ONLINE: + pr_info("memory is going online"); + set_bit(__EHEA_STOP_XFER, &ehea_driver_flags); + if (ehea_add_sect_bmap(arg->start_pfn, arg->nr_pages)) + goto out_unlock; + ehea_rereg_mrs(); + break; + + case MEM_GOING_OFFLINE: + pr_info("memory is going offline"); + set_bit(__EHEA_STOP_XFER, &ehea_driver_flags); + if (ehea_rem_sect_bmap(arg->start_pfn, arg->nr_pages)) + goto out_unlock; + ehea_rereg_mrs(); + break; + + default: + break; + } + + ehea_update_firmware_handles(); + ret = NOTIFY_OK; + +out_unlock: + mutex_unlock(&dlpar_mem_lock); + return ret; +} + +static struct notifier_block ehea_mem_nb = { + .notifier_call = ehea_mem_notifier, +}; + +static void ehea_crash_handler(void) +{ + int i; + + if (ehea_fw_handles.arr) + for (i = 0; i < ehea_fw_handles.num_entries; i++) + ehea_h_free_resource(ehea_fw_handles.arr[i].adh, + ehea_fw_handles.arr[i].fwh, + FORCE_FREE); + + if (ehea_bcmc_regs.arr) + for (i = 0; i < ehea_bcmc_regs.num_entries; i++) + ehea_h_reg_dereg_bcmc(ehea_bcmc_regs.arr[i].adh, + ehea_bcmc_regs.arr[i].port_id, + ehea_bcmc_regs.arr[i].reg_type, + ehea_bcmc_regs.arr[i].macaddr, + 0, H_DEREG_BCMC); +} + +static atomic_t ehea_memory_hooks_registered; + +/* Register memory hooks on probe of first adapter */ +static int ehea_register_memory_hooks(void) +{ + int ret = 0; + + if (atomic_inc_and_test(&ehea_memory_hooks_registered)) + return 0; + + ret = ehea_create_busmap(); + if (ret) { + pr_info("ehea_create_busmap failed\n"); + goto out; + } + + ret = register_reboot_notifier(&ehea_reboot_nb); + if (ret) { + pr_info("register_reboot_notifier failed\n"); + goto out; + } + + ret = register_memory_notifier(&ehea_mem_nb); + if (ret) { + pr_info("register_memory_notifier failed\n"); + goto out2; + } + + ret = crash_shutdown_register(ehea_crash_handler); + if (ret) { + pr_info("crash_shutdown_register failed\n"); + goto out3; + } + + return 0; + +out3: + unregister_memory_notifier(&ehea_mem_nb); +out2: + unregister_reboot_notifier(&ehea_reboot_nb); +out: + return ret; +} + +static void ehea_unregister_memory_hooks(void) +{ + if (atomic_read(&ehea_memory_hooks_registered)) + return; + + unregister_reboot_notifier(&ehea_reboot_nb); + if (crash_shutdown_unregister(ehea_crash_handler)) + pr_info("failed unregistering crash handler\n"); + unregister_memory_notifier(&ehea_mem_nb); +} + static int ehea_probe_adapter(struct platform_device *dev) { struct ehea_adapter *adapter; @@ -3269,6 +3402,10 @@ static int ehea_probe_adapter(struct platform_device *dev) int ret; int i; + ret = ehea_register_memory_hooks(); + if (ret) + return ret; + if (!dev || !dev->dev.of_node) { pr_err("Invalid ibmebus device probed\n"); return -EINVAL; @@ -3392,81 +3529,6 @@ static int ehea_remove(struct platform_device *dev) return 0; } -static void ehea_crash_handler(void) -{ - int i; - - if (ehea_fw_handles.arr) - for (i = 0; i < ehea_fw_handles.num_entries; i++) - ehea_h_free_resource(ehea_fw_handles.arr[i].adh, - ehea_fw_handles.arr[i].fwh, - FORCE_FREE); - - if (ehea_bcmc_regs.arr) - for (i = 0; i < ehea_bcmc_regs.num_entries; i++) - ehea_h_reg_dereg_bcmc(ehea_bcmc_regs.arr[i].adh, - ehea_bcmc_regs.arr[i].port_id, - ehea_bcmc_regs.arr[i].reg_type, - ehea_bcmc_regs.arr[i].macaddr, - 0, H_DEREG_BCMC); -} - -static int ehea_mem_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - int ret = NOTIFY_BAD; - struct memory_notify *arg = data; - - mutex_lock(&dlpar_mem_lock); - - switch (action) { - case MEM_CANCEL_OFFLINE: - pr_info("memory offlining canceled"); - /* Readd canceled memory block */ - case MEM_ONLINE: - pr_info("memory is going online"); - set_bit(__EHEA_STOP_XFER, &ehea_driver_flags); - if (ehea_add_sect_bmap(arg->start_pfn, arg->nr_pages)) - goto out_unlock; - ehea_rereg_mrs(); - break; - case MEM_GOING_OFFLINE: - pr_info("memory is going offline"); - set_bit(__EHEA_STOP_XFER, &ehea_driver_flags); - if (ehea_rem_sect_bmap(arg->start_pfn, arg->nr_pages)) - goto out_unlock; - ehea_rereg_mrs(); - break; - default: - break; - } - - ehea_update_firmware_handles(); - ret = NOTIFY_OK; - -out_unlock: - mutex_unlock(&dlpar_mem_lock); - return ret; -} - -static struct notifier_block ehea_mem_nb = { - .notifier_call = ehea_mem_notifier, -}; - -static int ehea_reboot_notifier(struct notifier_block *nb, - unsigned long action, void *unused) -{ - if (action == SYS_RESTART) { - pr_info("Reboot: freeing all eHEA resources\n"); - ibmebus_unregister_driver(&ehea_driver); - } - return NOTIFY_DONE; -} - -static struct notifier_block ehea_reboot_nb = { - .notifier_call = ehea_reboot_notifier, -}; - static int check_module_parm(void) { int ret = 0; @@ -3520,26 +3582,10 @@ static int __init ehea_module_init(void) if (ret) goto out; - ret = ehea_create_busmap(); - if (ret) - goto out; - - ret = register_reboot_notifier(&ehea_reboot_nb); - if (ret) - pr_info("failed registering reboot notifier\n"); - - ret = register_memory_notifier(&ehea_mem_nb); - if (ret) - pr_info("failed registering memory remove notifier\n"); - - ret = crash_shutdown_register(ehea_crash_handler); - if (ret) - pr_info("failed registering crash handler\n"); - ret = ibmebus_register_driver(&ehea_driver); if (ret) { pr_err("failed registering eHEA device driver on ebus\n"); - goto out2; + goto out; } ret = driver_create_file(&ehea_driver.driver, @@ -3547,32 +3593,22 @@ static int __init ehea_module_init(void) if (ret) { pr_err("failed to register capabilities attribute, ret=%d\n", ret); - goto out3; + goto out2; } return ret; -out3: - ibmebus_unregister_driver(&ehea_driver); out2: - unregister_memory_notifier(&ehea_mem_nb); - unregister_reboot_notifier(&ehea_reboot_nb); - crash_shutdown_unregister(ehea_crash_handler); + ibmebus_unregister_driver(&ehea_driver); out: return ret; } static void __exit ehea_module_exit(void) { - int ret; - driver_remove_file(&ehea_driver.driver, &driver_attr_capabilities); ibmebus_unregister_driver(&ehea_driver); - unregister_reboot_notifier(&ehea_reboot_nb); - ret = crash_shutdown_unregister(ehea_crash_handler); - if (ret) - pr_info("failed unregistering crash handler\n"); - unregister_memory_notifier(&ehea_mem_nb); + ehea_unregister_memory_hooks(); kfree(ehea_fw_handles.arr); kfree(ehea_bcmc_regs.arr); ehea_destroy_busmap(); -- cgit v0.10.2 From 42c972a1f390e3bc51ca1e434b7e28764992067f Mon Sep 17 00:00:00 2001 From: Ben Shelton Date: Mon, 16 Feb 2015 13:47:06 -0600 Subject: usb: plusb: Add support for National Instruments host-to-host cable The National Instruments USB Host-to-Host Cable is based on the Prolific PL-25A1 chipset. Add its VID/PID so the plusb driver will recognize it. Signed-off-by: Ben Shelton Signed-off-by: David S. Miller diff --git a/drivers/net/usb/plusb.c b/drivers/net/usb/plusb.c index 3d18bb0..1bfe0fc 100644 --- a/drivers/net/usb/plusb.c +++ b/drivers/net/usb/plusb.c @@ -134,6 +134,11 @@ static const struct usb_device_id products [] = { }, { USB_DEVICE(0x050d, 0x258a), /* Belkin F5U258/F5U279 (PL-25A1) */ .driver_info = (unsigned long) &prolific_info, +}, { + USB_DEVICE(0x3923, 0x7825), /* National Instruments USB + * Host-to-Host Cable + */ + .driver_info = (unsigned long) &prolific_info, }, { }, // END -- cgit v0.10.2 From fba04a9e0c869498889b6445fd06cbe7da9bb834 Mon Sep 17 00:00:00 2001 From: Alexander Drozdov Date: Tue, 17 Feb 2015 13:33:46 +0300 Subject: ipv4: ip_check_defrag should correctly check return value of skb_copy_bits skb_copy_bits() returns zero on success and negative value on error, so it is needed to invert the condition in ip_check_defrag(). Fixes: 1bf3751ec90c ("ipv4: ip_check_defrag must not modify skb before unsharing") Signed-off-by: Alexander Drozdov Acked-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index e5b6d0d..2c8d98e 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -664,7 +664,7 @@ struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user) if (skb->protocol != htons(ETH_P_IP)) return skb; - if (!skb_copy_bits(skb, 0, &iph, sizeof(iph))) + if (skb_copy_bits(skb, 0, &iph, sizeof(iph)) < 0) return skb; if (iph.ihl < 5 || iph.version != 4) -- cgit v0.10.2 From 54da5a8be3c1e924c35480eb44c6e9b275f6444e Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 17 Feb 2015 09:36:22 -0800 Subject: net: phy: Fix verification of EEE support in phy_init_eee phy_init_eee uses phy_find_setting(phydev->speed, phydev->duplex) to find a valid entry in the settings array for the given speed and duplex value. For full duplex 1000baseT, this will return the first matching entry, which is the entry for 1000baseKX_Full. If the phy eee does not support 1000baseKX_Full, this entry will not match, causing phy_init_eee to fail for no good reason. Fixes: 9a9c56cb34e6 ("net: phy: fix a bug when verify the EEE support") Fixes: 3e7077067e80c ("phy: Expand phy speed/duplex settings array") Cc: Giuseppe Cavallaro Signed-off-by: Guenter Roeck Acked-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index cdcac6a..52cd8db 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -236,6 +236,25 @@ static inline unsigned int phy_find_valid(unsigned int idx, u32 features) } /** + * phy_check_valid - check if there is a valid PHY setting which matches + * speed, duplex, and feature mask + * @speed: speed to match + * @duplex: duplex to match + * @features: A mask of the valid settings + * + * Description: Returns true if there is a valid setting, false otherwise. + */ +static inline bool phy_check_valid(int speed, int duplex, u32 features) +{ + unsigned int idx; + + idx = phy_find_valid(phy_find_setting(speed, duplex), features); + + return settings[idx].speed == speed && settings[idx].duplex == duplex && + (settings[idx].setting & features); +} + +/** * phy_sanitize_settings - make sure the PHY is set to supported speed and duplex * @phydev: the target phy_device struct * @@ -1045,7 +1064,6 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable) int eee_lp, eee_cap, eee_adv; u32 lp, cap, adv; int status; - unsigned int idx; /* Read phy status to properly get the right settings */ status = phy_read_status(phydev); @@ -1077,8 +1095,7 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable) adv = mmd_eee_adv_to_ethtool_adv_t(eee_adv); lp = mmd_eee_adv_to_ethtool_adv_t(eee_lp); - idx = phy_find_setting(phydev->speed, phydev->duplex); - if (!(lp & adv & settings[idx].setting)) + if (!phy_check_valid(phydev->speed, phydev->duplex, lp & adv)) goto eee_exit_err; if (clk_stop_enable) { -- cgit v0.10.2 From 34eea79e2664b314cab6a30fc582fdfa7a1bb1df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ignacy=20Gaw=C4=99dzki?= Date: Tue, 17 Feb 2015 20:15:20 +0100 Subject: ematch: Fix auto-loading of ematch modules. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In tcf_em_validate(), after calling request_module() to load the kind-specific module, set em->ops to NULL before returning -EAGAIN, so that module_put() is not called again by tcf_em_tree_destroy(). Signed-off-by: Ignacy Gawędzki Acked-by: Cong Wang Signed-off-by: David S. Miller diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 6742200..fbb7ebf 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -228,6 +228,7 @@ static int tcf_em_validate(struct tcf_proto *tp, * to replay the request. */ module_put(em->ops->owner); + em->ops = NULL; err = -EAGAIN; } #endif -- cgit v0.10.2 From 7b4577a9da3702049650f7095506e9afd9f68849 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 17 Feb 2015 11:23:10 -0800 Subject: openvswitch: Fix net exit. Open vSwitch allows moving internal vport to different namespace while still connected to the bridge. But when namespace deleted OVS does not detach these vports, that results in dangling pointer to netdevice which causes kernel panic as follows. This issue is fixed by detaching all ovs ports from the deleted namespace at net-exit. BUG: unable to handle kernel NULL pointer dereference at 0000000000000028 IP: [] ovs_vport_locate+0x35/0x80 [openvswitch] Oops: 0000 [#1] SMP Call Trace: [] lookup_vport+0x21/0xd0 [openvswitch] [] ovs_vport_cmd_get+0x59/0xf0 [openvswitch] [] genl_family_rcv_msg+0x1bc/0x3e0 [] genl_rcv_msg+0x79/0xc0 [] netlink_rcv_skb+0xb9/0xe0 [] genl_rcv+0x2c/0x40 [] netlink_unicast+0x12d/0x1c0 [] netlink_sendmsg+0x34a/0x6b0 [] sock_sendmsg+0xa0/0xe0 [] ___sys_sendmsg+0x408/0x420 [] __sys_sendmsg+0x51/0x90 [] SyS_sendmsg+0x12/0x20 [] system_call_fastpath+0x12/0x17 Reported-by: Assaf Muller Fixes: 46df7b81454("openvswitch: Add support for network namespaces.") Signed-off-by: Pravin B Shelar Reviewed-by: Thomas Graf Signed-off-by: David S. Miller diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index ae5e77c..5bae724 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -2194,14 +2194,55 @@ static int __net_init ovs_init_net(struct net *net) return 0; } -static void __net_exit ovs_exit_net(struct net *net) +static void __net_exit list_vports_from_net(struct net *net, struct net *dnet, + struct list_head *head) { - struct datapath *dp, *dp_next; struct ovs_net *ovs_net = net_generic(net, ovs_net_id); + struct datapath *dp; + + list_for_each_entry(dp, &ovs_net->dps, list_node) { + int i; + + for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { + struct vport *vport; + + hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) { + struct netdev_vport *netdev_vport; + + if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL) + continue; + + netdev_vport = netdev_vport_priv(vport); + if (dev_net(netdev_vport->dev) == dnet) + list_add(&vport->detach_list, head); + } + } + } +} + +static void __net_exit ovs_exit_net(struct net *dnet) +{ + struct datapath *dp, *dp_next; + struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id); + struct vport *vport, *vport_next; + struct net *net; + LIST_HEAD(head); ovs_lock(); list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) __dp_destroy(dp); + + rtnl_lock(); + for_each_net(net) + list_vports_from_net(net, dnet, &head); + rtnl_unlock(); + + /* Detach all vports from given namespace. */ + list_for_each_entry_safe(vport, vport_next, &head, detach_list) { + list_del(&vport->detach_list); + ovs_dp_detach_port(vport); + } + ovs_unlock(); cancel_work_sync(&ovs_net->dp_notify_work); diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index f8ae295..bc85331 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -103,6 +103,7 @@ struct vport_portids { * @ops: Class structure. * @percpu_stats: Points to per-CPU statistics used and maintained by vport * @err_stats: Points to error statistics used and maintained by vport + * @detach_list: list used for detaching vport in net-exit call. */ struct vport { struct rcu_head rcu; @@ -117,6 +118,7 @@ struct vport { struct pcpu_sw_netstats __percpu *percpu_stats; struct vport_err_stats err_stats; + struct list_head detach_list; }; /** -- cgit v0.10.2 From f81edc6ac1e1e2e2cbe98bcd6ef5ebb7afb00807 Mon Sep 17 00:00:00 2001 From: Derrick Pallas Date: Wed, 18 Feb 2015 00:50:25 -0800 Subject: ethernet/ixp4xx: prevent allmulti from clobbering promisc If both promisc and allmulti are set, promisc should trump allmulti and disable the MAC filter; otherwise, the interface is not really promisc. Previously, this code checked IFF_ALLMULTI prior to and without regard for IFF_PROMISC; if both were set, only multicast and direct unicast traffic would make it through the filter. Signed-off-by: Derrick Pallas Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c index f7e0f0f..9e16a28 100644 --- a/drivers/net/ethernet/xscale/ixp4xx_eth.c +++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c @@ -938,7 +938,7 @@ static void eth_set_mcast_list(struct net_device *dev) int i; static const u8 allmulti[] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00 }; - if (dev->flags & IFF_ALLMULTI) { + if ((dev->flags & IFF_ALLMULTI) && !(dev->flags & IFF_PROMISC)) { for (i = 0; i < ETH_ALEN; i++) { __raw_writel(allmulti[i], &port->regs->mcast_addr[i]); __raw_writel(allmulti[i], &port->regs->mcast_mask[i]); -- cgit v0.10.2 From 846cd66788b11105a62785078360c8854aa98310 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 18 Feb 2015 11:38:06 +0100 Subject: net: Initialize all members in skb_gro_remcsum_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit skb_gro_remcsum_init() initializes the gro_remcsum.delta member only, leading to compiler warnings about a possibly uninitialized gro_remcsum.offset member: drivers/net/vxlan.c: In function ‘vxlan_gro_receive’: drivers/net/vxlan.c:602: warning: ‘grc.offset’ may be used uninitialized in this function net/ipv4/fou.c: In function ‘gue_gro_receive’: net/ipv4/fou.c:262: warning: ‘grc.offset’ may be used uninitialized in this function While these are harmless for now: - skb_gro_remcsum_process() sets offset before changing delta, - skb_gro_remcsum_cleanup() checks if delta is non-zero before accessing offset, it's safer to let the initialization function initialize all members. Signed-off-by: Geert Uytterhoeven Acked-by: Tom Herbert Signed-off-by: David S. Miller diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5897b4e..429d179 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2342,6 +2342,7 @@ struct gro_remcsum { static inline void skb_gro_remcsum_init(struct gro_remcsum *grc) { + grc->offset = 0; grc->delta = 0; } -- cgit v0.10.2 From 997d5c3f4427f38562cbe207ce05bb25fdcb993b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Feb 2015 05:47:55 -0800 Subject: sock: sock_dequeue_err_skb() needs hard irq safety Non NAPI drivers can call skb_tstamp_tx() and then sock_queue_err_skb() from hard IRQ context. Therefore, sock_dequeue_err_skb() needs to block hard irq or corruptions or hangs can happen. Signed-off-by: Eric Dumazet Fixes: 364a9e93243d1 ("sock: deduplicate errqueue dequeue") Fixes: cb820f8e4b7f7 ("net: Provide a generic socket error queue delivery method for Tx time stamps.") Signed-off-by: David S. Miller diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 88c613e..f805078 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3621,13 +3621,14 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk) { struct sk_buff_head *q = &sk->sk_error_queue; struct sk_buff *skb, *skb_next; + unsigned long flags; int err = 0; - spin_lock_bh(&q->lock); + spin_lock_irqsave(&q->lock, flags); skb = __skb_dequeue(q); if (skb && (skb_next = skb_peek(q))) err = SKB_EXT_ERR(skb_next)->ee.ee_errno; - spin_unlock_bh(&q->lock); + spin_unlock_irqrestore(&q->lock, flags); sk->sk_err = err; if (err) -- cgit v0.10.2 From 15added6a5940454e225c09b78837514d35fcf70 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 18 Feb 2015 20:47:30 +0100 Subject: net: smc91x: improve neponset hack The smc91x driver tries to support multiple platforms at compile time, but they are mutually exclusive at runtime, and not clearly defined. Trying to build for CONFIG_SA1100_ASSABET without CONFIG_ASSABET_NEPONSET results in this link error: drivers/built-in.o: In function `smc_drv_probe': :(.text+0x33310c): undefined reference to `neponset_ncr_frob' since the neponset_ncr_set function is not defined otherwise. Similarly, building for both CONFIG_SA1100_ASSABET and CONFIG_SA1100_PLEB results in a different build error: smsc/smc91x.c: In function 'smc_drv_probe': smsc/smc91x.c:2299:2: error: implicit declaration of function 'neponset_ncr_set' [-Werror=implicit-function-declaration] neponset_ncr_set(NCR_ENET_OSC_EN); ^ smsc/smc91x.c:2299:19: error: 'NCR_ENET_OSC_EN' undeclared (first use in this function) neponset_ncr_set(NCR_ENET_OSC_EN); ^ This is an attempt to fix the call site responsible for both errors, making sure we call the function exactly when the driver is actually trying to run on the assabet/neponset machine. With this patch, I no longer see randconfig build errors in this file. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 88a55f9..fa3f193 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -2355,7 +2355,7 @@ static int smc_drv_probe(struct platform_device *pdev) ret = smc_request_attrib(pdev, ndev); if (ret) goto out_release_io; -#if defined(CONFIG_SA1100_ASSABET) +#if defined(CONFIG_ASSABET_NEPONSET) && !defined(CONFIG_SA1100_PLEB) neponset_ncr_set(NCR_ENET_OSC_EN); #endif platform_set_drvdata(pdev, ndev); -- cgit v0.10.2 From bf60e50cb3ed50ae536f7655c8af1acda137ea5b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 18 Feb 2015 20:48:54 +0100 Subject: net/appletalk: LTPC needs virt_to_bus The ltpc driver is rather outdated and does not get built on most platforms because it requires the ISA_DMA_API symbol. However there are some ARM platforms that have ISA_DMA_API but no virt_to_bus, and they get this build error when enabling the ltpc driver. drivers/net/appletalk/ltpc.c: In function 'handlefc': drivers/net/appletalk/ltpc.c:380:2: error: implicit declaration of function 'virt_to_bus' [-Werror=implicit-function-declaration] set_dma_addr(dma,virt_to_bus(ltdmacbuf)); ^ This adds another dependency in Kconfig to avoid that configuration. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller diff --git a/drivers/net/appletalk/Kconfig b/drivers/net/appletalk/Kconfig index 4ce6ca5..dc6b78e 100644 --- a/drivers/net/appletalk/Kconfig +++ b/drivers/net/appletalk/Kconfig @@ -40,7 +40,7 @@ config DEV_APPLETALK config LTPC tristate "Apple/Farallon LocalTalk PC support" - depends on DEV_APPLETALK && (ISA || EISA) && ISA_DMA_API + depends on DEV_APPLETALK && (ISA || EISA) && ISA_DMA_API && VIRT_TO_BUS help This allows you to use the AppleTalk PC card to connect to LocalTalk networks. The card is also known as the Farallon PhoneNet PC card. -- cgit v0.10.2 From b7f5e5c7f8cedf6b69c9702d448cdf78ffc45c7b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 20 Feb 2015 21:14:21 +0100 Subject: rhashtable: don't allocate ht structure on stack in test_rht_init With object runtime debugging enabled, the rhashtable test suite will rightfully throw a warning "ODEBUG: object is on stack, but not annotated" from rhashtable_init(). This is because run_work is (correctly) being initialized via INIT_WORK(), and not annotated by INIT_WORK_ONSTACK(). Meaning, rhashtable_init() is okay as is, we just need to move ht e.g., into global scope. It never triggered anything, since test_rhashtable is rather a controlled environment and effectively runs to completion, so that stack memory is not vanishing underneath us, we shouldn't confuse any testers with it though. Fixes: 7e1e77636e36 ("lib: Resizable, Scalable, Concurrent Hash Table") Signed-off-by: Daniel Borkmann Acked-by: Thomas Graf Signed-off-by: David S. Miller diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 1dfeba7..f6ce291 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -191,9 +191,10 @@ error: return err; } +static struct rhashtable ht; + static int __init test_rht_init(void) { - struct rhashtable ht; struct rhashtable_params params = { .nelem_hint = TEST_HT_SIZE, .head_offset = offsetof(struct test_obj, node), -- cgit v0.10.2 From f4c2b7a08170dc4e442c4566486e4597af8d72a3 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Wed, 18 Feb 2015 15:15:57 -0800 Subject: ipvlan: Fix text that talks about ip util support ipvlan was added into 3.19 release and iproute2 added support for the same in iproute2-3.19 package. Signed-off-by: Mahesh Bandewar CC: Stephen Hemminger Signed-off-by: David S. Miller diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 84673eb..df51d60 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -157,7 +157,7 @@ config IPVLAN making it transparent to the connected L2 switch. Ipvlan devices can be added using the "ip" command from the - iproute2 package starting with the iproute2-X.Y.ZZ release: + iproute2 package starting with the iproute2-3.19 release: "ip link add link [ NAME ] type ipvlan" -- cgit v0.10.2 From 65e9256c4ec569ed62bb89023daab7b72368b89f Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Fri, 20 Feb 2015 21:34:58 +0200 Subject: ethtool: Add hw-switch-offload to netdev_features_strings. commit aafb3e98b279 (netdev: introduce new NETIF_F_HW_SWITCH_OFFLOAD feature flag for switch device offloads) add a new feature without adding it to netdev_features_strings array; this patch fixes this. Signed-off-by: Rami Rosen Signed-off-by: David S. Miller diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 91f74f3..aa378ec 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -98,6 +98,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_RXALL_BIT] = "rx-all", [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", [NETIF_F_BUSY_POLL_BIT] = "busy-poll", + [NETIF_F_HW_SWITCH_OFFLOAD_BIT] = "hw-switch-offload", }; static const char -- cgit v0.10.2 From 5a8eeec468f229558322926f28c61bb0769793e9 Mon Sep 17 00:00:00 2001 From: Anish Bhatt Date: Wed, 18 Feb 2015 15:29:45 -0800 Subject: cxgb4: Fix incorrect 'c' suffix to %pI4, use %pISc instead Issue caught by 0-day kernel test infrastructure. Code changed to use sockaddr members so that %pISc can be used instead. Fixes: b5a02f503caa ('cxgb4 : Update ipv6 address handling api') Signed-off-by: Anish Bhatt Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c index 9062a84..c308429 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c +++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c @@ -35,10 +35,10 @@ static inline unsigned int ipv6_clip_hash(struct clip_tbl *d, const u32 *key) } static unsigned int clip_addr_hash(struct clip_tbl *ctbl, const u32 *addr, - int addr_len) + u8 v6) { - return addr_len == 4 ? ipv4_clip_hash(ctbl, addr) : - ipv6_clip_hash(ctbl, addr); + return v6 ? ipv6_clip_hash(ctbl, addr) : + ipv4_clip_hash(ctbl, addr); } static int clip6_get_mbox(const struct net_device *dev, @@ -78,23 +78,22 @@ int cxgb4_clip_get(const struct net_device *dev, const u32 *lip, u8 v6) struct clip_entry *ce, *cte; u32 *addr = (u32 *)lip; int hash; - int addr_len; - int ret = 0; + int ret = -1; if (!ctbl) return 0; - if (v6) - addr_len = 16; - else - addr_len = 4; - - hash = clip_addr_hash(ctbl, addr, addr_len); + hash = clip_addr_hash(ctbl, addr, v6); read_lock_bh(&ctbl->lock); list_for_each_entry(cte, &ctbl->hash_list[hash], list) { - if (addr_len == cte->addr_len && - memcmp(lip, cte->addr, cte->addr_len) == 0) { + if (cte->addr6.sin6_family == AF_INET6 && v6) + ret = memcmp(lip, cte->addr6.sin6_addr.s6_addr, + sizeof(struct in6_addr)); + else if (cte->addr.sin_family == AF_INET && !v6) + ret = memcmp(lip, (char *)(&cte->addr.sin_addr), + sizeof(struct in_addr)); + if (!ret) { ce = cte; read_unlock_bh(&ctbl->lock); goto found; @@ -111,15 +110,20 @@ int cxgb4_clip_get(const struct net_device *dev, const u32 *lip, u8 v6) spin_lock_init(&ce->lock); atomic_set(&ce->refcnt, 0); atomic_dec(&ctbl->nfree); - ce->addr_len = addr_len; - memcpy(ce->addr, lip, addr_len); list_add_tail(&ce->list, &ctbl->hash_list[hash]); if (v6) { + ce->addr6.sin6_family = AF_INET6; + memcpy(ce->addr6.sin6_addr.s6_addr, + lip, sizeof(struct in6_addr)); ret = clip6_get_mbox(dev, (const struct in6_addr *)lip); if (ret) { write_unlock_bh(&ctbl->lock); return ret; } + } else { + ce->addr.sin_family = AF_INET; + memcpy((char *)(&ce->addr.sin_addr), lip, + sizeof(struct in_addr)); } } else { write_unlock_bh(&ctbl->lock); @@ -140,19 +144,19 @@ void cxgb4_clip_release(const struct net_device *dev, const u32 *lip, u8 v6) struct clip_entry *ce, *cte; u32 *addr = (u32 *)lip; int hash; - int addr_len; - - if (v6) - addr_len = 16; - else - addr_len = 4; + int ret = -1; - hash = clip_addr_hash(ctbl, addr, addr_len); + hash = clip_addr_hash(ctbl, addr, v6); read_lock_bh(&ctbl->lock); list_for_each_entry(cte, &ctbl->hash_list[hash], list) { - if (addr_len == cte->addr_len && - memcmp(lip, cte->addr, cte->addr_len) == 0) { + if (cte->addr6.sin6_family == AF_INET6 && v6) + ret = memcmp(lip, cte->addr6.sin6_addr.s6_addr, + sizeof(struct in6_addr)); + else if (cte->addr.sin_family == AF_INET && !v6) + ret = memcmp(lip, (char *)(&cte->addr.sin_addr), + sizeof(struct in_addr)); + if (!ret) { ce = cte; read_unlock_bh(&ctbl->lock); goto found; @@ -249,10 +253,7 @@ int clip_tbl_show(struct seq_file *seq, void *v) for (i = 0 ; i < ctbl->clipt_size; ++i) { list_for_each_entry(ce, &ctbl->hash_list[i], list) { ip[0] = '\0'; - if (ce->addr_len == 16) - sprintf(ip, "%pI6c", ce->addr); - else - sprintf(ip, "%pI4c", ce->addr); + sprintf(ip, "%pISc", &ce->addr); seq_printf(seq, "%-25s %u\n", ip, atomic_read(&ce->refcnt)); } diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h index 2eaba01..35eb43c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h +++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h @@ -14,8 +14,10 @@ struct clip_entry { spinlock_t lock; /* Hold while modifying clip reference */ atomic_t refcnt; struct list_head list; - u32 addr[4]; - int addr_len; + union { + struct sockaddr_in addr; + struct sockaddr_in6 addr6; + }; }; struct clip_tbl { -- cgit v0.10.2 From 278f7b4fffce9ad267406cf8800df271d14f4a16 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 19 Feb 2015 12:13:13 +0300 Subject: caif: fix a signedness bug in cfpkt_iterate() The cfpkt_iterate() function can return -EPROTO on error, but the function is a u16 so the negative value gets truncated to a positive unsigned short. This causes a static checker warning. The only caller which might care is cffrml_receive(), when it's checking the frame checksum. I modified cffrml_receive() so that it never says -EPROTO is a valid checksum. Also this isn't ever going to be inlined so I removed the "inline". Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller diff --git a/include/net/caif/cfpkt.h b/include/net/caif/cfpkt.h index 1c1ad46..fe328c5 100644 --- a/include/net/caif/cfpkt.h +++ b/include/net/caif/cfpkt.h @@ -171,7 +171,7 @@ struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos); * @return Checksum of buffer. */ -u16 cfpkt_iterate(struct cfpkt *pkt, +int cfpkt_iterate(struct cfpkt *pkt, u16 (*iter_func)(u16 chks, void *buf, u16 len), u16 data); diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c index 8bc7caa..434ba85 100644 --- a/net/caif/cffrml.c +++ b/net/caif/cffrml.c @@ -84,7 +84,7 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt) u16 tmp; u16 len; u16 hdrchks; - u16 pktchks; + int pktchks; struct cffrml *this; this = container_obj(layr); diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c index 1be0b52..f6c3b21 100644 --- a/net/caif/cfpkt_skbuff.c +++ b/net/caif/cfpkt_skbuff.c @@ -255,9 +255,9 @@ inline u16 cfpkt_getlen(struct cfpkt *pkt) return skb->len; } -inline u16 cfpkt_iterate(struct cfpkt *pkt, - u16 (*iter_func)(u16, void *, u16), - u16 data) +int cfpkt_iterate(struct cfpkt *pkt, + u16 (*iter_func)(u16, void *, u16), + u16 data) { /* * Don't care about the performance hit of linearizing, -- cgit v0.10.2 From 342100d937ed6e5faf1e7ee7dcd7b3935fec8877 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 20 Feb 2015 00:53:37 +0100 Subject: rhashtable: don't test for shrink on insert, expansion on delete Restore pre 54c5b7d311c8 behaviour and only probe for expansions on inserts and shrinks on deletes. Currently, it will happen that on initial inserts into a sparse hash table, we may i.e. shrink it first simply because it's not fully populated yet, only to later realize that we need to grow again. This however is counter intuitive, e.g. an initial default size of 64 elements is already small enough, and in case an elements size hint is given to the hash table by a user, we should avoid unnecessary expansion steps, so a shrink is clearly unintended here. Fixes: 54c5b7d311c8 ("rhashtable: introduce rhashtable_wakeup_worker helper function") Signed-off-by: Daniel Borkmann Cc: Ying Xue Acked-by: Thomas Graf Signed-off-by: David S. Miller diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 9cc4c4a..38f7879 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -537,16 +537,25 @@ unlock: mutex_unlock(&ht->mutex); } -static void rhashtable_wakeup_worker(struct rhashtable *ht) +static void rhashtable_probe_expand(struct rhashtable *ht) { - struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); - struct bucket_table *new_tbl = rht_dereference_rcu(ht->future_tbl, ht); - size_t size = tbl->size; + const struct bucket_table *new_tbl = rht_dereference_rcu(ht->future_tbl, ht); + const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); /* Only adjust the table if no resizing is currently in progress. */ - if (tbl == new_tbl && - ((ht->p.grow_decision && ht->p.grow_decision(ht, size)) || - (ht->p.shrink_decision && ht->p.shrink_decision(ht, size)))) + if (tbl == new_tbl && ht->p.grow_decision && + ht->p.grow_decision(ht, tbl->size)) + schedule_work(&ht->run_work); +} + +static void rhashtable_probe_shrink(struct rhashtable *ht) +{ + const struct bucket_table *new_tbl = rht_dereference_rcu(ht->future_tbl, ht); + const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); + + /* Only adjust the table if no resizing is currently in progress. */ + if (tbl == new_tbl && ht->p.shrink_decision && + ht->p.shrink_decision(ht, tbl->size)) schedule_work(&ht->run_work); } @@ -569,7 +578,7 @@ static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, atomic_inc(&ht->nelems); - rhashtable_wakeup_worker(ht); + rhashtable_probe_expand(ht); } /** @@ -682,7 +691,7 @@ found: if (ret) { atomic_dec(&ht->nelems); - rhashtable_wakeup_worker(ht); + rhashtable_probe_shrink(ht); } rcu_read_unlock(); -- cgit v0.10.2 From eb6d1abf1bd8bf1beb45b5401c8324bdb8f893c4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 20 Feb 2015 00:53:38 +0100 Subject: rhashtable: better high order allocation attempts When trying to allocate future tables via bucket_table_alloc(), it seems overkill on large table shifts that we probe for kzalloc() unconditionally first, as it's likely to fail. Only probe with kzalloc() for more reasonable table sizes and use vzalloc() either as a fallback on failure or directly in case of large table sizes. Fixes: 7e1e77636e36 ("lib: Resizable, Scalable, Concurrent Hash Table") Signed-off-by: Daniel Borkmann Acked-by: Thomas Graf Signed-off-by: David S. Miller diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 38f7879..b41a5c0 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -217,15 +217,15 @@ static void bucket_table_free(const struct bucket_table *tbl) static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, size_t nbuckets) { - struct bucket_table *tbl; + struct bucket_table *tbl = NULL; size_t size; int i; size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); - tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); + if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) + tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); if (tbl == NULL) tbl = vzalloc(size); - if (tbl == NULL) return NULL; -- cgit v0.10.2 From 6dd0c1655be26345960a6bae574c7dc4648611d3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 20 Feb 2015 00:53:39 +0100 Subject: rhashtable: allow to unload test module There's no good reason why to disallow unloading of the rhashtable test case module. Commit 9d6dbe1bbaf8 moved the code from a boot test into a stand-alone module, but only converted the subsys_initcall() handler into a module_init() function without a related exit handler, and thus preventing the test module from unloading. Fixes: 9d6dbe1bbaf8 ("rhashtable: Make selftest modular") Signed-off-by: Daniel Borkmann Acked-by: Thomas Graf Signed-off-by: David S. Miller diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index f6ce291..58b9953 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -223,6 +223,11 @@ static int __init test_rht_init(void) return err; } +static void __exit test_rht_exit(void) +{ +} + module_init(test_rht_init); +module_exit(test_rht_exit); MODULE_LICENSE("GPL v2"); -- cgit v0.10.2 From ddede6d536d70c4ef2193ca14208db90740fcd22 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 19 Feb 2015 11:09:27 -0800 Subject: net: dsa: bcm_sf2: fix 64-bits register reads Reading 64-bits register was not working because we inverted the steps between reading the lower 32-bits of the register and reading the upper 32-bits. Swapping these operations is how the HW guarantees that 64-bits reads are latched correctly. We only have a handful of 64-bits registers for now, mostly MIB counters, so the imapct is low. Fixes: 246d7f773c13 ("net: dsa: add Broadcom SF2 switch driver") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h index ee9f650..7b7053d 100644 --- a/drivers/net/dsa/bcm_sf2.h +++ b/drivers/net/dsa/bcm_sf2.h @@ -105,8 +105,8 @@ static inline u64 name##_readq(struct bcm_sf2_priv *priv, u32 off) \ { \ u32 indir, dir; \ spin_lock(&priv->indir_lock); \ - indir = reg_readl(priv, REG_DIR_DATA_READ); \ dir = __raw_readl(priv->name + off); \ + indir = reg_readl(priv, REG_DIR_DATA_READ); \ spin_unlock(&priv->indir_lock); \ return (u64)indir << 32 | dir; \ } \ -- cgit v0.10.2 From a4176a9391868bfa87705bcd2e3b49e9b9dd2996 Mon Sep 17 00:00:00 2001 From: Matthew Thode Date: Tue, 17 Feb 2015 18:31:57 -0600 Subject: net: reject creation of netdev names with colons colons are used as a separator in netdev device lookup in dev_ioctl.c Specific functions are SIOCGIFTXQLEN SIOCETHTOOL SIOCSIFNAME Signed-off-by: Matthew Thode Signed-off-by: David S. Miller diff --git a/net/core/dev.c b/net/core/dev.c index 8f9710c..962ee9d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -946,7 +946,7 @@ bool dev_valid_name(const char *name) return false; while (*name) { - if (*name == '/' || isspace(*name)) + if (*name == '/' || *name == ':' || isspace(*name)) return false; name++; } -- cgit v0.10.2 From b9ebafbe8cfeeddec881504c446cccd0d87a51b6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Feb 2015 06:48:57 -0800 Subject: rhashtable: ensure cache line alignment on bucket_table struct bucket_table contains mostly read fields : size, locks_mask, locks. Make sure these are not sharing a cache line with buckets[] Signed-off-by: Eric Dumazet Acked-by: Daniel Borkmann Acked-by: Thomas Graf Signed-off-by: David S. Miller diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 5885127..cb2104b 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -54,10 +54,11 @@ struct rhash_head { * @buckets: size * hash buckets */ struct bucket_table { - size_t size; - unsigned int locks_mask; - spinlock_t *locks; - struct rhash_head __rcu *buckets[]; + size_t size; + unsigned int locks_mask; + spinlock_t *locks; + + struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed); -- cgit v0.10.2 From 3f34b24a732bab9635c4b32823268c37c01b40f0 Mon Sep 17 00:00:00 2001 From: Alexander Drozdov Date: Fri, 20 Feb 2015 08:24:27 +0300 Subject: af_packet: allow packets defragmentation not only for hash fanout type Packets defragmentation was introduced for PACKET_FANOUT_HASH only, see 7736d33f4262 ("packet: Add pre-defragmentation support for ipv4 fanouts") It may be useful to have defragmentation enabled regardless of fanout type. Without that, the AF_PACKET user may have to: 1. Collect fragments from different rings 2. Defragment by itself Signed-off-by: Alexander Drozdov Signed-off-by: David S. Miller diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9c28cec..99fc628 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1349,14 +1349,14 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, return 0; } + if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) { + skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET); + if (!skb) + return 0; + } switch (f->type) { case PACKET_FANOUT_HASH: default: - if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) { - skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET); - if (!skb) - return 0; - } idx = fanout_demux_hash(f, skb, num); break; case PACKET_FANOUT_LB: -- cgit v0.10.2 From 45cee4f594bcb3083c2d8475462af2f2ddf29aff Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Fri, 20 Feb 2015 19:12:52 +0100 Subject: mISDN: replace current->state by set_current_state() Use helper function to access current->state. Direct assignments are prone to races and therefore buggy. Thanks to Peter Zijlstra for the exact definition of the problem. Signed-off-by: Fabian Frederick Signed-off-by: David S. Miller diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c index 3c92780..ff48da6 100644 --- a/drivers/isdn/hardware/mISDN/hfcpci.c +++ b/drivers/isdn/hardware/mISDN/hfcpci.c @@ -1755,7 +1755,7 @@ init_card(struct hfc_pci *hc) enable_hwirq(hc); spin_unlock_irqrestore(&hc->lock, flags); /* Timeout 80ms */ - current->state = TASK_UNINTERRUPTIBLE; + set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout((80 * HZ) / 1000); printk(KERN_INFO "HFC PCI: IRQ %d count %d\n", hc->irq, hc->irqcnt); -- cgit v0.10.2 From 50462ce0052c67b5a06f19fb7c7f308813006879 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Fri, 20 Feb 2015 19:12:55 +0100 Subject: hso: replace current->state by __set_current_state() Use helper functions to access current->state. Direct assignments are prone to races and therefore buggy. Thanks to Peter Zijlstra for the exact definition of the problem. Suggested-By: Peter Zijlstra Signed-off-by: Fabian Frederick Signed-off-by: David S. Miller diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 9cdfb3f..778e915 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -1594,7 +1594,7 @@ hso_wait_modem_status(struct hso_serial *serial, unsigned long arg) } cprev = cnow; } - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); remove_wait_queue(&tiocmget->waitq, &wait); return ret; -- cgit v0.10.2 From 2c45015a66a171de306e23991fec998667b49acf Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Fri, 20 Feb 2015 19:12:56 +0100 Subject: wan: cosa: replace current->state by set_current_state() Use helper functions to access current->state. Direct assignments are prone to races and therefore buggy. current->state = TASK_RUNNING is replaced by __set_current_state() Thanks to Peter Zijlstra for the exact definition of the problem. Suggested-By: Peter Zijlstra Signed-off-by: Fabian Frederick Acked-By: Jan "Yenya" Kasprzak Signed-off-by: David S. Miller diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index 83c39e2..88d121d 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -806,21 +806,21 @@ static ssize_t cosa_read(struct file *file, spin_lock_irqsave(&cosa->lock, flags); add_wait_queue(&chan->rxwaitq, &wait); while (!chan->rx_status) { - current->state = TASK_INTERRUPTIBLE; + set_current_state(TASK_INTERRUPTIBLE); spin_unlock_irqrestore(&cosa->lock, flags); schedule(); spin_lock_irqsave(&cosa->lock, flags); if (signal_pending(current) && chan->rx_status == 0) { chan->rx_status = 1; remove_wait_queue(&chan->rxwaitq, &wait); - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); spin_unlock_irqrestore(&cosa->lock, flags); mutex_unlock(&chan->rlock); return -ERESTARTSYS; } } remove_wait_queue(&chan->rxwaitq, &wait); - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); kbuf = chan->rxdata; count = chan->rxsize; spin_unlock_irqrestore(&cosa->lock, flags); @@ -890,14 +890,14 @@ static ssize_t cosa_write(struct file *file, spin_lock_irqsave(&cosa->lock, flags); add_wait_queue(&chan->txwaitq, &wait); while (!chan->tx_status) { - current->state = TASK_INTERRUPTIBLE; + set_current_state(TASK_INTERRUPTIBLE); spin_unlock_irqrestore(&cosa->lock, flags); schedule(); spin_lock_irqsave(&cosa->lock, flags); if (signal_pending(current) && chan->tx_status == 0) { chan->tx_status = 1; remove_wait_queue(&chan->txwaitq, &wait); - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); chan->tx_status = 1; spin_unlock_irqrestore(&cosa->lock, flags); up(&chan->wsem); @@ -905,7 +905,7 @@ static ssize_t cosa_write(struct file *file, } } remove_wait_queue(&chan->txwaitq, &wait); - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); up(&chan->wsem); spin_unlock_irqrestore(&cosa->lock, flags); kfree(kbuf); -- cgit v0.10.2 From 87cda7cb4380fa05b70485b0a57b07d020f5204b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 22 Feb 2015 15:54:29 -0500 Subject: r8169: Revert BQL and xmit_more support. There are certain regressions which are pointing to these two commits which we are having a hard time resolving. So revert them for now. Specifically this reverts: commit 0bec3b700d106a8b0a34227b2976d1a582f1aab7 Author: Florian Westphal Date: Wed Jan 7 10:49:49 2015 +0100 r8169: add support for xmit_more and commit 1e918876853aa85435e0f17fd8b4a92dcfff53d6 Author: Florian Westphal Date: Wed Oct 1 13:38:03 2014 +0200 r8169: add support for Byte Queue Limits There were some attempts by Eric Dumazet to address some obvious problems in the TX flow, to see if they would fix the problems, but none of them seem to help for the regression reporters. Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index ad0020a..b156092 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -5067,8 +5067,6 @@ static void rtl_hw_reset(struct rtl8169_private *tp) RTL_W8(ChipCmd, CmdReset); rtl_udelay_loop_wait_low(tp, &rtl_chipcmd_cond, 100, 100); - - netdev_reset_queue(tp->dev); } static void rtl_request_uncached_firmware(struct rtl8169_private *tp) @@ -7049,7 +7047,6 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, u32 status, len; u32 opts[2]; int frags; - bool stop_queue; if (unlikely(!TX_FRAGS_READY_FOR(tp, skb_shinfo(skb)->nr_frags))) { netif_err(tp, drv, dev, "BUG! Tx Ring full when queue awake!\n"); @@ -7090,8 +7087,6 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, txd->opts2 = cpu_to_le32(opts[1]); - netdev_sent_queue(dev, skb->len); - skb_tx_timestamp(skb); /* Force memory writes to complete before releasing descriptor */ @@ -7106,16 +7101,11 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, tp->cur_tx += frags + 1; - stop_queue = !TX_FRAGS_READY_FOR(tp, MAX_SKB_FRAGS); + RTL_W8(TxPoll, NPQ); - if (!skb->xmit_more || stop_queue || - netif_xmit_stopped(netdev_get_tx_queue(dev, 0))) { - RTL_W8(TxPoll, NPQ); - - mmiowb(); - } + mmiowb(); - if (stop_queue) { + if (!TX_FRAGS_READY_FOR(tp, MAX_SKB_FRAGS)) { /* Avoid wrongly optimistic queue wake-up: rtl_tx thread must * not miss a ring update when it notices a stopped queue. */ @@ -7198,7 +7188,6 @@ static void rtl8169_pcierr_interrupt(struct net_device *dev) static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp) { unsigned int dirty_tx, tx_left; - unsigned int bytes_compl = 0, pkts_compl = 0; dirty_tx = tp->dirty_tx; smp_rmb(); @@ -7222,8 +7211,10 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp) rtl8169_unmap_tx_skb(&tp->pci_dev->dev, tx_skb, tp->TxDescArray + entry); if (status & LastFrag) { - pkts_compl++; - bytes_compl += tx_skb->skb->len; + u64_stats_update_begin(&tp->tx_stats.syncp); + tp->tx_stats.packets++; + tp->tx_stats.bytes += tx_skb->skb->len; + u64_stats_update_end(&tp->tx_stats.syncp); dev_kfree_skb_any(tx_skb->skb); tx_skb->skb = NULL; } @@ -7232,13 +7223,6 @@ static void rtl_tx(struct net_device *dev, struct rtl8169_private *tp) } if (tp->dirty_tx != dirty_tx) { - netdev_completed_queue(tp->dev, pkts_compl, bytes_compl); - - u64_stats_update_begin(&tp->tx_stats.syncp); - tp->tx_stats.packets += pkts_compl; - tp->tx_stats.bytes += bytes_compl; - u64_stats_update_end(&tp->tx_stats.syncp); - tp->dirty_tx = dirty_tx; /* Sync with rtl8169_start_xmit: * - publish dirty_tx ring index (write barrier) -- cgit v0.10.2 From 52d6c8c6ca125872459054daa70f2f1c698c8e75 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 22 Feb 2015 17:03:41 -0800 Subject: net: pktgen: disable xmit_clone on virtual devices Trying to use burst capability (aka xmit_more) on a virtual device like bonding is not supported. For example, skb might be queued multiple times on a qdisc, with various list corruptions. Fixes: 38b2cf2982dc ("net: pktgen: packet bursting via skb->xmit_more") Signed-off-by: Eric Dumazet Cc: Alexei Starovoitov Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller diff --git a/net/core/pktgen.c b/net/core/pktgen.c index b4899f5b..508155b 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1134,6 +1134,9 @@ static ssize_t pktgen_if_write(struct file *file, return len; i += len; + if ((value > 1) && + (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))) + return -ENOTSUPP; pkt_dev->burst = value < 1 ? 1 : value; sprintf(pg_result, "OK: burst=%d", pkt_dev->burst); return count; -- cgit v0.10.2 From 6514890f7aa8dd75fa46e282a1c7a8615e86f363 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Fri, 20 Feb 2015 13:33:16 -0500 Subject: tcp: fix tcp_should_expand_sndbuf() to use tcp_packets_in_flight() tcp_should_expand_sndbuf() does not expand the send buffer if we have filled the congestion window. However, it should use tcp_packets_in_flight() instead of tp->packets_out to make this check. Testing has established that the difference matters a lot if there are many SACKed packets, causing a needless performance shortfall. Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: Nandita Dukkipati Signed-off-by: David S. Miller diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8fdd27b..fb4cf8b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4770,7 +4770,7 @@ static bool tcp_should_expand_sndbuf(const struct sock *sk) return false; /* If we filled the congestion window, do not expand. */ - if (tp->packets_out >= tp->snd_cwnd) + if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) return false; return true; -- cgit v0.10.2 From 407550fe2ccfca3fa0ac4bcdb0a412adeabf84ba Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 22 Feb 2015 15:41:18 -0800 Subject: Bluetooth: btusb: Fix issue with CSR based Intel Wireless controllers Older Wireless controllers from Intel used CSR chips to provide support for Bluetooth. The commit d0ac9eb72 (Bluetooth: btusb: Ignore unknown Intel devices with generic descriptor) disabled these older controllers. To enable them again, put them into the blacklist and mark them clearly as CSR based controllers. T: Bus=02 Lev=02 Prnt=02 Port=05 Cnt=01 Dev#= 3 Spd=12 MxCh= 0 D: Ver= 2.00 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=8087 ProdID=07da Rev=78.69 C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr= 0mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=1ms E: Ad=02(O) Atr=02(Bulk) MxPS= 64 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 64 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 0 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 0 Ivl=1ms I: If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 9 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 9 Ivl=1ms I: If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 17 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 17 Ivl=1ms I: If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 25 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 25 Ivl=1ms I: If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 33 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 33 Ivl=1ms I: If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb E: Ad=03(O) Atr=01(Isoc) MxPS= 49 Ivl=1ms E: Ad=83(I) Atr=01(Isoc) MxPS= 49 Ivl=1ms Reported-by: Kenneth R. Crudup Signed-off-by: Marcel Holtmann Signed-off-by: Johan Hedberg diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index b876888..8bfc4c2 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -272,6 +272,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x1286, 0x2046), .driver_info = BTUSB_MARVELL }, /* Intel Bluetooth devices */ + { USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR }, { USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL }, { USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL }, { USB_DEVICE(0x8087, 0x0a2b), .driver_info = BTUSB_INTEL_NEW }, -- cgit v0.10.2 From 71bb0012c38fbd090a56b3cb96e9f626c415d264 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 23 Feb 2015 04:35:06 -0500 Subject: rhashtable: initialize all rhashtable walker members Commit f2dba9c6ff ("rhashtable: Introduce rhashtable_walk_*") forgot to initialize the members of struct rhashtable_walker after allocating it, which caused an undefined value for 'resize' which is used later on. Fixes: f2dba9c6ff ("rhashtable: Introduce rhashtable_walk_*") Signed-off-by: Sasha Levin Signed-off-by: David S. Miller diff --git a/lib/rhashtable.c b/lib/rhashtable.c index b41a5c0..e3a04e4 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -903,6 +903,9 @@ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter) if (!iter->walker) return -ENOMEM; + INIT_LIST_HEAD(&iter->walker->list); + iter->walker->resize = false; + mutex_lock(&ht->mutex); list_add(&iter->walker->list, &ht->walkers); mutex_unlock(&ht->mutex); -- cgit v0.10.2 From 46b9e4bb76ee26f1e024e048bb95af41b763f48f Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Mon, 23 Feb 2015 12:02:51 +0100 Subject: decnet: Fix obvious o/0 typo Signed-off-by: Rasmus Villemoes Signed-off-by: David S. Miller diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 1d7c125..3b81092 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1062,7 +1062,7 @@ source_ok: if (decnet_debug_level & 16) printk(KERN_DEBUG "dn_route_output_slow: initial checks complete." - " dst=%o4x src=%04x oif=%d try_hard=%d\n", + " dst=%04x src=%04x oif=%d try_hard=%d\n", le16_to_cpu(fld.daddr), le16_to_cpu(fld.saddr), fld.flowidn_oif, try_hard); -- cgit v0.10.2 From 57e595631904c827cfa1a0f7bbd7cc9a49da5745 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 23 Feb 2015 14:02:54 +0100 Subject: team: fix possible null pointer dereference in team_handle_frame Currently following race is possible in team: CPU0 CPU1 team_port_del team_upper_dev_unlink priv_flags &= ~IFF_TEAM_PORT team_handle_frame team_port_get_rcu team_port_exists priv_flags & IFF_TEAM_PORT == 0 return NULL (instead of port got from rx_handler_data) netdev_rx_handler_unregister The thing is that the flag is removed before rx_handler is unregistered. If team_handle_frame is called in between, team_port_exists returns 0 and team_port_get_rcu will return NULL. So do not check the flag here. It is guaranteed by netdev_rx_handler_unregister that team_handle_frame will always see valid rx_handler_data pointer. Signed-off-by: Jiri Pirko Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device") Signed-off-by: David S. Miller diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 0e62274..f1ee71e 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -43,9 +43,7 @@ static struct team_port *team_port_get_rcu(const struct net_device *dev) { - struct team_port *port = rcu_dereference(dev->rx_handler_data); - - return team_port_exists(dev) ? port : NULL; + return rcu_dereference(dev->rx_handler_data); } static struct team_port *team_port_get_rtnl(const struct net_device *dev) -- cgit v0.10.2 From 30ff54765976e132674e3eae2071ed8ed494665c Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 23 Feb 2015 08:17:12 -0500 Subject: net: sched: export tc_connmark.h so it is uapi accessible Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller diff --git a/include/uapi/linux/tc_act/Kbuild b/include/uapi/linux/tc_act/Kbuild index 19d5219..242cf0c 100644 --- a/include/uapi/linux/tc_act/Kbuild +++ b/include/uapi/linux/tc_act/Kbuild @@ -9,3 +9,4 @@ header-y += tc_pedit.h header-y += tc_skbedit.h header-y += tc_vlan.h header-y += tc_bpf.h +header-y += tc_connmark.h -- cgit v0.10.2 From a948f8ce771a1f07c17ed8bcb51f59f69129a51c Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Mon, 23 Feb 2015 18:38:24 +0100 Subject: irda: replace current->state by set_current_state() Use helper functions to access current->state. Direct assignments are prone to races and therefore buggy. current->state = TASK_RUNNING can be replaced by __set_current_state() Thanks to Peter Zijlstra for the exact definition of the problem. Suggested-By: Peter Zijlstra Signed-off-by: Fabian Frederick Signed-off-by: David S. Miller diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 40695b9..9940a41 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -811,7 +811,7 @@ static void ircomm_tty_wait_until_sent(struct tty_struct *tty, int timeout) break; } spin_unlock_irqrestore(&self->spinlock, flags); - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); } /* diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 3c83a1e..1215693 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -305,7 +305,7 @@ irnet_ctrl_read(irnet_socket * ap, /* Put ourselves on the wait queue to be woken up */ add_wait_queue(&irnet_events.rwait, &wait); - current->state = TASK_INTERRUPTIBLE; + set_current_state(TASK_INTERRUPTIBLE); for(;;) { /* If there is unread events */ @@ -321,7 +321,7 @@ irnet_ctrl_read(irnet_socket * ap, /* Yield and wait to be woken up */ schedule(); } - current->state = TASK_RUNNING; + __set_current_state(TASK_RUNNING); remove_wait_queue(&irnet_events.rwait, &wait); /* Did we got it ? */ -- cgit v0.10.2 From d720d8cec563ce4e4fa44a613d4f2dcb1caf2998 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 23 Feb 2015 18:12:56 +0000 Subject: net: compat: Ignore MSG_CMSG_COMPAT in compat_sys_{send, recv}msg With commit a7526eb5d06b (net: Unbreak compat_sys_{send,recv}msg), the MSG_CMSG_COMPAT flag is blocked at the compat syscall entry points, changing the kernel compat behaviour from the one before the commit it was trying to fix (1be374a0518a, net: Block MSG_CMSG_COMPAT in send(m)msg and recv(m)msg). On 32-bit kernels (!CONFIG_COMPAT), MSG_CMSG_COMPAT is 0 and the native 32-bit sys_sendmsg() allows flag 0x80000000 to be set (it is ignored by the kernel). However, on a 64-bit kernel, the compat ABI is different with commit a7526eb5d06b. This patch changes the compat_sys_{send,recv}msg behaviour to the one prior to commit 1be374a0518a. The problem was found running 32-bit LTP (sendmsg01) binary on an arm64 kernel. Arguably, LTP should not pass 0xffffffff as flags to sendmsg() but the general rule is not to break user ABI (even when the user behaviour is not entirely sane). Fixes: a7526eb5d06b (net: Unbreak compat_sys_{send,recv}msg) Cc: Andy Lutomirski Cc: David S. Miller Signed-off-by: Catalin Marinas Signed-off-by: David S. Miller diff --git a/net/compat.c b/net/compat.c index 3236b41..94d3d5e 100644 --- a/net/compat.c +++ b/net/compat.c @@ -711,24 +711,18 @@ static unsigned char nas[21] = { COMPAT_SYSCALL_DEFINE3(sendmsg, int, fd, struct compat_msghdr __user *, msg, unsigned int, flags) { - if (flags & MSG_CMSG_COMPAT) - return -EINVAL; return __sys_sendmsg(fd, (struct user_msghdr __user *)msg, flags | MSG_CMSG_COMPAT); } COMPAT_SYSCALL_DEFINE4(sendmmsg, int, fd, struct compat_mmsghdr __user *, mmsg, unsigned int, vlen, unsigned int, flags) { - if (flags & MSG_CMSG_COMPAT) - return -EINVAL; return __sys_sendmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, flags | MSG_CMSG_COMPAT); } COMPAT_SYSCALL_DEFINE3(recvmsg, int, fd, struct compat_msghdr __user *, msg, unsigned int, flags) { - if (flags & MSG_CMSG_COMPAT) - return -EINVAL; return __sys_recvmsg(fd, (struct user_msghdr __user *)msg, flags | MSG_CMSG_COMPAT); } @@ -751,9 +745,6 @@ COMPAT_SYSCALL_DEFINE5(recvmmsg, int, fd, struct compat_mmsghdr __user *, mmsg, int datagrams; struct timespec ktspec; - if (flags & MSG_CMSG_COMPAT) - return -EINVAL; - if (timeout == NULL) return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, flags | MSG_CMSG_COMPAT, NULL); -- cgit v0.10.2 From fe6e4081a626d2987b73c9b15ed520b4f22d585f Mon Sep 17 00:00:00 2001 From: Vlastimil Setka Date: Mon, 23 Feb 2015 11:27:37 -0600 Subject: altera_tse: Correct typo in obtaining tx_fifo_depth from devicetree This patch corrects a typo in the way tx_fifo_depth is read from the devicetree. This patch was submitted by Vlastimil about a week ago, and is now cleaned up and resubmitted. Signed-off-by: Vlastimil Setka Signed-off-by: Vince Bridgers Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 760c72c..f3d784a 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -1399,7 +1399,7 @@ static int altera_tse_probe(struct platform_device *pdev) } if (of_property_read_u32(pdev->dev.of_node, "tx-fifo-depth", - &priv->rx_fifo_depth)) { + &priv->tx_fifo_depth)) { dev_err(&pdev->dev, "cannot obtain tx-fifo-depth\n"); ret = -ENXIO; goto err_free_netdev; -- cgit v0.10.2 From 8d4ac39df09c6f8078af60cd0ddd7b2435728e72 Mon Sep 17 00:00:00 2001 From: Vlastimil Setka Date: Mon, 23 Feb 2015 11:30:29 -0600 Subject: altera_tse: Fixes in NAPI and interrupt handling paths Incorrect NAPI polling caused WARNING at net/core/dev.c net_rx_action. Some stability issues were also seen at high throughput and system load before this patch. This patch contains several changes in altera_tse_main.c: - tse_rx() is fixed to not process more than `limit` frames - tse_poll() is refactored to match NAPI logic - only received frames are counted for return value - removed bogus condition `(rxcomplete >= budget || txcomplete > 0)` - replace by: if (rxcomplete < budget) -> call __napi_complete and enable irq - altera_isr() - replace spin_lock_irqsave() by spin_lock() - we are in isr - use spinlocks just over irq manipulation, not over __napi_schedule - reset IRQ first, then disable and schedule napi This is a cleaned up resubmission from Vlastimil's recent submission. Signed-off-by: Vlastimil Setka Signed-off-by: Roman Pisl Signed-off-by: Vince Bridgers Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index f3d784a..6725dc0 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -376,7 +376,8 @@ static int tse_rx(struct altera_tse_private *priv, int limit) u16 pktlength; u16 pktstatus; - while ((rxstatus = priv->dmaops->get_rx_status(priv)) != 0) { + while (((rxstatus = priv->dmaops->get_rx_status(priv)) != 0) && + (count < limit)) { pktstatus = rxstatus >> 16; pktlength = rxstatus & 0xffff; @@ -491,28 +492,27 @@ static int tse_poll(struct napi_struct *napi, int budget) struct altera_tse_private *priv = container_of(napi, struct altera_tse_private, napi); int rxcomplete = 0; - int txcomplete = 0; unsigned long int flags; - txcomplete = tse_tx_complete(priv); + tse_tx_complete(priv); rxcomplete = tse_rx(priv, budget); - if (rxcomplete >= budget || txcomplete > 0) - return rxcomplete; + if (rxcomplete < budget) { - napi_gro_flush(napi, false); - __napi_complete(napi); + napi_gro_flush(napi, false); + __napi_complete(napi); - netdev_dbg(priv->dev, - "NAPI Complete, did %d packets with budget %d\n", - txcomplete+rxcomplete, budget); + netdev_dbg(priv->dev, + "NAPI Complete, did %d packets with budget %d\n", + rxcomplete, budget); - spin_lock_irqsave(&priv->rxdma_irq_lock, flags); - priv->dmaops->enable_rxirq(priv); - priv->dmaops->enable_txirq(priv); - spin_unlock_irqrestore(&priv->rxdma_irq_lock, flags); - return rxcomplete + txcomplete; + spin_lock_irqsave(&priv->rxdma_irq_lock, flags); + priv->dmaops->enable_rxirq(priv); + priv->dmaops->enable_txirq(priv); + spin_unlock_irqrestore(&priv->rxdma_irq_lock, flags); + } + return rxcomplete; } /* DMA TX & RX FIFO interrupt routing @@ -521,7 +521,6 @@ static irqreturn_t altera_isr(int irq, void *dev_id) { struct net_device *dev = dev_id; struct altera_tse_private *priv; - unsigned long int flags; if (unlikely(!dev)) { pr_err("%s: invalid dev pointer\n", __func__); @@ -529,20 +528,20 @@ static irqreturn_t altera_isr(int irq, void *dev_id) } priv = netdev_priv(dev); - /* turn off desc irqs and enable napi rx */ - spin_lock_irqsave(&priv->rxdma_irq_lock, flags); + spin_lock(&priv->rxdma_irq_lock); + /* reset IRQs */ + priv->dmaops->clear_rxirq(priv); + priv->dmaops->clear_txirq(priv); + spin_unlock(&priv->rxdma_irq_lock); if (likely(napi_schedule_prep(&priv->napi))) { + spin_lock(&priv->rxdma_irq_lock); priv->dmaops->disable_rxirq(priv); priv->dmaops->disable_txirq(priv); + spin_unlock(&priv->rxdma_irq_lock); __napi_schedule(&priv->napi); } - /* reset IRQs */ - priv->dmaops->clear_rxirq(priv); - priv->dmaops->clear_txirq(priv); - - spin_unlock_irqrestore(&priv->rxdma_irq_lock, flags); return IRQ_HANDLED; } -- cgit v0.10.2 From 77751427a1ff25b27d47a4c36b12c3c8667855ac Mon Sep 17 00:00:00 2001 From: Marcelo Leitner Date: Mon, 23 Feb 2015 11:17:13 -0300 Subject: ipv6: addrconf: validate new MTU before applying it Currently we don't check if the new MTU is valid or not and this allows one to configure a smaller than minimum allowed by RFCs or even bigger than interface own MTU, which is a problem as it may lead to packet drops. If you have a daemon like NetworkManager running, this may be exploited by remote attackers by forging RA packets with an invalid MTU, possibly leading to a DoS. (NetworkManager currently only validates for values too small, but not for too big ones.) The fix is just to make sure the new value is valid. That is, between IPV6_MIN_MTU and interface's MTU. Note that similar check is already performed at ndisc_router_discovery(), for when kernel itself parses the RA. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 98e4a63..b603002 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4903,6 +4903,21 @@ int addrconf_sysctl_forward(struct ctl_table *ctl, int write, return ret; } +static +int addrconf_sysctl_mtu(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct inet6_dev *idev = ctl->extra1; + int min_mtu = IPV6_MIN_MTU; + struct ctl_table lctl; + + lctl = *ctl; + lctl.extra1 = &min_mtu; + lctl.extra2 = idev ? &idev->dev->mtu : NULL; + + return proc_dointvec_minmax(&lctl, write, buffer, lenp, ppos); +} + static void dev_disable_change(struct inet6_dev *idev) { struct netdev_notifier_info info; @@ -5054,7 +5069,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.mtu6, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = addrconf_sysctl_mtu, }, { .procname = "accept_ra", -- cgit v0.10.2 From f2831e2007810b690f93a26128058a193eadf393 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Thu, 5 Feb 2015 08:45:43 -0500 Subject: mac80211_hwsim: fix error handling in tx_frame_nl Correct two problems with the error handling when using the netlink forwarding API: first, the netlink skb is never freed if nla_put() fails; and second, genlmsg_unicast() can fail if the netlink socket is full. In the latter case, the corresponding data skb is not counted as a drop and userspace programs like wmediumd will see TCP stalls due to lost packets. Signed-off-by: Bob Copeland Signed-off-by: Johannes Berg diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 4a4c658..8908be6 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -946,7 +946,8 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, goto nla_put_failure; genlmsg_end(skb, msg_head); - genlmsg_unicast(&init_net, skb, dst_portid); + if (genlmsg_unicast(&init_net, skb, dst_portid)) + goto err_free_txskb; /* Enqueue the packet */ skb_queue_tail(&data->pending, my_skb); @@ -955,6 +956,8 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, return; nla_put_failure: + nlmsg_free(skb); +err_free_txskb: printk(KERN_DEBUG "mac80211_hwsim: error occurred in %s\n", __func__); ieee80211_free_txskb(hw, my_skb); data->tx_failed++; -- cgit v0.10.2 From 104f5a6206f4b3133c675e3d41eca2ca4c41406b Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Sun, 8 Feb 2015 12:36:07 +0200 Subject: mac80211: clear sdata->radar_required If ieee80211_vif_use_channel() fails, we have to clear sdata->radar_required (which we might have just set). Failing to do it results in stale radar_required field which prevents starting new scan requests. Reported-by: Jouni Malinen Signed-off-by: Eliad Peller [use false instead of 0] Signed-off-by: Johannes Berg diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index ff0d2db..5bcd4e5 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1508,6 +1508,8 @@ static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata) if (ieee80211_chanctx_refcount(local, ctx) == 0) ieee80211_free_chanctx(local, ctx); + sdata->radar_required = false; + /* Unreserving may ready an in-place reservation. */ if (use_reserved_switch) ieee80211_vif_use_reserved_switch(local); @@ -1566,6 +1568,9 @@ int ieee80211_vif_use_channel(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_smps_chanctx(local, ctx); ieee80211_recalc_radar_chanctx(local, ctx); out: + if (ret) + sdata->radar_required = false; + mutex_unlock(&local->chanctx_mtx); return ret; } -- cgit v0.10.2 From 5528fae88697268a7176dd6c8d7ab368c04368be Mon Sep 17 00:00:00 2001 From: Samuel Tan Date: Mon, 9 Feb 2015 21:29:15 +0200 Subject: nl80211: use loop index as type for net detect frequency results We currently add nested members of the NL80211_ATTR_SCAN_FREQUENCIES as NLA_U32 attributes of type NL80211_ATTR_WIPHY_FREQ in cfg80211_net_detect_results. However, since there can be an arbitrary number of frequency results, we should use the loop index of the loop used to add the frequency results to NL80211_ATTR_SCAN_FREQUENCIES as the type (i.e. nla_type) for each result attribute, rather than a fixed type. This change is in line with how nested members are added to NL80211_ATTR_SCAN_FREQUENCIES in the functions nl80211_send_wowlan_nd and nl80211_add_scan_req. Signed-off-by: Samuel Tan Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d78fd8b..3c7fb04 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -12528,9 +12528,7 @@ static int cfg80211_net_detect_results(struct sk_buff *msg, } for (j = 0; j < match->n_channels; j++) { - if (nla_put_u32(msg, - NL80211_ATTR_WIPHY_FREQ, - match->channels[j])) { + if (nla_put_u32(msg, j, match->channels[j])) { nla_nest_cancel(msg, nl_freqs); nla_nest_cancel(msg, nl_match); goto out; -- cgit v0.10.2 From a18c7192aabac73078f35e5ef4ce28ad5f8cfe8e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 24 Feb 2015 10:56:42 +0100 Subject: nl80211: fix memory leak in monitor flags parsing If monitor flags parsing results in active monitor but that isn't supported, the already allocated message is leaked. Fix this by moving the allocation after this check. Reported-by: Christian Engelmayer Signed-off-by: Johannes Berg diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 3c7fb04..be25015 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2654,10 +2654,6 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) return err; } - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) - return -ENOMEM; - err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, &flags); @@ -2666,6 +2662,10 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) return -EOPNOTSUPP; + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + wdev = rdev_add_virtual_intf(rdev, nla_data(info->attrs[NL80211_ATTR_IFNAME]), type, err ? NULL : &flags, ¶ms); -- cgit v0.10.2 From 28981e5eb45fe13e1d2c9e0e2e189dc5c8682006 Mon Sep 17 00:00:00 2001 From: Jason Abele Date: Tue, 17 Feb 2015 16:10:41 -0800 Subject: cfg80211: fix n_reg_rules to match world_regdom There are currently 8 rules in the world_regdom, but only the first 6 are applied due to an incorrect value for n_reg_rules. This causes channels 149-165 and 60GHz to be disabled. Signed-off-by: Jason Abele Signed-off-by: Johannes Berg diff --git a/net/wireless/reg.c b/net/wireless/reg.c index b586d0d..48dfc7b 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -228,7 +228,7 @@ static DECLARE_DELAYED_WORK(reg_timeout, reg_timeout_work); /* We keep a static world regulatory domain in case of the absence of CRDA */ static const struct ieee80211_regdomain world_regdom = { - .n_reg_rules = 6, + .n_reg_rules = 8, .alpha2 = "00", .reg_rules = { /* IEEE 802.11b/g, channels 1..11 */ -- cgit v0.10.2 From 81daf735f9fe35ef6bc4073068748b221d64fb47 Mon Sep 17 00:00:00 2001 From: Junjie Mao Date: Wed, 28 Jan 2015 10:03:26 +0800 Subject: cfg80211: calls nl80211_exit on error nl80211_exit should be called in cfg80211_init if nl80211_init succeeds but regulatory_init or create_singlethread_workqueue fails. Signed-off-by: Junjie Mao Signed-off-by: Johannes Berg diff --git a/net/wireless/core.c b/net/wireless/core.c index 3af0ecf..2a0bbd2 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1199,6 +1199,7 @@ out_fail_wq: regulatory_exit(); out_fail_reg: debugfs_remove(ieee80211_debugfs_dir); + nl80211_exit(); out_fail_nl80211: unregister_netdevice_notifier(&cfg80211_netdev_notifier); out_fail_notifier: -- cgit v0.10.2 From 17dce15801d5602719936045a7e84ff7dc6b6da2 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 19 Feb 2015 12:57:40 +0100 Subject: mac80211/minstrel: fix !x!=0 confusion Commit 06d961a8e210 ("mac80211/minstrel: use the new rate control API") inverted the condition 'if (msr->sample_limit != 0)' to 'if (!msr->sample_limit != 0)'. But it is confusing both to people and compilers (gcc5): net/mac80211/rc80211_minstrel.c: In function 'minstrel_get_rate': net/mac80211/rc80211_minstrel.c:376:26: warning: logical not is only applied to the left hand side of comparison if (!msr->sample_limit != 0) ^ Let there be only 'if (!msr->sample_limit)'. Fixes: 06d961a8e210 ("mac80211/minstrel: use the new rate control API") Signed-off-by: Jiri Slaby Signed-off-by: Johannes Berg diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 7c86a00..ef6e8a6 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -373,7 +373,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, rate++; mi->sample_deferred++; } else { - if (!msr->sample_limit != 0) + if (!msr->sample_limit) return; mi->sample_packets++; -- cgit v0.10.2 From 4e10fd5b4a7f4100007147558c304da3e73b25cf Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Tue, 24 Feb 2015 14:14:35 -0500 Subject: rtnetlink: avoid 0 sized arrays Arrays (when not in a struct) "shall have a value greater than zero". GCC complains when it's not the case here. Fixes: ba7d49b1f0 ("rtnetlink: provide api for getting and setting slave info") Signed-off-by: Sasha Levin Signed-off-by: David S. Miller diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ab293a3..1385de0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2012,8 +2012,8 @@ replay: } if (1) { - struct nlattr *attr[ops ? ops->maxtype + 1 : 0]; - struct nlattr *slave_attr[m_ops ? m_ops->slave_maxtype + 1 : 0]; + struct nlattr *attr[ops ? ops->maxtype + 1 : 1]; + struct nlattr *slave_attr[m_ops ? m_ops->slave_maxtype + 1 : 1]; struct nlattr **data = NULL; struct nlattr **slave_data = NULL; struct net *dest_net, *link_net = NULL; -- cgit v0.10.2 From 41a50d621a321b4c15273cc1b5ed41437f4acdfb Mon Sep 17 00:00:00 2001 From: Alexander Drozdov Date: Tue, 24 Feb 2015 08:18:28 +0300 Subject: af_packet: don't pass empty blocks for PACKET_V3 Before da413eec729d ("packet: Fixed TPACKET V3 to signal poll when block is closed rather than every packet") poll listening for an af_packet socket was not signaled if there was no packets to process. After the patch poll is signaled evety time when block retire timer expires. That happens because af_packet closes the current block on timeout even if the block is empty. Passing empty blocks to the user not only wastes CPU but also wastes ring buffer space increasing probability of packets dropping on small timeouts. Signed-off-by: Alexander Drozdov Cc: Dan Collins Cc: Willem de Bruijn Cc: Guy Harris Signed-off-by: David S. Miller diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 99fc628..5bf1e96 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -698,6 +698,10 @@ static void prb_retire_rx_blk_timer_expired(unsigned long data) if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) { if (!frozen) { + if (!BLOCK_NUM_PKTS(pbd)) { + /* An empty block. Just refresh the timer. */ + goto refresh_timer; + } prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO); if (!prb_dispatch_next_block(pkc, po)) goto refresh_timer; @@ -798,7 +802,11 @@ static void prb_close_block(struct tpacket_kbdq_core *pkc1, h1->ts_last_pkt.ts_sec = last_pkt->tp_sec; h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec; } else { - /* Ok, we tmo'd - so get the current time */ + /* Ok, we tmo'd - so get the current time. + * + * It shouldn't really happen as we don't close empty + * blocks. See prb_retire_rx_blk_timer_expired(). + */ struct timespec ts; getnstimeofday(&ts); h1->ts_last_pkt.ts_sec = ts.tv_sec; -- cgit v0.10.2 From 7fbb9d8415d4a51cf542e87cf3a717a9f7e6aedc Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Tue, 24 Feb 2015 11:17:59 +0000 Subject: xen-netback: release pending index before pushing Tx responses If the pending indexes are released /after/ pushing the Tx response then a stale pending index may be used if a new Tx request is immediately pushed by the frontend. The may cause various WARNINGs or BUGs if the stale pending index is actually still in use. Fix this by releasing the pending index before pushing the Tx response. The full barrier for the pending ring update is not required since the the Tx response push already has a suitable write barrier. Signed-off-by: David Vrabel Reviewed-by: Wei Liu Signed-off-by: David S. Miller diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index f7a31d2..c4d68d7 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -655,9 +655,15 @@ static void xenvif_tx_err(struct xenvif_queue *queue, unsigned long flags; do { + int notify; + spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, XEN_NETIF_RSP_ERROR); + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify); spin_unlock_irqrestore(&queue->response_lock, flags); + if (notify) + notify_remote_via_irq(queue->tx_irq); + if (cons == end) break; txp = RING_GET_REQUEST(&queue->tx, cons++); @@ -1649,17 +1655,28 @@ static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, { struct pending_tx_info *pending_tx_info; pending_ring_idx_t index; + int notify; unsigned long flags; pending_tx_info = &queue->pending_tx_info[pending_idx]; + spin_lock_irqsave(&queue->response_lock, flags); + make_tx_response(queue, &pending_tx_info->req, status); - index = pending_index(queue->pending_prod); + + /* Release the pending index before pusing the Tx response so + * its available before a new Tx request is pushed by the + * frontend. + */ + index = pending_index(queue->pending_prod++); queue->pending_ring[index] = pending_idx; - /* TX shouldn't use the index before we give it back here */ - mb(); - queue->pending_prod++; + + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify); + spin_unlock_irqrestore(&queue->response_lock, flags); + + if (notify) + notify_remote_via_irq(queue->tx_irq); } @@ -1669,7 +1686,6 @@ static void make_tx_response(struct xenvif_queue *queue, { RING_IDX i = queue->tx.rsp_prod_pvt; struct xen_netif_tx_response *resp; - int notify; resp = RING_GET_RESPONSE(&queue->tx, i); resp->id = txp->id; @@ -1679,9 +1695,6 @@ static void make_tx_response(struct xenvif_queue *queue, RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL; queue->tx.rsp_prod_pvt = ++i; - RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify); - if (notify) - notify_remote_via_irq(queue->tx_irq); } static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue, -- cgit v0.10.2 From 5c2d2b148b35a995f0c0e76a3a37902ec98f164c Mon Sep 17 00:00:00 2001 From: Yannick Guerrini Date: Tue, 24 Feb 2015 13:03:51 +0100 Subject: r8169: Fix trivial typo in rtl_check_firmware Change 'firwmare' to 'firmware' Signed-off-by: Yannick Guerrini Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index b156092..c70ab40 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -2561,7 +2561,7 @@ static int rtl_check_firmware(struct rtl8169_private *tp, struct rtl_fw *rtl_fw) int rc = -EINVAL; if (!rtl_fw_format_ok(tp, rtl_fw)) { - netif_err(tp, ifup, dev, "invalid firwmare\n"); + netif_err(tp, ifup, dev, "invalid firmware\n"); goto out; } -- cgit v0.10.2 From 74ad752442d9488cf02ee2a9243d6c6b5c943efb Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 24 Feb 2015 10:47:49 -0600 Subject: amd-xgbe-phy: PHY KX/KR mode differences The PHY requires different settings for the Decision Feedback Analyzer (DFE) when running in KX mode vs. KR mode. Update the code to change these settings when changing modes in order to provide a more stable link. Additionally, adjust the 10GbE PQ skew default setting to a more sane value. Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller diff --git a/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt b/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt index 33df393..8db3238 100644 --- a/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt +++ b/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt @@ -27,6 +27,8 @@ property is used. - amd,serdes-cdr-rate: CDR rate speed selection - amd,serdes-pq-skew: PQ (data sampling) skew - amd,serdes-tx-amp: TX amplitude boost +- amd,serdes-dfe-tap-config: DFE taps available to run +- amd,serdes-dfe-tap-enable: DFE taps to enable Example: xgbe_phy@e1240800 { @@ -41,4 +43,6 @@ Example: amd,serdes-cdr-rate = <2>, <2>, <7>; amd,serdes-pq-skew = <10>, <10>, <30>; amd,serdes-tx-amp = <15>, <15>, <10>; + amd,serdes-dfe-tap-config = <3>, <3>, <1>; + amd,serdes-dfe-tap-enable = <0>, <0>, <127>; }; diff --git a/drivers/net/phy/amd-xgbe-phy.c b/drivers/net/phy/amd-xgbe-phy.c index 9e3af54..32efbd4 100644 --- a/drivers/net/phy/amd-xgbe-phy.c +++ b/drivers/net/phy/amd-xgbe-phy.c @@ -92,6 +92,8 @@ MODULE_DESCRIPTION("AMD 10GbE (amd-xgbe) PHY driver"); #define XGBE_PHY_CDR_RATE_PROPERTY "amd,serdes-cdr-rate" #define XGBE_PHY_PQ_SKEW_PROPERTY "amd,serdes-pq-skew" #define XGBE_PHY_TX_AMP_PROPERTY "amd,serdes-tx-amp" +#define XGBE_PHY_DFE_CFG_PROPERTY "amd,serdes-dfe-tap-config" +#define XGBE_PHY_DFE_ENA_PROPERTY "amd,serdes-dfe-tap-enable" #define XGBE_PHY_SPEEDS 3 #define XGBE_PHY_SPEED_1000 0 @@ -177,10 +179,12 @@ MODULE_DESCRIPTION("AMD 10GbE (amd-xgbe) PHY driver"); #define SPEED_10000_BLWC 0 #define SPEED_10000_CDR 0x7 #define SPEED_10000_PLL 0x1 -#define SPEED_10000_PQ 0x1e +#define SPEED_10000_PQ 0x12 #define SPEED_10000_RATE 0x0 #define SPEED_10000_TXAMP 0xa #define SPEED_10000_WORD 0x7 +#define SPEED_10000_DFE_TAP_CONFIG 0x1 +#define SPEED_10000_DFE_TAP_ENABLE 0x7f #define SPEED_2500_BLWC 1 #define SPEED_2500_CDR 0x2 @@ -189,6 +193,8 @@ MODULE_DESCRIPTION("AMD 10GbE (amd-xgbe) PHY driver"); #define SPEED_2500_RATE 0x1 #define SPEED_2500_TXAMP 0xf #define SPEED_2500_WORD 0x1 +#define SPEED_2500_DFE_TAP_CONFIG 0x3 +#define SPEED_2500_DFE_TAP_ENABLE 0x0 #define SPEED_1000_BLWC 1 #define SPEED_1000_CDR 0x2 @@ -197,16 +203,25 @@ MODULE_DESCRIPTION("AMD 10GbE (amd-xgbe) PHY driver"); #define SPEED_1000_RATE 0x3 #define SPEED_1000_TXAMP 0xf #define SPEED_1000_WORD 0x1 +#define SPEED_1000_DFE_TAP_CONFIG 0x3 +#define SPEED_1000_DFE_TAP_ENABLE 0x0 /* SerDes RxTx register offsets */ +#define RXTX_REG6 0x0018 #define RXTX_REG20 0x0050 +#define RXTX_REG22 0x0058 #define RXTX_REG114 0x01c8 +#define RXTX_REG129 0x0204 /* SerDes RxTx register entry bit positions and sizes */ +#define RXTX_REG6_RESETB_RXD_INDEX 8 +#define RXTX_REG6_RESETB_RXD_WIDTH 1 #define RXTX_REG20_BLWC_ENA_INDEX 2 #define RXTX_REG20_BLWC_ENA_WIDTH 1 #define RXTX_REG114_PQ_REG_INDEX 9 #define RXTX_REG114_PQ_REG_WIDTH 7 +#define RXTX_REG129_RXDFE_CONFIG_INDEX 14 +#define RXTX_REG129_RXDFE_CONFIG_WIDTH 2 /* Bit setting and getting macros * The get macro will extract the current bit field value from within @@ -333,6 +348,18 @@ static const u32 amd_xgbe_phy_serdes_tx_amp[] = { SPEED_10000_TXAMP, }; +static const u32 amd_xgbe_phy_serdes_dfe_tap_cfg[] = { + SPEED_1000_DFE_TAP_CONFIG, + SPEED_2500_DFE_TAP_CONFIG, + SPEED_10000_DFE_TAP_CONFIG, +}; + +static const u32 amd_xgbe_phy_serdes_dfe_tap_ena[] = { + SPEED_1000_DFE_TAP_ENABLE, + SPEED_2500_DFE_TAP_ENABLE, + SPEED_10000_DFE_TAP_ENABLE, +}; + enum amd_xgbe_phy_an { AMD_XGBE_AN_READY = 0, AMD_XGBE_AN_PAGE_RECEIVED, @@ -393,6 +420,8 @@ struct amd_xgbe_phy_priv { u32 serdes_cdr_rate[XGBE_PHY_SPEEDS]; u32 serdes_pq_skew[XGBE_PHY_SPEEDS]; u32 serdes_tx_amp[XGBE_PHY_SPEEDS]; + u32 serdes_dfe_tap_cfg[XGBE_PHY_SPEEDS]; + u32 serdes_dfe_tap_ena[XGBE_PHY_SPEEDS]; /* Auto-negotiation state machine support */ struct mutex an_mutex; @@ -481,11 +510,16 @@ static void amd_xgbe_phy_serdes_complete_ratechange(struct phy_device *phydev) status = XSIR0_IOREAD(priv, SIR0_STATUS); if (XSIR_GET_BITS(status, SIR0_STATUS, RX_READY) && XSIR_GET_BITS(status, SIR0_STATUS, TX_READY)) - return; + goto rx_reset; } netdev_dbg(phydev->attached_dev, "SerDes rx/tx not ready (%#hx)\n", status); + +rx_reset: + /* Perform Rx reset for the DFE changes */ + XRXTX_IOWRITE_BITS(priv, RXTX_REG6, RESETB_RXD, 0); + XRXTX_IOWRITE_BITS(priv, RXTX_REG6, RESETB_RXD, 1); } static int amd_xgbe_phy_xgmii_mode(struct phy_device *phydev) @@ -534,6 +568,10 @@ static int amd_xgbe_phy_xgmii_mode(struct phy_device *phydev) priv->serdes_blwc[XGBE_PHY_SPEED_10000]); XRXTX_IOWRITE_BITS(priv, RXTX_REG114, PQ_REG, priv->serdes_pq_skew[XGBE_PHY_SPEED_10000]); + XRXTX_IOWRITE_BITS(priv, RXTX_REG129, RXDFE_CONFIG, + priv->serdes_dfe_tap_cfg[XGBE_PHY_SPEED_10000]); + XRXTX_IOWRITE(priv, RXTX_REG22, + priv->serdes_dfe_tap_ena[XGBE_PHY_SPEED_10000]); amd_xgbe_phy_serdes_complete_ratechange(phydev); @@ -586,6 +624,10 @@ static int amd_xgbe_phy_gmii_2500_mode(struct phy_device *phydev) priv->serdes_blwc[XGBE_PHY_SPEED_2500]); XRXTX_IOWRITE_BITS(priv, RXTX_REG114, PQ_REG, priv->serdes_pq_skew[XGBE_PHY_SPEED_2500]); + XRXTX_IOWRITE_BITS(priv, RXTX_REG129, RXDFE_CONFIG, + priv->serdes_dfe_tap_cfg[XGBE_PHY_SPEED_2500]); + XRXTX_IOWRITE(priv, RXTX_REG22, + priv->serdes_dfe_tap_ena[XGBE_PHY_SPEED_2500]); amd_xgbe_phy_serdes_complete_ratechange(phydev); @@ -638,6 +680,10 @@ static int amd_xgbe_phy_gmii_mode(struct phy_device *phydev) priv->serdes_blwc[XGBE_PHY_SPEED_1000]); XRXTX_IOWRITE_BITS(priv, RXTX_REG114, PQ_REG, priv->serdes_pq_skew[XGBE_PHY_SPEED_1000]); + XRXTX_IOWRITE_BITS(priv, RXTX_REG129, RXDFE_CONFIG, + priv->serdes_dfe_tap_cfg[XGBE_PHY_SPEED_1000]); + XRXTX_IOWRITE(priv, RXTX_REG22, + priv->serdes_dfe_tap_ena[XGBE_PHY_SPEED_1000]); amd_xgbe_phy_serdes_complete_ratechange(phydev); @@ -1668,6 +1714,38 @@ static int amd_xgbe_phy_probe(struct phy_device *phydev) sizeof(priv->serdes_tx_amp)); } + if (device_property_present(phy_dev, XGBE_PHY_DFE_CFG_PROPERTY)) { + ret = device_property_read_u32_array(phy_dev, + XGBE_PHY_DFE_CFG_PROPERTY, + priv->serdes_dfe_tap_cfg, + XGBE_PHY_SPEEDS); + if (ret) { + dev_err(dev, "invalid %s property\n", + XGBE_PHY_DFE_CFG_PROPERTY); + goto err_sir1; + } + } else { + memcpy(priv->serdes_dfe_tap_cfg, + amd_xgbe_phy_serdes_dfe_tap_cfg, + sizeof(priv->serdes_dfe_tap_cfg)); + } + + if (device_property_present(phy_dev, XGBE_PHY_DFE_ENA_PROPERTY)) { + ret = device_property_read_u32_array(phy_dev, + XGBE_PHY_DFE_ENA_PROPERTY, + priv->serdes_dfe_tap_ena, + XGBE_PHY_SPEEDS); + if (ret) { + dev_err(dev, "invalid %s property\n", + XGBE_PHY_DFE_ENA_PROPERTY); + goto err_sir1; + } + } else { + memcpy(priv->serdes_dfe_tap_ena, + amd_xgbe_phy_serdes_dfe_tap_ena, + sizeof(priv->serdes_dfe_tap_ena)); + } + phydev->priv = priv; if (!priv->adev || acpi_disabled) -- cgit v0.10.2 From 31639b94cadc03727d0ae1f048e9688dd508883f Mon Sep 17 00:00:00 2001 From: Andy Gospodarek Date: Wed, 25 Feb 2015 17:00:16 -0500 Subject: MAINTAINERS: update my email address I have been signing off on patches with this address so I'll change it. Signed-off-by: Andy Gospodarek Signed-off-by: David S. Miller diff --git a/MAINTAINERS b/MAINTAINERS index 4f4915c..5d1606e9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2065,7 +2065,7 @@ F: include/net/bluetooth/ BONDING DRIVER M: Jay Vosburgh M: Veaceslav Falico -M: Andy Gospodarek +M: Andy Gospodarek L: netdev@vger.kernel.org W: http://sourceforge.net/projects/bonding/ S: Supported -- cgit v0.10.2 From dda094a312dbb2cd96e3e46fce7784aca999bcf1 Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 21 Feb 2015 07:32:47 +0000 Subject: i40e: Fix memory leak at failure path in i40e_dbg_command_write() The patch fixes a leak of 'cmd_buf' when copy_from_user() failed in i40e_dbg_command_write(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Tested-by: Jim Young Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 61236f9..c17ee77 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -989,8 +989,10 @@ static ssize_t i40e_dbg_command_write(struct file *filp, if (!cmd_buf) return count; bytes_not_copied = copy_from_user(cmd_buf, buffer, count); - if (bytes_not_copied < 0) + if (bytes_not_copied < 0) { + kfree(cmd_buf); return bytes_not_copied; + } if (bytes_not_copied > 0) count -= bytes_not_copied; cmd_buf[count] = '\0'; -- cgit v0.10.2 From de78fc5ac1702059ca13203010fd0a3dd20d426f Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Sat, 21 Feb 2015 06:41:47 +0000 Subject: i40e: fix shift precedence issue Add parens to make sure the shift and bitwise precedences don't work backwards for us. Change-ID: I60c10ef4fad6bc654522b9d8a53da2e270a0f268 Reported-by: Joe Perches Signed-off-by: Shannon Nelson Tested-by: Jim Young Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 11a9ffe..2a705a1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -868,8 +868,9 @@ i40e_status i40e_pf_reset(struct i40e_hw *hw) * The grst delay value is in 100ms units, and we'll wait a * couple counts longer to be sure we don't just miss the end. */ - grst_del = rd32(hw, I40E_GLGEN_RSTCTL) & I40E_GLGEN_RSTCTL_GRSTDEL_MASK - >> I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT; + grst_del = (rd32(hw, I40E_GLGEN_RSTCTL) & + I40E_GLGEN_RSTCTL_GRSTDEL_MASK) >> + I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT; for (cnt = 0; cnt < grst_del + 2; cnt++) { reg = rd32(hw, I40E_GLGEN_RSTAT); if (!(reg & I40E_GLGEN_RSTAT_DEVSTATE_MASK)) diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c index 183dcb6..a11c70c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb_nl.c @@ -40,7 +40,7 @@ static void i40e_get_pfc_delay(struct i40e_hw *hw, u16 *delay) u32 val; val = rd32(hw, I40E_PRTDCB_GENC); - *delay = (u16)(val & I40E_PRTDCB_GENC_PFCLDA_MASK >> + *delay = (u16)((val & I40E_PRTDCB_GENC_PFCLDA_MASK) >> I40E_PRTDCB_GENC_PFCLDA_SHIFT); } -- cgit v0.10.2 From b67a03357cab0ccb91d641fead6f167c697a24cb Mon Sep 17 00:00:00 2001 From: Akeem G Abodunrin Date: Sat, 21 Feb 2015 06:42:15 +0000 Subject: i40e: Don't check for Tx hang when PF down This patch adds check to bail out if device is already down when checking for Tx hang subtask. Change-ID: I3853fb7a6d11cb9a4c349b687cb25c15b19977a0 Signed-off-by: Akeem G Abodunrin Tested-by: Jim Young Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index cbe281b..6bb6376 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5587,7 +5587,8 @@ static void i40e_check_hang_subtask(struct i40e_pf *pf) int i, v; /* If we're down or resetting, just bail */ - if (test_bit(__I40E_CONFIG_BUSY, &pf->state)) + if (test_bit(__I40E_DOWN, &pf->state) || + test_bit(__I40E_CONFIG_BUSY, &pf->state)) return; /* for each VSI/netdev -- cgit v0.10.2 From 71da61976ec18fb57b3ba9a1dd846b747cc7c884 Mon Sep 17 00:00:00 2001 From: Anjali Singhai Date: Sat, 21 Feb 2015 06:42:35 +0000 Subject: i40e: Fix TSO with more than 8 frags per segment issue The hardware has some limitations the driver needs to adhere to, that we found in extended testing. 1) no more than 8 descriptors per packet on the wire 2) no header can span more than 3 descriptors If one of these events occurs, the hardware will generate an internal error and freeze the Tx queue. This patch linearizes the skb to avoid these situations. Change-ID: I37dab7d3966e14895a9663ec4d0aaa8eb0d9e115 Signed-off-by: Anjali Singhai Jain Tested-by: Jim Young Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 2206d2d..0aad611 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2140,6 +2140,67 @@ static int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size) } /** + * i40e_chk_linearize - Check if there are more than 8 fragments per packet + * @skb: send buffer + * @tx_flags: collected send information + * @hdr_len: size of the packet header + * + * Note: Our HW can't scatter-gather more than 8 fragments to build + * a packet on the wire and so we need to figure out the cases where we + * need to linearize the skb. + **/ +static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags, + const u8 hdr_len) +{ + struct skb_frag_struct *frag; + bool linearize = false; + unsigned int size = 0; + u16 num_frags; + u16 gso_segs; + + num_frags = skb_shinfo(skb)->nr_frags; + gso_segs = skb_shinfo(skb)->gso_segs; + + if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) { + u16 j = 1; + + if (num_frags < (I40E_MAX_BUFFER_TXD)) + goto linearize_chk_done; + /* try the simple math, if we have too many frags per segment */ + if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) > + I40E_MAX_BUFFER_TXD) { + linearize = true; + goto linearize_chk_done; + } + frag = &skb_shinfo(skb)->frags[0]; + size = hdr_len; + /* we might still have more fragments per segment */ + do { + size += skb_frag_size(frag); + frag++; j++; + if (j == I40E_MAX_BUFFER_TXD) { + if (size < skb_shinfo(skb)->gso_size) { + linearize = true; + break; + } + j = 1; + size -= skb_shinfo(skb)->gso_size; + if (size) + j++; + size += hdr_len; + } + num_frags--; + } while (num_frags); + } else { + if (num_frags >= I40E_MAX_BUFFER_TXD) + linearize = true; + } + +linearize_chk_done: + return linearize; +} + +/** * i40e_tx_map - Build the Tx descriptor * @tx_ring: ring to send buffer on * @skb: send buffer @@ -2396,6 +2457,10 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, if (tsyn) tx_flags |= I40E_TX_FLAGS_TSYN; + if (i40e_chk_linearize(skb, tx_flags, hdr_len)) + if (skb_linearize(skb)) + goto out_drop; + skb_tx_timestamp(skb); /* always enable CRC insertion offload */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 18b0023..dff0bae 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -112,6 +112,7 @@ enum i40e_dyn_idx_t { #define i40e_rx_desc i40e_32byte_rx_desc +#define I40E_MAX_BUFFER_TXD 8 #define I40E_MIN_TX_LEN 17 #define I40E_MAX_DATA_PER_TXD 8192 diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 2900438..1320a43 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -1380,6 +1380,67 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring, context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss); } + /** + * i40e_chk_linearize - Check if there are more than 8 fragments per packet + * @skb: send buffer + * @tx_flags: collected send information + * @hdr_len: size of the packet header + * + * Note: Our HW can't scatter-gather more than 8 fragments to build + * a packet on the wire and so we need to figure out the cases where we + * need to linearize the skb. + **/ +static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags, + const u8 hdr_len) +{ + struct skb_frag_struct *frag; + bool linearize = false; + unsigned int size = 0; + u16 num_frags; + u16 gso_segs; + + num_frags = skb_shinfo(skb)->nr_frags; + gso_segs = skb_shinfo(skb)->gso_segs; + + if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) { + u16 j = 1; + + if (num_frags < (I40E_MAX_BUFFER_TXD)) + goto linearize_chk_done; + /* try the simple math, if we have too many frags per segment */ + if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) > + I40E_MAX_BUFFER_TXD) { + linearize = true; + goto linearize_chk_done; + } + frag = &skb_shinfo(skb)->frags[0]; + size = hdr_len; + /* we might still have more fragments per segment */ + do { + size += skb_frag_size(frag); + frag++; j++; + if (j == I40E_MAX_BUFFER_TXD) { + if (size < skb_shinfo(skb)->gso_size) { + linearize = true; + break; + } + j = 1; + size -= skb_shinfo(skb)->gso_size; + if (size) + j++; + size += hdr_len; + } + num_frags--; + } while (num_frags); + } else { + if (num_frags >= I40E_MAX_BUFFER_TXD) + linearize = true; + } + +linearize_chk_done: + return linearize; +} + /** * i40e_tx_map - Build the Tx descriptor * @tx_ring: ring to send buffer on @@ -1654,6 +1715,10 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, else if (tso) tx_flags |= I40E_TX_FLAGS_TSO; + if (i40e_chk_linearize(skb, tx_flags, hdr_len)) + if (skb_linearize(skb)) + goto out_drop; + skb_tx_timestamp(skb); /* always enable CRC insertion offload */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index 4e15903..c950a03 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -112,6 +112,7 @@ enum i40e_dyn_idx_t { #define i40e_rx_desc i40e_32byte_rx_desc +#define I40E_MAX_BUFFER_TXD 8 #define I40E_MIN_TX_LEN 17 #define I40E_MAX_DATA_PER_TXD 8192 -- cgit v0.10.2 From a68de58d2791dd9f2b159703b70377011b35a568 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Tue, 24 Feb 2015 05:26:03 +0000 Subject: i40e: fix race in hang check The driver was having some issues with false Tx hang detection. This makes the driver a little more direct with the checks for progress forward by directly checking the head write back address and tail register when determining progress. This avoids Tx hangs where the software gets behind, because we are directly checking hardware state when determining hang state. Change-ID: I774f0e861c9e8ab5ccb213634100fe15440ae24a Signed-off-by: Jesse Brandeburg Tested-by: Jim Young Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 0aad611..bbf1b12 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -586,6 +586,20 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring) } /** + * i40e_get_head - Retrieve head from head writeback + * @tx_ring: tx ring to fetch head of + * + * Returns value of Tx ring head based on value stored + * in head write-back location + **/ +static inline u32 i40e_get_head(struct i40e_ring *tx_ring) +{ + void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; + + return le32_to_cpu(*(volatile __le32 *)head); +} + +/** * i40e_get_tx_pending - how many tx descriptors not processed * @tx_ring: the ring of descriptors * @@ -594,10 +608,16 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring) **/ static u32 i40e_get_tx_pending(struct i40e_ring *ring) { - u32 ntu = ((ring->next_to_clean <= ring->next_to_use) - ? ring->next_to_use - : ring->next_to_use + ring->count); - return ntu - ring->next_to_clean; + u32 head, tail; + + head = i40e_get_head(ring); + tail = readl(ring->tail); + + if (head != tail) + return (head < tail) ? + tail - head : (tail + ring->count - head); + + return 0; } /** @@ -606,6 +626,8 @@ static u32 i40e_get_tx_pending(struct i40e_ring *ring) **/ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) { + u32 tx_done = tx_ring->stats.packets; + u32 tx_done_old = tx_ring->tx_stats.tx_done_old; u32 tx_pending = i40e_get_tx_pending(tx_ring); struct i40e_pf *pf = tx_ring->vsi->back; bool ret = false; @@ -623,41 +645,25 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) * run the check_tx_hang logic with a transmit completion * pending but without time to complete it yet. */ - if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) && - (tx_pending >= I40E_MIN_DESC_PENDING)) { + if ((tx_done_old == tx_done) && tx_pending) { /* make sure it is true for two checks in a row */ ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state); - } else if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) && - (tx_pending < I40E_MIN_DESC_PENDING) && - (tx_pending > 0)) { + } else if (tx_done_old == tx_done && + (tx_pending < I40E_MIN_DESC_PENDING) && (tx_pending > 0)) { if (I40E_DEBUG_FLOW & pf->hw.debug_mask) dev_info(tx_ring->dev, "HW needs some more descs to do a cacheline flush. tx_pending %d, queue %d", tx_pending, tx_ring->queue_index); pf->tx_sluggish_count++; } else { /* update completed stats and disarm the hang check */ - tx_ring->tx_stats.tx_done_old = tx_ring->stats.packets; + tx_ring->tx_stats.tx_done_old = tx_done; clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state); } return ret; } -/** - * i40e_get_head - Retrieve head from head writeback - * @tx_ring: tx ring to fetch head of - * - * Returns value of Tx ring head based on value stored - * in head write-back location - **/ -static inline u32 i40e_get_head(struct i40e_ring *tx_ring) -{ - void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; - - return le32_to_cpu(*(volatile __le32 *)head); -} - #define WB_STRIDE 0x3 /** diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 1320a43..be05829 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -126,6 +126,20 @@ void i40evf_free_tx_resources(struct i40e_ring *tx_ring) } /** + * i40e_get_head - Retrieve head from head writeback + * @tx_ring: tx ring to fetch head of + * + * Returns value of Tx ring head based on value stored + * in head write-back location + **/ +static inline u32 i40e_get_head(struct i40e_ring *tx_ring) +{ + void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; + + return le32_to_cpu(*(volatile __le32 *)head); +} + +/** * i40e_get_tx_pending - how many tx descriptors not processed * @tx_ring: the ring of descriptors * @@ -134,10 +148,16 @@ void i40evf_free_tx_resources(struct i40e_ring *tx_ring) **/ static u32 i40e_get_tx_pending(struct i40e_ring *ring) { - u32 ntu = ((ring->next_to_clean <= ring->next_to_use) - ? ring->next_to_use - : ring->next_to_use + ring->count); - return ntu - ring->next_to_clean; + u32 head, tail; + + head = i40e_get_head(ring); + tail = readl(ring->tail); + + if (head != tail) + return (head < tail) ? + tail - head : (tail + ring->count - head); + + return 0; } /** @@ -146,6 +166,8 @@ static u32 i40e_get_tx_pending(struct i40e_ring *ring) **/ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) { + u32 tx_done = tx_ring->stats.packets; + u32 tx_done_old = tx_ring->tx_stats.tx_done_old; u32 tx_pending = i40e_get_tx_pending(tx_ring); bool ret = false; @@ -162,36 +184,20 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring) * run the check_tx_hang logic with a transmit completion * pending but without time to complete it yet. */ - if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) && - (tx_pending >= I40E_MIN_DESC_PENDING)) { + if ((tx_done_old == tx_done) && tx_pending) { /* make sure it is true for two checks in a row */ ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state); - } else if (!(tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) || - !(tx_pending < I40E_MIN_DESC_PENDING) || - !(tx_pending > 0)) { + } else if (tx_done_old == tx_done && + (tx_pending < I40E_MIN_DESC_PENDING) && (tx_pending > 0)) { /* update completed stats and disarm the hang check */ - tx_ring->tx_stats.tx_done_old = tx_ring->stats.packets; + tx_ring->tx_stats.tx_done_old = tx_done; clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state); } return ret; } -/** - * i40e_get_head - Retrieve head from head writeback - * @tx_ring: tx ring to fetch head of - * - * Returns value of Tx ring head based on value stored - * in head write-back location - **/ -static inline u32 i40e_get_head(struct i40e_ring *tx_ring) -{ - void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count; - - return le32_to_cpu(*(volatile __le32 *)head); -} - #define WB_STRIDE 0x3 /** -- cgit v0.10.2 From 7f9ff47683cb7441e6d0496365bcf64738f6d2d4 Mon Sep 17 00:00:00 2001 From: Anjali Singhai Date: Sat, 21 Feb 2015 06:43:19 +0000 Subject: i40e: Fix the case where per TC queue count was higher than queues enabled When the driver or hardware gets less interrupt vectors than the actual number of CPU cores, limit the queue count for the priority queue traffic class (TC) queues. This will fix a warning with multiple function mode where systems regularly have more cores than vectors. Also add extra comment for readability. Change-ID: I4f02226263aa3995e1f5ee5503eac0cd6ee12fbd Signed-off-by: Anjali Singhai Jain Tested-by: Jim Young Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 6bb6376..a926e3b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1512,7 +1512,12 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, vsi->tc_config.numtc = numtc; vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1; /* Number of queues per enabled TC */ - num_tc_qps = vsi->alloc_queue_pairs/numtc; + /* In MFP case we can have a much lower count of MSIx + * vectors available and so we need to lower the used + * q count. + */ + qcount = min_t(int, vsi->alloc_queue_pairs, pf->num_lan_msix); + num_tc_qps = qcount / numtc; num_tc_qps = min_t(int, num_tc_qps, I40E_MAX_QUEUES_PER_TC); /* Setup queue offset/count for all TCs for given VSI */ -- cgit v0.10.2 From cd238a3ecf2bf7f3d1a155a32b4bddd87dbd3d23 Mon Sep 17 00:00:00 2001 From: "Parikh, Neerav" Date: Sat, 21 Feb 2015 06:43:37 +0000 Subject: i40e: Fix the Tx ring qset handle when DCB reconfigures When DCB is reconfigured to single TC the driver did not reset the Tx ring Qset handle to the correct mapping; which caused Tx queue disable timeouts. Change-ID: I4da5915ec92a83c281b478d653fae6ef1b72edfe Signed-off-by: Neerav Parikh Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index a926e3b..bd4494d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2689,8 +2689,15 @@ static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi) u16 qoffset, qcount; int i, n; - if (!(vsi->back->flags & I40E_FLAG_DCB_ENABLED)) - return; + if (!(vsi->back->flags & I40E_FLAG_DCB_ENABLED)) { + /* Reset the TC information */ + for (i = 0; i < vsi->num_queue_pairs; i++) { + rx_ring = vsi->rx_rings[i]; + tx_ring = vsi->tx_rings[i]; + rx_ring->dcb_tc = 0; + tx_ring->dcb_tc = 0; + } + } for (n = 0; n < I40E_MAX_TRAFFIC_CLASS; n++) { if (!(vsi->tc_config.enabled_tc & (1 << n))) -- cgit v0.10.2 From 11e4770842d4917b1cad52d901b758a6d1997735 Mon Sep 17 00:00:00 2001 From: "Parikh, Neerav" Date: Sat, 21 Feb 2015 06:43:55 +0000 Subject: i40e: Issue a PF reset if Tx queue disable timeout As part of DCB reconfiguration flow if the Tx queue disable times out then issue a PF reset to do some level of recovery. Change-ID: I7550021c55bff355351c0365e61e1f05fcaff46d Signed-off-by: Neerav Parikh Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index bd4494d..9751465 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -5266,8 +5266,14 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf, /* Wait for the PF's Tx queues to be disabled */ ret = i40e_pf_wait_txq_disabled(pf); - if (!ret) + if (ret) { + /* Schedule PF reset to recover */ + set_bit(__I40E_PF_RESET_REQUESTED, &pf->state); + i40e_service_event_schedule(pf); + } else { i40e_pf_unquiesce_all_vsi(pf); + } + exit: return ret; } -- cgit v0.10.2 From 85e76d0312a373a1268b88a7a442c6004e0c6063 Mon Sep 17 00:00:00 2001 From: Anjali Singhai Date: Sat, 21 Feb 2015 06:44:16 +0000 Subject: i40evf: TCP/IPv6 over Vxlan Tx checksum offload fix We were checking the outer Protocol flags and deciding the flow for inner header. This patch fixes that. This fixes the Tx checksum offload for TCP/IPv6 over vxlan. Change-ID: I837aaea921d34f71b24c2bc32aaadea5001ddf78 Signed-off-by: Anjali Singhai Jain Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index be05829..7088915 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -1212,17 +1212,16 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, if (err < 0) return err; - if (protocol == htons(ETH_P_IP)) { - iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); + iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); + ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); + + if (iph->version == 4) { tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); iph->tot_len = 0; iph->check = 0; tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0, IPPROTO_TCP, 0); - } else if (skb_is_gso_v6(skb)) { - - ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) - : ipv6_hdr(skb); + } else if (ipv6h->version == 6) { tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb); ipv6h->payload_len = 0; tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, @@ -1280,13 +1279,9 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags, I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; } } else if (tx_flags & I40E_TX_FLAGS_IPV6) { - if (tx_flags & I40E_TX_FLAGS_TSO) { - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; + *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; + if (tx_flags & I40E_TX_FLAGS_TSO) ip_hdr(skb)->check = 0; - } else { - *cd_tunneling |= - I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; - } } /* Now set the ctx descriptor fields */ @@ -1296,6 +1291,11 @@ static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags, ((skb_inner_network_offset(skb) - skb_transport_offset(skb)) >> 1) << I40E_TXD_CTX_QW0_NATLEN_SHIFT; + if (this_ip_hdr->version == 6) { + tx_flags &= ~I40E_TX_FLAGS_IPV4; + tx_flags |= I40E_TX_FLAGS_IPV6; + } + } else { network_hdr_len = skb_network_header_len(skb); -- cgit v0.10.2 From e147758d9aef1e41dad331b21df206200cf16e80 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Sat, 21 Feb 2015 06:44:33 +0000 Subject: i40e: disconnect irqs on shutdown Combine the ICR0 shutdown with the standard interrupt shutdown, and add the interrupt clearing to the PCI shutdown path. This prevents the driver from allowing stray interrupts or causing system logs from un-handled interrupts. Change-ID: I48f6ab95cad7f8ca77c1f26c92a51cc1034ced43 Signed-off-by: Shannon Nelson Tested-by: Jim Young Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 9751465..8bfd254 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -3842,6 +3842,12 @@ static void i40e_clear_interrupt_scheme(struct i40e_pf *pf) { int i; + i40e_stop_misc_vector(pf); + if (pf->flags & I40E_FLAG_MSIX_ENABLED) { + synchronize_irq(pf->msix_entries[0].vector); + free_irq(pf->msix_entries[0].vector, pf); + } + i40e_put_lump(pf->irq_pile, 0, I40E_PILE_VALID_BIT-1); for (i = 0; i < pf->num_alloc_vsi; i++) if (pf->vsi[i]) @@ -9578,12 +9584,6 @@ static void i40e_remove(struct pci_dev *pdev) if (pf->vsi[pf->lan_vsi]) i40e_vsi_release(pf->vsi[pf->lan_vsi]); - i40e_stop_misc_vector(pf); - if (pf->flags & I40E_FLAG_MSIX_ENABLED) { - synchronize_irq(pf->msix_entries[0].vector); - free_irq(pf->msix_entries[0].vector, pf); - } - /* shutdown and destroy the HMC */ if (pf->hw.hmc.hmc_obj) { ret_code = i40e_shutdown_lan_hmc(&pf->hw); @@ -9737,6 +9737,8 @@ static void i40e_shutdown(struct pci_dev *pdev) wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0)); wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0)); + i40e_clear_interrupt_scheme(pf); + if (system_state == SYSTEM_POWER_OFF) { pci_wake_from_d3(pdev, pf->wol_en); pci_set_power_state(pdev, PCI_D3hot); -- cgit v0.10.2 From 33c62b34e5a3d57eb8a487140ca46631fc76e10c Mon Sep 17 00:00:00 2001 From: Mitch A Williams Date: Sat, 21 Feb 2015 06:44:51 +0000 Subject: i40e: stop flow director on shutdown In some cases, the hardware would continue to try to access the FDIR ring after entering D3Hot state, which would cause either PCIe errors or NMIs, depending upon system configuration. Explicitly stop FDIR in our shutdown routine to eliminate this possibility. Change-ID: I1bd9fc7fd8f151fe24cad132ac9adddab923e3af Signed-off-by: Mitch Williams Tested-by: Jim Young Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 8bfd254..dadda3c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -9558,6 +9558,7 @@ static void i40e_remove(struct pci_dev *pdev) set_bit(__I40E_DOWN, &pf->state); del_timer_sync(&pf->service_timer); cancel_work_sync(&pf->service_task); + i40e_fdir_teardown(pf); if (pf->flags & I40E_FLAG_SRIOV_ENABLED) { i40e_free_vfs(pf); -- cgit v0.10.2 From 2c47e351f61b66f2e870bfd634e471a6ab1af920 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Sat, 21 Feb 2015 06:45:10 +0000 Subject: i40e: catch NVM write semaphore timeout and retry In some circumstances, a multi-write transaction takes longer than the default 3 minute timeout on the write semaphore. If the write failed with an EBUSY status, this is likely the problem, so here we try to reacquire the semaphore then retry the write. We only do one retry, then give up. Change-ID: I1c8be60688acc2f39573839579baf601207c4a36 Signed-off-by: Shannon Nelson Tested-by: Jim Young Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index 3e70f2e..5defe0d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -679,9 +679,11 @@ static i40e_status i40e_nvmupd_state_writing(struct i40e_hw *hw, { i40e_status status; enum i40e_nvmupd_cmd upd_cmd; + bool retry_attempt = false; upd_cmd = i40e_nvmupd_validate_command(hw, cmd, errno); +retry: switch (upd_cmd) { case I40E_NVMUPD_WRITE_CON: status = i40e_nvmupd_nvm_write(hw, cmd, bytes, errno); @@ -725,6 +727,39 @@ static i40e_status i40e_nvmupd_state_writing(struct i40e_hw *hw, *errno = -ESRCH; break; } + + /* In some circumstances, a multi-write transaction takes longer + * than the default 3 minute timeout on the write semaphore. If + * the write failed with an EBUSY status, this is likely the problem, + * so here we try to reacquire the semaphore then retry the write. + * We only do one retry, then give up. + */ + if (status && (hw->aq.asq_last_status == I40E_AQ_RC_EBUSY) && + !retry_attempt) { + i40e_status old_status = status; + u32 old_asq_status = hw->aq.asq_last_status; + u32 gtime; + + gtime = rd32(hw, I40E_GLVFGEN_TIMER); + if (gtime >= hw->nvm.hw_semaphore_timeout) { + i40e_debug(hw, I40E_DEBUG_ALL, + "NVMUPD: write semaphore expired (%d >= %lld), retrying\n", + gtime, hw->nvm.hw_semaphore_timeout); + i40e_release_nvm(hw); + status = i40e_acquire_nvm(hw, I40E_RESOURCE_WRITE); + if (status) { + i40e_debug(hw, I40E_DEBUG_ALL, + "NVMUPD: write semaphore reacquire failed aq_err = %d\n", + hw->aq.asq_last_status); + status = old_status; + hw->aq.asq_last_status = old_asq_status; + } else { + retry_attempt = true; + goto retry; + } + } + } + return status; } -- cgit v0.10.2 From 65d13461d7c3fa822ff2000d2c0fe5f5b1943cb9 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Sat, 21 Feb 2015 06:45:28 +0000 Subject: i40e: check pointers before use Make sure we don't try to dereference NULL pointers when returning values from the AdminQ calls. Change-ID: Ia6694f2f415d50acf0aba063c863568742799aff Signed-off-by: Shannon Nelson Tested-by: Jim Young Signed-off-by: Jeff Kirsher diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 2a705a1..6aea65d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -2847,7 +2847,7 @@ i40e_status i40e_aq_add_udp_tunnel(struct i40e_hw *hw, status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); - if (!status) + if (!status && filter_index) *filter_index = resp->index; return status; -- cgit v0.10.2 From 9c1c98a3bb7b7593b60264b9a07e001e68b46697 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 26 Feb 2015 15:50:50 +0200 Subject: mac80211: Send EAPOL frames at lowest rate The current minstrel_ht rate control behavior is somewhat optimistic in trying to find optimum TX rate. While this is usually fine for normal Data frames, there are cases where a more conservative set of retry parameters would be beneficial to make the connection more robust. EAPOL frames are critical to the authentication and especially the EAPOL-Key message 4/4 (the last message in the 4-way handshake) is important to get through to the AP. If that message is lost, the only recovery mechanism in many cases is to reassociate with the AP and start from scratch. This can often be avoided by trying to send the frame with more conservative rate and/or with more link layer retries. In most cases, minstrel_ht is currently using the initial EAPOL-Key frames for probing higher rates and this results in only five link layer transmission attempts (one at high(ish) MCS and four at MCS0). While this works with most APs, it looks like there are some deployed APs that may have issues with the EAPOL frames using HT MCS immediately after association. Similarly, there may be issues in cases where the signal strength or radio environment is not good enough to be able to get frames through even at couple of MCS 0 tries. The best approach for this would likely to be to reduce the TX rate for the last rate (3rd rate parameter in the set) to a low basic rate (say, 6 Mbps on 5 GHz and 2 or 5.5 Mbps on 2.4 GHz), but doing that cleanly requires some more effort. For now, we can start with a simple one-liner that forces the minimum rate to be used for EAPOL frames similarly how the TX rate is selected for the IEEE 802.11 Management frames. This does result in a small extra latency added to the cases where the AP would be able to receive the higher rate, but taken into account how small number of EAPOL frames are used, this is likely to be insignificant. A future optimization in the minstrel_ht design can also allow this patch to be reverted to get back to the more optimized initial TX rate. It should also be noted that many drivers that do not use minstrel as the rate control algorithm are already doing similar workarounds by forcing the lowest TX rate to be used for EAPOL frames. Cc: stable@vger.kernel.org Reported-by: Linus Torvalds Tested-by: Linus Torvalds Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 88a18ff..07bd8db 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -566,6 +566,7 @@ ieee80211_tx_h_check_control_port_protocol(struct ieee80211_tx_data *tx) if (tx->sdata->control_port_no_encrypt) info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; info->control.flags |= IEEE80211_TX_CTRL_PORT_CTRL_PROTO; + info->flags |= IEEE80211_TX_CTL_USE_MINRATE; } return TX_CONTINUE; -- cgit v0.10.2 From 2b0c2e2d2a43357fc51d3499bc405d0d05df2451 Mon Sep 17 00:00:00 2001 From: Sujith Sankar Date: Wed, 25 Feb 2015 15:26:55 +0530 Subject: enic: do notify_check before returning credits We should complete notify_check before returning the credits. Once we return the credits, adaptor may access the notify data. Signed-off-by: Sujith Sankar Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 9cbe038..a5179bf 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -272,8 +272,8 @@ static irqreturn_t enic_isr_legacy(int irq, void *data) } if (ENIC_TEST_INTR(pba, notify_intr)) { - vnic_intr_return_all_credits(&enic->intr[notify_intr]); enic_notify_check(enic); + vnic_intr_return_all_credits(&enic->intr[notify_intr]); } if (ENIC_TEST_INTR(pba, err_intr)) { @@ -346,8 +346,8 @@ static irqreturn_t enic_isr_msix_notify(int irq, void *data) struct enic *enic = data; unsigned int intr = enic_msix_notify_intr(enic); - vnic_intr_return_all_credits(&enic->intr[intr]); enic_notify_check(enic); + vnic_intr_return_all_credits(&enic->intr[intr]); return IRQ_HANDLED; } -- cgit v0.10.2 From f01aa633e040e52603b8defd2263691d15b86cb0 Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Wed, 25 Feb 2015 16:50:04 +0530 Subject: cxgb4: Fix PCI-E Memory window interface for big-endian systems When doing reads and writes to adapter memory via the PCI-E Memory Window interface, data gets swizzled on 4-byte boundaries on Big-Endian systems because we need to account for the register read/write interface which incorporates a swizzle onto the Little-Endian PCI-E Bus. Based on original work by Casey Leedom Signed-off-by: Hariprasad Shenai Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index d6cda17..97842d0 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1103,7 +1103,7 @@ int t4_restart_aneg(struct adapter *adap, unsigned int mbox, unsigned int port); #define T4_MEMORY_WRITE 0 #define T4_MEMORY_READ 1 int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len, - __be32 *buf, int dir); + void *buf, int dir); static inline int t4_memory_write(struct adapter *adap, int mtype, u32 addr, u32 len, __be32 *buf) { diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 4d643b6..853c389 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -449,7 +449,7 @@ int t4_edc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, u64 *ecc) * @mtype: memory type: MEM_EDC0, MEM_EDC1 or MEM_MC * @addr: address within indicated memory type * @len: amount of memory to transfer - * @buf: host memory buffer + * @hbuf: host memory buffer * @dir: direction of transfer T4_MEMORY_READ (1) or T4_MEMORY_WRITE (0) * * Reads/writes an [almost] arbitrary memory region in the firmware: the @@ -460,15 +460,17 @@ int t4_edc_read(struct adapter *adap, int idx, u32 addr, __be32 *data, u64 *ecc) * caller's responsibility to perform appropriate byte order conversions. */ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, - u32 len, __be32 *buf, int dir) + u32 len, void *hbuf, int dir) { u32 pos, offset, resid, memoffset; u32 edc_size, mc_size, win_pf, mem_reg, mem_aperture, mem_base; + u32 *buf; /* Argument sanity checks ... */ - if (addr & 0x3) + if (addr & 0x3 || (uintptr_t)hbuf & 0x3) return -EINVAL; + buf = (u32 *)hbuf; /* It's convenient to be able to handle lengths which aren't a * multiple of 32-bits because we often end up transferring files to @@ -532,14 +534,45 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, /* Transfer data to/from the adapter as long as there's an integral * number of 32-bit transfers to complete. + * + * A note on Endianness issues: + * + * The "register" reads and writes below from/to the PCI-E Memory + * Window invoke the standard adapter Big-Endian to PCI-E Link + * Little-Endian "swizzel." As a result, if we have the following + * data in adapter memory: + * + * Memory: ... | b0 | b1 | b2 | b3 | ... + * Address: i+0 i+1 i+2 i+3 + * + * Then a read of the adapter memory via the PCI-E Memory Window + * will yield: + * + * x = readl(i) + * 31 0 + * [ b3 | b2 | b1 | b0 ] + * + * If this value is stored into local memory on a Little-Endian system + * it will show up correctly in local memory as: + * + * ( ..., b0, b1, b2, b3, ... ) + * + * But on a Big-Endian system, the store will show up in memory + * incorrectly swizzled as: + * + * ( ..., b3, b2, b1, b0, ... ) + * + * So we need to account for this in the reads and writes to the + * PCI-E Memory Window below by undoing the register read/write + * swizzels. */ while (len > 0) { if (dir == T4_MEMORY_READ) - *buf++ = (__force __be32) t4_read_reg(adap, - mem_base + offset); + *buf++ = le32_to_cpu((__force __le32)t4_read_reg(adap, + mem_base + offset)); else t4_write_reg(adap, mem_base + offset, - (__force u32) *buf++); + (__force u32)cpu_to_le32(*buf++)); offset += sizeof(__be32); len -= sizeof(__be32); @@ -568,15 +601,16 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, */ if (resid) { union { - __be32 word; + u32 word; char byte[4]; } last; unsigned char *bp; int i; if (dir == T4_MEMORY_READ) { - last.word = (__force __be32) t4_read_reg(adap, - mem_base + offset); + last.word = le32_to_cpu( + (__force __le32)t4_read_reg(adap, + mem_base + offset)); for (bp = (unsigned char *)buf, i = resid; i < 4; i++) bp[i] = last.byte[i]; } else { @@ -584,7 +618,7 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, for (i = resid; i < 4; i++) last.byte[i] = 0; t4_write_reg(adap, mem_base + offset, - (__force u32) last.word); + (__force u32)cpu_to_le32(last.word)); } } -- cgit v0.10.2 From e65ad3be869b45f90a401d8ce4d4e7381c99ceb0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 25 Feb 2015 16:35:32 +0300 Subject: rocker: add a check for NULL in rocker_probe_ports() Make sure kmalloc() succeeds. Signed-off-by: Dan Carpenter Acked-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 34389b6a..713a13c 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4201,6 +4201,8 @@ static int rocker_probe_ports(struct rocker *rocker) alloc_size = sizeof(struct rocker_port *) * rocker->port_count; rocker->ports = kmalloc(alloc_size, GFP_KERNEL); + if (!rocker->ports) + return -ENOMEM; for (i = 0; i < rocker->port_count; i++) { err = rocker_probe_port(rocker, i); if (err) -- cgit v0.10.2 From 5f2ebfbee68872762ad76f735277ed7afa074d76 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 25 Feb 2015 16:36:12 +0300 Subject: rocker: silence shift wrapping warning "val" is declared as a u64 so static checkers complain that this shift can wrap. I don't have the hardware but probably it's doesn't have over 31 ports. Still we may as well silence the warning even if it's not a real bug. Signed-off-by: Dan Carpenter Acked-by: Jiri Pirko Acked-by: Scott Feldman Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 713a13c..9fb6948 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -1257,9 +1257,9 @@ static void rocker_port_set_enable(struct rocker_port *rocker_port, bool enable) u64 val = rocker_read64(rocker_port->rocker, PORT_PHYS_ENABLE); if (enable) - val |= 1 << rocker_port->lport; + val |= 1ULL << rocker_port->lport; else - val &= ~(1 << rocker_port->lport); + val &= ~(1ULL << rocker_port->lport); rocker_write64(rocker_port->rocker, PORT_PHYS_ENABLE, val); } -- cgit v0.10.2 From 4c5a84421c7d1c259c3883a404f9a67a2f55b003 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 25 Feb 2015 15:19:28 +0100 Subject: vhost: cleanup iterator update logic Recent iterator-related changes in vhost made it harder to follow the logic fixing up the header. In fact, the fixup always happens at the same offset: sizeof(virtio_net_hdr): sometimes the fixup iterator is updated by copy_to_iter, sometimes-by iov_iter_advance. Rearrange code to make this obvious. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index afa06d2..ca70434 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -591,11 +591,6 @@ static void handle_rx(struct vhost_net *net) * TODO: support TSO. */ iov_iter_advance(&msg.msg_iter, vhost_hlen); - } else { - /* It'll come from socket; we'll need to patch - * ->num_buffers over if VIRTIO_NET_F_MRG_RXBUF - */ - iov_iter_advance(&fixup, sizeof(hdr)); } err = sock->ops->recvmsg(NULL, sock, &msg, sock_len, MSG_DONTWAIT | MSG_TRUNC); @@ -609,11 +604,18 @@ static void handle_rx(struct vhost_net *net) continue; } /* Supply virtio_net_hdr if VHOST_NET_F_VIRTIO_NET_HDR */ - if (unlikely(vhost_hlen) && - copy_to_iter(&hdr, sizeof(hdr), &fixup) != sizeof(hdr)) { - vq_err(vq, "Unable to write vnet_hdr at addr %p\n", - vq->iov->iov_base); - break; + if (unlikely(vhost_hlen)) { + if (copy_to_iter(&hdr, sizeof(hdr), + &fixup) != sizeof(hdr)) { + vq_err(vq, "Unable to write vnet_hdr " + "at addr %p\n", vq->iov->iov_base); + break; + } + } else { + /* Header came from socket; we'll need to patch + * ->num_buffers over if VIRTIO_NET_F_MRG_RXBUF + */ + iov_iter_advance(&fixup, sizeof(hdr)); } /* TODO: Should check and handle checksum. */ -- cgit v0.10.2 From 0d79a493e507437a2135e5ac1a447d4d503488d8 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 25 Feb 2015 15:20:01 +0100 Subject: vhost: drop hard-coded num_buffers size The 2 that we use for copy_to_iter comes from sizeof(u16), it used to be that way before the iov iter update. Fix it up, making it obvious the size of stack access is right. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index ca70434..2bbfc25 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -621,7 +621,8 @@ static void handle_rx(struct vhost_net *net) num_buffers = cpu_to_vhost16(vq, headcount); if (likely(mergeable) && - copy_to_iter(&num_buffers, 2, &fixup) != 2) { + copy_to_iter(&num_buffers, sizeof num_buffers, + &fixup) != sizeof num_buffers) { vq_err(vq, "Failed num_buffers write"); vhost_discard_vq_desc(vq, headcount); break; -- cgit v0.10.2 From 8331de75cb13fc907ceba78e698c42150e61dda9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 25 Feb 2015 16:31:53 +0100 Subject: rhashtable: unconditionally grow when max_shift is not specified While commit c0c09bfdc415 ("rhashtable: avoid unnecessary wakeup for worker queue") rightfully moved part of the decision making of whether we should expand or shrink from the expand/shrink functions themselves into insert/delete functions in order to avoid unnecessary worker wake-ups, it however introduced a regression by doing so. Before that change, if no max_shift was specified (= 0) on rhashtable initialization, rhashtable_expand() would just grow unconditionally and lets the available memory be the limiting factor. After that change, if no max_shift was specified, there would be _no_ expansion step at all. Given that netlink and tipc have a max_shift specified, it was not visible there, but Josh Hunt reported that if nft that starts out with a default element hint of 3 if not otherwise provided, would slow i.e. inserts down trememdously as it cannot grow larger to relax table occupancy. Given that the test case verifies shrinks/expands manually, we also must remove pointer to the helper functions to explicitly avoid parallel resizing on insertions/deletions. test_bucket_stats() and test_rht_lookup() could also be wrapped around rhashtable mutex to explicitly synchronize a walk from resizing, but I think that defeats the actual test case which intended to have explicit test steps, i.e. 1) inserts, 2) expands, 3) shrinks, 4) deletions, with object verification after each stage. Reported-by: Josh Hunt Fixes: c0c09bfdc415 ("rhashtable: avoid unnecessary wakeup for worker queue") Signed-off-by: Daniel Borkmann Cc: Ying Xue Cc: Josh Hunt Acked-by: Thomas Graf Signed-off-by: David S. Miller diff --git a/lib/rhashtable.c b/lib/rhashtable.c index e3a04e4..bcf119b 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -251,7 +251,7 @@ bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size) { /* Expand table when exceeding 75% load */ return atomic_read(&ht->nelems) > (new_size / 4 * 3) && - (ht->p.max_shift && atomic_read(&ht->shift) < ht->p.max_shift); + (!ht->p.max_shift || atomic_read(&ht->shift) < ht->p.max_shift); } EXPORT_SYMBOL_GPL(rht_grow_above_75); diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 58b9953..f9e9d73 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -202,8 +202,6 @@ static int __init test_rht_init(void) .key_len = sizeof(int), .hashfn = jhash, .nulls_base = (3U << RHT_BASE_SHIFT), - .grow_decision = rht_grow_above_75, - .shrink_decision = rht_shrink_below_30, }; int err; -- cgit v0.10.2 From 4c4b52d9b2df45e8216d3e30b5452e4a364d2cac Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 25 Feb 2015 16:31:54 +0100 Subject: rhashtable: remove indirection for grow/shrink decision functions Currently, all real users of rhashtable default their grow and shrink decision functions to rht_grow_above_75() and rht_shrink_below_30(), so that there's currently no need to have this explicitly selectable. It can/should be generic and private inside rhashtable until a real use case pops up. Since we can make this private, we'll save us this additional indirection layer and can improve insertion/deletion time as well. Reference: http://patchwork.ozlabs.org/patch/443040/ Suggested-by: David S. Miller Signed-off-by: Daniel Borkmann Acked-by: Thomas Graf Signed-off-by: David S. Miller diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index cb2104b..d438eeb 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -79,12 +79,6 @@ struct rhashtable; * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) * @hashfn: Function to hash key * @obj_hashfn: Function to hash object - * @grow_decision: If defined, may return true if table should expand - * @shrink_decision: If defined, may return true if table should shrink - * - * Note: when implementing the grow and shrink decision function, min/max - * shift must be enforced, otherwise, resizing watermarks they set may be - * useless. */ struct rhashtable_params { size_t nelem_hint; @@ -98,10 +92,6 @@ struct rhashtable_params { size_t locks_mul; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; - bool (*grow_decision)(const struct rhashtable *ht, - size_t new_size); - bool (*shrink_decision)(const struct rhashtable *ht, - size_t new_size); }; /** @@ -193,9 +183,6 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params); void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node); bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node); -bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size); -bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size); - int rhashtable_expand(struct rhashtable *ht); int rhashtable_shrink(struct rhashtable *ht); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index bcf119b..090641d 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -247,26 +247,24 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, * @ht: hash table * @new_size: new table size */ -bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size) +static bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size) { /* Expand table when exceeding 75% load */ return atomic_read(&ht->nelems) > (new_size / 4 * 3) && (!ht->p.max_shift || atomic_read(&ht->shift) < ht->p.max_shift); } -EXPORT_SYMBOL_GPL(rht_grow_above_75); /** * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size * @ht: hash table * @new_size: new table size */ -bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size) +static bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size) { /* Shrink table beneath 30% load */ return atomic_read(&ht->nelems) < (new_size * 3 / 10) && (atomic_read(&ht->shift) > ht->p.min_shift); } -EXPORT_SYMBOL_GPL(rht_shrink_below_30); static void lock_buckets(struct bucket_table *new_tbl, struct bucket_table *old_tbl, unsigned int hash) @@ -528,40 +526,19 @@ static void rht_deferred_worker(struct work_struct *work) list_for_each_entry(walker, &ht->walkers, list) walker->resize = true; - if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size)) + if (rht_grow_above_75(ht, tbl->size)) rhashtable_expand(ht); - else if (ht->p.shrink_decision && ht->p.shrink_decision(ht, tbl->size)) + else if (rht_shrink_below_30(ht, tbl->size)) rhashtable_shrink(ht); - unlock: mutex_unlock(&ht->mutex); } -static void rhashtable_probe_expand(struct rhashtable *ht) -{ - const struct bucket_table *new_tbl = rht_dereference_rcu(ht->future_tbl, ht); - const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); - - /* Only adjust the table if no resizing is currently in progress. */ - if (tbl == new_tbl && ht->p.grow_decision && - ht->p.grow_decision(ht, tbl->size)) - schedule_work(&ht->run_work); -} - -static void rhashtable_probe_shrink(struct rhashtable *ht) -{ - const struct bucket_table *new_tbl = rht_dereference_rcu(ht->future_tbl, ht); - const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); - - /* Only adjust the table if no resizing is currently in progress. */ - if (tbl == new_tbl && ht->p.shrink_decision && - ht->p.shrink_decision(ht, tbl->size)) - schedule_work(&ht->run_work); -} - static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, - struct bucket_table *tbl, u32 hash) + struct bucket_table *tbl, + const struct bucket_table *old_tbl, u32 hash) { + bool no_resize_running = tbl == old_tbl; struct rhash_head *head; hash = rht_bucket_index(tbl, hash); @@ -577,8 +554,8 @@ static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, rcu_assign_pointer(tbl->buckets[hash], obj); atomic_inc(&ht->nelems); - - rhashtable_probe_expand(ht); + if (no_resize_running && rht_grow_above_75(ht, tbl->size)) + schedule_work(&ht->run_work); } /** @@ -608,7 +585,7 @@ void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj) hash = obj_raw_hashfn(ht, rht_obj(ht, obj)); lock_buckets(tbl, old_tbl, hash); - __rhashtable_insert(ht, obj, tbl, hash); + __rhashtable_insert(ht, obj, tbl, old_tbl, hash); unlock_buckets(tbl, old_tbl, hash); rcu_read_unlock(); @@ -690,8 +667,11 @@ found: unlock_buckets(new_tbl, old_tbl, new_hash); if (ret) { + bool no_resize_running = new_tbl == old_tbl; + atomic_dec(&ht->nelems); - rhashtable_probe_shrink(ht); + if (no_resize_running && rht_shrink_below_30(ht, new_tbl->size)) + schedule_work(&ht->run_work); } rcu_read_unlock(); @@ -861,7 +841,7 @@ bool rhashtable_lookup_compare_insert(struct rhashtable *ht, goto exit; } - __rhashtable_insert(ht, obj, new_tbl, new_hash); + __rhashtable_insert(ht, obj, new_tbl, old_tbl, new_hash); exit: unlock_buckets(new_tbl, old_tbl, new_hash); @@ -1123,8 +1103,7 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) if (!ht->p.hash_rnd) get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd)); - if (ht->p.grow_decision || ht->p.shrink_decision) - INIT_WORK(&ht->run_work, rht_deferred_worker); + INIT_WORK(&ht->run_work, rht_deferred_worker); return 0; } @@ -1142,8 +1121,7 @@ void rhashtable_destroy(struct rhashtable *ht) { ht->being_destroyed = true; - if (ht->p.grow_decision || ht->p.shrink_decision) - cancel_work_sync(&ht->run_work); + cancel_work_sync(&ht->run_work); mutex_lock(&ht->mutex); bucket_table_free(rht_dereference(ht->tbl, ht)); diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index f9e9d73..67c7593 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -201,6 +201,7 @@ static int __init test_rht_init(void) .key_offset = offsetof(struct test_obj, value), .key_len = sizeof(int), .hashfn = jhash, + .max_shift = 1, /* we expand/shrink manually here */ .nulls_base = (3U << RHT_BASE_SHIFT), }; int err; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 61e6c40..c82df0a 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -192,8 +192,6 @@ static int nft_hash_init(const struct nft_set *set, .key_offset = offsetof(struct nft_hash_elem, key), .key_len = set->klen, .hashfn = jhash, - .grow_decision = rht_grow_above_75, - .shrink_decision = rht_shrink_below_30, }; return rhashtable_init(priv, ¶ms); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2702673..05919bf 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -3126,8 +3126,6 @@ static int __init netlink_proto_init(void) .key_len = sizeof(u32), /* portid */ .hashfn = jhash, .max_shift = 16, /* 64K */ - .grow_decision = rht_grow_above_75, - .shrink_decision = rht_shrink_below_30, }; if (err != 0) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f73e975..b4d4467 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2364,8 +2364,6 @@ int tipc_sk_rht_init(struct net *net) .hashfn = jhash, .max_shift = 20, /* 1M */ .min_shift = 8, /* 256 */ - .grow_decision = rht_grow_above_75, - .shrink_decision = rht_shrink_below_30, }; return rhashtable_init(&tn->sk_rht, &rht_params); -- cgit v0.10.2 From 7488c3e3d8384e6a3d71c6a05645b3db8d82d275 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Thu, 26 Feb 2015 00:58:12 +0100 Subject: net: asix: add support for the Sitecom LN-028 USB adapter Just another AX88178-based 10/100/1000 USB-to-Ethernet dongle. This one shows up in lsusb as: "Sitecom Europe B.V. LN-028 Network USB 2.0 Adapter". Signed-off-by: Luca Ceresoli Cc: Francois Romieu Cc: "David S. Miller" Cc: linux-usb@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index 37eed4d..0732f9e 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -161,6 +161,7 @@ config USB_NET_AX8817X * Linksys USB200M * Netgear FA120 * Sitecom LN-029 + * Sitecom LN-028 * Intellinet USB 2.0 Ethernet * ST Lab USB 2.0 Ethernet * TrendNet TU2-ET100 diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index bf49792..1173a24 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -979,6 +979,10 @@ static const struct usb_device_id products [] = { USB_DEVICE (0x0df6, 0x0056), .driver_info = (unsigned long) &ax88178_info, }, { + // Sitecom LN-028 "USB 2.0 10/100/1000 Ethernet adapter" + USB_DEVICE (0x0df6, 0x061c), + .driver_info = (unsigned long) &ax88178_info, +}, { // corega FEther USB2-TX USB_DEVICE (0x07aa, 0x0017), .driver_info = (unsigned long) &ax8817x_info, -- cgit v0.10.2 From c30e76a728beb5bbfff0ddeb573e28927853d4b8 Mon Sep 17 00:00:00 2001 From: "Lendacky, Thomas" Date: Wed, 25 Feb 2015 13:50:12 -0600 Subject: amd-xgbe: Request IRQs only after driver is fully setup It is possible that the hardware may not have been properly shutdown before this driver gets control, through use by firmware, for example. Until the driver is loaded, interrupts associated with the hardware could go pending. When the IRQs are requested napi support has not been initialized yet, but the ISR will get control and schedule napi processing resulting in a kernel panic because the poll routine has not been set. Adjust the code so that the driver is fully ready to handle and process interrupts as soon as the IRQs are requested. This involves requesting and freeing IRQs during start and stop processing and ordering the napi add and delete calls appropriately. Also adjust the powerup and powerdown routines to match the start and stop routines in regards to the ordering of tasks, including napi related calls. Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index b93d440..885b02b 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -609,6 +609,68 @@ static void xgbe_napi_disable(struct xgbe_prv_data *pdata, unsigned int del) } } +static int xgbe_request_irqs(struct xgbe_prv_data *pdata) +{ + struct xgbe_channel *channel; + struct net_device *netdev = pdata->netdev; + unsigned int i; + int ret; + + ret = devm_request_irq(pdata->dev, pdata->dev_irq, xgbe_isr, 0, + netdev->name, pdata); + if (ret) { + netdev_alert(netdev, "error requesting irq %d\n", + pdata->dev_irq); + return ret; + } + + if (!pdata->per_channel_irq) + return 0; + + channel = pdata->channel; + for (i = 0; i < pdata->channel_count; i++, channel++) { + snprintf(channel->dma_irq_name, + sizeof(channel->dma_irq_name) - 1, + "%s-TxRx-%u", netdev_name(netdev), + channel->queue_index); + + ret = devm_request_irq(pdata->dev, channel->dma_irq, + xgbe_dma_isr, 0, + channel->dma_irq_name, channel); + if (ret) { + netdev_alert(netdev, "error requesting irq %d\n", + channel->dma_irq); + goto err_irq; + } + } + + return 0; + +err_irq: + /* Using an unsigned int, 'i' will go to UINT_MAX and exit */ + for (i--, channel--; i < pdata->channel_count; i--, channel--) + devm_free_irq(pdata->dev, channel->dma_irq, channel); + + devm_free_irq(pdata->dev, pdata->dev_irq, pdata); + + return ret; +} + +static void xgbe_free_irqs(struct xgbe_prv_data *pdata) +{ + struct xgbe_channel *channel; + unsigned int i; + + devm_free_irq(pdata->dev, pdata->dev_irq, pdata); + + if (!pdata->per_channel_irq) + return; + + channel = pdata->channel; + for (i = 0; i < pdata->channel_count; i++, channel++) + devm_free_irq(pdata->dev, channel->dma_irq, channel); +} + void xgbe_init_tx_coalesce(struct xgbe_prv_data *pdata) { struct xgbe_hw_if *hw_if = &pdata->hw_if; @@ -810,20 +872,20 @@ int xgbe_powerdown(struct net_device *netdev, unsigned int caller) return -EINVAL; } - phy_stop(pdata->phydev); - spin_lock_irqsave(&pdata->lock, flags); if (caller == XGMAC_DRIVER_CONTEXT) netif_device_detach(netdev); netif_tx_stop_all_queues(netdev); - xgbe_napi_disable(pdata, 0); - /* Powerdown Tx/Rx */ hw_if->powerdown_tx(pdata); hw_if->powerdown_rx(pdata); + xgbe_napi_disable(pdata, 0); + + phy_stop(pdata->phydev); + pdata->power_down = 1; spin_unlock_irqrestore(&pdata->lock, flags); @@ -854,14 +916,14 @@ int xgbe_powerup(struct net_device *netdev, unsigned int caller) phy_start(pdata->phydev); - /* Enable Tx/Rx */ + xgbe_napi_enable(pdata, 0); + hw_if->powerup_tx(pdata); hw_if->powerup_rx(pdata); if (caller == XGMAC_DRIVER_CONTEXT) netif_device_attach(netdev); - xgbe_napi_enable(pdata, 0); netif_tx_start_all_queues(netdev); spin_unlock_irqrestore(&pdata->lock, flags); @@ -875,6 +937,7 @@ static int xgbe_start(struct xgbe_prv_data *pdata) { struct xgbe_hw_if *hw_if = &pdata->hw_if; struct net_device *netdev = pdata->netdev; + int ret; DBGPR("-->xgbe_start\n"); @@ -884,17 +947,31 @@ static int xgbe_start(struct xgbe_prv_data *pdata) phy_start(pdata->phydev); + xgbe_napi_enable(pdata, 1); + + ret = xgbe_request_irqs(pdata); + if (ret) + goto err_napi; + hw_if->enable_tx(pdata); hw_if->enable_rx(pdata); xgbe_init_tx_timers(pdata); - xgbe_napi_enable(pdata, 1); netif_tx_start_all_queues(netdev); DBGPR("<--xgbe_start\n"); return 0; + +err_napi: + xgbe_napi_disable(pdata, 1); + + phy_stop(pdata->phydev); + + hw_if->exit(pdata); + + return ret; } static void xgbe_stop(struct xgbe_prv_data *pdata) @@ -907,16 +984,21 @@ static void xgbe_stop(struct xgbe_prv_data *pdata) DBGPR("-->xgbe_stop\n"); - phy_stop(pdata->phydev); - netif_tx_stop_all_queues(netdev); - xgbe_napi_disable(pdata, 1); xgbe_stop_tx_timers(pdata); hw_if->disable_tx(pdata); hw_if->disable_rx(pdata); + xgbe_free_irqs(pdata); + + xgbe_napi_disable(pdata, 1); + + phy_stop(pdata->phydev); + + hw_if->exit(pdata); + channel = pdata->channel; for (i = 0; i < pdata->channel_count; i++, channel++) { if (!channel->tx_ring) @@ -931,10 +1013,6 @@ static void xgbe_stop(struct xgbe_prv_data *pdata) static void xgbe_restart_dev(struct xgbe_prv_data *pdata) { - struct xgbe_channel *channel; - struct xgbe_hw_if *hw_if = &pdata->hw_if; - unsigned int i; - DBGPR("-->xgbe_restart_dev\n"); /* If not running, "restart" will happen on open */ @@ -942,19 +1020,10 @@ static void xgbe_restart_dev(struct xgbe_prv_data *pdata) return; xgbe_stop(pdata); - synchronize_irq(pdata->dev_irq); - if (pdata->per_channel_irq) { - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) - synchronize_irq(channel->dma_irq); - } xgbe_free_tx_data(pdata); xgbe_free_rx_data(pdata); - /* Issue software reset to device */ - hw_if->exit(pdata); - xgbe_start(pdata); DBGPR("<--xgbe_restart_dev\n"); @@ -1283,10 +1352,7 @@ static void xgbe_packet_info(struct xgbe_prv_data *pdata, static int xgbe_open(struct net_device *netdev) { struct xgbe_prv_data *pdata = netdev_priv(netdev); - struct xgbe_hw_if *hw_if = &pdata->hw_if; struct xgbe_desc_if *desc_if = &pdata->desc_if; - struct xgbe_channel *channel = NULL; - unsigned int i = 0; int ret; DBGPR("-->xgbe_open\n"); @@ -1329,55 +1395,14 @@ static int xgbe_open(struct net_device *netdev) INIT_WORK(&pdata->restart_work, xgbe_restart); INIT_WORK(&pdata->tx_tstamp_work, xgbe_tx_tstamp); - /* Request interrupts */ - ret = devm_request_irq(pdata->dev, pdata->dev_irq, xgbe_isr, 0, - netdev->name, pdata); - if (ret) { - netdev_alert(netdev, "error requesting irq %d\n", - pdata->dev_irq); - goto err_rings; - } - - if (pdata->per_channel_irq) { - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) { - snprintf(channel->dma_irq_name, - sizeof(channel->dma_irq_name) - 1, - "%s-TxRx-%u", netdev_name(netdev), - channel->queue_index); - - ret = devm_request_irq(pdata->dev, channel->dma_irq, - xgbe_dma_isr, 0, - channel->dma_irq_name, channel); - if (ret) { - netdev_alert(netdev, - "error requesting irq %d\n", - channel->dma_irq); - goto err_irq; - } - } - } - ret = xgbe_start(pdata); if (ret) - goto err_start; + goto err_rings; DBGPR("<--xgbe_open\n"); return 0; -err_start: - hw_if->exit(pdata); - -err_irq: - if (pdata->per_channel_irq) { - /* Using an unsigned int, 'i' will go to UINT_MAX and exit */ - for (i--, channel--; i < pdata->channel_count; i--, channel--) - devm_free_irq(pdata->dev, channel->dma_irq, channel); - } - - devm_free_irq(pdata->dev, pdata->dev_irq, pdata); - err_rings: desc_if->free_ring_resources(pdata); @@ -1399,30 +1424,16 @@ err_phy_init: static int xgbe_close(struct net_device *netdev) { struct xgbe_prv_data *pdata = netdev_priv(netdev); - struct xgbe_hw_if *hw_if = &pdata->hw_if; struct xgbe_desc_if *desc_if = &pdata->desc_if; - struct xgbe_channel *channel; - unsigned int i; DBGPR("-->xgbe_close\n"); /* Stop the device */ xgbe_stop(pdata); - /* Issue software reset to device */ - hw_if->exit(pdata); - /* Free the ring descriptors and buffers */ desc_if->free_ring_resources(pdata); - /* Release the interrupts */ - devm_free_irq(pdata->dev, pdata->dev_irq, pdata); - if (pdata->per_channel_irq) { - channel = pdata->channel; - for (i = 0; i < pdata->channel_count; i++, channel++) - devm_free_irq(pdata->dev, channel->dma_irq, channel); - } - /* Free the channel and ring structures */ xgbe_free_channels(pdata); -- cgit v0.10.2 From 5beb5c90c1f54d745da040aa05634a5830ba4a4c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Feb 2015 07:20:34 -0800 Subject: rhashtable: use cond_resched() If a hash table has 128 slots and 16384 elems, expand to 256 slots takes more than one second. For larger sets, a soft lockup is detected. Holding cpu for that long, even in a work queue is a show stopper for non preemptable kernels. cond_resched() at strategic points to allow process scheduler to reschedule us. Signed-off-by: Eric Dumazet Acked-by: Daniel Borkmann Signed-off-by: David S. Miller diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 090641d..b5344ef 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -412,6 +413,7 @@ int rhashtable_expand(struct rhashtable *ht) } } unlock_buckets(new_tbl, old_tbl, new_hash); + cond_resched(); } /* Unzip interleaved hash chains */ @@ -435,6 +437,7 @@ int rhashtable_expand(struct rhashtable *ht) complete = false; unlock_buckets(new_tbl, old_tbl, old_hash); + cond_resched(); } } @@ -493,6 +496,7 @@ int rhashtable_shrink(struct rhashtable *ht) tbl->buckets[new_hash + new_tbl->size]); unlock_buckets(new_tbl, tbl, new_hash); + cond_resched(); } /* Publish the new, valid hash table */ -- cgit v0.10.2 From b70661c70830d5c69aab6844f2d86d2daf124fbd Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 25 Feb 2015 16:31:57 +0100 Subject: net: smc91x: use run-time configuration on all ARM machines The smc91x driver traditionally gets configured at compile-time for whichever hardware it runs on. This no longer works on ARM as we continue to move to building all-in-one kernels. Most ARM configurations with this driver already use run-time configuration through DT or through platform_data, but a few have not been converted yet. I've checked all ARM boards that use this driver in their legacy board files, and converted the ones that were using compile-time configuration in smc91x.h to behave like the other ones and provide the interrupt polarity along with the MMIO configuration (width, stride) at platform device creation time. In particular, these combinations were previously selectable in Kconfig but in fact broken: - sa1100 assabet plus pleb - msm combined with any other armv6/v7 platform - pxa-idp combined with any non-DMA pxa variant - LogicPD PXA270 combined with any other pxa - nomadik combined with any other armv4/v5 platform, e.g. versatile. None of these seem critical enough to warrant a backport to stable, but it would be nice to clean this up for good. Signed-off-by: Arnd Bergmann ---- I would like the patch to get merged through netdev, after Robert and/or Linus have verified it on at least some hardware. There are a few other non-ARM platforms using this driver, I could do the same patch for those if we want to take it further. arch/arm/mach-msm/board-halibut.c | 8 ++++- arch/arm/mach-msm/board-qsd8x50.c | 8 ++++- arch/arm/mach-pxa/idp.c | 5 +++ arch/arm/mach-pxa/lpd270.c | 8 ++++- arch/arm/mach-realview/core.c | 7 ++++ arch/arm/mach-realview/realview_eb.c | 2 +- arch/arm/mach-sa1100/neponset.c | 6 ++++ arch/arm/mach-sa1100/pleb.c | 7 ++++ drivers/net/ethernet/smsc/smc91x.c | 9 +++-- drivers/net/ethernet/smsc/smc91x.h | 114 ++---------------------------------------------------------- 10 files changed, 57 insertions(+), 117 deletions(-) Tested-by: Robert Jarzmik Signed-off-by: David S. Miller diff --git a/arch/arm/mach-msm/board-halibut.c b/arch/arm/mach-msm/board-halibut.c index 61bfe58..fc83204 100644 --- a/arch/arm/mach-msm/board-halibut.c +++ b/arch/arm/mach-msm/board-halibut.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -46,15 +47,20 @@ static struct resource smc91x_resources[] = { [1] = { .start = MSM_GPIO_TO_INT(49), .end = MSM_GPIO_TO_INT(49), - .flags = IORESOURCE_IRQ, + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL, }, }; +static struct smc91x_platdata smc91x_platdata = { + .flags = SMC91X_USE_16BIT | SMC91X_NOWAIT, +}; + static struct platform_device smc91x_device = { .name = "smc91x", .id = 0, .num_resources = ARRAY_SIZE(smc91x_resources), .resource = smc91x_resources, + .dev.platform_data = &smc91x_platdata, }; static struct platform_device *devices[] __initdata = { diff --git a/arch/arm/mach-msm/board-qsd8x50.c b/arch/arm/mach-msm/board-qsd8x50.c index 4c74861..10016a3 100644 --- a/arch/arm/mach-msm/board-qsd8x50.c +++ b/arch/arm/mach-msm/board-qsd8x50.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -49,15 +50,20 @@ static struct resource smc91x_resources[] = { .flags = IORESOURCE_MEM, }, [1] = { - .flags = IORESOURCE_IRQ, + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL, }, }; +static struct smc91x_platdata smc91x_platdata = { + .flags = SMC91X_USE_16BIT | SMC91X_NOWAIT, +}; + static struct platform_device smc91x_device = { .name = "smc91x", .id = 0, .num_resources = ARRAY_SIZE(smc91x_resources), .resource = smc91x_resources, + .dev.platform_data = &smc91x_platdata, }; static int __init msm_init_smc91x(void) diff --git a/arch/arm/mach-pxa/idp.c b/arch/arm/mach-pxa/idp.c index 343c4e3..7d8eab8 100644 --- a/arch/arm/mach-pxa/idp.c +++ b/arch/arm/mach-pxa/idp.c @@ -81,11 +81,16 @@ static struct resource smc91x_resources[] = { } }; +static struct smc91x_platdata smc91x_platdata = { + .flags = SMC91X_USE_32BIT | SMC91X_USE_DMA | SMC91X_NOWAIT, +}; + static struct platform_device smc91x_device = { .name = "smc91x", .id = 0, .num_resources = ARRAY_SIZE(smc91x_resources), .resource = smc91x_resources, + .dev.platform_data = &smc91x_platdata, }; static void idp_backlight_power(int on) diff --git a/arch/arm/mach-pxa/lpd270.c b/arch/arm/mach-pxa/lpd270.c index ad777b3..28da319 100644 --- a/arch/arm/mach-pxa/lpd270.c +++ b/arch/arm/mach-pxa/lpd270.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -189,15 +190,20 @@ static struct resource smc91x_resources[] = { [1] = { .start = LPD270_ETHERNET_IRQ, .end = LPD270_ETHERNET_IRQ, - .flags = IORESOURCE_IRQ, + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHEDGE, }, }; +struct smc91x_platdata smc91x_platdata = { + .flags = SMC91X_USE_16BIT | SMC91X_NOWAIT; +}; + static struct platform_device smc91x_device = { .name = "smc91x", .id = 0, .num_resources = ARRAY_SIZE(smc91x_resources), .resource = smc91x_resources, + .dev.platform_data = &smc91x_platdata, }; static struct resource lpd270_flash_resources[] = { diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index 850e506..c309593 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -94,6 +95,10 @@ static struct smsc911x_platform_config smsc911x_config = { .phy_interface = PHY_INTERFACE_MODE_MII, }; +static struct smc91x_platdata smc91x_platdata = { + .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT, +}; + static struct platform_device realview_eth_device = { .name = "smsc911x", .id = 0, @@ -107,6 +112,8 @@ int realview_eth_register(const char *name, struct resource *res) realview_eth_device.resource = res; if (strcmp(realview_eth_device.name, "smsc911x") == 0) realview_eth_device.dev.platform_data = &smsc911x_config; + else + realview_eth_device.dev.platform_data = &smc91x_platdata; return platform_device_register(&realview_eth_device); } diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c index 64c88d6..b3869cb 100644 --- a/arch/arm/mach-realview/realview_eb.c +++ b/arch/arm/mach-realview/realview_eb.c @@ -234,7 +234,7 @@ static struct resource realview_eb_eth_resources[] = { [1] = { .start = IRQ_EB_ETH, .end = IRQ_EB_ETH, - .flags = IORESOURCE_IRQ, + .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHEDGE, }, }; diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c index 169262e..7b0cd31 100644 --- a/arch/arm/mach-sa1100/neponset.c +++ b/arch/arm/mach-sa1100/neponset.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -258,12 +259,17 @@ static int neponset_probe(struct platform_device *dev) 0x02000000, "smc91x-attrib"), { .flags = IORESOURCE_IRQ }, }; + struct smc91x_platdata smc91x_platdata = { + .flags = SMC91X_USE_8BIT | SMC91X_IO_SHIFT_2 | SMC91X_NOWAIT, + }; struct platform_device_info smc91x_devinfo = { .parent = &dev->dev, .name = "smc91x", .id = 0, .res = smc91x_resources, .num_res = ARRAY_SIZE(smc91x_resources), + .data = &smc91c_platdata, + .size_data = sizeof(smc91c_platdata), }; int ret, irq; diff --git a/arch/arm/mach-sa1100/pleb.c b/arch/arm/mach-sa1100/pleb.c index 0912618..696fd0f 100644 --- a/arch/arm/mach-sa1100/pleb.c +++ b/arch/arm/mach-sa1100/pleb.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -43,12 +44,18 @@ static struct resource smc91x_resources[] = { #endif }; +static struct smc91x_platdata smc91x_platdata = { + .flags = SMC91X_USE_16BIT | SMC91X_NOWAIT, +}; static struct platform_device smc91x_device = { .name = "smc91x", .id = 0, .num_resources = ARRAY_SIZE(smc91x_resources), .resource = smc91x_resources, + .dev = { + .platform_data = &smc91c_platdata, + }, }; static struct platform_device *devices[] __initdata = { diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index fa3f193..209ee1b 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -91,6 +91,10 @@ static const char version[] = #include "smc91x.h" +#if defined(CONFIG_ASSABET_NEPONSET) +#include +#endif + #ifndef SMC_NOWAIT # define SMC_NOWAIT 0 #endif @@ -2355,8 +2359,9 @@ static int smc_drv_probe(struct platform_device *pdev) ret = smc_request_attrib(pdev, ndev); if (ret) goto out_release_io; -#if defined(CONFIG_ASSABET_NEPONSET) && !defined(CONFIG_SA1100_PLEB) - neponset_ncr_set(NCR_ENET_OSC_EN); +#if defined(CONFIG_ASSABET_NEPONSET) + if (machine_is_assabet() && machine_has_neponset()) + neponset_ncr_set(NCR_ENET_OSC_EN); #endif platform_set_drvdata(pdev, ndev); ret = smc_enable_device(pdev); diff --git a/drivers/net/ethernet/smsc/smc91x.h b/drivers/net/ethernet/smsc/smc91x.h index be67baf..3a18501 100644 --- a/drivers/net/ethernet/smsc/smc91x.h +++ b/drivers/net/ethernet/smsc/smc91x.h @@ -39,14 +39,7 @@ * Define your architecture specific bus configuration parameters here. */ -#if defined(CONFIG_ARCH_LUBBOCK) ||\ - defined(CONFIG_MACH_MAINSTONE) ||\ - defined(CONFIG_MACH_ZYLONITE) ||\ - defined(CONFIG_MACH_LITTLETON) ||\ - defined(CONFIG_MACH_ZYLONITE2) ||\ - defined(CONFIG_ARCH_VIPER) ||\ - defined(CONFIG_MACH_STARGATE2) ||\ - defined(CONFIG_ARCH_VERSATILE) +#if defined(CONFIG_ARM) #include @@ -74,95 +67,8 @@ /* We actually can't write halfwords properly if not word aligned */ static inline void SMC_outw(u16 val, void __iomem *ioaddr, int reg) { - if ((machine_is_mainstone() || machine_is_stargate2()) && reg & 2) { - unsigned int v = val << 16; - v |= readl(ioaddr + (reg & ~2)) & 0xffff; - writel(v, ioaddr + (reg & ~2)); - } else { - writew(val, ioaddr + reg); - } -} - -#elif defined(CONFIG_SA1100_PLEB) -/* We can only do 16-bit reads and writes in the static memory space. */ -#define SMC_CAN_USE_8BIT 1 -#define SMC_CAN_USE_16BIT 1 -#define SMC_CAN_USE_32BIT 0 -#define SMC_IO_SHIFT 0 -#define SMC_NOWAIT 1 - -#define SMC_inb(a, r) readb((a) + (r)) -#define SMC_insb(a, r, p, l) readsb((a) + (r), p, (l)) -#define SMC_inw(a, r) readw((a) + (r)) -#define SMC_insw(a, r, p, l) readsw((a) + (r), p, l) -#define SMC_outb(v, a, r) writeb(v, (a) + (r)) -#define SMC_outsb(a, r, p, l) writesb((a) + (r), p, (l)) -#define SMC_outw(v, a, r) writew(v, (a) + (r)) -#define SMC_outsw(a, r, p, l) writesw((a) + (r), p, l) - -#define SMC_IRQ_FLAGS (-1) - -#elif defined(CONFIG_SA1100_ASSABET) - -#include - -/* We can only do 8-bit reads and writes in the static memory space. */ -#define SMC_CAN_USE_8BIT 1 -#define SMC_CAN_USE_16BIT 0 -#define SMC_CAN_USE_32BIT 0 -#define SMC_NOWAIT 1 - -/* The first two address lines aren't connected... */ -#define SMC_IO_SHIFT 2 - -#define SMC_inb(a, r) readb((a) + (r)) -#define SMC_outb(v, a, r) writeb(v, (a) + (r)) -#define SMC_insb(a, r, p, l) readsb((a) + (r), p, (l)) -#define SMC_outsb(a, r, p, l) writesb((a) + (r), p, (l)) -#define SMC_IRQ_FLAGS (-1) /* from resource */ - -#elif defined(CONFIG_MACH_LOGICPD_PXA270) || \ - defined(CONFIG_MACH_NOMADIK_8815NHK) - -#define SMC_CAN_USE_8BIT 0 -#define SMC_CAN_USE_16BIT 1 -#define SMC_CAN_USE_32BIT 0 -#define SMC_IO_SHIFT 0 -#define SMC_NOWAIT 1 - -#define SMC_inw(a, r) readw((a) + (r)) -#define SMC_outw(v, a, r) writew(v, (a) + (r)) -#define SMC_insw(a, r, p, l) readsw((a) + (r), p, l) -#define SMC_outsw(a, r, p, l) writesw((a) + (r), p, l) - -#elif defined(CONFIG_ARCH_INNOKOM) || \ - defined(CONFIG_ARCH_PXA_IDP) || \ - defined(CONFIG_ARCH_RAMSES) || \ - defined(CONFIG_ARCH_PCM027) - -#define SMC_CAN_USE_8BIT 1 -#define SMC_CAN_USE_16BIT 1 -#define SMC_CAN_USE_32BIT 1 -#define SMC_IO_SHIFT 0 -#define SMC_NOWAIT 1 -#define SMC_USE_PXA_DMA 1 - -#define SMC_inb(a, r) readb((a) + (r)) -#define SMC_inw(a, r) readw((a) + (r)) -#define SMC_inl(a, r) readl((a) + (r)) -#define SMC_outb(v, a, r) writeb(v, (a) + (r)) -#define SMC_outl(v, a, r) writel(v, (a) + (r)) -#define SMC_insl(a, r, p, l) readsl((a) + (r), p, l) -#define SMC_outsl(a, r, p, l) writesl((a) + (r), p, l) -#define SMC_insw(a, r, p, l) readsw((a) + (r), p, l) -#define SMC_outsw(a, r, p, l) writesw((a) + (r), p, l) -#define SMC_IRQ_FLAGS (-1) /* from resource */ - -/* We actually can't write halfwords properly if not word aligned */ -static inline void -SMC_outw(u16 val, void __iomem *ioaddr, int reg) -{ - if (reg & 2) { + if ((machine_is_mainstone() || machine_is_stargate2() || + machine_is_pxa_idp()) && reg & 2) { unsigned int v = val << 16; v |= readl(ioaddr + (reg & ~2)) & 0xffff; writel(v, ioaddr + (reg & ~2)); @@ -237,20 +143,6 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg) #define RPC_LSA_DEFAULT RPC_LED_100_10 #define RPC_LSB_DEFAULT RPC_LED_TX_RX -#elif defined(CONFIG_ARCH_MSM) - -#define SMC_CAN_USE_8BIT 0 -#define SMC_CAN_USE_16BIT 1 -#define SMC_CAN_USE_32BIT 0 -#define SMC_NOWAIT 1 - -#define SMC_inw(a, r) readw((a) + (r)) -#define SMC_outw(v, a, r) writew(v, (a) + (r)) -#define SMC_insw(a, r, p, l) readsw((a) + (r), p, l) -#define SMC_outsw(a, r, p, l) writesw((a) + (r), p, l) - -#define SMC_IRQ_FLAGS IRQF_TRIGGER_HIGH - #elif defined(CONFIG_COLDFIRE) #define SMC_CAN_USE_8BIT 0 -- cgit v0.10.2 From f55ea3d932df06d82f37de5826063caf938c06f2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 26 Feb 2015 19:56:56 +0300 Subject: niu: fix error handling in niu_class_to_ethflow() There is a discrepancy here because the niu_class_to_ethflow() returns zero on failure and one on success but the caller expected zero on success and negative on failure. The problem means that we allow the user to pass classes and flow_types which we don't want. I've looked at it a bit and I don't see it as a very serious bug. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 4b51f90..0c5842a 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -6989,10 +6989,10 @@ static int niu_class_to_ethflow(u64 class, int *flow_type) *flow_type = IP_USER_FLOW; break; default: - return 0; + return -EINVAL; } - return 1; + return 0; } static int niu_ethflow_to_class(int flow_type, u64 *class) @@ -7198,11 +7198,9 @@ static int niu_get_ethtool_tcam_entry(struct niu *np, class = (tp->key[0] & TCAM_V4KEY0_CLASS_CODE) >> TCAM_V4KEY0_CLASS_CODE_SHIFT; ret = niu_class_to_ethflow(class, &fsp->flow_type); - if (ret < 0) { netdev_info(np->dev, "niu%d: niu_class_to_ethflow failed\n", parent->index); - ret = -EINVAL; goto out; } -- cgit v0.10.2 From 5688714977ebefa92e6dad8bd94bffaeaadc303d Mon Sep 17 00:00:00 2001 From: George McCollister Date: Thu, 26 Feb 2015 15:19:30 -0600 Subject: drivers: net: cpsw: Set SECURE for dual_emac ucast Prior to this patch, sending a packet with the source MAC address of one of the CPSW interfaces to one of the CPSW slave ports while it's configured in dual_emac mode would update the port_num field of the VLAN/Unicast Address Table Entry. This would cause it to discard all incoming traffic addressed to that MAC address, essentially rendering the port useless until the ALE table is cleared (by starting and stopping the interface or rebooting.) For example, if eth0 has a MAC address of 90:59:af:8f:43:e9 it will have an ALE table entry: 00 00 00 00 59 90 02 30 e9 43 8f af (VLAN Addr vlan_id=2 unicast type=0 port_num=0 addr=90:59:af:8f:43:e9) If you configure another device with the same MAC address and connect it to the first CPSW slave port and send some traffic the ALE table entry becomes: 04 00 00 00 59 90 02 30 e9 43 8f af (VLAN Addr vlan_id=2 unicast type=0 port_num=1 addr=90:59:af:8f:43:e9) >From this point forward all incoming traffic addressed to 90:59:af:8f:43:e9 will be dropped. Setting the SECURE bit for the VLAN/Unicast address table entry for each interface's MAC address corrects the problem. Signed-off-by: George McCollister Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 7d8dd0d..49b0336 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1103,7 +1103,7 @@ static inline void cpsw_add_dual_emac_def_ale_entries( cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast, port_mask, ALE_VLAN, slave->port_vlan, 0); cpsw_ale_add_ucast(priv->ale, priv->mac_addr, - priv->host_port, ALE_VLAN, slave->port_vlan); + priv->host_port, ALE_VLAN | ALE_SECURE, slave->port_vlan); } static void soft_reset_slave(struct cpsw_slave *slave) -- cgit v0.10.2 From 505ce4154ac86c250aa4a84a536dd9fc56479bb5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 26 Feb 2015 16:19:00 -0600 Subject: net: Verify permission to dest_net in newlink When applicable verify that the caller has permision to create a network device in another network namespace. This check is already present when moving a network device between network namespaces in setlink so all that is needed is to duplicate that check in newlink. This change almost backports cleanly, but there are context conflicts as the code that follows was added in v4.0-rc1 Fixes: b51642f6d77b net: Enable a userns root rtnl calls that are safe for unprivilged users Signed-off-by: "Eric W. Biederman" Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1385de0..b237959 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2122,6 +2122,10 @@ replay: if (IS_ERR(dest_net)) return PTR_ERR(dest_net); + err = -EPERM; + if (!netlink_ns_capable(skb, dest_net->user_ns, CAP_NET_ADMIN)) + goto out; + if (tb[IFLA_LINK_NETNSID]) { int id = nla_get_s32(tb[IFLA_LINK_NETNSID]); -- cgit v0.10.2 From 06615bed60c1fb7c37adddb75bdc80da873b5edb Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 26 Feb 2015 16:20:07 -0600 Subject: net: Verify permission to link_net in newlink When applicable verify that the caller has permisson to the underlying network namespace for a newly created network device. Similary checks exist for the network namespace a network device will be created in. Fixes: 317f4810e45e ("rtnl: allow to create device with IFLA_LINK_NETNSID set") Signed-off-by: "Eric W. Biederman" Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b237959..2c49355 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2134,6 +2134,9 @@ replay: err = -EINVAL; goto out; } + err = -EPERM; + if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN)) + goto out; } dev = rtnl_create_link(link_net ? : dest_net, ifname, -- cgit v0.10.2 From 2f1d8b9e8afa5a833d96afcd23abcb8cdf8d83ab Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Feb 2015 18:35:35 -0800 Subject: macvtap: make sure neighbour code can push ethernet header Brian reported crashes using IPv6 traffic with macvtap/veth combo. I tracked the crashes in neigh_hh_output() -> memcpy(skb->data - HH_DATA_MOD, hh->hh_data, HH_DATA_MOD); Neighbour code assumes headroom to push Ethernet header is at least 16 bytes. It appears macvtap has only 14 bytes available on arches where NET_IP_ALIGN is 0 (like x86) Effect is a corruption of 2 bytes right before skb->head, and possible crashes if accessing non existing memory. This fix should also increase IPv4 performance, as paranoid code in ip_finish_output2() wont have to call skb_realloc_headroom() Reported-by: Brian Rak Tested-by: Brian Rak Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index e40fdfc..27ecc5c 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -654,11 +654,14 @@ static void macvtap_skb_to_vnet_hdr(struct macvtap_queue *q, } /* else everything is zero */ } +/* Neighbour code has some assumptions on HH_DATA_MOD alignment */ +#define MACVTAP_RESERVE HH_DATA_OFF(ETH_HLEN) + /* Get packet from user space buffer */ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, struct iov_iter *from, int noblock) { - int good_linear = SKB_MAX_HEAD(NET_IP_ALIGN); + int good_linear = SKB_MAX_HEAD(MACVTAP_RESERVE); struct sk_buff *skb; struct macvlan_dev *vlan; unsigned long total_len = iov_iter_count(from); @@ -722,7 +725,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, linear = macvtap16_to_cpu(q, vnet_hdr.hdr_len); } - skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, copylen, + skb = macvtap_alloc_skb(&q->sk, MACVTAP_RESERVE, copylen, linear, noblock, &err); if (!skb) goto err; -- cgit v0.10.2 From 4092e6acf5cb16f56154e2dd22d647023dc3d646 Mon Sep 17 00:00:00 2001 From: Jaedon Shin Date: Sat, 28 Feb 2015 11:48:26 +0900 Subject: net: bcmgenet: fix throughtput regression This patch adds bcmgenet_tx_poll for the tx_rings. This can reduce the interrupt load and send xmit in network stack on time. This also separated for the completion of tx_ring16 from bcmgenet_poll. The bcmgenet_tx_reclaim of tx_ring[{0,1,2,3}] operative by an interrupt is to be not more than a certain number TxBDs. It is caused by too slowly reclaiming the transmitted skb. Therefore, performance degradation of xmit after 605ad7f ("tcp: refine TSO autosizing"). Signed-off-by: Jaedon Shin Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index ff83c46b..2874a00 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -971,13 +971,14 @@ static inline void bcmgenet_tx_ring_int_disable(struct bcmgenet_priv *priv, } /* Unlocked version of the reclaim routine */ -static void __bcmgenet_tx_reclaim(struct net_device *dev, - struct bcmgenet_tx_ring *ring) +static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev, + struct bcmgenet_tx_ring *ring) { struct bcmgenet_priv *priv = netdev_priv(dev); int last_tx_cn, last_c_index, num_tx_bds; struct enet_cb *tx_cb_ptr; struct netdev_queue *txq; + unsigned int pkts_compl = 0; unsigned int bds_compl; unsigned int c_index; @@ -1005,6 +1006,7 @@ static void __bcmgenet_tx_reclaim(struct net_device *dev, tx_cb_ptr = ring->cbs + last_c_index; bds_compl = 0; if (tx_cb_ptr->skb) { + pkts_compl++; bds_compl = skb_shinfo(tx_cb_ptr->skb)->nr_frags + 1; dev->stats.tx_bytes += tx_cb_ptr->skb->len; dma_unmap_single(&dev->dev, @@ -1028,23 +1030,45 @@ static void __bcmgenet_tx_reclaim(struct net_device *dev, last_c_index &= (num_tx_bds - 1); } - if (ring->free_bds > (MAX_SKB_FRAGS + 1)) - ring->int_disable(priv, ring); - - if (netif_tx_queue_stopped(txq)) - netif_tx_wake_queue(txq); + if (ring->free_bds > (MAX_SKB_FRAGS + 1)) { + if (netif_tx_queue_stopped(txq)) + netif_tx_wake_queue(txq); + } ring->c_index = c_index; + + return pkts_compl; } -static void bcmgenet_tx_reclaim(struct net_device *dev, +static unsigned int bcmgenet_tx_reclaim(struct net_device *dev, struct bcmgenet_tx_ring *ring) { + unsigned int released; unsigned long flags; spin_lock_irqsave(&ring->lock, flags); - __bcmgenet_tx_reclaim(dev, ring); + released = __bcmgenet_tx_reclaim(dev, ring); spin_unlock_irqrestore(&ring->lock, flags); + + return released; +} + +static int bcmgenet_tx_poll(struct napi_struct *napi, int budget) +{ + struct bcmgenet_tx_ring *ring = + container_of(napi, struct bcmgenet_tx_ring, napi); + unsigned int work_done = 0; + + work_done = bcmgenet_tx_reclaim(ring->priv->dev, ring); + + if (work_done == 0) { + napi_complete(napi); + ring->int_enable(ring->priv, ring); + + return 0; + } + + return budget; } static void bcmgenet_tx_reclaim_all(struct net_device *dev) @@ -1302,10 +1326,8 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) bcmgenet_tdma_ring_writel(priv, ring->index, ring->prod_index, TDMA_PROD_INDEX); - if (ring->free_bds <= (MAX_SKB_FRAGS + 1)) { + if (ring->free_bds <= (MAX_SKB_FRAGS + 1)) netif_tx_stop_queue(txq); - ring->int_enable(priv, ring); - } out: spin_unlock_irqrestore(&ring->lock, flags); @@ -1621,6 +1643,7 @@ static int init_umac(struct bcmgenet_priv *priv) struct device *kdev = &priv->pdev->dev; int ret; u32 reg, cpu_mask_clear; + int index; dev_dbg(&priv->pdev->dev, "bcmgenet: init_umac\n"); @@ -1647,7 +1670,7 @@ static int init_umac(struct bcmgenet_priv *priv) bcmgenet_intr_disable(priv); - cpu_mask_clear = UMAC_IRQ_RXDMA_BDONE; + cpu_mask_clear = UMAC_IRQ_RXDMA_BDONE | UMAC_IRQ_TXDMA_BDONE; dev_dbg(kdev, "%s:Enabling RXDMA_BDONE interrupt\n", __func__); @@ -1674,6 +1697,10 @@ static int init_umac(struct bcmgenet_priv *priv) bcmgenet_intrl2_0_writel(priv, cpu_mask_clear, INTRL2_CPU_MASK_CLEAR); + for (index = 0; index < priv->hw_params->tx_queues; index++) + bcmgenet_intrl2_1_writel(priv, (1 << index), + INTRL2_CPU_MASK_CLEAR); + /* Enable rx/tx engine.*/ dev_dbg(kdev, "done init umac\n"); @@ -1693,6 +1720,8 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv, unsigned int first_bd; spin_lock_init(&ring->lock); + ring->priv = priv; + netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, 64); ring->index = index; if (index == DESC_INDEX) { ring->queue = 0; @@ -1738,6 +1767,17 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv, TDMA_WRITE_PTR); bcmgenet_tdma_ring_writel(priv, index, end_ptr * words_per_bd - 1, DMA_END_ADDR); + + napi_enable(&ring->napi); +} + +static void bcmgenet_fini_tx_ring(struct bcmgenet_priv *priv, + unsigned int index) +{ + struct bcmgenet_tx_ring *ring = &priv->tx_rings[index]; + + napi_disable(&ring->napi); + netif_napi_del(&ring->napi); } /* Initialize a RDMA ring */ @@ -1907,7 +1947,7 @@ static int bcmgenet_dma_teardown(struct bcmgenet_priv *priv) return ret; } -static void bcmgenet_fini_dma(struct bcmgenet_priv *priv) +static void __bcmgenet_fini_dma(struct bcmgenet_priv *priv) { int i; @@ -1926,6 +1966,18 @@ static void bcmgenet_fini_dma(struct bcmgenet_priv *priv) kfree(priv->tx_cbs); } +static void bcmgenet_fini_dma(struct bcmgenet_priv *priv) +{ + int i; + + bcmgenet_fini_tx_ring(priv, DESC_INDEX); + + for (i = 0; i < priv->hw_params->tx_queues; i++) + bcmgenet_fini_tx_ring(priv, i); + + __bcmgenet_fini_dma(priv); +} + /* init_edma: Initialize DMA control register */ static int bcmgenet_init_dma(struct bcmgenet_priv *priv) { @@ -1952,7 +2004,7 @@ static int bcmgenet_init_dma(struct bcmgenet_priv *priv) priv->tx_cbs = kcalloc(priv->num_tx_bds, sizeof(struct enet_cb), GFP_KERNEL); if (!priv->tx_cbs) { - bcmgenet_fini_dma(priv); + __bcmgenet_fini_dma(priv); return -ENOMEM; } @@ -1975,9 +2027,6 @@ static int bcmgenet_poll(struct napi_struct *napi, int budget) struct bcmgenet_priv, napi); unsigned int work_done; - /* tx reclaim */ - bcmgenet_tx_reclaim(priv->dev, &priv->tx_rings[DESC_INDEX]); - work_done = bcmgenet_desc_rx(priv, budget); /* Advancing our consumer index*/ @@ -2022,28 +2071,34 @@ static void bcmgenet_irq_task(struct work_struct *work) static irqreturn_t bcmgenet_isr1(int irq, void *dev_id) { struct bcmgenet_priv *priv = dev_id; + struct bcmgenet_tx_ring *ring; unsigned int index; /* Save irq status for bottom-half processing. */ priv->irq1_stat = bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_STAT) & - ~priv->int1_mask; + ~bcmgenet_intrl2_1_readl(priv, INTRL2_CPU_MASK_STATUS); /* clear interrupts */ bcmgenet_intrl2_1_writel(priv, priv->irq1_stat, INTRL2_CPU_CLEAR); netif_dbg(priv, intr, priv->dev, "%s: IRQ=0x%x\n", __func__, priv->irq1_stat); + /* Check the MBDONE interrupts. * packet is done, reclaim descriptors */ - if (priv->irq1_stat & 0x0000ffff) { - index = 0; - for (index = 0; index < 16; index++) { - if (priv->irq1_stat & (1 << index)) - bcmgenet_tx_reclaim(priv->dev, - &priv->tx_rings[index]); + for (index = 0; index < priv->hw_params->tx_queues; index++) { + if (!(priv->irq1_stat & BIT(index))) + continue; + + ring = &priv->tx_rings[index]; + + if (likely(napi_schedule_prep(&ring->napi))) { + ring->int_disable(priv, ring); + __napi_schedule(&ring->napi); } } + return IRQ_HANDLED; } @@ -2075,8 +2130,12 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id) } if (priv->irq0_stat & (UMAC_IRQ_TXDMA_BDONE | UMAC_IRQ_TXDMA_PDONE)) { - /* Tx reclaim */ - bcmgenet_tx_reclaim(priv->dev, &priv->tx_rings[DESC_INDEX]); + struct bcmgenet_tx_ring *ring = &priv->tx_rings[DESC_INDEX]; + + if (likely(napi_schedule_prep(&ring->napi))) { + ring->int_disable(priv, ring); + __napi_schedule(&ring->napi); + } } if (priv->irq0_stat & (UMAC_IRQ_PHY_DET_R | UMAC_IRQ_PHY_DET_F | diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index b36ddec..0d370d1 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -520,6 +520,7 @@ struct bcmgenet_hw_params { struct bcmgenet_tx_ring { spinlock_t lock; /* ring lock */ + struct napi_struct napi; /* NAPI per tx queue */ unsigned int index; /* ring index */ unsigned int queue; /* queue index */ struct enet_cb *cbs; /* tx ring buffer control block*/ @@ -534,6 +535,7 @@ struct bcmgenet_tx_ring { struct bcmgenet_tx_ring *); void (*int_disable)(struct bcmgenet_priv *priv, struct bcmgenet_tx_ring *); + struct bcmgenet_priv *priv; }; /* device context */ -- cgit v0.10.2 From a14c7d15ca91b444e77df08b916befdce77562ab Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 27 Feb 2015 17:16:26 +0100 Subject: sh_eth: Fix lost MAC address on kexec Commit 740c7f31c094703c ("sh_eth: Ensure DMA engines are stopped before freeing buffers") added a call to sh_eth_reset() to the sh_eth_set_ringparam() and sh_eth_close() paths. However, setting the software reset bit(s) in the EDMR register resets the MAC Address Registers to zero. Hence after kexec, the new kernel doesn't detect a valid MAC address and assigns a random MAC address, breaking DHCP. Set the MAC address again after the reset in sh_eth_dev_exit() to fix this. Tested on r8a7740/armadillo (GETHER) and r8a7791/koelsch (FAST_RCAR). Fixes: 740c7f31c094703c ("sh_eth: Ensure DMA engines are stopped before freeing buffers") Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 4da8bd2..654b48d 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1392,6 +1392,9 @@ static void sh_eth_dev_exit(struct net_device *ndev) msleep(2); /* max frame time at 10 Mbps < 1250 us */ sh_eth_get_stats(ndev); sh_eth_reset(ndev); + + /* Set MAC address again */ + update_mac_address(ndev); } /* free Tx skb function */ -- cgit v0.10.2 From cac5e65e8a7ea074f2626d2eaa53aa308452dec4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 27 Feb 2015 09:42:50 -0800 Subject: net: do not use rcu in rtnl_dump_ifinfo() We did a failed attempt in the past to only use rcu in rtnl dump operations (commit e67f88dd12f6 "net: dont hold rtnl mutex during netlink dump callbacks") Now that dumps are holding RTNL anyway, there is no need to also use rcu locking, as it forbids any scheduling ability, like GFP_KERNEL allocations that controlling path should use instead of GFP_ATOMIC whenever possible. This should fix following splat Cong Wang reported : [ INFO: suspicious RCU usage. ] 3.19.0+ #805 Tainted: G W include/linux/rcupdate.h:538 Illegal context switch in RCU read-side critical section! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 2 locks held by ip/771: #0: (rtnl_mutex){+.+.+.}, at: [] netlink_dump+0x21/0x26c #1: (rcu_read_lock){......}, at: [] rcu_read_lock+0x0/0x6e stack backtrace: CPU: 3 PID: 771 Comm: ip Tainted: G W 3.19.0+ #805 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 0000000000000001 ffff8800d51e7718 ffffffff81a27457 0000000029e729e6 ffff8800d6108000 ffff8800d51e7748 ffffffff810b539b ffffffff820013dd 00000000000001c8 0000000000000000 ffff8800d7448088 ffff8800d51e7758 Call Trace: [] dump_stack+0x4c/0x65 [] lockdep_rcu_suspicious+0x107/0x110 [] rcu_preempt_sleep_check+0x45/0x47 [] ___might_sleep+0x1d/0x1cb [] __might_sleep+0x78/0x80 [] idr_alloc+0x45/0xd1 [] ? rcu_read_lock_held+0x3b/0x3d [] ? idr_for_each+0x53/0x101 [] alloc_netid+0x61/0x69 [] __peernet2id+0x79/0x8d [] peernet2id+0x13/0x1f [] rtnl_fill_ifinfo+0xa8d/0xc20 [] ? __lock_is_held+0x39/0x52 [] rtnl_dump_ifinfo+0x149/0x213 [] netlink_dump+0xef/0x26c [] netlink_recvmsg+0x17b/0x2c5 [] __sock_recvmsg+0x4e/0x59 [] sock_recvmsg+0x3f/0x51 [] ___sys_recvmsg+0xf6/0x1d9 [] ? handle_pte_fault+0x6e1/0xd3d [] ? native_sched_clock+0x35/0x37 [] ? sched_clock_local+0x12/0x72 [] ? sched_clock_cpu+0x9e/0xb7 [] ? rcu_read_lock_held+0x3b/0x3d [] ? __fcheck_files+0x4c/0x58 [] ? __fget_light+0x2d/0x52 [] __sys_recvmsg+0x42/0x60 [] SyS_recvmsg+0x12/0x1c Signed-off-by: Eric Dumazet Fixes: 0c7aecd4bde4b7302 ("netns: add rtnl cmd to add and get peer netns ids") Cc: Nicolas Dichtel Reported-by: Cong Wang Signed-off-by: David S. Miller diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2c49355..25b4b5d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1300,7 +1300,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) s_h = cb->args[0]; s_idx = cb->args[1]; - rcu_read_lock(); cb->seq = net->dev_base_seq; /* A hack to preserve kernel<->userspace interface. @@ -1322,7 +1321,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; - hlist_for_each_entry_rcu(dev, head, index_hlist) { + hlist_for_each_entry(dev, head, index_hlist) { if (idx < s_idx) goto cont; err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, @@ -1344,7 +1343,6 @@ cont: } } out: - rcu_read_unlock(); cb->args[1] = idx; cb->args[0] = h; -- cgit v0.10.2 From 2f5c54ce0d11a527de3544e1b2c904544a2c1dd7 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 27 Feb 2015 13:19:44 +0200 Subject: net: davinci_mdio: add hibernation callbacks Setting a dev_pm_ops suspend_late/resume_early pair but not a set of hibernation functions means those pm functions will not be called upon hibernation. Fix this by using SET_LATE_SYSTEM_SLEEP_PM_OPS, which appropriately assigns the suspend and hibernation handlers and move davinci_mdio_x callbacks under CONFIG_PM_SLEEP to avoid build warnings. Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c index 98655b4..c00084d 100644 --- a/drivers/net/ethernet/ti/davinci_mdio.c +++ b/drivers/net/ethernet/ti/davinci_mdio.c @@ -423,6 +423,7 @@ static int davinci_mdio_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM_SLEEP static int davinci_mdio_suspend(struct device *dev) { struct davinci_mdio_data *data = dev_get_drvdata(dev); @@ -464,10 +465,10 @@ static int davinci_mdio_resume(struct device *dev) return 0; } +#endif static const struct dev_pm_ops davinci_mdio_pm_ops = { - .suspend_late = davinci_mdio_suspend, - .resume_early = davinci_mdio_resume, + SET_LATE_SYSTEM_SLEEP_PM_OPS(davinci_mdio_suspend, davinci_mdio_resume) }; #if IS_ENABLED(CONFIG_OF) -- cgit v0.10.2 From 8963a50453508a3f605dc2b29be35ee72d55335c Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Fri, 27 Feb 2015 13:19:45 +0200 Subject: net: ti: cpsw: add hibernation callbacks Setting a dev_pm_ops suspend/resume pair but not a set of hibernation functions means those pm functions will not be called upon hibernation. Fix this by using SIMPLE_DEV_PM_OPS, which appropriately assigns the suspend and hibernation handlers and move cpsw_suspend/resume calbacks under CONFIG_PM_SLEEP to avoid build warnings. Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 49b0336..a1bbaf6 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2466,6 +2466,7 @@ static int cpsw_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM_SLEEP static int cpsw_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); @@ -2518,11 +2519,9 @@ static int cpsw_resume(struct device *dev) } return 0; } +#endif -static const struct dev_pm_ops cpsw_pm_ops = { - .suspend = cpsw_suspend, - .resume = cpsw_resume, -}; +static SIMPLE_DEV_PM_OPS(cpsw_pm_ops, cpsw_suspend, cpsw_resume); static const struct of_device_id cpsw_of_mtable[] = { { .compatible = "ti,cpsw", }, -- cgit v0.10.2 From 00c7eb99a5c4bd09a3f4133f61d3ebae787384c7 Mon Sep 17 00:00:00 2001 From: Yannick Guerrini Date: Fri, 27 Feb 2015 18:12:16 +0100 Subject: qlcnic: Fix trivial typo in comment Change 'Firmare' to 'Firmware' Signed-off-by: Yannick Guerrini Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index fa43176..f221126 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -314,7 +314,7 @@ struct qlcnic_fdt { #define QLCNIC_BRDCFG_START 0x4000 /* board config */ #define QLCNIC_BOOTLD_START 0x10000 /* bootld */ #define QLCNIC_IMAGE_START 0x43000 /* compressed image */ -#define QLCNIC_USER_START 0x3E8000 /* Firmare info */ +#define QLCNIC_USER_START 0x3E8000 /* Firmware info */ #define QLCNIC_FW_VERSION_OFFSET (QLCNIC_USER_START+0x408) #define QLCNIC_FW_SIZE_OFFSET (QLCNIC_USER_START+0x40c) -- cgit v0.10.2 From f7c306880590f04f9417f84361fa4146103d6de8 Mon Sep 17 00:00:00 2001 From: Yannick Guerrini Date: Fri, 27 Feb 2015 18:38:46 +0100 Subject: netxen_nic: Fix trivial typos in comments Change 'mutliple' to 'multiple' Change 'Firmare' to 'Firmware' Signed-off-by: Yannick Guerrini Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h index 6e426ae..0a5e204 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic.h +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic.h @@ -354,7 +354,7 @@ struct cmd_desc_type0 { } __attribute__ ((aligned(64))); -/* Note: sizeof(rcv_desc) should always be a mutliple of 2 */ +/* Note: sizeof(rcv_desc) should always be a multiple of 2 */ struct rcv_desc { __le16 reference_handle; __le16 reserved; @@ -499,7 +499,7 @@ struct uni_data_desc{ #define NETXEN_IMAGE_START 0x43000 /* compressed image */ #define NETXEN_SECONDARY_START 0x200000 /* backup images */ #define NETXEN_PXE_START 0x3E0000 /* PXE boot rom */ -#define NETXEN_USER_START 0x3E8000 /* Firmare info */ +#define NETXEN_USER_START 0x3E8000 /* Firmware info */ #define NETXEN_FIXED_START 0x3F0000 /* backup of crbinit */ #define NETXEN_USER_START_OLD NETXEN_PXE_START /* very old flash */ -- cgit v0.10.2 From b8b01344eb166740f4dc2f5397fd7fe7c814c16a Mon Sep 17 00:00:00 2001 From: Vaishali Thakkar Date: Fri, 27 Feb 2015 23:40:24 +0530 Subject: net: smc91c92_cs: Use setup_timer and mod_timer Use timer API functions setup_timer and mod_timer instead of structure assignments as they are standard way to set the timer and to update the expire field of an active timer respectively. This is done using Coccinelle and semantic patch used for this is as follows: // @@ expression x,y,z,a,b; @@ -init_timer (&x); +setup_timer (&x, y, z); +mod_timer (&a, b); -x.function = y; -x.data = z; -x.expires = b; -add_timer(&a); // Signed-off-by: Vaishali Thakkar Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c index 6b33127..3449893 100644 --- a/drivers/net/ethernet/smsc/smc91c92_cs.c +++ b/drivers/net/ethernet/smsc/smc91c92_cs.c @@ -1070,11 +1070,8 @@ static int smc_open(struct net_device *dev) smc->packets_waiting = 0; smc_reset(dev); - init_timer(&smc->media); - smc->media.function = media_check; - smc->media.data = (u_long) dev; - smc->media.expires = jiffies + HZ; - add_timer(&smc->media); + setup_timer(&smc->media, media_check, (u_long)dev); + mod_timer(&smc->media, jiffies + HZ); return 0; } /* smc_open */ -- cgit v0.10.2 From fc4ba63627948bb395cb54f846470d22eda38ac2 Mon Sep 17 00:00:00 2001 From: Vaishali Thakkar Date: Fri, 27 Feb 2015 23:53:03 +0530 Subject: net: 8390: pcnet_cs: Use setup_timer and mod_timer Use timer API functions setup_timer and mod_timer instead of structure assignments as they are standard way to set the timer and to update the expire field of an active timer respectively. This is done using Coccinelle and semantic patch used for this is as follows: // @@ expression x,y,z,a,b; @@ -init_timer (&x); +setup_timer (&x, y, z); +mod_timer (&a, b); -x.function = y; -x.data = z; -x.expires = b; -add_timer(&a); // Signed-off-by: Vaishali Thakkar Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c index 9fb7b9d..2777289 100644 --- a/drivers/net/ethernet/8390/pcnet_cs.c +++ b/drivers/net/ethernet/8390/pcnet_cs.c @@ -918,11 +918,8 @@ static int pcnet_open(struct net_device *dev) info->phy_id = info->eth_phy; info->link_status = 0x00; - init_timer(&info->watchdog); - info->watchdog.function = ei_watchdog; - info->watchdog.data = (u_long)dev; - info->watchdog.expires = jiffies + HZ; - add_timer(&info->watchdog); + setup_timer(&info->watchdog, ei_watchdog, (u_long)dev); + mod_timer(&info->watchdog, jiffies + HZ); return ei_open(dev); } /* pcnet_open */ -- cgit v0.10.2 From 6753a971bef3d91f25571a5da24abbfd76459114 Mon Sep 17 00:00:00 2001 From: Vaishali Thakkar Date: Sat, 28 Feb 2015 00:02:34 +0530 Subject: net: 8390: axnet_cs: Use setup_timer and mod_timer Use timer API functions setup_timer and mod_timer instead of structure assignments as they are standard way to set the timer and to update the expire field of an active timer respectively. This is done using Coccinelle and semantic patch used for this is as follows: // @@ expression x,y,z,a,b; @@ -init_timer (&x); +setup_timer (&x, y, z); +mod_timer (&a, b); -x.function = y; -x.data = z; -x.expires = b; -add_timer(&a); // Signed-off-by: Vaishali Thakkar Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c index 7769c05..ec6eac1 100644 --- a/drivers/net/ethernet/8390/axnet_cs.c +++ b/drivers/net/ethernet/8390/axnet_cs.c @@ -484,11 +484,8 @@ static int axnet_open(struct net_device *dev) link->open++; info->link_status = 0x00; - init_timer(&info->watchdog); - info->watchdog.function = ei_watchdog; - info->watchdog.data = (u_long)dev; - info->watchdog.expires = jiffies + HZ; - add_timer(&info->watchdog); + setup_timer(&info->watchdog, ei_watchdog, (u_long)dev); + mod_timer(&info->watchdog, jiffies + HZ); return ax_open(dev); } /* axnet_open */ -- cgit v0.10.2 From ccb36da19b36a77dce926efeb76de0ab57c00ad5 Mon Sep 17 00:00:00 2001 From: Vaishali Thakkar Date: Sat, 28 Feb 2015 00:12:34 +0530 Subject: net: stmmac: Use setup_timer and mod_timer Use timer API functions setup_timer and mod_timer instead of structure assignments as they are standard way to set the timer and to update the expire field of an active timer respectively. This is done using Coccinelle and semantic patch used for this is as follows: // @@ expression x,y,z,a,b; @@ -init_timer (&x); +setup_timer (&x, y, z); +mod_timer (&a, b); -x.function = y; -x.data = z; -x.expires = b; -add_timer(&a); // Signed-off-by: Vaishali Thakkar Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 55e89b3..a0ea84f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -310,11 +310,11 @@ bool stmmac_eee_init(struct stmmac_priv *priv) spin_lock_irqsave(&priv->lock, flags); if (!priv->eee_active) { priv->eee_active = 1; - init_timer(&priv->eee_ctrl_timer); - priv->eee_ctrl_timer.function = stmmac_eee_ctrl_timer; - priv->eee_ctrl_timer.data = (unsigned long)priv; - priv->eee_ctrl_timer.expires = STMMAC_LPI_T(eee_timer); - add_timer(&priv->eee_ctrl_timer); + setup_timer(&priv->eee_ctrl_timer, + stmmac_eee_ctrl_timer, + (unsigned long)priv); + mod_timer(&priv->eee_ctrl_timer, + STMMAC_LPI_T(eee_timer)); priv->hw->mac->set_eee_timer(priv->hw, STMMAC_DEFAULT_LIT_LS, -- cgit v0.10.2 From 187d67858bafd16334d150b0fa7cff1f5814c6fb Mon Sep 17 00:00:00 2001 From: Vaishali Thakkar Date: Sat, 28 Feb 2015 00:20:59 +0530 Subject: net: pasemi: Use setup_timer and mod_timer Use timer API functions setup_timer and mod_timer instead of structure assignments as they are standard way to set the timer and to update the expire field of an active timer respectively. This is done using Coccinelle and semantic patch used for this is as follows: // @@ expression x,y,z,a,b; @@ -init_timer (&x); +setup_timer (&x, y, z); +mod_timer (&a, b); -x.function = y; -x.data = z; -x.expires = b; -add_timer(&a); // Signed-off-by: Vaishali Thakkar Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c index 44e8d7d..57a6e6c 100644 --- a/drivers/net/ethernet/pasemi/pasemi_mac.c +++ b/drivers/net/ethernet/pasemi/pasemi_mac.c @@ -1239,11 +1239,9 @@ static int pasemi_mac_open(struct net_device *dev) if (mac->phydev) phy_start(mac->phydev); - init_timer(&mac->tx->clean_timer); - mac->tx->clean_timer.function = pasemi_mac_tx_timer; - mac->tx->clean_timer.data = (unsigned long)mac->tx; - mac->tx->clean_timer.expires = jiffies+HZ; - add_timer(&mac->tx->clean_timer); + setup_timer(&mac->tx->clean_timer, pasemi_mac_tx_timer, + (unsigned long)mac->tx); + mod_timer(&mac->tx->clean_timer, jiffies + HZ); return 0; -- cgit v0.10.2 From 56b08fdcf637955d3023d769afd6cdabc526ba22 Mon Sep 17 00:00:00 2001 From: Arvid Brodin Date: Fri, 27 Feb 2015 21:26:03 +0100 Subject: net/hsr: Fix NULL pointer dereference and refcnt bugs when deleting a HSR interface. To repeat: $ sudo ip link del hsr0 BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 IP: [] hsr_del_port+0x15/0xa0 etc... Bug description: As part of the hsr master device destruction, hsr_del_port() is called for each of the hsr ports. At each such call, the master device is updated regarding features and mtu. When the master device is freed before the slave interfaces, master will be NULL in hsr_del_port(), which led to a NULL pointer dereference. Additionally, dev_put() was called on the master device itself in hsr_del_port(), causing a refcnt error. A third bug in the same code path was that the rtnl lock was not taken before hsr_del_port() was called as part of hsr_dev_destroy(). The reporter (Nicolas Dichtel) also said: "hsr_netdev_notify() supposes that the port will always be available when the notification is for an hsr interface. It's wrong. For example, netdev_wait_allrefs() may resend NETDEV_UNREGISTER.". As a precaution against this, a check for port == NULL was added in hsr_dev_notify(). Reported-by: Nicolas Dichtel Fixes: 51f3c605318b056a ("net/hsr: Move slave init to hsr_slave.c.") Signed-off-by: Arvid Brodin Signed-off-by: David S. Miller diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index a138d75..44d2746 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -359,8 +359,11 @@ static void hsr_dev_destroy(struct net_device *hsr_dev) struct hsr_port *port; hsr = netdev_priv(hsr_dev); + + rtnl_lock(); hsr_for_each_port(hsr, port) hsr_del_port(port); + rtnl_unlock(); del_timer_sync(&hsr->prune_timer); del_timer_sync(&hsr->announce_timer); diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index 779d28b..cd37d00 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -36,6 +36,10 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, return NOTIFY_DONE; /* Not an HSR device */ hsr = netdev_priv(dev); port = hsr_port_get_hsr(hsr, HSR_PT_MASTER); + if (port == NULL) { + /* Resend of notification concerning removed device? */ + return NOTIFY_DONE; + } } else { hsr = port->hsr; } diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index a348dcb..7d37366 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -181,8 +181,10 @@ void hsr_del_port(struct hsr_port *port) list_del_rcu(&port->port_list); if (port != master) { - netdev_update_features(master->dev); - dev_set_mtu(master->dev, hsr_get_max_mtu(hsr)); + if (master != NULL) { + netdev_update_features(master->dev); + dev_set_mtu(master->dev, hsr_get_max_mtu(hsr)); + } netdev_rx_handler_unregister(port->dev); dev_set_promiscuity(port->dev, -1); } @@ -192,5 +194,7 @@ void hsr_del_port(struct hsr_port *port) */ synchronize_rcu(); - dev_put(port->dev); + + if (port != master) + dev_put(port->dev); } -- cgit v0.10.2 From c03ae533a9c4de83a35105f9bfd7152d916b4680 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 28 Feb 2015 11:51:36 +0100 Subject: rxrpc: terminate retrans loop when sending of skb fails Typo, 'stop' is never set to true. Seems intent is to not attempt to retransmit more packets after sendmsg returns an error. This change is based on code inspection only. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c index c6be17a..4040418 100644 --- a/net/rxrpc/ar-ack.c +++ b/net/rxrpc/ar-ack.c @@ -218,7 +218,8 @@ static void rxrpc_resend(struct rxrpc_call *call) struct rxrpc_header *hdr; struct sk_buff *txb; unsigned long *p_txb, resend_at; - int loop, stop; + bool stop; + int loop; u8 resend; _enter("{%d,%d,%d,%d},", @@ -226,7 +227,7 @@ static void rxrpc_resend(struct rxrpc_call *call) atomic_read(&call->sequence), CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz)); - stop = 0; + stop = false; resend = 0; resend_at = 0; @@ -255,7 +256,7 @@ static void rxrpc_resend(struct rxrpc_call *call) _proto("Tx DATA %%%u { #%d }", ntohl(sp->hdr.serial), ntohl(sp->hdr.seq)); if (rxrpc_send_packet(call->conn->trans, txb) < 0) { - stop = 0; + stop = true; sp->resend_at = jiffies + 3; } else { sp->resend_at = -- cgit v0.10.2 From 765dd3bb44711b4ba36c1e06f9c4b7bfe73ffef7 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 28 Feb 2015 11:51:37 +0100 Subject: rxrpc: don't multiply with HZ twice rxrpc_resend_timeout has an initial value of 4 * HZ; use it as-is. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c index 4040418..e0547f5 100644 --- a/net/rxrpc/ar-ack.c +++ b/net/rxrpc/ar-ack.c @@ -260,7 +260,7 @@ static void rxrpc_resend(struct rxrpc_call *call) sp->resend_at = jiffies + 3; } else { sp->resend_at = - jiffies + rxrpc_resend_timeout * HZ; + jiffies + rxrpc_resend_timeout; } } -- cgit v0.10.2 From f62ba9c14b85a682b64a4c421f91de0bd2aa8538 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 28 Feb 2015 18:09:16 -0800 Subject: net: bcmgenet: fix software maintained statistics Commit 44c8bc3ce39f ("net: bcmgenet: log RX buffer allocation and RX/TX dma failures") added a few software maintained statistics using BCMGENET_STAT_MIB_RX and BCMGENET_STAT_MIB_TX. These statistics are read from the hardware MIB counters, such that bcmgenet_update_mib_counters() was trying to read from a non-existing MIB offset for these counters. Fix this by introducing a special type: BCMGENET_STAT_SOFT, similar to BCMGENET_STAT_NETDEV, such that bcmgenet_get_ethtool_stats will read from the software mib. Fixes: 44c8bc3ce39f ("net: bcmgenet: log RX buffer allocation and RX/TX dma failures") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 2874a00..6befde6 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -487,6 +487,7 @@ enum bcmgenet_stat_type { BCMGENET_STAT_MIB_TX, BCMGENET_STAT_RUNT, BCMGENET_STAT_MISC, + BCMGENET_STAT_SOFT, }; struct bcmgenet_stats { @@ -515,6 +516,7 @@ struct bcmgenet_stats { #define STAT_GENET_MIB_RX(str, m) STAT_GENET_MIB(str, m, BCMGENET_STAT_MIB_RX) #define STAT_GENET_MIB_TX(str, m) STAT_GENET_MIB(str, m, BCMGENET_STAT_MIB_TX) #define STAT_GENET_RUNT(str, m) STAT_GENET_MIB(str, m, BCMGENET_STAT_RUNT) +#define STAT_GENET_SOFT_MIB(str, m) STAT_GENET_MIB(str, m, BCMGENET_STAT_SOFT) #define STAT_GENET_MISC(str, m, offset) { \ .stat_string = str, \ @@ -614,9 +616,9 @@ static const struct bcmgenet_stats bcmgenet_gstrings_stats[] = { UMAC_RBUF_OVFL_CNT), STAT_GENET_MISC("rbuf_err_cnt", mib.rbuf_err_cnt, UMAC_RBUF_ERR_CNT), STAT_GENET_MISC("mdf_err_cnt", mib.mdf_err_cnt, UMAC_MDF_ERR_CNT), - STAT_GENET_MIB_RX("alloc_rx_buff_failed", mib.alloc_rx_buff_failed), - STAT_GENET_MIB_RX("rx_dma_failed", mib.rx_dma_failed), - STAT_GENET_MIB_TX("tx_dma_failed", mib.tx_dma_failed), + STAT_GENET_SOFT_MIB("alloc_rx_buff_failed", mib.alloc_rx_buff_failed), + STAT_GENET_SOFT_MIB("rx_dma_failed", mib.rx_dma_failed), + STAT_GENET_SOFT_MIB("tx_dma_failed", mib.tx_dma_failed), }; #define BCMGENET_STATS_LEN ARRAY_SIZE(bcmgenet_gstrings_stats) @@ -668,6 +670,7 @@ static void bcmgenet_update_mib_counters(struct bcmgenet_priv *priv) s = &bcmgenet_gstrings_stats[i]; switch (s->type) { case BCMGENET_STAT_NETDEV: + case BCMGENET_STAT_SOFT: continue; case BCMGENET_STAT_MIB_RX: case BCMGENET_STAT_MIB_TX: -- cgit v0.10.2 From 55ff4ea9a853f5ec9a8290c0ccc1d00c97e49f82 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 28 Feb 2015 18:09:17 -0800 Subject: net: systemport: fix software maintained statistics Commit 60b4ea1781fd ("net: systemport: log RX buffer allocation and RX/TX DMA failures") added a few software maintained statistics using BCM_SYSPORT_STAT_MIB_RX and BCM_SYSPORT_STAT_MIB_TX. These statistics are read from the hardware MIB counters, such that bcm_sysport_update_mib_counters() was trying to read from a non-existing MIB offset for these counters. Fix this by introducing a special type: BCM_SYSPORT_STAT_SOFT, similar to BCM_SYSPORT_STAT_NETDEV, such that bcm_sysport_get_ethtool_stats will read from the software mib. Fixes: 60b4ea1781fd ("net: systemport: log RX buffer allocation and RX/TX DMA failures") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 5b308a4..783543a 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -274,9 +274,9 @@ static const struct bcm_sysport_stats bcm_sysport_gstrings_stats[] = { /* RBUF misc statistics */ STAT_RBUF("rbuf_ovflow_cnt", mib.rbuf_ovflow_cnt, RBUF_OVFL_DISC_CNTR), STAT_RBUF("rbuf_err_cnt", mib.rbuf_err_cnt, RBUF_ERR_PKT_CNTR), - STAT_MIB_RX("alloc_rx_buff_failed", mib.alloc_rx_buff_failed), - STAT_MIB_RX("rx_dma_failed", mib.rx_dma_failed), - STAT_MIB_TX("tx_dma_failed", mib.tx_dma_failed), + STAT_MIB_SOFT("alloc_rx_buff_failed", mib.alloc_rx_buff_failed), + STAT_MIB_SOFT("rx_dma_failed", mib.rx_dma_failed), + STAT_MIB_SOFT("tx_dma_failed", mib.tx_dma_failed), }; #define BCM_SYSPORT_STATS_LEN ARRAY_SIZE(bcm_sysport_gstrings_stats) @@ -345,6 +345,7 @@ static void bcm_sysport_update_mib_counters(struct bcm_sysport_priv *priv) s = &bcm_sysport_gstrings_stats[i]; switch (s->type) { case BCM_SYSPORT_STAT_NETDEV: + case BCM_SYSPORT_STAT_SOFT: continue; case BCM_SYSPORT_STAT_MIB_RX: case BCM_SYSPORT_STAT_MIB_TX: diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index fc19417..7e3d87a 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -570,6 +570,7 @@ enum bcm_sysport_stat_type { BCM_SYSPORT_STAT_RUNT, BCM_SYSPORT_STAT_RXCHK, BCM_SYSPORT_STAT_RBUF, + BCM_SYSPORT_STAT_SOFT, }; /* Macros to help define ethtool statistics */ @@ -590,6 +591,7 @@ enum bcm_sysport_stat_type { #define STAT_MIB_RX(str, m) STAT_MIB(str, m, BCM_SYSPORT_STAT_MIB_RX) #define STAT_MIB_TX(str, m) STAT_MIB(str, m, BCM_SYSPORT_STAT_MIB_TX) #define STAT_RUNT(str, m) STAT_MIB(str, m, BCM_SYSPORT_STAT_RUNT) +#define STAT_MIB_SOFT(str, m) STAT_MIB(str, m, BCM_SYSPORT_STAT_SOFT) #define STAT_RXCHK(str, m, ofs) { \ .stat_string = str, \ -- cgit v0.10.2 From f5956fafb00afab474c3886b6297f9b5e7aff722 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 2 Mar 2015 18:22:15 +0200 Subject: net/mlx4_core: Fix wrong mask and error flow for the update-qp command The bit mask for currently supported driver features (MLX4_UPDATE_QP_SUPPORTED_ATTRS) of the update-qp command was defined twice (using enum value and pre-processor define directive) and wrong. The return value of the call to mlx4_update_qp() from within the SRIOV resource-tracker was wrongly voided down. Fix both issues. issue: none Fixes: 09e05c3f78e9 ('net/mlx4: Set vlan stripping policy by the right command') Fixes: ce8d9e0d6746 ('net/mlx4_core: Add UPDATE_QP SRIOV wrapper support') Signed-off-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 2bb8553..eda29db 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -412,7 +412,6 @@ err_icm: EXPORT_SYMBOL_GPL(mlx4_qp_alloc); -#define MLX4_UPDATE_QP_SUPPORTED_ATTRS MLX4_UPDATE_QP_SMAC int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, enum mlx4_update_qp_attr attr, struct mlx4_update_qp_params *params) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 486e3d2..d97ca88 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -713,7 +713,7 @@ static int update_vport_qp_param(struct mlx4_dev *dev, struct mlx4_vport_oper_state *vp_oper; struct mlx4_priv *priv; u32 qp_type; - int port; + int port, err = 0; port = (qpc->pri_path.sched_queue & 0x40) ? 2 : 1; priv = mlx4_priv(dev); @@ -738,7 +738,9 @@ static int update_vport_qp_param(struct mlx4_dev *dev, } else { struct mlx4_update_qp_params params = {.flags = 0}; - mlx4_update_qp(dev, qpn, MLX4_UPDATE_QP_VSD, ¶ms); + err = mlx4_update_qp(dev, qpn, MLX4_UPDATE_QP_VSD, ¶ms); + if (err) + goto out; } } @@ -773,7 +775,8 @@ static int update_vport_qp_param(struct mlx4_dev *dev, qpc->pri_path.feup |= MLX4_FSM_FORCE_ETH_SRC_MAC; qpc->pri_path.grh_mylmc = (0x80 & qpc->pri_path.grh_mylmc) + vp_oper->mac_idx; } - return 0; +out: + return err; } static int mpt_mask(struct mlx4_dev *dev) diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 2bbc62a..551f854 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -427,7 +427,7 @@ struct mlx4_wqe_inline_seg { enum mlx4_update_qp_attr { MLX4_UPDATE_QP_SMAC = 1 << 0, - MLX4_UPDATE_QP_VSD = 1 << 2, + MLX4_UPDATE_QP_VSD = 1 << 1, MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 2) - 1 }; -- cgit v0.10.2 From 1037ebbbd262227a91dfdd558159e345d4edf6b7 Mon Sep 17 00:00:00 2001 From: Ido Shamay Date: Mon, 2 Mar 2015 18:22:16 +0200 Subject: net/mlx4_en: Disbale GRO for incoming loopback/selftest packets Packets which are sent from the selftest (ethtool) flow, should not be passed to GRO stack but rather dropped by the driver after validation. To achieve that, we disable GRO for the duration of the selftest. Fixes: dd65beac48a5 ("net/mlx4_en: Extend usage of napi_gro_frags") Reported-by: Carol Soto Signed-off-by: Ido Shamay Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c index 2d8ee66..a61009f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c @@ -81,12 +81,14 @@ static int mlx4_en_test_loopback(struct mlx4_en_priv *priv) { u32 loopback_ok = 0; int i; - + bool gro_enabled; priv->loopback_ok = 0; priv->validate_loopback = 1; + gro_enabled = priv->dev->features & NETIF_F_GRO; mlx4_en_update_loopback_state(priv->dev, priv->dev->features); + priv->dev->features &= ~NETIF_F_GRO; /* xmit */ if (mlx4_en_test_loopback_xmit(priv)) { @@ -108,6 +110,10 @@ static int mlx4_en_test_loopback(struct mlx4_en_priv *priv) mlx4_en_test_loopback_exit: priv->validate_loopback = 0; + + if (gro_enabled) + priv->dev->features |= NETIF_F_GRO; + mlx4_en_update_loopback_state(priv->dev, priv->dev->features); return !loopback_ok; } -- cgit v0.10.2 From 7d7355f58ba4f9d68d2fb79864bab4ccb618e4e1 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 3 Mar 2015 00:52:00 +0000 Subject: sh_eth: Ensure proper ordering of descriptor active bit write/read When submitting a DMA descriptor, the active bit must be written last. When reading a completed DMA descriptor, the active bit must be read first. Add memory barriers to ensure that this ordering is maintained. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 654b48d..5c212a8 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1410,6 +1410,8 @@ static int sh_eth_txfree(struct net_device *ndev) txdesc = &mdp->tx_ring[entry]; if (txdesc->status & cpu_to_edmac(mdp, TD_TACT)) break; + /* TACT bit must be checked before all the following reads */ + rmb(); /* Free the original skb. */ if (mdp->tx_skbuff[entry]) { dma_unmap_single(&ndev->dev, txdesc->addr, @@ -1447,6 +1449,8 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota) limit = boguscnt; rxdesc = &mdp->rx_ring[entry]; while (!(rxdesc->status & cpu_to_edmac(mdp, RD_RACT))) { + /* RACT bit must be checked before all the following reads */ + rmb(); desc_status = edmac_to_cpu(mdp, rxdesc->status); pkt_len = rxdesc->frame_length; @@ -1526,6 +1530,7 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota) skb_checksum_none_assert(skb); rxdesc->addr = dma_addr; } + wmb(); /* RACT bit must be set after all the above writes */ if (entry >= mdp->num_rx_ring - 1) rxdesc->status |= cpu_to_edmac(mdp, RD_RACT | RD_RFP | RD_RDEL); @@ -2195,6 +2200,7 @@ static int sh_eth_start_xmit(struct sk_buff *skb, struct net_device *ndev) } txdesc->buffer_length = skb->len; + wmb(); /* TACT bit must be set after all the above writes */ if (entry >= mdp->num_tx_ring - 1) txdesc->status |= cpu_to_edmac(mdp, TD_TACT | TD_TDLE); else -- cgit v0.10.2 From 6ded286555c2518be2f2d438f83dfaba3f0100fd Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 3 Mar 2015 00:52:08 +0000 Subject: sh_eth: Fix RX recovery on R-Car in case of RX ring underrun In case of RX ring underrun (RDE), we attempt to reset the software descriptor pointers (dirty_rx and cur_rx) to match where the hardware will read the next descriptor from, as that might not be the first dirty descriptor. This relies on reading RDFAR, but that register doesn't exist on all supported chips - specifically, not on the R-Car chips. This will result in unpredictable behaviour on those chips after an RDE. Make this pointer reset conditional and assume that it isn't needed on the R-Car chips. This fix also assumes that RDFAR is never exposed at offset 0 in the memory map - this is currently true, and a subsequent commit will fix the ambiguity between offset 0 and no-offset in the register offset maps. Fixes: 79fba9f51755 ("net: sh_eth: fix the rxdesc pointer when rx ...") Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 5c212a8..3309494 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1543,7 +1543,7 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota) /* If we don't need to check status, don't. -KDU */ if (!(sh_eth_read(ndev, EDRRR) & EDRRR_R)) { /* fix the values for the next receiving if RDE is set */ - if (intr_status & EESR_RDE) { + if (intr_status & EESR_RDE && mdp->reg_offset[RDFAR] != 0) { u32 count = (sh_eth_read(ndev, RDFAR) - sh_eth_read(ndev, RDLAR)) >> 4; -- cgit v0.10.2 From 9b4a6364a6b3176511956ad186f8dffbe2e60c3e Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 3 Mar 2015 00:52:39 +0000 Subject: Revert "sh_eth: Enable Rx descriptor word 0 shift for r8a7790" This reverts commit fd9af07c3404ac9ecbd0d859563360f51ce1ffde. The hardware manual states that the frame error and multicast bits are copied to bits 9:0 of RD0, not bits 25:16. I've tested that this is true for RFS1 (CRC error), RFS3 (frame too short), RFS4 (frame too long) and RFS8 (multicast). Also adjust a comment to agree with this. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 3309494..3406cda 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -508,7 +508,6 @@ static struct sh_eth_cpu_data r8a779x_data = { .tpauser = 1, .hw_swap = 1, .rmiimode = 1, - .shift_rd0 = 1, }; static void sh_eth_set_rate_sh7724(struct net_device *ndev) @@ -1462,8 +1461,8 @@ static int sh_eth_rx(struct net_device *ndev, u32 intr_status, int *quota) /* In case of almost all GETHER/ETHERs, the Receive Frame State * (RFS) bits in the Receive Descriptor 0 are from bit 9 to - * bit 0. However, in case of the R8A7740, R8A779x, and - * R7S72100 the RFS bits are from bit 25 to bit 16. So, the + * bit 0. However, in case of the R8A7740 and R7S72100 + * the RFS bits are from bit 25 to bit 16. So, the * driver needs right shifting by 16. */ if (mdp->cd->shift_rd0) -- cgit v0.10.2 From dacc73e0cf930e87e2e6a94d29156f1d5776b18f Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 3 Mar 2015 00:53:08 +0000 Subject: sh_eth: Really fix padding of short frames on TX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My previous fix to clear padding of short frames used skb->len as the DMA length, assuming that skb_padto() extended skb->len to include the padding. That isn't the case; we need to use skb_put_padto() instead. (This wasn't immediately obvious because software padding isn't actually needed on the R-Car H2. We could make it conditional on which chip is being driven, but it's probably not worth the effort.) Reported-by: "Violeta Menéndez González" Fixes: 612a17a54b50 ("sh_eth: Fix padding of short frames on TX") Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 3406cda..736d5d1 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2181,7 +2181,7 @@ static int sh_eth_start_xmit(struct sk_buff *skb, struct net_device *ndev) } spin_unlock_irqrestore(&mdp->lock, flags); - if (skb_padto(skb, ETH_ZLEN)) + if (skb_put_padto(skb, ETH_ZLEN)) return NETDEV_TX_OK; entry = mdp->cur_tx % mdp->num_tx_ring; -- cgit v0.10.2 From acf8dd0a9d0b9e4cdb597c2f74802f79c699e802 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Mon, 2 Mar 2015 18:27:11 +0100 Subject: udp: only allow UFO for packets from SOCK_DGRAM sockets If an over-MTU UDP datagram is sent through a SOCK_RAW socket to a UFO-capable device, ip_ufo_append_data() sets skb->ip_summed to CHECKSUM_PARTIAL unconditionally as all GSO code assumes transport layer checksum is to be computed on segmentation. However, in this case, skb->csum_start and skb->csum_offset are never set as raw socket transmit path bypasses udp_send_skb() where they are usually set. As a result, driver may access invalid memory when trying to calculate the checksum and store the result (as observed in virtio_net driver). Moreover, the very idea of modifying the userspace provided UDP header is IMHO against raw socket semantics (I wasn't able to find a document clearly stating this or the opposite, though). And while allowing CHECKSUM_NONE in the UFO case would be more efficient, it would be a bit too intrusive change just to handle a corner case like this. Therefore disallowing UFO for packets from SOCK_DGRAM seems to be the best option. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d68199d..a7aea20 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -888,7 +888,8 @@ static int __ip_append_data(struct sock *sk, cork->length += length; if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) { + (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && + (sk->sk_type == SOCK_DGRAM)) { err = ip_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, maxfraglen, flags); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7deebf1..0a04a37 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1298,7 +1298,8 @@ emsgsize: if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO)) { + (rt->dst.dev->features & NETIF_F_UFO) && + (sk->sk_type == SOCK_DGRAM)) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, mtu, flags, rt); -- cgit v0.10.2 From 71e168b151babf4334e2e26c92230a6bda3b1f24 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 3 Mar 2015 13:53:31 +0100 Subject: net: bridge: add compile-time assert for cb struct size make build fail if structure no longer fits into ->cb storage. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller diff --git a/net/bridge/br.c b/net/bridge/br.c index fb57ab6..02c24cf 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -190,6 +190,8 @@ static int __init br_init(void) { int err; + BUILD_BUG_ON(sizeof(struct br_input_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); + err = stp_proto_register(&br_stp_proto); if (err < 0) { pr_err("bridge: can't register sap for STP\n"); -- cgit v0.10.2 From c77c761fa40e0ebdacb728b0310191ef8dc6902b Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Mon, 2 Mar 2015 11:56:12 -0600 Subject: ibmveth: Add function to enable live MAC address changes Add a function that will enable changing the MAC address of an ibmveth interface while it is still running. Signed-off-by: Thomas Falcon Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 21978cc..072426a 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1327,6 +1327,28 @@ static unsigned long ibmveth_get_desired_dma(struct vio_dev *vdev) return ret; } +static int ibmveth_set_mac_addr(struct net_device *dev, void *p) +{ + struct ibmveth_adapter *adapter = netdev_priv(dev); + struct sockaddr *addr = p; + u64 mac_address; + int rc; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + mac_address = ibmveth_encode_mac_addr(addr->sa_data); + rc = h_change_logical_lan_mac(adapter->vdev->unit_address, mac_address); + if (rc) { + netdev_err(adapter->netdev, "h_change_logical_lan_mac failed with rc=%d\n", rc); + return rc; + } + + ether_addr_copy(dev->dev_addr, addr->sa_data); + + return 0; +} + static const struct net_device_ops ibmveth_netdev_ops = { .ndo_open = ibmveth_open, .ndo_stop = ibmveth_close, @@ -1337,7 +1359,7 @@ static const struct net_device_ops ibmveth_netdev_ops = { .ndo_fix_features = ibmveth_fix_features, .ndo_set_features = ibmveth_set_features, .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = ibmveth_set_mac_addr, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ibmveth_poll_controller, #endif -- cgit v0.10.2 From 0ae93b2cccbcc060899800a6bac7905a7d754448 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 2 Mar 2015 12:03:27 -0800 Subject: gianfar: Reduce logging noise seen due to phy polling if link is down Commit 6ce29b0e2a04 ("gianfar: Avoid unnecessary reg accesses in adjust_link()") eliminates unnecessary calls to adjust_link for phy devices which don't support interrupts and need polling. As part of that work, the 'new_state' local flag, which was used to reduce logging noise on the console, was eliminated. Unfortunately, that means that a 'Link is Down' log message will now be issued continuously if a link is configured as UP, the link state is down, and the associated phy requires polling. This occurs because priv->oldduplex is -1 in this case, which always differs from phydev->duplex. In addition, phydev->speed may also differ from priv->oldspeed. gfar_update_link_state() is therefore called each time a phy is polled, even if the link state did not change. Cc: Claudiu Manoil Signed-off-by: Guenter Roeck Reviewed-by: Claudiu Manoil Signed-off-by: David S. Miller diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 43df788..178e540 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -3162,8 +3162,8 @@ static void adjust_link(struct net_device *dev) struct phy_device *phydev = priv->phydev; if (unlikely(phydev->link != priv->oldlink || - phydev->duplex != priv->oldduplex || - phydev->speed != priv->oldspeed)) + (phydev->link && (phydev->duplex != priv->oldduplex || + phydev->speed != priv->oldspeed)))) gfar_update_link_state(priv); } -- cgit v0.10.2 From f4f8e73850589008095b1da3c8c17cf68bd1c62a Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Mon, 2 Mar 2015 18:49:56 -0800 Subject: openvswitch: Fix serialization of non-masked set actions. Set actions consist of a regular OVS_KEY_ATTR_* attribute nested inside of a OVS_ACTION_ATTR_SET action attribute. When converting masked actions back to regular set actions, the inner attribute length was not changed, ie, double the length being serialized. This patch fixes the bug. Fixes: 83d2b9b ("net: openvswitch: Support masked set actions.") Signed-off-by: Joe Stringer Acked-by: Jarno Rajahalme Signed-off-by: David S. Miller diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 216f20b..22b18c1 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2253,14 +2253,20 @@ static int masked_set_action_to_set_action_attr(const struct nlattr *a, struct sk_buff *skb) { const struct nlattr *ovs_key = nla_data(a); + struct nlattr *nla; size_t key_len = nla_len(ovs_key) / 2; /* Revert the conversion we did from a non-masked set action to * masked set action. */ - if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a) - key_len, ovs_key)) + nla = nla_nest_start(skb, OVS_ACTION_ATTR_SET); + if (!nla) return -EMSGSIZE; + if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key))) + return -EMSGSIZE; + + nla_nest_end(skb, nla); return 0; } -- cgit v0.10.2