diff options
Diffstat (limited to 'net')
221 files changed, 9200 insertions, 6047 deletions
diff --git a/net/Kconfig b/net/Kconfig index 6f676ab..2ddc904 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -217,6 +217,7 @@ source "net/dns_resolver/Kconfig" source "net/batman-adv/Kconfig" source "net/openvswitch/Kconfig" source "net/vmw_vsock/Kconfig" +source "net/netlink/Kconfig" config RPS boolean diff --git a/net/atm/lec.h b/net/atm/lec.h index a86aff9..4149db1 100644 --- a/net/atm/lec.h +++ b/net/atm/lec.h @@ -58,7 +58,7 @@ struct lane2_ops { * field in h_type field. Data follows immediately after header. * 2. LLC Data frames whose total length, including LLC field and data, * but not padding required to meet the minimum data frame length, - * is less than 1536(0x0600) MUST be encoded by placing that length + * is less than ETH_P_802_3_MIN MUST be encoded by placing that length * in the h_type field. The LLC field follows header immediately. * 3. LLC data frames longer than this maximum MUST be encoded by placing * the value 0 in the h_type field. diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index 8d8afb1..fa780b7 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -36,6 +36,20 @@ config BATMAN_ADV_DAT mesh networks. If you think that your network does not need this option you can safely remove it and save some space. +config BATMAN_ADV_NC + bool "Network Coding" + depends on BATMAN_ADV + default n + help + This option enables network coding, a mechanism that aims to + increase the overall network throughput by fusing multiple + packets in one transmission. + Note that interfaces controlled by batman-adv must be manually + configured to have promiscuous mode enabled in order to make + network coding work. + If you think that your network does not need this feature you + can safely disable it and save some space. + config BATMAN_ADV_DEBUG bool "B.A.T.M.A.N. debugging" depends on BATMAN_ADV diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index e45e3b4..acbac2a 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -1,5 +1,5 @@ # -# Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +# Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: # # Marek Lindner, Simon Wunderlich # @@ -30,6 +30,7 @@ batman-adv-y += hard-interface.o batman-adv-y += hash.o batman-adv-y += icmp_socket.o batman-adv-y += main.o +batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o batman-adv-y += originator.o batman-adv-y += ring_buffer.o batman-adv-y += routing.o diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index a5bb0a76..071f288 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -27,6 +27,7 @@ #include "hard-interface.h" #include "send.h" #include "bat_algo.h" +#include "network-coding.h" static struct batadv_neigh_node * batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, @@ -1185,6 +1186,10 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, if (!orig_neigh_node) goto out; + /* Update nc_nodes of the originator */ + batadv_nc_update_nc_node(bat_priv, orig_node, orig_neigh_node, + batadv_ogm_packet, is_single_hop_neigh); + orig_neigh_router = batadv_orig_node_get_router(orig_neigh_node); /* drop packet if sender is not a direct neighbor and if we diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index 6ae8651..f186a55 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -32,6 +32,7 @@ #include "icmp_socket.h" #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" +#include "network-coding.h" static struct dentry *batadv_debugfs; @@ -310,6 +311,14 @@ struct batadv_debuginfo { const struct file_operations fops; }; +#ifdef CONFIG_BATMAN_ADV_NC +static int batadv_nc_nodes_open(struct inode *inode, struct file *file) +{ + struct net_device *net_dev = (struct net_device *)inode->i_private; + return single_open(file, batadv_nc_nodes_seq_print_text, net_dev); +} +#endif + #define BATADV_DEBUGINFO(_name, _mode, _open) \ struct batadv_debuginfo batadv_debuginfo_##_name = { \ .attr = { .name = __stringify(_name), \ @@ -348,6 +357,9 @@ static BATADV_DEBUGINFO(dat_cache, S_IRUGO, batadv_dat_cache_open); static BATADV_DEBUGINFO(transtable_local, S_IRUGO, batadv_transtable_local_open); static BATADV_DEBUGINFO(vis_data, S_IRUGO, batadv_vis_data_open); +#ifdef CONFIG_BATMAN_ADV_NC +static BATADV_DEBUGINFO(nc_nodes, S_IRUGO, batadv_nc_nodes_open); +#endif static struct batadv_debuginfo *batadv_mesh_debuginfos[] = { &batadv_debuginfo_originators, @@ -362,6 +374,9 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = { #endif &batadv_debuginfo_transtable_local, &batadv_debuginfo_vis_data, +#ifdef CONFIG_BATMAN_ADV_NC + &batadv_debuginfo_nc_nodes, +#endif NULL, }; @@ -431,6 +446,9 @@ int batadv_debugfs_add_meshif(struct net_device *dev) } } + if (batadv_nc_init_debugfs(bat_priv) < 0) + goto rem_attr; + return 0; rem_attr: debugfs_remove_recursive(bat_priv->debug_dir); diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index d54188a..8e15d96 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -816,7 +816,6 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, bool ret = false; struct batadv_dat_entry *dat_entry = NULL; struct sk_buff *skb_new; - struct batadv_hard_iface *primary_if = NULL; if (!atomic_read(&bat_priv->distributed_arp_table)) goto out; @@ -838,22 +837,18 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst); if (dat_entry) { - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) - goto out; - skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src, - primary_if->soft_iface, ip_dst, hw_src, + bat_priv->soft_iface, ip_dst, hw_src, dat_entry->mac_addr, hw_src); if (!skb_new) goto out; skb_reset_mac_header(skb_new); skb_new->protocol = eth_type_trans(skb_new, - primary_if->soft_iface); + bat_priv->soft_iface); bat_priv->stats.rx_packets++; bat_priv->stats.rx_bytes += skb->len + ETH_HLEN; - primary_if->soft_iface->last_rx = jiffies; + bat_priv->soft_iface->last_rx = jiffies; netif_rx(skb_new); batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n"); @@ -866,8 +861,6 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, out: if (dat_entry) batadv_dat_entry_free_ref(dat_entry); - if (primary_if) - batadv_hardif_free_ref(primary_if); return ret; } @@ -887,7 +880,6 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, __be32 ip_src, ip_dst; uint8_t *hw_src; struct sk_buff *skb_new; - struct batadv_hard_iface *primary_if = NULL; struct batadv_dat_entry *dat_entry = NULL; bool ret = false; int err; @@ -912,12 +904,8 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, if (!dat_entry) goto out; - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) - goto out; - skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src, - primary_if->soft_iface, ip_dst, hw_src, + bat_priv->soft_iface, ip_dst, hw_src, dat_entry->mac_addr, hw_src); if (!skb_new) @@ -941,8 +929,6 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, out: if (dat_entry) batadv_dat_entry_free_ref(dat_entry); - if (primary_if) - batadv_hardif_free_ref(primary_if); if (ret) kfree_skb(skb); return ret; diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 34f99a4..f105219 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -500,7 +500,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) rcu_read_unlock(); if (gw_count == 0) - seq_printf(seq, "No gateways in range ...\n"); + seq_puts(seq, "No gateways in range ...\n"); out: if (primary_if) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 368219e..522243a 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -307,11 +307,35 @@ batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface) batadv_update_min_mtu(hard_iface->soft_iface); } +/** + * batadv_master_del_slave - remove hard_iface from the current master interface + * @slave: the interface enslaved in another master + * @master: the master from which slave has to be removed + * + * Invoke ndo_del_slave on master passing slave as argument. In this way slave + * is free'd and master can correctly change its internal state. + * Return 0 on success, a negative value representing the error otherwise + */ +static int batadv_master_del_slave(struct batadv_hard_iface *slave, + struct net_device *master) +{ + int ret; + + if (!master) + return 0; + + ret = -EBUSY; + if (master->netdev_ops->ndo_del_slave) + ret = master->netdev_ops->ndo_del_slave(master, slave->net_dev); + + return ret; +} + int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, const char *iface_name) { struct batadv_priv *bat_priv; - struct net_device *soft_iface; + struct net_device *soft_iface, *master; __be16 ethertype = __constant_htons(ETH_P_BATMAN); int ret; @@ -321,11 +345,6 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, if (!atomic_inc_not_zero(&hard_iface->refcount)) goto out; - /* hard-interface is part of a bridge */ - if (hard_iface->net_dev->priv_flags & IFF_BRIDGE_PORT) - pr_err("You are about to enable batman-adv on '%s' which already is part of a bridge. Unless you know exactly what you are doing this is probably wrong and won't work the way you think it would.\n", - hard_iface->net_dev->name); - soft_iface = dev_get_by_name(&init_net, iface_name); if (!soft_iface) { @@ -347,12 +366,24 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, goto err_dev; } + /* check if the interface is enslaved in another virtual one and + * in that case unlink it first + */ + master = netdev_master_upper_dev_get(hard_iface->net_dev); + ret = batadv_master_del_slave(hard_iface, master); + if (ret) + goto err_dev; + hard_iface->soft_iface = soft_iface; bat_priv = netdev_priv(hard_iface->soft_iface); + ret = netdev_master_upper_dev_link(hard_iface->net_dev, soft_iface); + if (ret) + goto err_dev; + ret = bat_priv->bat_algo_ops->bat_iface_enable(hard_iface); if (ret < 0) - goto err_dev; + goto err_upper; hard_iface->if_num = bat_priv->num_ifaces; bat_priv->num_ifaces++; @@ -362,7 +393,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, bat_priv->bat_algo_ops->bat_iface_disable(hard_iface); bat_priv->num_ifaces--; hard_iface->if_status = BATADV_IF_NOT_IN_USE; - goto err_dev; + goto err_upper; } hard_iface->batman_adv_ptype.type = ethertype; @@ -401,14 +432,18 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, out: return 0; +err_upper: + netdev_upper_dev_unlink(hard_iface->net_dev, soft_iface); err_dev: + hard_iface->soft_iface = NULL; dev_put(soft_iface); err: batadv_hardif_free_ref(hard_iface); return ret; } -void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface) +void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, + enum batadv_hard_if_cleanup autodel) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hard_iface *primary_if = NULL; @@ -446,9 +481,10 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface) dev_put(hard_iface->soft_iface); /* nobody uses this interface anymore */ - if (!bat_priv->num_ifaces) - batadv_softif_destroy(hard_iface->soft_iface); + if (!bat_priv->num_ifaces && autodel == BATADV_IF_CLEANUP_AUTO) + batadv_softif_destroy_sysfs(hard_iface->soft_iface); + netdev_upper_dev_unlink(hard_iface->net_dev, hard_iface->soft_iface); hard_iface->soft_iface = NULL; batadv_hardif_free_ref(hard_iface); @@ -533,7 +569,8 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface) /* first deactivate interface */ if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) - batadv_hardif_disable_interface(hard_iface); + batadv_hardif_disable_interface(hard_iface, + BATADV_IF_CLEANUP_AUTO); if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) return; @@ -563,6 +600,11 @@ static int batadv_hard_if_event(struct notifier_block *this, struct batadv_hard_iface *primary_if = NULL; struct batadv_priv *bat_priv; + if (batadv_softif_is_valid(net_dev) && event == NETDEV_REGISTER) { + batadv_sysfs_add_meshif(net_dev); + return NOTIFY_DONE; + } + hard_iface = batadv_hardif_get_by_netdev(net_dev); if (!hard_iface && event == NETDEV_REGISTER) hard_iface = batadv_hardif_add_interface(net_dev); diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 308437d..4989288 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -29,13 +29,24 @@ enum batadv_hard_if_state { BATADV_IF_I_WANT_YOU, }; +/** + * enum batadv_hard_if_cleanup - Cleanup modi for soft_iface after slave removal + * @BATADV_IF_CLEANUP_KEEP: Don't automatically delete soft-interface + * @BATADV_IF_CLEANUP_AUTO: Delete soft-interface after last slave was removed + */ +enum batadv_hard_if_cleanup { + BATADV_IF_CLEANUP_KEEP, + BATADV_IF_CLEANUP_AUTO, +}; + extern struct notifier_block batadv_hard_if_notifier; struct batadv_hard_iface* batadv_hardif_get_by_netdev(const struct net_device *net_dev); int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, const char *iface_name); -void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface); +void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, + enum batadv_hard_if_cleanup autodel); void batadv_hardif_remove_interfaces(void); int batadv_hardif_min_mtu(struct net_device *soft_iface); void batadv_update_min_mtu(struct net_device *soft_iface); diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 0488d70..6277735 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -35,6 +35,7 @@ #include "vis.h" #include "hash.h" #include "bat_algo.h" +#include "network-coding.h" /* List manipulations on hardif_list have to be rtnl_lock()'ed, @@ -70,6 +71,7 @@ static int __init batadv_init(void) batadv_debugfs_init(); register_netdevice_notifier(&batadv_hard_if_notifier); + rtnl_link_register(&batadv_link_ops); pr_info("B.A.T.M.A.N. advanced %s (compatibility version %i) loaded\n", BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION); @@ -80,6 +82,7 @@ static int __init batadv_init(void) static void __exit batadv_exit(void) { batadv_debugfs_destroy(); + rtnl_link_unregister(&batadv_link_ops); unregister_netdevice_notifier(&batadv_hard_if_notifier); batadv_hardif_remove_interfaces(); @@ -135,6 +138,10 @@ int batadv_mesh_init(struct net_device *soft_iface) if (ret < 0) goto err; + ret = batadv_nc_init(bat_priv); + if (ret < 0) + goto err; + atomic_set(&bat_priv->gw.reselect, 0); atomic_set(&bat_priv->mesh_state, BATADV_MESH_ACTIVE); @@ -157,6 +164,7 @@ void batadv_mesh_free(struct net_device *soft_iface) batadv_gw_node_purge(bat_priv); batadv_originator_free(bat_priv); + batadv_nc_free(bat_priv); batadv_tt_free(bat_priv); @@ -411,7 +419,7 @@ int batadv_algo_seq_print_text(struct seq_file *seq, void *offset) { struct batadv_algo_ops *bat_algo_ops; - seq_printf(seq, "Available routing algorithms:\n"); + seq_puts(seq, "Available routing algorithms:\n"); hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) { seq_printf(seq, "%s\n", bat_algo_ops->name); diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index ced08b9..f90f5bc 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -26,7 +26,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2013.1.0" +#define BATADV_SOURCE_VERSION "2013.2.0" #endif /* B.A.T.M.A.N. parameters */ @@ -105,6 +105,8 @@ #define BATADV_RESET_PROTECTION_MS 30000 #define BATADV_EXPECTED_SEQNO_RANGE 65536 +#define BATADV_NC_NODE_TIMEOUT 10000 /* Milliseconds */ + enum batadv_mesh_state { BATADV_MESH_INACTIVE, BATADV_MESH_ACTIVE, @@ -150,6 +152,7 @@ enum batadv_uev_type { #include <linux/percpu.h> #include <linux/slab.h> #include <net/sock.h> /* struct sock */ +#include <net/rtnetlink.h> #include <linux/jiffies.h> #include <linux/seq_file.h> #include "types.h" @@ -185,6 +188,7 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr); * @BATADV_DBG_TT: translation table messages * @BATADV_DBG_BLA: bridge loop avoidance messages * @BATADV_DBG_DAT: ARP snooping and DAT related messages + * @BATADV_DBG_NC: network coding related messages * @BATADV_DBG_ALL: the union of all the above log levels */ enum batadv_dbg_level { @@ -193,7 +197,8 @@ enum batadv_dbg_level { BATADV_DBG_TT = BIT(2), BATADV_DBG_BLA = BIT(3), BATADV_DBG_DAT = BIT(4), - BATADV_DBG_ALL = 31, + BATADV_DBG_NC = BIT(5), + BATADV_DBG_ALL = 63, }; #ifdef CONFIG_BATMAN_ADV_DEBUG @@ -298,4 +303,10 @@ static inline uint64_t batadv_sum_counter(struct batadv_priv *bat_priv, return sum; } +/* Define a macro to reach the control buffer of the skb. The members of the + * control buffer are defined in struct batadv_skb_cb in types.h. + * The macro is inspired by the similar macro TCP_SKB_CB() in tcp.h. + */ +#define BATADV_SKB_CB(__skb) ((struct batadv_skb_cb *)&((__skb)->cb[0])) + #endif /* _NET_BATMAN_ADV_MAIN_H_ */ diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c new file mode 100644 index 0000000..6b9a544 --- /dev/null +++ b/net/batman-adv/network-coding.c @@ -0,0 +1,1821 @@ +/* Copyright (C) 2012-2013 B.A.T.M.A.N. contributors: + * + * Martin Hundebøll, Jeppe Ledet-Pedersen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#include <linux/debugfs.h> + +#include "main.h" +#include "hash.h" +#include "network-coding.h" +#include "send.h" +#include "originator.h" +#include "hard-interface.h" +#include "routing.h" + +static struct lock_class_key batadv_nc_coding_hash_lock_class_key; +static struct lock_class_key batadv_nc_decoding_hash_lock_class_key; + +static void batadv_nc_worker(struct work_struct *work); +static int batadv_nc_recv_coded_packet(struct sk_buff *skb, + struct batadv_hard_iface *recv_if); + +/** + * batadv_nc_start_timer - initialise the nc periodic worker + * @bat_priv: the bat priv with all the soft interface information + */ +static void batadv_nc_start_timer(struct batadv_priv *bat_priv) +{ + queue_delayed_work(batadv_event_workqueue, &bat_priv->nc.work, + msecs_to_jiffies(10)); +} + +/** + * batadv_nc_init - initialise coding hash table and start house keeping + * @bat_priv: the bat priv with all the soft interface information + */ +int batadv_nc_init(struct batadv_priv *bat_priv) +{ + bat_priv->nc.timestamp_fwd_flush = jiffies; + bat_priv->nc.timestamp_sniffed_purge = jiffies; + + if (bat_priv->nc.coding_hash || bat_priv->nc.decoding_hash) + return 0; + + bat_priv->nc.coding_hash = batadv_hash_new(128); + if (!bat_priv->nc.coding_hash) + goto err; + + batadv_hash_set_lock_class(bat_priv->nc.coding_hash, + &batadv_nc_coding_hash_lock_class_key); + + bat_priv->nc.decoding_hash = batadv_hash_new(128); + if (!bat_priv->nc.decoding_hash) + goto err; + + batadv_hash_set_lock_class(bat_priv->nc.coding_hash, + &batadv_nc_decoding_hash_lock_class_key); + + /* Register our packet type */ + if (batadv_recv_handler_register(BATADV_CODED, + batadv_nc_recv_coded_packet) < 0) + goto err; + + INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker); + batadv_nc_start_timer(bat_priv); + + return 0; + +err: + return -ENOMEM; +} + +/** + * batadv_nc_init_bat_priv - initialise the nc specific bat_priv variables + * @bat_priv: the bat priv with all the soft interface information + */ +void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv) +{ + atomic_set(&bat_priv->network_coding, 1); + bat_priv->nc.min_tq = 200; + bat_priv->nc.max_fwd_delay = 10; + bat_priv->nc.max_buffer_time = 200; +} + +/** + * batadv_nc_init_orig - initialise the nc fields of an orig_node + * @orig_node: the orig_node which is going to be initialised + */ +void batadv_nc_init_orig(struct batadv_orig_node *orig_node) +{ + INIT_LIST_HEAD(&orig_node->in_coding_list); + INIT_LIST_HEAD(&orig_node->out_coding_list); + spin_lock_init(&orig_node->in_coding_list_lock); + spin_lock_init(&orig_node->out_coding_list_lock); +} + +/** + * batadv_nc_node_free_rcu - rcu callback to free an nc node and remove + * its refcount on the orig_node + * @rcu: rcu pointer of the nc node + */ +static void batadv_nc_node_free_rcu(struct rcu_head *rcu) +{ + struct batadv_nc_node *nc_node; + + nc_node = container_of(rcu, struct batadv_nc_node, rcu); + batadv_orig_node_free_ref(nc_node->orig_node); + kfree(nc_node); +} + +/** + * batadv_nc_node_free_ref - decrements the nc node refcounter and possibly + * frees it + * @nc_node: the nc node to free + */ +static void batadv_nc_node_free_ref(struct batadv_nc_node *nc_node) +{ + if (atomic_dec_and_test(&nc_node->refcount)) + call_rcu(&nc_node->rcu, batadv_nc_node_free_rcu); +} + +/** + * batadv_nc_path_free_ref - decrements the nc path refcounter and possibly + * frees it + * @nc_path: the nc node to free + */ +static void batadv_nc_path_free_ref(struct batadv_nc_path *nc_path) +{ + if (atomic_dec_and_test(&nc_path->refcount)) + kfree_rcu(nc_path, rcu); +} + +/** + * batadv_nc_packet_free - frees nc packet + * @nc_packet: the nc packet to free + */ +static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet) +{ + if (nc_packet->skb) + kfree_skb(nc_packet->skb); + + batadv_nc_path_free_ref(nc_packet->nc_path); + kfree(nc_packet); +} + +/** + * batadv_nc_to_purge_nc_node - checks whether an nc node has to be purged + * @bat_priv: the bat priv with all the soft interface information + * @nc_node: the nc node to check + * + * Returns true if the entry has to be purged now, false otherwise + */ +static bool batadv_nc_to_purge_nc_node(struct batadv_priv *bat_priv, + struct batadv_nc_node *nc_node) +{ + if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) + return true; + + return batadv_has_timed_out(nc_node->last_seen, BATADV_NC_NODE_TIMEOUT); +} + +/** + * batadv_nc_to_purge_nc_path_coding - checks whether an nc path has timed out + * @bat_priv: the bat priv with all the soft interface information + * @nc_path: the nc path to check + * + * Returns true if the entry has to be purged now, false otherwise + */ +static bool batadv_nc_to_purge_nc_path_coding(struct batadv_priv *bat_priv, + struct batadv_nc_path *nc_path) +{ + if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) + return true; + + /* purge the path when no packets has been added for 10 times the + * max_fwd_delay time + */ + return batadv_has_timed_out(nc_path->last_valid, + bat_priv->nc.max_fwd_delay * 10); +} + +/** + * batadv_nc_to_purge_nc_path_decoding - checks whether an nc path has timed out + * @bat_priv: the bat priv with all the soft interface information + * @nc_path: the nc path to check + * + * Returns true if the entry has to be purged now, false otherwise + */ +static bool batadv_nc_to_purge_nc_path_decoding(struct batadv_priv *bat_priv, + struct batadv_nc_path *nc_path) +{ + if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) + return true; + + /* purge the path when no packets has been added for 10 times the + * max_buffer time + */ + return batadv_has_timed_out(nc_path->last_valid, + bat_priv->nc.max_buffer_time*10); +} + +/** + * batadv_nc_purge_orig_nc_nodes - go through list of nc nodes and purge stale + * entries + * @bat_priv: the bat priv with all the soft interface information + * @list: list of nc nodes + * @lock: nc node list lock + * @to_purge: function in charge to decide whether an entry has to be purged or + * not. This function takes the nc node as argument and has to return + * a boolean value: true if the entry has to be deleted, false + * otherwise + */ +static void +batadv_nc_purge_orig_nc_nodes(struct batadv_priv *bat_priv, + struct list_head *list, + spinlock_t *lock, + bool (*to_purge)(struct batadv_priv *, + struct batadv_nc_node *)) +{ + struct batadv_nc_node *nc_node, *nc_node_tmp; + + /* For each nc_node in list */ + spin_lock_bh(lock); + list_for_each_entry_safe(nc_node, nc_node_tmp, list, list) { + /* if an helper function has been passed as parameter, + * ask it if the entry has to be purged or not + */ + if (to_purge && !to_purge(bat_priv, nc_node)) + continue; + + batadv_dbg(BATADV_DBG_NC, bat_priv, + "Removing nc_node %pM -> %pM\n", + nc_node->addr, nc_node->orig_node->orig); + list_del_rcu(&nc_node->list); + batadv_nc_node_free_ref(nc_node); + } + spin_unlock_bh(lock); +} + +/** + * batadv_nc_purge_orig - purges all nc node data attached of the given + * originator + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig_node with the nc node entries to be purged + * @to_purge: function in charge to decide whether an entry has to be purged or + * not. This function takes the nc node as argument and has to return + * a boolean value: true is the entry has to be deleted, false + * otherwise + */ +void batadv_nc_purge_orig(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + bool (*to_purge)(struct batadv_priv *, + struct batadv_nc_node *)) +{ + /* Check ingoing nc_node's of this orig_node */ + batadv_nc_purge_orig_nc_nodes(bat_priv, &orig_node->in_coding_list, + &orig_node->in_coding_list_lock, + to_purge); + + /* Check outgoing nc_node's of this orig_node */ + batadv_nc_purge_orig_nc_nodes(bat_priv, &orig_node->out_coding_list, + &orig_node->out_coding_list_lock, + to_purge); +} + +/** + * batadv_nc_purge_orig_hash - traverse entire originator hash to check if they + * have timed out nc nodes + * @bat_priv: the bat priv with all the soft interface information + */ +static void batadv_nc_purge_orig_hash(struct batadv_priv *bat_priv) +{ + struct batadv_hashtable *hash = bat_priv->orig_hash; + struct hlist_head *head; + struct batadv_orig_node *orig_node; + uint32_t i; + + if (!hash) + return; + + /* For each orig_node */ + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, head, hash_entry) + batadv_nc_purge_orig(bat_priv, orig_node, + batadv_nc_to_purge_nc_node); + rcu_read_unlock(); + } +} + +/** + * batadv_nc_purge_paths - traverse all nc paths part of the hash and remove + * unused ones + * @bat_priv: the bat priv with all the soft interface information + * @hash: hash table containing the nc paths to check + * @to_purge: function in charge to decide whether an entry has to be purged or + * not. This function takes the nc node as argument and has to return + * a boolean value: true is the entry has to be deleted, false + * otherwise + */ +static void batadv_nc_purge_paths(struct batadv_priv *bat_priv, + struct batadv_hashtable *hash, + bool (*to_purge)(struct batadv_priv *, + struct batadv_nc_path *)) +{ + struct hlist_head *head; + struct hlist_node *node_tmp; + struct batadv_nc_path *nc_path; + spinlock_t *lock; /* Protects lists in hash */ + uint32_t i; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + lock = &hash->list_locks[i]; + + /* For each nc_path in this bin */ + spin_lock_bh(lock); + hlist_for_each_entry_safe(nc_path, node_tmp, head, hash_entry) { + /* if an helper function has been passed as parameter, + * ask it if the entry has to be purged or not + */ + if (to_purge && !to_purge(bat_priv, nc_path)) + continue; + + /* purging an non-empty nc_path should never happen, but + * is observed under high CPU load. Delay the purging + * until next iteration to allow the packet_list to be + * emptied first. + */ + if (!unlikely(list_empty(&nc_path->packet_list))) { + net_ratelimited_function(printk, + KERN_WARNING + "Skipping free of non-empty nc_path (%pM -> %pM)!\n", + nc_path->prev_hop, + nc_path->next_hop); + continue; + } + + /* nc_path is unused, so remove it */ + batadv_dbg(BATADV_DBG_NC, bat_priv, + "Remove nc_path %pM -> %pM\n", + nc_path->prev_hop, nc_path->next_hop); + hlist_del_rcu(&nc_path->hash_entry); + batadv_nc_path_free_ref(nc_path); + } + spin_unlock_bh(lock); + } +} + +/** + * batadv_nc_hash_key_gen - computes the nc_path hash key + * @key: buffer to hold the final hash key + * @src: source ethernet mac address going into the hash key + * @dst: destination ethernet mac address going into the hash key + */ +static void batadv_nc_hash_key_gen(struct batadv_nc_path *key, const char *src, + const char *dst) +{ + memcpy(key->prev_hop, src, sizeof(key->prev_hop)); + memcpy(key->next_hop, dst, sizeof(key->next_hop)); +} + +/** + * batadv_nc_hash_choose - compute the hash value for an nc path + * @data: data to hash + * @size: size of the hash table + * + * Returns the selected index in the hash table for the given data. + */ +static uint32_t batadv_nc_hash_choose(const void *data, uint32_t size) +{ + const struct batadv_nc_path *nc_path = data; + uint32_t hash = 0; + + hash = batadv_hash_bytes(hash, &nc_path->prev_hop, + sizeof(nc_path->prev_hop)); + hash = batadv_hash_bytes(hash, &nc_path->next_hop, + sizeof(nc_path->next_hop)); + + hash += (hash << 3); + hash ^= (hash >> 11); + hash += (hash << 15); + + return hash % size; +} + +/** + * batadv_nc_hash_compare - comparing function used in the network coding hash + * tables + * @node: node in the local table + * @data2: second object to compare the node to + * + * Returns 1 if the two entry are the same, 0 otherwise + */ +static int batadv_nc_hash_compare(const struct hlist_node *node, + const void *data2) +{ + const struct batadv_nc_path *nc_path1, *nc_path2; + + nc_path1 = container_of(node, struct batadv_nc_path, hash_entry); + nc_path2 = data2; + + /* Return 1 if the two keys are identical */ + if (memcmp(nc_path1->prev_hop, nc_path2->prev_hop, + sizeof(nc_path1->prev_hop)) != 0) + return 0; + + if (memcmp(nc_path1->next_hop, nc_path2->next_hop, + sizeof(nc_path1->next_hop)) != 0) + return 0; + + return 1; +} + +/** + * batadv_nc_hash_find - search for an existing nc path and return it + * @hash: hash table containing the nc path + * @data: search key + * + * Returns the nc_path if found, NULL otherwise. + */ +static struct batadv_nc_path * +batadv_nc_hash_find(struct batadv_hashtable *hash, + void *data) +{ + struct hlist_head *head; + struct batadv_nc_path *nc_path, *nc_path_tmp = NULL; + int index; + + if (!hash) + return NULL; + + index = batadv_nc_hash_choose(data, hash->size); + head = &hash->table[index]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(nc_path, head, hash_entry) { + if (!batadv_nc_hash_compare(&nc_path->hash_entry, data)) + continue; + + if (!atomic_inc_not_zero(&nc_path->refcount)) + continue; + + nc_path_tmp = nc_path; + break; + } + rcu_read_unlock(); + + return nc_path_tmp; +} + +/** + * batadv_nc_send_packet - send non-coded packet and free nc_packet struct + * @nc_packet: the nc packet to send + */ +static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet) +{ + batadv_send_skb_packet(nc_packet->skb, + nc_packet->neigh_node->if_incoming, + nc_packet->nc_path->next_hop); + nc_packet->skb = NULL; + batadv_nc_packet_free(nc_packet); +} + +/** + * batadv_nc_sniffed_purge - Checks timestamp of given sniffed nc_packet. + * @bat_priv: the bat priv with all the soft interface information + * @nc_path: the nc path the packet belongs to + * @nc_packet: the nc packet to be checked + * + * Checks whether the given sniffed (overheard) nc_packet has hit its buffering + * timeout. If so, the packet is no longer kept and the entry deleted from the + * queue. Has to be called with the appropriate locks. + * + * Returns false as soon as the entry in the fifo queue has not been timed out + * yet and true otherwise. + */ +static bool batadv_nc_sniffed_purge(struct batadv_priv *bat_priv, + struct batadv_nc_path *nc_path, + struct batadv_nc_packet *nc_packet) +{ + unsigned long timeout = bat_priv->nc.max_buffer_time; + bool res = false; + + /* Packets are added to tail, so the remaining packets did not time + * out and we can stop processing the current queue + */ + if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE && + !batadv_has_timed_out(nc_packet->timestamp, timeout)) + goto out; + + /* purge nc packet */ + list_del(&nc_packet->list); + batadv_nc_packet_free(nc_packet); + + res = true; + +out: + return res; +} + +/** + * batadv_nc_fwd_flush - Checks the timestamp of the given nc packet. + * @bat_priv: the bat priv with all the soft interface information + * @nc_path: the nc path the packet belongs to + * @nc_packet: the nc packet to be checked + * + * Checks whether the given nc packet has hit its forward timeout. If so, the + * packet is no longer delayed, immediately sent and the entry deleted from the + * queue. Has to be called with the appropriate locks. + * + * Returns false as soon as the entry in the fifo queue has not been timed out + * yet and true otherwise. + */ +static bool batadv_nc_fwd_flush(struct batadv_priv *bat_priv, + struct batadv_nc_path *nc_path, + struct batadv_nc_packet *nc_packet) +{ + unsigned long timeout = bat_priv->nc.max_fwd_delay; + + /* Packets are added to tail, so the remaining packets did not time + * out and we can stop processing the current queue + */ + if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE && + !batadv_has_timed_out(nc_packet->timestamp, timeout)) + return false; + + /* Send packet */ + batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD); + batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES, + nc_packet->skb->len + ETH_HLEN); + list_del(&nc_packet->list); + batadv_nc_send_packet(nc_packet); + + return true; +} + +/** + * batadv_nc_process_nc_paths - traverse given nc packet pool and free timed out + * nc packets + * @bat_priv: the bat priv with all the soft interface information + * @hash: to be processed hash table + * @process_fn: Function called to process given nc packet. Should return true + * to encourage this function to proceed with the next packet. + * Otherwise the rest of the current queue is skipped. + */ +static void +batadv_nc_process_nc_paths(struct batadv_priv *bat_priv, + struct batadv_hashtable *hash, + bool (*process_fn)(struct batadv_priv *, + struct batadv_nc_path *, + struct batadv_nc_packet *)) +{ + struct hlist_head *head; + struct batadv_nc_packet *nc_packet, *nc_packet_tmp; + struct batadv_nc_path *nc_path; + bool ret; + int i; + + if (!hash) + return; + + /* Loop hash table bins */ + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + /* Loop coding paths */ + rcu_read_lock(); + hlist_for_each_entry_rcu(nc_path, head, hash_entry) { + /* Loop packets */ + spin_lock_bh(&nc_path->packet_list_lock); + list_for_each_entry_safe(nc_packet, nc_packet_tmp, + &nc_path->packet_list, list) { + ret = process_fn(bat_priv, nc_path, nc_packet); + if (!ret) + break; + } + spin_unlock_bh(&nc_path->packet_list_lock); + } + rcu_read_unlock(); + } +} + +/** + * batadv_nc_worker - periodic task for house keeping related to network coding + * @work: kernel work struct + */ +static void batadv_nc_worker(struct work_struct *work) +{ + struct delayed_work *delayed_work; + struct batadv_priv_nc *priv_nc; + struct batadv_priv *bat_priv; + unsigned long timeout; + + delayed_work = container_of(work, struct delayed_work, work); + priv_nc = container_of(delayed_work, struct batadv_priv_nc, work); + bat_priv = container_of(priv_nc, struct batadv_priv, nc); + + batadv_nc_purge_orig_hash(bat_priv); + batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, + batadv_nc_to_purge_nc_path_coding); + batadv_nc_purge_paths(bat_priv, bat_priv->nc.decoding_hash, + batadv_nc_to_purge_nc_path_decoding); + + timeout = bat_priv->nc.max_fwd_delay; + + if (batadv_has_timed_out(bat_priv->nc.timestamp_fwd_flush, timeout)) { + batadv_nc_process_nc_paths(bat_priv, bat_priv->nc.coding_hash, + batadv_nc_fwd_flush); + bat_priv->nc.timestamp_fwd_flush = jiffies; + } + + if (batadv_has_timed_out(bat_priv->nc.timestamp_sniffed_purge, + bat_priv->nc.max_buffer_time)) { + batadv_nc_process_nc_paths(bat_priv, bat_priv->nc.decoding_hash, + batadv_nc_sniffed_purge); + bat_priv->nc.timestamp_sniffed_purge = jiffies; + } + + /* Schedule a new check */ + batadv_nc_start_timer(bat_priv); +} + +/** + * batadv_can_nc_with_orig - checks whether the given orig node is suitable for + * coding or not + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: neighboring orig node which may be used as nc candidate + * @ogm_packet: incoming ogm packet also used for the checks + * + * Returns true if: + * 1) The OGM must have the most recent sequence number. + * 2) The TTL must be decremented by one and only one. + * 3) The OGM must be received from the first hop from orig_node. + * 4) The TQ value of the OGM must be above bat_priv->nc.min_tq. + */ +static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + struct batadv_ogm_packet *ogm_packet) +{ + if (orig_node->last_real_seqno != ntohl(ogm_packet->seqno)) + return false; + if (orig_node->last_ttl != ogm_packet->header.ttl + 1) + return false; + if (!batadv_compare_eth(ogm_packet->orig, ogm_packet->prev_sender)) + return false; + if (ogm_packet->tq < bat_priv->nc.min_tq) + return false; + + return true; +} + +/** + * batadv_nc_find_nc_node - search for an existing nc node and return it + * @orig_node: orig node originating the ogm packet + * @orig_neigh_node: neighboring orig node from which we received the ogm packet + * (can be equal to orig_node) + * @in_coding: traverse incoming or outgoing network coding list + * + * Returns the nc_node if found, NULL otherwise. + */ +static struct batadv_nc_node +*batadv_nc_find_nc_node(struct batadv_orig_node *orig_node, + struct batadv_orig_node *orig_neigh_node, + bool in_coding) +{ + struct batadv_nc_node *nc_node, *nc_node_out = NULL; + struct list_head *list; + + if (in_coding) + list = &orig_neigh_node->in_coding_list; + else + list = &orig_neigh_node->out_coding_list; + + /* Traverse list of nc_nodes to orig_node */ + rcu_read_lock(); + list_for_each_entry_rcu(nc_node, list, list) { + if (!batadv_compare_eth(nc_node->addr, orig_node->orig)) + continue; + + if (!atomic_inc_not_zero(&nc_node->refcount)) + continue; + + /* Found a match */ + nc_node_out = nc_node; + break; + } + rcu_read_unlock(); + + return nc_node_out; +} + +/** + * batadv_nc_get_nc_node - retrieves an nc node or creates the entry if it was + * not found + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig node originating the ogm packet + * @orig_neigh_node: neighboring orig node from which we received the ogm packet + * (can be equal to orig_node) + * @in_coding: traverse incoming or outgoing network coding list + * + * Returns the nc_node if found or created, NULL in case of an error. + */ +static struct batadv_nc_node +*batadv_nc_get_nc_node(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + struct batadv_orig_node *orig_neigh_node, + bool in_coding) +{ + struct batadv_nc_node *nc_node; + spinlock_t *lock; /* Used to lock list selected by "int in_coding" */ + struct list_head *list; + + /* Check if nc_node is already added */ + nc_node = batadv_nc_find_nc_node(orig_node, orig_neigh_node, in_coding); + + /* Node found */ + if (nc_node) + return nc_node; + + nc_node = kzalloc(sizeof(*nc_node), GFP_ATOMIC); + if (!nc_node) + return NULL; + + if (!atomic_inc_not_zero(&orig_neigh_node->refcount)) + goto free; + + /* Initialize nc_node */ + INIT_LIST_HEAD(&nc_node->list); + memcpy(nc_node->addr, orig_node->orig, ETH_ALEN); + nc_node->orig_node = orig_neigh_node; + atomic_set(&nc_node->refcount, 2); + + /* Select ingoing or outgoing coding node */ + if (in_coding) { + lock = &orig_neigh_node->in_coding_list_lock; + list = &orig_neigh_node->in_coding_list; + } else { + lock = &orig_neigh_node->out_coding_list_lock; + list = &orig_neigh_node->out_coding_list; + } + + batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_node %pM -> %pM\n", + nc_node->addr, nc_node->orig_node->orig); + + /* Add nc_node to orig_node */ + spin_lock_bh(lock); + list_add_tail_rcu(&nc_node->list, list); + spin_unlock_bh(lock); + + return nc_node; + +free: + kfree(nc_node); + return NULL; +} + +/** + * batadv_nc_update_nc_node - updates stored incoming and outgoing nc node structs + * (best called on incoming OGMs) + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig node originating the ogm packet + * @orig_neigh_node: neighboring orig node from which we received the ogm packet + * (can be equal to orig_node) + * @ogm_packet: incoming ogm packet + * @is_single_hop_neigh: orig_node is a single hop neighbor + */ +void batadv_nc_update_nc_node(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + struct batadv_orig_node *orig_neigh_node, + struct batadv_ogm_packet *ogm_packet, + int is_single_hop_neigh) +{ + struct batadv_nc_node *in_nc_node = NULL, *out_nc_node = NULL; + + /* Check if network coding is enabled */ + if (!atomic_read(&bat_priv->network_coding)) + goto out; + + /* accept ogms from 'good' neighbors and single hop neighbors */ + if (!batadv_can_nc_with_orig(bat_priv, orig_node, ogm_packet) && + !is_single_hop_neigh) + goto out; + + /* Add orig_node as in_nc_node on hop */ + in_nc_node = batadv_nc_get_nc_node(bat_priv, orig_node, + orig_neigh_node, true); + if (!in_nc_node) + goto out; + + in_nc_node->last_seen = jiffies; + + /* Add hop as out_nc_node on orig_node */ + out_nc_node = batadv_nc_get_nc_node(bat_priv, orig_neigh_node, + orig_node, false); + if (!out_nc_node) + goto out; + + out_nc_node->last_seen = jiffies; + +out: + if (in_nc_node) + batadv_nc_node_free_ref(in_nc_node); + if (out_nc_node) + batadv_nc_node_free_ref(out_nc_node); +} + +/** + * batadv_nc_get_path - get existing nc_path or allocate a new one + * @bat_priv: the bat priv with all the soft interface information + * @hash: hash table containing the nc path + * @src: ethernet source address - first half of the nc path search key + * @dst: ethernet destination address - second half of the nc path search key + * + * Returns pointer to nc_path if the path was found or created, returns NULL + * on error. + */ +static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv, + struct batadv_hashtable *hash, + uint8_t *src, + uint8_t *dst) +{ + int hash_added; + struct batadv_nc_path *nc_path, nc_path_key; + + batadv_nc_hash_key_gen(&nc_path_key, src, dst); + + /* Search for existing nc_path */ + nc_path = batadv_nc_hash_find(hash, (void *)&nc_path_key); + + if (nc_path) { + /* Set timestamp to delay removal of nc_path */ + nc_path->last_valid = jiffies; + return nc_path; + } + + /* No existing nc_path was found; create a new */ + nc_path = kzalloc(sizeof(*nc_path), GFP_ATOMIC); + + if (!nc_path) + return NULL; + + /* Initialize nc_path */ + INIT_LIST_HEAD(&nc_path->packet_list); + spin_lock_init(&nc_path->packet_list_lock); + atomic_set(&nc_path->refcount, 2); + nc_path->last_valid = jiffies; + memcpy(nc_path->next_hop, dst, ETH_ALEN); + memcpy(nc_path->prev_hop, src, ETH_ALEN); + + batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_path %pM -> %pM\n", + nc_path->prev_hop, + nc_path->next_hop); + + /* Add nc_path to hash table */ + hash_added = batadv_hash_add(hash, batadv_nc_hash_compare, + batadv_nc_hash_choose, &nc_path_key, + &nc_path->hash_entry); + + if (hash_added < 0) { + kfree(nc_path); + return NULL; + } + + return nc_path; +} + +/** + * batadv_nc_random_weight_tq - scale the receivers TQ-value to avoid unfair + * selection of a receiver with slightly lower TQ than the other + * @tq: to be weighted tq value + */ +static uint8_t batadv_nc_random_weight_tq(uint8_t tq) +{ + uint8_t rand_val, rand_tq; + + get_random_bytes(&rand_val, sizeof(rand_val)); + + /* randomize the estimated packet loss (max TQ - estimated TQ) */ + rand_tq = rand_val * (BATADV_TQ_MAX_VALUE - tq); + + /* normalize the randomized packet loss */ + rand_tq /= BATADV_TQ_MAX_VALUE; + + /* convert to (randomized) estimated tq again */ + return BATADV_TQ_MAX_VALUE - rand_tq; +} + +/** + * batadv_nc_memxor - XOR destination with source + * @dst: byte array to XOR into + * @src: byte array to XOR from + * @len: length of destination array + */ +static void batadv_nc_memxor(char *dst, const char *src, unsigned int len) +{ + unsigned int i; + + for (i = 0; i < len; ++i) + dst[i] ^= src[i]; +} + +/** + * batadv_nc_code_packets - code a received unicast_packet with an nc packet + * into a coded_packet and send it + * @bat_priv: the bat priv with all the soft interface information + * @skb: data skb to forward + * @ethhdr: pointer to the ethernet header inside the skb + * @nc_packet: structure containing the packet to the skb can be coded with + * @neigh_node: next hop to forward packet to + * + * Returns true if both packets are consumed, false otherwise. + */ +static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, + struct sk_buff *skb, + struct ethhdr *ethhdr, + struct batadv_nc_packet *nc_packet, + struct batadv_neigh_node *neigh_node) +{ + uint8_t tq_weighted_neigh, tq_weighted_coding; + struct sk_buff *skb_dest, *skb_src; + struct batadv_unicast_packet *packet1; + struct batadv_unicast_packet *packet2; + struct batadv_coded_packet *coded_packet; + struct batadv_neigh_node *neigh_tmp, *router_neigh; + struct batadv_neigh_node *router_coding = NULL; + uint8_t *first_source, *first_dest, *second_source, *second_dest; + __be32 packet_id1, packet_id2; + size_t count; + bool res = false; + int coding_len; + int unicast_size = sizeof(*packet1); + int coded_size = sizeof(*coded_packet); + int header_add = coded_size - unicast_size; + + router_neigh = batadv_orig_node_get_router(neigh_node->orig_node); + if (!router_neigh) + goto out; + + neigh_tmp = nc_packet->neigh_node; + router_coding = batadv_orig_node_get_router(neigh_tmp->orig_node); + if (!router_coding) + goto out; + + tq_weighted_neigh = batadv_nc_random_weight_tq(router_neigh->tq_avg); + tq_weighted_coding = batadv_nc_random_weight_tq(router_coding->tq_avg); + + /* Select one destination for the MAC-header dst-field based on + * weighted TQ-values. + */ + if (tq_weighted_neigh >= tq_weighted_coding) { + /* Destination from nc_packet is selected for MAC-header */ + first_dest = nc_packet->nc_path->next_hop; + first_source = nc_packet->nc_path->prev_hop; + second_dest = neigh_node->addr; + second_source = ethhdr->h_source; + packet1 = (struct batadv_unicast_packet *)nc_packet->skb->data; + packet2 = (struct batadv_unicast_packet *)skb->data; + packet_id1 = nc_packet->packet_id; + packet_id2 = batadv_skb_crc32(skb, + skb->data + sizeof(*packet2)); + } else { + /* Destination for skb is selected for MAC-header */ + first_dest = neigh_node->addr; + first_source = ethhdr->h_source; + second_dest = nc_packet->nc_path->next_hop; + second_source = nc_packet->nc_path->prev_hop; + packet1 = (struct batadv_unicast_packet *)skb->data; + packet2 = (struct batadv_unicast_packet *)nc_packet->skb->data; + packet_id1 = batadv_skb_crc32(skb, + skb->data + sizeof(*packet1)); + packet_id2 = nc_packet->packet_id; + } + + /* Instead of zero padding the smallest data buffer, we + * code into the largest. + */ + if (skb->len <= nc_packet->skb->len) { + skb_dest = nc_packet->skb; + skb_src = skb; + } else { + skb_dest = skb; + skb_src = nc_packet->skb; + } + + /* coding_len is used when decoding the packet shorter packet */ + coding_len = skb_src->len - unicast_size; + + if (skb_linearize(skb_dest) < 0 || skb_linearize(skb_src) < 0) + goto out; + + skb_push(skb_dest, header_add); + + coded_packet = (struct batadv_coded_packet *)skb_dest->data; + skb_reset_mac_header(skb_dest); + + coded_packet->header.packet_type = BATADV_CODED; + coded_packet->header.version = BATADV_COMPAT_VERSION; + coded_packet->header.ttl = packet1->header.ttl; + + /* Info about first unicast packet */ + memcpy(coded_packet->first_source, first_source, ETH_ALEN); + memcpy(coded_packet->first_orig_dest, packet1->dest, ETH_ALEN); + coded_packet->first_crc = packet_id1; + coded_packet->first_ttvn = packet1->ttvn; + + /* Info about second unicast packet */ + memcpy(coded_packet->second_dest, second_dest, ETH_ALEN); + memcpy(coded_packet->second_source, second_source, ETH_ALEN); + memcpy(coded_packet->second_orig_dest, packet2->dest, ETH_ALEN); + coded_packet->second_crc = packet_id2; + coded_packet->second_ttl = packet2->header.ttl; + coded_packet->second_ttvn = packet2->ttvn; + coded_packet->coded_len = htons(coding_len); + + /* This is where the magic happens: Code skb_src into skb_dest */ + batadv_nc_memxor(skb_dest->data + coded_size, + skb_src->data + unicast_size, coding_len); + + /* Update counters accordingly */ + if (BATADV_SKB_CB(skb_src)->decoded && + BATADV_SKB_CB(skb_dest)->decoded) { + /* Both packets are recoded */ + count = skb_src->len + ETH_HLEN; + count += skb_dest->len + ETH_HLEN; + batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE, 2); + batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES, count); + } else if (!BATADV_SKB_CB(skb_src)->decoded && + !BATADV_SKB_CB(skb_dest)->decoded) { + /* Both packets are newly coded */ + count = skb_src->len + ETH_HLEN; + count += skb_dest->len + ETH_HLEN; + batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE, 2); + batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES, count); + } else if (BATADV_SKB_CB(skb_src)->decoded && + !BATADV_SKB_CB(skb_dest)->decoded) { + /* skb_src recoded and skb_dest is newly coded */ + batadv_inc_counter(bat_priv, BATADV_CNT_NC_RECODE); + batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES, + skb_src->len + ETH_HLEN); + batadv_inc_counter(bat_priv, BATADV_CNT_NC_CODE); + batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES, + skb_dest->len + ETH_HLEN); + } else if (!BATADV_SKB_CB(skb_src)->decoded && + BATADV_SKB_CB(skb_dest)->decoded) { + /* skb_src is newly coded and skb_dest is recoded */ + batadv_inc_counter(bat_priv, BATADV_CNT_NC_CODE); + batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES, + skb_src->len + ETH_HLEN); + batadv_inc_counter(bat_priv, BATADV_CNT_NC_RECODE); + batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES, + skb_dest->len + ETH_HLEN); + } + + /* skb_src is now coded into skb_dest, so free it */ + kfree_skb(skb_src); + + /* avoid duplicate free of skb from nc_packet */ + nc_packet->skb = NULL; + batadv_nc_packet_free(nc_packet); + + /* Send the coded packet and return true */ + batadv_send_skb_packet(skb_dest, neigh_node->if_incoming, first_dest); + res = true; +out: + if (router_neigh) + batadv_neigh_node_free_ref(router_neigh); + if (router_coding) + batadv_neigh_node_free_ref(router_coding); + return res; +} + +/** + * batadv_nc_skb_coding_possible - true if a decoded skb is available at dst. + * @skb: data skb to forward + * @dst: destination mac address of the other skb to code with + * @src: source mac address of skb + * + * Whenever we network code a packet we have to check whether we received it in + * a network coded form. If so, we may not be able to use it for coding because + * some neighbors may also have received (overheard) the packet in the network + * coded form without being able to decode it. It is hard to know which of the + * neighboring nodes was able to decode the packet, therefore we can only + * re-code the packet if the source of the previous encoded packet is involved. + * Since the source encoded the packet we can be certain it has all necessary + * decode information. + * + * Returns true if coding of a decoded packet is allowed. + */ +static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, + uint8_t *dst, uint8_t *src) +{ + if (BATADV_SKB_CB(skb)->decoded && !batadv_compare_eth(dst, src)) + return false; + else + return true; +} + +/** + * batadv_nc_path_search - Find the coding path matching in_nc_node and + * out_nc_node to retrieve a buffered packet that can be used for coding. + * @bat_priv: the bat priv with all the soft interface information + * @in_nc_node: pointer to skb next hop's neighbor nc node + * @out_nc_node: pointer to skb source's neighbor nc node + * @skb: data skb to forward + * @eth_dst: next hop mac address of skb + * + * Returns true if coding of a decoded skb is allowed. + */ +static struct batadv_nc_packet * +batadv_nc_path_search(struct batadv_priv *bat_priv, + struct batadv_nc_node *in_nc_node, + struct batadv_nc_node *out_nc_node, + struct sk_buff *skb, + uint8_t *eth_dst) +{ + struct batadv_nc_path *nc_path, nc_path_key; + struct batadv_nc_packet *nc_packet_out = NULL; + struct batadv_nc_packet *nc_packet, *nc_packet_tmp; + struct batadv_hashtable *hash = bat_priv->nc.coding_hash; + int idx; + + if (!hash) + return NULL; + + /* Create almost path key */ + batadv_nc_hash_key_gen(&nc_path_key, in_nc_node->addr, + out_nc_node->addr); + idx = batadv_nc_hash_choose(&nc_path_key, hash->size); + + /* Check for coding opportunities in this nc_path */ + rcu_read_lock(); + hlist_for_each_entry_rcu(nc_path, &hash->table[idx], hash_entry) { + if (!batadv_compare_eth(nc_path->prev_hop, in_nc_node->addr)) + continue; + + if (!batadv_compare_eth(nc_path->next_hop, out_nc_node->addr)) + continue; + + spin_lock_bh(&nc_path->packet_list_lock); + if (list_empty(&nc_path->packet_list)) { + spin_unlock_bh(&nc_path->packet_list_lock); + continue; + } + + list_for_each_entry_safe(nc_packet, nc_packet_tmp, + &nc_path->packet_list, list) { + if (!batadv_nc_skb_coding_possible(nc_packet->skb, + eth_dst, + in_nc_node->addr)) + continue; + + /* Coding opportunity is found! */ + list_del(&nc_packet->list); + nc_packet_out = nc_packet; + break; + } + + spin_unlock_bh(&nc_path->packet_list_lock); + break; + } + rcu_read_unlock(); + + return nc_packet_out; +} + +/** + * batadv_nc_skb_src_search - Loops through the list of neighoring nodes of the + * skb's sender (may be equal to the originator). + * @bat_priv: the bat priv with all the soft interface information + * @skb: data skb to forward + * @eth_dst: next hop mac address of skb + * @eth_src: source mac address of skb + * @in_nc_node: pointer to skb next hop's neighbor nc node + * + * Returns an nc packet if a suitable coding packet was found, NULL otherwise. + */ +static struct batadv_nc_packet * +batadv_nc_skb_src_search(struct batadv_priv *bat_priv, + struct sk_buff *skb, + uint8_t *eth_dst, + uint8_t *eth_src, + struct batadv_nc_node *in_nc_node) +{ + struct batadv_orig_node *orig_node; + struct batadv_nc_node *out_nc_node; + struct batadv_nc_packet *nc_packet = NULL; + + orig_node = batadv_orig_hash_find(bat_priv, eth_src); + if (!orig_node) + return NULL; + + rcu_read_lock(); + list_for_each_entry_rcu(out_nc_node, + &orig_node->out_coding_list, list) { + /* Check if the skb is decoded and if recoding is possible */ + if (!batadv_nc_skb_coding_possible(skb, + out_nc_node->addr, eth_src)) + continue; + + /* Search for an opportunity in this nc_path */ + nc_packet = batadv_nc_path_search(bat_priv, in_nc_node, + out_nc_node, skb, eth_dst); + if (nc_packet) + break; + } + rcu_read_unlock(); + + batadv_orig_node_free_ref(orig_node); + return nc_packet; +} + +/** + * batadv_nc_skb_store_before_coding - set the ethernet src and dst of the + * unicast skb before it is stored for use in later decoding + * @bat_priv: the bat priv with all the soft interface information + * @skb: data skb to store + * @eth_dst_new: new destination mac address of skb + */ +static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv, + struct sk_buff *skb, + uint8_t *eth_dst_new) +{ + struct ethhdr *ethhdr; + + /* Copy skb header to change the mac header */ + skb = pskb_copy(skb, GFP_ATOMIC); + if (!skb) + return; + + /* Set the mac header as if we actually sent the packet uncoded */ + ethhdr = (struct ethhdr *)skb_mac_header(skb); + memcpy(ethhdr->h_source, ethhdr->h_dest, ETH_ALEN); + memcpy(ethhdr->h_dest, eth_dst_new, ETH_ALEN); + + /* Set data pointer to MAC header to mimic packets from our tx path */ + skb_push(skb, ETH_HLEN); + + /* Add the packet to the decoding packet pool */ + batadv_nc_skb_store_for_decoding(bat_priv, skb); + + /* batadv_nc_skb_store_for_decoding() clones the skb, so we must free + * our ref + */ + kfree_skb(skb); +} + +/** + * batadv_nc_skb_dst_search - Loops through list of neighboring nodes to dst. + * @skb: data skb to forward + * @neigh_node: next hop to forward packet to + * @ethhdr: pointer to the ethernet header inside the skb + * + * Loops through list of neighboring nodes the next hop has a good connection to + * (receives OGMs with a sufficient quality). We need to find a neighbor of our + * next hop that potentially sent a packet which our next hop also received + * (overheard) and has stored for later decoding. + * + * Returns true if the skb was consumed (encoded packet sent) or false otherwise + */ +static bool batadv_nc_skb_dst_search(struct sk_buff *skb, + struct batadv_neigh_node *neigh_node, + struct ethhdr *ethhdr) +{ + struct net_device *netdev = neigh_node->if_incoming->soft_iface; + struct batadv_priv *bat_priv = netdev_priv(netdev); + struct batadv_orig_node *orig_node = neigh_node->orig_node; + struct batadv_nc_node *nc_node; + struct batadv_nc_packet *nc_packet = NULL; + + rcu_read_lock(); + list_for_each_entry_rcu(nc_node, &orig_node->in_coding_list, list) { + /* Search for coding opportunity with this in_nc_node */ + nc_packet = batadv_nc_skb_src_search(bat_priv, skb, + neigh_node->addr, + ethhdr->h_source, nc_node); + + /* Opportunity was found, so stop searching */ + if (nc_packet) + break; + } + rcu_read_unlock(); + + if (!nc_packet) + return false; + + /* Save packets for later decoding */ + batadv_nc_skb_store_before_coding(bat_priv, skb, + neigh_node->addr); + batadv_nc_skb_store_before_coding(bat_priv, nc_packet->skb, + nc_packet->neigh_node->addr); + + /* Code and send packets */ + if (batadv_nc_code_packets(bat_priv, skb, ethhdr, nc_packet, + neigh_node)) + return true; + + /* out of mem ? Coding failed - we have to free the buffered packet + * to avoid memleaks. The skb passed as argument will be dealt with + * by the calling function. + */ + batadv_nc_send_packet(nc_packet); + return false; +} + +/** + * batadv_nc_skb_add_to_path - buffer skb for later encoding / decoding + * @skb: skb to add to path + * @nc_path: path to add skb to + * @neigh_node: next hop to forward packet to + * @packet_id: checksum to identify packet + * + * Returns true if the packet was buffered or false in case of an error. + */ +static bool batadv_nc_skb_add_to_path(struct sk_buff *skb, + struct batadv_nc_path *nc_path, + struct batadv_neigh_node *neigh_node, + __be32 packet_id) +{ + struct batadv_nc_packet *nc_packet; + + nc_packet = kzalloc(sizeof(*nc_packet), GFP_ATOMIC); + if (!nc_packet) + return false; + + /* Initialize nc_packet */ + nc_packet->timestamp = jiffies; + nc_packet->packet_id = packet_id; + nc_packet->skb = skb; + nc_packet->neigh_node = neigh_node; + nc_packet->nc_path = nc_path; + + /* Add coding packet to list */ + spin_lock_bh(&nc_path->packet_list_lock); + list_add_tail(&nc_packet->list, &nc_path->packet_list); + spin_unlock_bh(&nc_path->packet_list_lock); + + return true; +} + +/** + * batadv_nc_skb_forward - try to code a packet or add it to the coding packet + * buffer + * @skb: data skb to forward + * @neigh_node: next hop to forward packet to + * @ethhdr: pointer to the ethernet header inside the skb + * + * Returns true if the skb was consumed (encoded packet sent) or false otherwise + */ +bool batadv_nc_skb_forward(struct sk_buff *skb, + struct batadv_neigh_node *neigh_node, + struct ethhdr *ethhdr) +{ + const struct net_device *netdev = neigh_node->if_incoming->soft_iface; + struct batadv_priv *bat_priv = netdev_priv(netdev); + struct batadv_unicast_packet *packet; + struct batadv_nc_path *nc_path; + __be32 packet_id; + u8 *payload; + + /* Check if network coding is enabled */ + if (!atomic_read(&bat_priv->network_coding)) + goto out; + + /* We only handle unicast packets */ + payload = skb_network_header(skb); + packet = (struct batadv_unicast_packet *)payload; + if (packet->header.packet_type != BATADV_UNICAST) + goto out; + + /* Try to find a coding opportunity and send the skb if one is found */ + if (batadv_nc_skb_dst_search(skb, neigh_node, ethhdr)) + return true; + + /* Find or create a nc_path for this src-dst pair */ + nc_path = batadv_nc_get_path(bat_priv, + bat_priv->nc.coding_hash, + ethhdr->h_source, + neigh_node->addr); + + if (!nc_path) + goto out; + + /* Add skb to nc_path */ + packet_id = batadv_skb_crc32(skb, payload + sizeof(*packet)); + if (!batadv_nc_skb_add_to_path(skb, nc_path, neigh_node, packet_id)) + goto free_nc_path; + + /* Packet is consumed */ + return true; + +free_nc_path: + batadv_nc_path_free_ref(nc_path); +out: + /* Packet is not consumed */ + return false; +} + +/** + * batadv_nc_skb_store_for_decoding - save a clone of the skb which can be used + * when decoding coded packets + * @bat_priv: the bat priv with all the soft interface information + * @skb: data skb to store + */ +void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + struct batadv_unicast_packet *packet; + struct batadv_nc_path *nc_path; + struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); + __be32 packet_id; + u8 *payload; + + /* Check if network coding is enabled */ + if (!atomic_read(&bat_priv->network_coding)) + goto out; + + /* Check for supported packet type */ + payload = skb_network_header(skb); + packet = (struct batadv_unicast_packet *)payload; + if (packet->header.packet_type != BATADV_UNICAST) + goto out; + + /* Find existing nc_path or create a new */ + nc_path = batadv_nc_get_path(bat_priv, + bat_priv->nc.decoding_hash, + ethhdr->h_source, + ethhdr->h_dest); + + if (!nc_path) + goto out; + + /* Clone skb and adjust skb->data to point at batman header */ + skb = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!skb)) + goto free_nc_path; + + if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) + goto free_skb; + + if (unlikely(!skb_pull_rcsum(skb, ETH_HLEN))) + goto free_skb; + + /* Add skb to nc_path */ + packet_id = batadv_skb_crc32(skb, payload + sizeof(*packet)); + if (!batadv_nc_skb_add_to_path(skb, nc_path, NULL, packet_id)) + goto free_skb; + + batadv_inc_counter(bat_priv, BATADV_CNT_NC_BUFFER); + return; + +free_skb: + kfree_skb(skb); +free_nc_path: + batadv_nc_path_free_ref(nc_path); +out: + return; +} + +/** + * batadv_nc_skb_store_sniffed_unicast - check if a received unicast packet + * should be saved in the decoding buffer and, if so, store it there + * @bat_priv: the bat priv with all the soft interface information + * @skb: unicast skb to store + */ +void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); + + if (batadv_is_my_mac(ethhdr->h_dest)) + return; + + /* Set data pointer to MAC header to mimic packets from our tx path */ + skb_push(skb, ETH_HLEN); + + batadv_nc_skb_store_for_decoding(bat_priv, skb); +} + +/** + * batadv_nc_skb_decode_packet - decode given skb using the decode data stored + * in nc_packet + * @skb: unicast skb to decode + * @nc_packet: decode data needed to decode the skb + * + * Returns pointer to decoded unicast packet if the packet was decoded or NULL + * in case of an error. + */ +static struct batadv_unicast_packet * +batadv_nc_skb_decode_packet(struct sk_buff *skb, + struct batadv_nc_packet *nc_packet) +{ + const int h_size = sizeof(struct batadv_unicast_packet); + const int h_diff = sizeof(struct batadv_coded_packet) - h_size; + struct batadv_unicast_packet *unicast_packet; + struct batadv_coded_packet coded_packet_tmp; + struct ethhdr *ethhdr, ethhdr_tmp; + uint8_t *orig_dest, ttl, ttvn; + unsigned int coding_len; + + /* Save headers temporarily */ + memcpy(&coded_packet_tmp, skb->data, sizeof(coded_packet_tmp)); + memcpy(ðhdr_tmp, skb_mac_header(skb), sizeof(ethhdr_tmp)); + + if (skb_cow(skb, 0) < 0) + return NULL; + + if (unlikely(!skb_pull_rcsum(skb, h_diff))) + return NULL; + + /* Data points to batman header, so set mac header 14 bytes before + * and network to data + */ + skb_set_mac_header(skb, -ETH_HLEN); + skb_reset_network_header(skb); + + /* Reconstruct original mac header */ + ethhdr = (struct ethhdr *)skb_mac_header(skb); + memcpy(ethhdr, ðhdr_tmp, sizeof(*ethhdr)); + + /* Select the correct unicast header information based on the location + * of our mac address in the coded_packet header + */ + if (batadv_is_my_mac(coded_packet_tmp.second_dest)) { + /* If we are the second destination the packet was overheard, + * so the Ethernet address must be copied to h_dest and + * pkt_type changed from PACKET_OTHERHOST to PACKET_HOST + */ + memcpy(ethhdr->h_dest, coded_packet_tmp.second_dest, ETH_ALEN); + skb->pkt_type = PACKET_HOST; + + orig_dest = coded_packet_tmp.second_orig_dest; + ttl = coded_packet_tmp.second_ttl; + ttvn = coded_packet_tmp.second_ttvn; + } else { + orig_dest = coded_packet_tmp.first_orig_dest; + ttl = coded_packet_tmp.header.ttl; + ttvn = coded_packet_tmp.first_ttvn; + } + + coding_len = ntohs(coded_packet_tmp.coded_len); + + if (coding_len > skb->len) + return NULL; + + /* Here the magic is reversed: + * extract the missing packet from the received coded packet + */ + batadv_nc_memxor(skb->data + h_size, + nc_packet->skb->data + h_size, + coding_len); + + /* Resize decoded skb if decoded with larger packet */ + if (nc_packet->skb->len > coding_len + h_size) + pskb_trim_rcsum(skb, coding_len + h_size); + + /* Create decoded unicast packet */ + unicast_packet = (struct batadv_unicast_packet *)skb->data; + unicast_packet->header.packet_type = BATADV_UNICAST; + unicast_packet->header.version = BATADV_COMPAT_VERSION; + unicast_packet->header.ttl = ttl; + memcpy(unicast_packet->dest, orig_dest, ETH_ALEN); + unicast_packet->ttvn = ttvn; + + batadv_nc_packet_free(nc_packet); + return unicast_packet; +} + +/** + * batadv_nc_find_decoding_packet - search through buffered decoding data to + * find the data needed to decode the coded packet + * @bat_priv: the bat priv with all the soft interface information + * @ethhdr: pointer to the ethernet header inside the coded packet + * @coded: coded packet we try to find decode data for + * + * Returns pointer to nc packet if the needed data was found or NULL otherwise. + */ +static struct batadv_nc_packet * +batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv, + struct ethhdr *ethhdr, + struct batadv_coded_packet *coded) +{ + struct batadv_hashtable *hash = bat_priv->nc.decoding_hash; + struct batadv_nc_packet *tmp_nc_packet, *nc_packet = NULL; + struct batadv_nc_path *nc_path, nc_path_key; + uint8_t *dest, *source; + __be32 packet_id; + int index; + + if (!hash) + return NULL; + + /* Select the correct packet id based on the location of our mac-addr */ + dest = ethhdr->h_source; + if (!batadv_is_my_mac(coded->second_dest)) { + source = coded->second_source; + packet_id = coded->second_crc; + } else { + source = coded->first_source; + packet_id = coded->first_crc; + } + + batadv_nc_hash_key_gen(&nc_path_key, source, dest); + index = batadv_nc_hash_choose(&nc_path_key, hash->size); + + /* Search for matching coding path */ + rcu_read_lock(); + hlist_for_each_entry_rcu(nc_path, &hash->table[index], hash_entry) { + /* Find matching nc_packet */ + spin_lock_bh(&nc_path->packet_list_lock); + list_for_each_entry(tmp_nc_packet, + &nc_path->packet_list, list) { + if (packet_id == tmp_nc_packet->packet_id) { + list_del(&tmp_nc_packet->list); + + nc_packet = tmp_nc_packet; + break; + } + } + spin_unlock_bh(&nc_path->packet_list_lock); + + if (nc_packet) + break; + } + rcu_read_unlock(); + + if (!nc_packet) + batadv_dbg(BATADV_DBG_NC, bat_priv, + "No decoding packet found for %u\n", packet_id); + + return nc_packet; +} + +/** + * batadv_nc_recv_coded_packet - try to decode coded packet and enqueue the + * resulting unicast packet + * @skb: incoming coded packet + * @recv_if: pointer to interface this packet was received on + */ +static int batadv_nc_recv_coded_packet(struct sk_buff *skb, + struct batadv_hard_iface *recv_if) +{ + struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); + struct batadv_unicast_packet *unicast_packet; + struct batadv_coded_packet *coded_packet; + struct batadv_nc_packet *nc_packet; + struct ethhdr *ethhdr; + int hdr_size = sizeof(*coded_packet); + + /* Check if network coding is enabled */ + if (!atomic_read(&bat_priv->network_coding)) + return NET_RX_DROP; + + /* Make sure we can access (and remove) header */ + if (unlikely(!pskb_may_pull(skb, hdr_size))) + return NET_RX_DROP; + + coded_packet = (struct batadv_coded_packet *)skb->data; + ethhdr = (struct ethhdr *)skb_mac_header(skb); + + /* Verify frame is destined for us */ + if (!batadv_is_my_mac(ethhdr->h_dest) && + !batadv_is_my_mac(coded_packet->second_dest)) + return NET_RX_DROP; + + /* Update stat counter */ + if (batadv_is_my_mac(coded_packet->second_dest)) + batadv_inc_counter(bat_priv, BATADV_CNT_NC_SNIFFED); + + nc_packet = batadv_nc_find_decoding_packet(bat_priv, ethhdr, + coded_packet); + if (!nc_packet) { + batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE_FAILED); + return NET_RX_DROP; + } + + /* Make skb's linear, because decoding accesses the entire buffer */ + if (skb_linearize(skb) < 0) + goto free_nc_packet; + + if (skb_linearize(nc_packet->skb) < 0) + goto free_nc_packet; + + /* Decode the packet */ + unicast_packet = batadv_nc_skb_decode_packet(skb, nc_packet); + if (!unicast_packet) { + batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE_FAILED); + goto free_nc_packet; + } + + /* Mark packet as decoded to do correct recoding when forwarding */ + BATADV_SKB_CB(skb)->decoded = true; + batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE); + batadv_add_counter(bat_priv, BATADV_CNT_NC_DECODE_BYTES, + skb->len + ETH_HLEN); + return batadv_recv_unicast_packet(skb, recv_if); + +free_nc_packet: + batadv_nc_packet_free(nc_packet); + return NET_RX_DROP; +} + +/** + * batadv_nc_free - clean up network coding memory + * @bat_priv: the bat priv with all the soft interface information + */ +void batadv_nc_free(struct batadv_priv *bat_priv) +{ + batadv_recv_handler_unregister(BATADV_CODED); + cancel_delayed_work_sync(&bat_priv->nc.work); + + batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, NULL); + batadv_hash_destroy(bat_priv->nc.coding_hash); + batadv_nc_purge_paths(bat_priv, bat_priv->nc.decoding_hash, NULL); + batadv_hash_destroy(bat_priv->nc.decoding_hash); +} + +/** + * batadv_nc_nodes_seq_print_text - print the nc node information + * @seq: seq file to print on + * @offset: not used + */ +int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset) +{ + struct net_device *net_dev = (struct net_device *)seq->private; + struct batadv_priv *bat_priv = netdev_priv(net_dev); + struct batadv_hashtable *hash = bat_priv->orig_hash; + struct batadv_hard_iface *primary_if; + struct hlist_head *head; + struct batadv_orig_node *orig_node; + struct batadv_nc_node *nc_node; + int i; + + primary_if = batadv_seq_print_text_primary_if_get(seq); + if (!primary_if) + goto out; + + /* Traverse list of originators */ + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + /* For each orig_node in this bin */ + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + seq_printf(seq, "Node: %pM\n", orig_node->orig); + + seq_puts(seq, " Ingoing: "); + /* For each in_nc_node to this orig_node */ + list_for_each_entry_rcu(nc_node, + &orig_node->in_coding_list, + list) + seq_printf(seq, "%pM ", + nc_node->addr); + seq_puts(seq, "\n"); + + seq_puts(seq, " Outgoing: "); + /* For out_nc_node to this orig_node */ + list_for_each_entry_rcu(nc_node, + &orig_node->out_coding_list, + list) + seq_printf(seq, "%pM ", + nc_node->addr); + seq_puts(seq, "\n\n"); + } + rcu_read_unlock(); + } + +out: + if (primary_if) + batadv_hardif_free_ref(primary_if); + return 0; +} + +/** + * batadv_nc_init_debugfs - create nc folder and related files in debugfs + * @bat_priv: the bat priv with all the soft interface information + */ +int batadv_nc_init_debugfs(struct batadv_priv *bat_priv) +{ + struct dentry *nc_dir, *file; + + nc_dir = debugfs_create_dir("nc", bat_priv->debug_dir); + if (!nc_dir) + goto out; + + file = debugfs_create_u8("min_tq", S_IRUGO | S_IWUSR, nc_dir, + &bat_priv->nc.min_tq); + if (!file) + goto out; + + file = debugfs_create_u32("max_fwd_delay", S_IRUGO | S_IWUSR, nc_dir, + &bat_priv->nc.max_fwd_delay); + if (!file) + goto out; + + file = debugfs_create_u32("max_buffer_time", S_IRUGO | S_IWUSR, nc_dir, + &bat_priv->nc.max_buffer_time); + if (!file) + goto out; + + return 0; + +out: + return -ENOMEM; +} diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h new file mode 100644 index 0000000..4fa6d0c --- /dev/null +++ b/net/batman-adv/network-coding.h @@ -0,0 +1,123 @@ +/* Copyright (C) 2012-2013 B.A.T.M.A.N. contributors: + * + * Martin Hundebøll, Jeppe Ledet-Pedersen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#ifndef _NET_BATMAN_ADV_NETWORK_CODING_H_ +#define _NET_BATMAN_ADV_NETWORK_CODING_H_ + +#ifdef CONFIG_BATMAN_ADV_NC + +int batadv_nc_init(struct batadv_priv *bat_priv); +void batadv_nc_free(struct batadv_priv *bat_priv); +void batadv_nc_update_nc_node(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + struct batadv_orig_node *orig_neigh_node, + struct batadv_ogm_packet *ogm_packet, + int is_single_hop_neigh); +void batadv_nc_purge_orig(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + bool (*to_purge)(struct batadv_priv *, + struct batadv_nc_node *)); +void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv); +void batadv_nc_init_orig(struct batadv_orig_node *orig_node); +bool batadv_nc_skb_forward(struct sk_buff *skb, + struct batadv_neigh_node *neigh_node, + struct ethhdr *ethhdr); +void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, + struct sk_buff *skb); +void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, + struct sk_buff *skb); +int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset); +int batadv_nc_init_debugfs(struct batadv_priv *bat_priv); + +#else /* ifdef CONFIG_BATMAN_ADV_NC */ + +static inline int batadv_nc_init(struct batadv_priv *bat_priv) +{ + return 0; +} + +static inline void batadv_nc_free(struct batadv_priv *bat_priv) +{ + return; +} + +static inline void +batadv_nc_update_nc_node(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + struct batadv_orig_node *orig_neigh_node, + struct batadv_ogm_packet *ogm_packet, + int is_single_hop_neigh) +{ + return; +} + +static inline void +batadv_nc_purge_orig(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node, + bool (*to_purge)(struct batadv_priv *, + struct batadv_nc_node *)) +{ + return; +} + +static inline void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv) +{ + return; +} + +static inline void batadv_nc_init_orig(struct batadv_orig_node *orig_node) +{ + return; +} + +static inline bool batadv_nc_skb_forward(struct sk_buff *skb, + struct batadv_neigh_node *neigh_node, + struct ethhdr *ethhdr) +{ + return false; +} + +static inline void +batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + return; +} + +static inline void +batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, + struct sk_buff *skb) +{ + return; +} + +static inline int batadv_nc_nodes_seq_print_text(struct seq_file *seq, + void *offset) +{ + return 0; +} + +static inline int batadv_nc_init_debugfs(struct batadv_priv *bat_priv) +{ + return 0; +} + +#endif /* ifdef CONFIG_BATMAN_ADV_NC */ + +#endif /* _NET_BATMAN_ADV_NETWORK_CODING_H_ */ diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 96fb80b..2f34525 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -28,6 +28,7 @@ #include "unicast.h" #include "soft-interface.h" #include "bridge_loop_avoidance.h" +#include "network-coding.h" /* hash class keys */ static struct lock_class_key batadv_orig_hash_lock_class_key; @@ -142,6 +143,9 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu) spin_unlock_bh(&orig_node->neigh_list_lock); + /* Free nc_nodes */ + batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL); + batadv_frag_list_free(&orig_node->frag_list); batadv_tt_global_del_orig(orig_node->bat_priv, orig_node, "originator timed out"); @@ -219,6 +223,8 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv, spin_lock_init(&orig_node->neigh_list_lock); spin_lock_init(&orig_node->tt_buff_lock); + batadv_nc_init_orig(orig_node); + /* extra reference for return */ atomic_set(&orig_node->refcount, 2); @@ -459,7 +465,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) neigh_node_tmp->tq_avg); } - seq_printf(seq, "\n"); + seq_puts(seq, "\n"); batman_count++; next: @@ -469,7 +475,7 @@ next: } if (batman_count == 0) - seq_printf(seq, "No batman nodes in range ...\n"); + seq_puts(seq, "No batman nodes in range ...\n"); out: if (primary_if) diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index ed0aa89..a51ccfc 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -30,6 +30,7 @@ enum batadv_packettype { BATADV_TT_QUERY = 0x07, BATADV_ROAM_ADV = 0x08, BATADV_UNICAST_4ADDR = 0x09, + BATADV_CODED = 0x0a, }; /** @@ -278,4 +279,36 @@ struct batadv_tt_change { uint8_t addr[ETH_ALEN]; } __packed; +/** + * struct batadv_coded_packet - network coded packet + * @header: common batman packet header and ttl of first included packet + * @reserved: Align following fields to 2-byte boundaries + * @first_source: original source of first included packet + * @first_orig_dest: original destinal of first included packet + * @first_crc: checksum of first included packet + * @first_ttvn: tt-version number of first included packet + * @second_ttl: ttl of second packet + * @second_dest: second receiver of this coded packet + * @second_source: original source of second included packet + * @second_orig_dest: original destination of second included packet + * @second_crc: checksum of second included packet + * @second_ttvn: tt version number of second included packet + * @coded_len: length of network coded part of the payload + */ +struct batadv_coded_packet { + struct batadv_header header; + uint8_t first_ttvn; + /* uint8_t first_dest[ETH_ALEN]; - saved in mac header destination */ + uint8_t first_source[ETH_ALEN]; + uint8_t first_orig_dest[ETH_ALEN]; + __be32 first_crc; + uint8_t second_ttl; + uint8_t second_ttvn; + uint8_t second_dest[ETH_ALEN]; + uint8_t second_source[ETH_ALEN]; + uint8_t second_orig_dest[ETH_ALEN]; + __be32 second_crc; + __be16 coded_len; +}; + #endif /* _NET_BATMAN_ADV_PACKET_H_ */ diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 5ee21ce..8f88967 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -29,6 +29,7 @@ #include "unicast.h" #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" +#include "network-coding.h" static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); @@ -548,27 +549,37 @@ batadv_find_ifalter_router(struct batadv_orig_node *primary_orig, return router; } +/** + * batadv_check_unicast_packet - Check for malformed unicast packets + * @skb: packet to check + * @hdr_size: size of header to pull + * + * Check for short header and bad addresses in given packet. Returns negative + * value when check fails and 0 otherwise. The negative value depends on the + * reason: -ENODATA for bad header, -EBADR for broadcast destination or source, + * and -EREMOTE for non-local (other host) destination. + */ static int batadv_check_unicast_packet(struct sk_buff *skb, int hdr_size) { struct ethhdr *ethhdr; /* drop packet if it has not necessary minimum size */ if (unlikely(!pskb_may_pull(skb, hdr_size))) - return -1; + return -ENODATA; ethhdr = (struct ethhdr *)skb_mac_header(skb); /* packet with unicast indication but broadcast recipient */ if (is_broadcast_ether_addr(ethhdr->h_dest)) - return -1; + return -EBADR; /* packet with broadcast sender address */ if (is_broadcast_ether_addr(ethhdr->h_source)) - return -1; + return -EBADR; /* not for me */ if (!batadv_is_my_mac(ethhdr->h_dest)) - return -1; + return -EREMOTE; return 0; } @@ -850,15 +861,18 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, /* decrement ttl */ unicast_packet->header.ttl--; - /* Update stats counter */ - batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD); - batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES, - skb->len + ETH_HLEN); - - /* route it */ - if (batadv_send_skb_to_orig(skb, orig_node, recv_if)) + /* network code packet if possible */ + if (batadv_nc_skb_forward(skb, neigh_node, ethhdr)) { + ret = NET_RX_SUCCESS; + } else if (batadv_send_skb_to_orig(skb, orig_node, recv_if)) { ret = NET_RX_SUCCESS; + /* Update stats counter */ + batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD); + batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES, + skb->len + ETH_HLEN); + } + out: if (neigh_node) batadv_neigh_node_free_ref(neigh_node); @@ -1033,7 +1047,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, struct batadv_unicast_4addr_packet *unicast_4addr_packet; uint8_t *orig_addr; struct batadv_orig_node *orig_node = NULL; - int hdr_size = sizeof(*unicast_packet); + int check, hdr_size = sizeof(*unicast_packet); bool is4addr; unicast_packet = (struct batadv_unicast_packet *)skb->data; @@ -1044,7 +1058,16 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, if (is4addr) hdr_size = sizeof(*unicast_4addr_packet); - if (batadv_check_unicast_packet(skb, hdr_size) < 0) + /* function returns -EREMOTE for promiscuous packets */ + check = batadv_check_unicast_packet(skb, hdr_size); + + /* Even though the packet is not for us, we might save it to use for + * decoding a later received coded packet + */ + if (check == -EREMOTE) + batadv_nc_skb_store_sniffed_unicast(bat_priv, skb); + + if (check < 0) return NET_RX_DROP; if (!batadv_check_unicast_ttvn(bat_priv, skb)) diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index a67cffd..263cfd1 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -27,6 +27,7 @@ #include "vis.h" #include "gateway_common.h" #include "originator.h" +#include "network-coding.h" #include <linux/if_ether.h> @@ -39,6 +40,7 @@ int batadv_send_skb_packet(struct sk_buff *skb, struct batadv_hard_iface *hard_iface, const uint8_t *dst_addr) { + struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct ethhdr *ethhdr; if (hard_iface->if_status != BATADV_IF_ACTIVE) @@ -70,6 +72,9 @@ int batadv_send_skb_packet(struct sk_buff *skb, skb->dev = hard_iface->net_dev; + /* Save a clone of the skb to use when decoding coded packets */ + batadv_nc_skb_store_for_decoding(bat_priv, skb); + /* dev_queue_xmit() returns a negative result on error. However on * congestion and traffic shaping, it drops and returns NET_XMIT_DROP * (which is > 0). This will not be treated as an error. diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 2711e87..403b8c4 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -37,6 +37,7 @@ #include <linux/if_ether.h> #include "unicast.h" #include "bridge_loop_avoidance.h" +#include "network-coding.h" static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd); @@ -401,55 +402,6 @@ static void batadv_set_lockdep_class(struct net_device *dev) } /** - * batadv_softif_init - Late stage initialization of soft interface - * @dev: registered network device to modify - * - * Returns error code on failures - */ -static int batadv_softif_init(struct net_device *dev) -{ - batadv_set_lockdep_class(dev); - - return 0; -} - -static const struct net_device_ops batadv_netdev_ops = { - .ndo_init = batadv_softif_init, - .ndo_open = batadv_interface_open, - .ndo_stop = batadv_interface_release, - .ndo_get_stats = batadv_interface_stats, - .ndo_set_mac_address = batadv_interface_set_mac_addr, - .ndo_change_mtu = batadv_interface_change_mtu, - .ndo_start_xmit = batadv_interface_tx, - .ndo_validate_addr = eth_validate_addr -}; - -static void batadv_interface_setup(struct net_device *dev) -{ - struct batadv_priv *priv = netdev_priv(dev); - - ether_setup(dev); - - dev->netdev_ops = &batadv_netdev_ops; - dev->destructor = free_netdev; - dev->tx_queue_len = 0; - - /* can't call min_mtu, because the needed variables - * have not been initialized yet - */ - dev->mtu = ETH_DATA_LEN; - /* reserve more space in the skbuff for our header */ - dev->hard_header_len = BATADV_HEADER_LEN; - - /* generate random address */ - eth_hw_addr_random(dev); - - SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops); - - memset(priv, 0, sizeof(*priv)); -} - -/** * batadv_softif_destroy_finish - cleans up the remains of a softif * @work: work queue item * @@ -465,7 +417,6 @@ static void batadv_softif_destroy_finish(struct work_struct *work) cleanup_work); soft_iface = bat_priv->soft_iface; - batadv_debugfs_del_meshif(soft_iface); batadv_sysfs_del_meshif(soft_iface); rtnl_lock(); @@ -473,21 +424,22 @@ static void batadv_softif_destroy_finish(struct work_struct *work) rtnl_unlock(); } -struct net_device *batadv_softif_create(const char *name) +/** + * batadv_softif_init_late - late stage initialization of soft interface + * @dev: registered network device to modify + * + * Returns error code on failures + */ +static int batadv_softif_init_late(struct net_device *dev) { - struct net_device *soft_iface; struct batadv_priv *bat_priv; int ret; size_t cnt_len = sizeof(uint64_t) * BATADV_CNT_NUM; - soft_iface = alloc_netdev(sizeof(*bat_priv), name, - batadv_interface_setup); - - if (!soft_iface) - goto out; + batadv_set_lockdep_class(dev); - bat_priv = netdev_priv(soft_iface); - bat_priv->soft_iface = soft_iface; + bat_priv = netdev_priv(dev); + bat_priv->soft_iface = dev; INIT_WORK(&bat_priv->cleanup_work, batadv_softif_destroy_finish); /* batadv_interface_stats() needs to be available as soon as @@ -495,14 +447,7 @@ struct net_device *batadv_softif_create(const char *name) */ bat_priv->bat_counters = __alloc_percpu(cnt_len, __alignof__(uint64_t)); if (!bat_priv->bat_counters) - goto free_soft_iface; - - ret = register_netdevice(soft_iface); - if (ret < 0) { - pr_err("Unable to register the batman interface '%s': %i\n", - name, ret); - goto free_bat_counters; - } + return -ENOMEM; atomic_set(&bat_priv->aggregated_ogms, 1); atomic_set(&bat_priv->bonding, 0); @@ -540,49 +485,189 @@ struct net_device *batadv_softif_create(const char *name) bat_priv->primary_if = NULL; bat_priv->num_ifaces = 0; - ret = batadv_algo_select(bat_priv, batadv_routing_algo); - if (ret < 0) - goto unreg_soft_iface; + batadv_nc_init_bat_priv(bat_priv); - ret = batadv_sysfs_add_meshif(soft_iface); + ret = batadv_algo_select(bat_priv, batadv_routing_algo); if (ret < 0) - goto unreg_soft_iface; + goto free_bat_counters; - ret = batadv_debugfs_add_meshif(soft_iface); + ret = batadv_debugfs_add_meshif(dev); if (ret < 0) - goto unreg_sysfs; + goto free_bat_counters; - ret = batadv_mesh_init(soft_iface); + ret = batadv_mesh_init(dev); if (ret < 0) goto unreg_debugfs; - return soft_iface; + return 0; unreg_debugfs: - batadv_debugfs_del_meshif(soft_iface); -unreg_sysfs: - batadv_sysfs_del_meshif(soft_iface); -unreg_soft_iface: - free_percpu(bat_priv->bat_counters); - unregister_netdevice(soft_iface); - return NULL; - + batadv_debugfs_del_meshif(dev); free_bat_counters: free_percpu(bat_priv->bat_counters); -free_soft_iface: - free_netdev(soft_iface); + + return ret; +} + +/** + * batadv_softif_slave_add - Add a slave interface to a batadv_soft_interface + * @dev: batadv_soft_interface used as master interface + * @slave_dev: net_device which should become the slave interface + * + * Return 0 if successful or error otherwise. + */ +static int batadv_softif_slave_add(struct net_device *dev, + struct net_device *slave_dev) +{ + struct batadv_hard_iface *hard_iface; + int ret = -EINVAL; + + hard_iface = batadv_hardif_get_by_netdev(slave_dev); + if (!hard_iface || hard_iface->soft_iface != NULL) + goto out; + + ret = batadv_hardif_enable_interface(hard_iface, dev->name); + out: - return NULL; + if (hard_iface) + batadv_hardif_free_ref(hard_iface); + return ret; } -void batadv_softif_destroy(struct net_device *soft_iface) +/** + * batadv_softif_slave_del - Delete a slave iface from a batadv_soft_interface + * @dev: batadv_soft_interface used as master interface + * @slave_dev: net_device which should be removed from the master interface + * + * Return 0 if successful or error otherwise. + */ +static int batadv_softif_slave_del(struct net_device *dev, + struct net_device *slave_dev) +{ + struct batadv_hard_iface *hard_iface; + int ret = -EINVAL; + + hard_iface = batadv_hardif_get_by_netdev(slave_dev); + + if (!hard_iface || hard_iface->soft_iface != dev) + goto out; + + batadv_hardif_disable_interface(hard_iface, BATADV_IF_CLEANUP_KEEP); + ret = 0; + +out: + if (hard_iface) + batadv_hardif_free_ref(hard_iface); + return ret; +} + +static const struct net_device_ops batadv_netdev_ops = { + .ndo_init = batadv_softif_init_late, + .ndo_open = batadv_interface_open, + .ndo_stop = batadv_interface_release, + .ndo_get_stats = batadv_interface_stats, + .ndo_set_mac_address = batadv_interface_set_mac_addr, + .ndo_change_mtu = batadv_interface_change_mtu, + .ndo_start_xmit = batadv_interface_tx, + .ndo_validate_addr = eth_validate_addr, + .ndo_add_slave = batadv_softif_slave_add, + .ndo_del_slave = batadv_softif_slave_del, +}; + +/** + * batadv_softif_free - Deconstructor of batadv_soft_interface + * @dev: Device to cleanup and remove + */ +static void batadv_softif_free(struct net_device *dev) +{ + batadv_debugfs_del_meshif(dev); + batadv_mesh_free(dev); + free_netdev(dev); +} + +/** + * batadv_softif_init_early - early stage initialization of soft interface + * @dev: registered network device to modify + */ +static void batadv_softif_init_early(struct net_device *dev) +{ + struct batadv_priv *priv = netdev_priv(dev); + + ether_setup(dev); + + dev->netdev_ops = &batadv_netdev_ops; + dev->destructor = batadv_softif_free; + dev->tx_queue_len = 0; + + /* can't call min_mtu, because the needed variables + * have not been initialized yet + */ + dev->mtu = ETH_DATA_LEN; + /* reserve more space in the skbuff for our header */ + dev->hard_header_len = BATADV_HEADER_LEN; + + /* generate random address */ + eth_hw_addr_random(dev); + + SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops); + + memset(priv, 0, sizeof(*priv)); +} + +struct net_device *batadv_softif_create(const char *name) +{ + struct net_device *soft_iface; + int ret; + + soft_iface = alloc_netdev(sizeof(struct batadv_priv), name, + batadv_softif_init_early); + if (!soft_iface) + return NULL; + + soft_iface->rtnl_link_ops = &batadv_link_ops; + + ret = register_netdevice(soft_iface); + if (ret < 0) { + pr_err("Unable to register the batman interface '%s': %i\n", + name, ret); + free_netdev(soft_iface); + return NULL; + } + + return soft_iface; +} + +/** + * batadv_softif_destroy_sysfs - deletion of batadv_soft_interface via sysfs + * @soft_iface: the to-be-removed batman-adv interface + */ +void batadv_softif_destroy_sysfs(struct net_device *soft_iface) { struct batadv_priv *bat_priv = netdev_priv(soft_iface); - batadv_mesh_free(soft_iface); queue_work(batadv_event_workqueue, &bat_priv->cleanup_work); } +/** + * batadv_softif_destroy_netlink - deletion of batadv_soft_interface via netlink + * @soft_iface: the to-be-removed batman-adv interface + * @head: list pointer + */ +static void batadv_softif_destroy_netlink(struct net_device *soft_iface, + struct list_head *head) +{ + struct batadv_hard_iface *hard_iface; + + list_for_each_entry(hard_iface, &batadv_hardif_list, list) { + if (hard_iface->soft_iface == soft_iface) + batadv_hardif_disable_interface(hard_iface, + BATADV_IF_CLEANUP_KEEP); + } + + batadv_sysfs_del_meshif(soft_iface); + unregister_netdevice_queue(soft_iface, head); +} + int batadv_softif_is_valid(const struct net_device *net_dev) { if (net_dev->netdev_ops->ndo_start_xmit == batadv_interface_tx) @@ -591,6 +676,13 @@ int batadv_softif_is_valid(const struct net_device *net_dev) return 0; } +struct rtnl_link_ops batadv_link_ops __read_mostly = { + .kind = "batadv", + .priv_size = sizeof(struct batadv_priv), + .setup = batadv_softif_init_early, + .dellink = batadv_softif_destroy_netlink, +}; + /* ethtool */ static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { @@ -662,6 +754,17 @@ static const struct { { "dat_put_rx" }, { "dat_cached_reply_tx" }, #endif +#ifdef CONFIG_BATMAN_ADV_NC + { "nc_code" }, + { "nc_code_bytes" }, + { "nc_recode" }, + { "nc_recode_bytes" }, + { "nc_buffer" }, + { "nc_decode" }, + { "nc_decode_bytes" }, + { "nc_decode_failed" }, + { "nc_sniffed" }, +#endif }; static void batadv_get_strings(struct net_device *dev, uint32_t stringset, diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 43182e5..2f2472c 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -25,7 +25,8 @@ void batadv_interface_rx(struct net_device *soft_iface, struct sk_buff *skb, struct batadv_hard_iface *recv_if, int hdr_size, struct batadv_orig_node *orig_node); struct net_device *batadv_softif_create(const char *name); -void batadv_softif_destroy(struct net_device *soft_iface); +void batadv_softif_destroy_sysfs(struct net_device *soft_iface); int batadv_softif_is_valid(const struct net_device *net_dev); +extern struct rtnl_link_ops batadv_link_ops; #endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */ diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index afbba31..15a22ef 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -442,6 +442,9 @@ static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth, #ifdef CONFIG_BATMAN_ADV_DEBUG BATADV_ATTR_SIF_UINT(log_level, S_IRUGO | S_IWUSR, 0, BATADV_DBG_ALL, NULL); #endif +#ifdef CONFIG_BATMAN_ADV_NC +BATADV_ATTR_SIF_BOOL(network_coding, S_IRUGO | S_IWUSR, NULL); +#endif static struct batadv_attribute *batadv_mesh_attrs[] = { &batadv_attr_aggregated_ogms, @@ -464,6 +467,9 @@ static struct batadv_attribute *batadv_mesh_attrs[] = { #ifdef CONFIG_BATMAN_ADV_DEBUG &batadv_attr_log_level, #endif +#ifdef CONFIG_BATMAN_ADV_NC + &batadv_attr_network_coding, +#endif NULL, }; @@ -582,13 +588,15 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj, } if (status_tmp == BATADV_IF_NOT_IN_USE) { - batadv_hardif_disable_interface(hard_iface); + batadv_hardif_disable_interface(hard_iface, + BATADV_IF_CLEANUP_AUTO); goto unlock; } /* if the interface already is in use */ if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) - batadv_hardif_disable_interface(hard_iface); + batadv_hardif_disable_interface(hard_iface, + BATADV_IF_CLEANUP_AUTO); ret = batadv_hardif_enable_interface(hard_iface, buff); @@ -688,15 +696,10 @@ int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type, enum batadv_uev_action action, const char *data) { int ret = -ENOMEM; - struct batadv_hard_iface *primary_if; struct kobject *bat_kobj; char *uevent_env[4] = { NULL, NULL, NULL, NULL }; - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) - goto out; - - bat_kobj = &primary_if->soft_iface->dev.kobj; + bat_kobj = &bat_priv->soft_iface->dev.kobj; uevent_env[0] = kmalloc(strlen(BATADV_UEV_TYPE_VAR) + strlen(batadv_uev_type_str[type]) + 1, @@ -732,9 +735,6 @@ out: kfree(uevent_env[1]); kfree(uevent_env[2]); - if (primary_if) - batadv_hardif_free_ref(primary_if); - if (ret) batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Impossible to send uevent for (%s,%s,%s) event (err: %d)\n", diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 98a66a0..9322320 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -385,25 +385,19 @@ static void batadv_tt_prepare_packet_buff(struct batadv_priv *bat_priv, int *packet_buff_len, int min_packet_len) { - struct batadv_hard_iface *primary_if; int req_len; - primary_if = batadv_primary_if_get_selected(bat_priv); - req_len = min_packet_len; req_len += batadv_tt_len(atomic_read(&bat_priv->tt.local_changes)); /* if we have too many changes for one packet don't send any * and wait for the tt table request which will be fragmented */ - if ((!primary_if) || (req_len > primary_if->soft_iface->mtu)) + if (req_len > bat_priv->soft_iface->mtu) req_len = min_packet_len; batadv_tt_realloc_packet_buff(packet_buff, packet_buff_len, min_packet_len, req_len); - - if (primary_if) - batadv_hardif_free_ref(primary_if); } static int batadv_tt_changes_fill_buff(struct batadv_priv *bat_priv, @@ -908,7 +902,7 @@ out_remove: /* remove address from local hash if present */ local_flags = batadv_tt_local_remove(bat_priv, tt_addr, "global tt received", - !!(flags & BATADV_TT_CLIENT_ROAM)); + flags & BATADV_TT_CLIENT_ROAM); tt_global_entry->common.flags |= local_flags & BATADV_TT_CLIENT_WIFI; if (!(flags & BATADV_TT_CLIENT_ROAM)) @@ -1580,7 +1574,7 @@ static int batadv_tt_global_valid(const void *entry_ptr, static struct sk_buff * batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, struct batadv_hashtable *hash, - struct batadv_hard_iface *primary_if, + struct batadv_priv *bat_priv, int (*valid_cb)(const void *, const void *), void *cb_data) { @@ -1594,8 +1588,8 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, uint32_t i; size_t len; - if (tt_query_size + tt_len > primary_if->soft_iface->mtu) { - tt_len = primary_if->soft_iface->mtu - tt_query_size; + if (tt_query_size + tt_len > bat_priv->soft_iface->mtu) { + tt_len = bat_priv->soft_iface->mtu - tt_query_size; tt_len -= tt_len % sizeof(struct batadv_tt_change); } tt_tot = tt_len / sizeof(struct batadv_tt_change); @@ -1715,7 +1709,6 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, { struct batadv_orig_node *req_dst_orig_node; struct batadv_orig_node *res_dst_orig_node = NULL; - struct batadv_hard_iface *primary_if = NULL; uint8_t orig_ttvn, req_ttvn, ttvn; int ret = false; unsigned char *tt_buff; @@ -1740,10 +1733,6 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, if (!res_dst_orig_node) goto out; - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) - goto out; - orig_ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn); req_ttvn = tt_request->ttvn; @@ -1791,7 +1780,7 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, skb = batadv_tt_response_fill_table(tt_len, ttvn, bat_priv->tt.global_hash, - primary_if, + bat_priv, batadv_tt_global_valid, req_dst_orig_node); if (!skb) @@ -1828,8 +1817,6 @@ out: batadv_orig_node_free_ref(res_dst_orig_node); if (req_dst_orig_node) batadv_orig_node_free_ref(req_dst_orig_node); - if (primary_if) - batadv_hardif_free_ref(primary_if); if (!ret) kfree_skb(skb); return ret; @@ -1907,7 +1894,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, skb = batadv_tt_response_fill_table(tt_len, ttvn, bat_priv->tt.local_hash, - primary_if, + bat_priv, batadv_tt_local_valid_entry, NULL); if (!skb) @@ -2528,7 +2515,7 @@ bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv, if (!tt_global_entry) goto out; - ret = !!(tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM); + ret = tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM; batadv_tt_global_entry_free_ref(tt_global_entry); out: return ret; diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 4cd87a0..aba8364 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -128,6 +128,10 @@ struct batadv_hard_iface { * @bond_list: list of bonding candidates * @refcount: number of contexts the object is used * @rcu: struct used for freeing in an RCU-safe manner + * @in_coding_list: list of nodes this orig can hear + * @out_coding_list: list of nodes that can hear this orig + * @in_coding_list_lock: protects in_coding_list + * @out_coding_list_lock: protects out_coding_list */ struct batadv_orig_node { uint8_t orig[ETH_ALEN]; @@ -171,6 +175,12 @@ struct batadv_orig_node { struct list_head bond_list; atomic_t refcount; struct rcu_head rcu; +#ifdef CONFIG_BATMAN_ADV_NC + struct list_head in_coding_list; + struct list_head out_coding_list; + spinlock_t in_coding_list_lock; /* Protects in_coding_list */ + spinlock_t out_coding_list_lock; /* Protects out_coding_list */ +#endif }; /** @@ -265,6 +275,17 @@ struct batadv_bcast_duplist_entry { * @BATADV_CNT_DAT_PUT_RX: received dht PUT traffic packet counter * @BATADV_CNT_DAT_CACHED_REPLY_TX: transmitted dat cache reply traffic packet * counter + * @BATADV_CNT_NC_CODE: transmitted nc-combined traffic packet counter + * @BATADV_CNT_NC_CODE_BYTES: transmitted nc-combined traffic bytes counter + * @BATADV_CNT_NC_RECODE: transmitted nc-recombined traffic packet counter + * @BATADV_CNT_NC_RECODE_BYTES: transmitted nc-recombined traffic bytes counter + * @BATADV_CNT_NC_BUFFER: counter for packets buffered for later nc decoding + * @BATADV_CNT_NC_DECODE: received and nc-decoded traffic packet counter + * @BATADV_CNT_NC_DECODE_BYTES: received and nc-decoded traffic bytes counter + * @BATADV_CNT_NC_DECODE_FAILED: received and decode-failed traffic packet + * counter + * @BATADV_CNT_NC_SNIFFED: counter for nc-decoded packets received in promisc + * mode. * @BATADV_CNT_NUM: number of traffic counters */ enum batadv_counters { @@ -292,6 +313,17 @@ enum batadv_counters { BATADV_CNT_DAT_PUT_RX, BATADV_CNT_DAT_CACHED_REPLY_TX, #endif +#ifdef CONFIG_BATMAN_ADV_NC + BATADV_CNT_NC_CODE, + BATADV_CNT_NC_CODE_BYTES, + BATADV_CNT_NC_RECODE, + BATADV_CNT_NC_RECODE_BYTES, + BATADV_CNT_NC_BUFFER, + BATADV_CNT_NC_DECODE, + BATADV_CNT_NC_DECODE_BYTES, + BATADV_CNT_NC_DECODE_FAILED, + BATADV_CNT_NC_SNIFFED, +#endif BATADV_CNT_NUM, }; @@ -428,6 +460,35 @@ struct batadv_priv_dat { #endif /** + * struct batadv_priv_nc - per mesh interface network coding private data + * @work: work queue callback item for cleanup + * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs + * @min_tq: only consider neighbors for encoding if neigh_tq > min_tq + * @max_fwd_delay: maximum packet forward delay to allow coding of packets + * @max_buffer_time: buffer time for sniffed packets used to decoding + * @timestamp_fwd_flush: timestamp of last forward packet queue flush + * @timestamp_sniffed_purge: timestamp of last sniffed packet queue purge + * @coding_hash: Hash table used to buffer skbs while waiting for another + * incoming skb to code it with. Skbs are added to the buffer just before being + * forwarded in routing.c + * @decoding_hash: Hash table used to buffer skbs that might be needed to decode + * a received coded skb. The buffer is used for 1) skbs arriving on the + * soft-interface; 2) skbs overheard on the hard-interface; and 3) skbs + * forwarded by batman-adv. + */ +struct batadv_priv_nc { + struct delayed_work work; + struct dentry *debug_dir; + u8 min_tq; + u32 max_fwd_delay; + u32 max_buffer_time; + unsigned long timestamp_fwd_flush; + unsigned long timestamp_sniffed_purge; + struct batadv_hashtable *coding_hash; + struct batadv_hashtable *decoding_hash; +}; + +/** * struct batadv_priv - per mesh interface data * @mesh_state: current status of the mesh (inactive/active/deactivating) * @soft_iface: net device which holds this struct as private data @@ -470,6 +531,8 @@ struct batadv_priv_dat { * @tt: translation table data * @vis: vis data * @dat: distributed arp table data + * @network_coding: bool indicating whether network coding is enabled + * @batadv_priv_nc: network coding data */ struct batadv_priv { atomic_t mesh_state; @@ -522,6 +585,10 @@ struct batadv_priv { #ifdef CONFIG_BATMAN_ADV_DAT struct batadv_priv_dat dat; #endif +#ifdef CONFIG_BATMAN_ADV_NC + atomic_t network_coding; + struct batadv_priv_nc nc; +#endif /* CONFIG_BATMAN_ADV_NC */ }; /** @@ -702,6 +769,75 @@ struct batadv_tt_roam_node { }; /** + * struct batadv_nc_node - network coding node + * @list: next and prev pointer for the list handling + * @addr: the node's mac address + * @refcount: number of contexts the object is used by + * @rcu: struct used for freeing in an RCU-safe manner + * @orig_node: pointer to corresponding orig node struct + * @last_seen: timestamp of last ogm received from this node + */ +struct batadv_nc_node { + struct list_head list; + uint8_t addr[ETH_ALEN]; + atomic_t refcount; + struct rcu_head rcu; + struct batadv_orig_node *orig_node; + unsigned long last_seen; +}; + +/** + * struct batadv_nc_path - network coding path + * @hash_entry: next and prev pointer for the list handling + * @rcu: struct used for freeing in an RCU-safe manner + * @refcount: number of contexts the object is used by + * @packet_list: list of buffered packets for this path + * @packet_list_lock: access lock for packet list + * @next_hop: next hop (destination) of path + * @prev_hop: previous hop (source) of path + * @last_valid: timestamp for last validation of path + */ +struct batadv_nc_path { + struct hlist_node hash_entry; + struct rcu_head rcu; + atomic_t refcount; + struct list_head packet_list; + spinlock_t packet_list_lock; /* Protects packet_list */ + uint8_t next_hop[ETH_ALEN]; + uint8_t prev_hop[ETH_ALEN]; + unsigned long last_valid; +}; + +/** + * struct batadv_nc_packet - network coding packet used when coding and + * decoding packets + * @list: next and prev pointer for the list handling + * @packet_id: crc32 checksum of skb data + * @timestamp: field containing the info when the packet was added to path + * @neigh_node: pointer to original next hop neighbor of skb + * @skb: skb which can be encoded or used for decoding + * @nc_path: pointer to path this nc packet is attached to + */ +struct batadv_nc_packet { + struct list_head list; + __be32 packet_id; + unsigned long timestamp; + struct batadv_neigh_node *neigh_node; + struct sk_buff *skb; + struct batadv_nc_path *nc_path; +}; + +/** + * batadv_skb_cb - control buffer structure used to store private data relevant + * to batman-adv in the skb->cb buffer in skbs. + * @decoded: Marks a skb as decoded, which is checked when searching for coding + * opportunities in network-coding.c + */ +struct batadv_skb_cb { + bool decoded; +}; + +/** * struct batadv_forw_packet - structure for bcast packets to be sent/forwarded * @list: list node for batadv_socket_client::queue_list * @send_time: execution time for delayed_work (packet sending) diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 50e079f..0bb3b59 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -122,7 +122,7 @@ batadv_frag_search_packet(struct list_head *head, { struct batadv_frag_packet_list_entry *tfp; struct batadv_unicast_frag_packet *tmp_up = NULL; - int is_head_tmp, is_head; + bool is_head_tmp, is_head; uint16_t search_seqno; if (up->flags & BATADV_UNI_FRAG_HEAD) @@ -130,7 +130,7 @@ batadv_frag_search_packet(struct list_head *head, else search_seqno = ntohs(up->seqno)-1; - is_head = !!(up->flags & BATADV_UNI_FRAG_HEAD); + is_head = up->flags & BATADV_UNI_FRAG_HEAD; list_for_each_entry(tfp, head, list) { if (!tfp->skb) @@ -142,7 +142,7 @@ batadv_frag_search_packet(struct list_head *head, tmp_up = (struct batadv_unicast_frag_packet *)tfp->skb->data; if (tfp->seqno == search_seqno) { - is_head_tmp = !!(tmp_up->flags & BATADV_UNI_FRAG_HEAD); + is_head_tmp = tmp_up->flags & BATADV_UNI_FRAG_HEAD; if (is_head_tmp != is_head) return tfp; else diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index c053244..962ccf3 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -149,7 +149,7 @@ static void batadv_vis_data_read_prim_sec(struct seq_file *seq, hlist_for_each_entry(entry, if_list, list) { if (entry->primary) - seq_printf(seq, "PRIMARY, "); + seq_puts(seq, "PRIMARY, "); else seq_printf(seq, "SEC %pM, ", entry->addr); } @@ -207,7 +207,7 @@ static void batadv_vis_data_read_entries(struct seq_file *seq, if (batadv_compare_eth(entry->addr, packet->vis_orig)) batadv_vis_data_read_prim_sec(seq, list); - seq_printf(seq, "\n"); + seq_puts(seq, "\n"); } } diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index d3ee69b..fea778e 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -422,7 +422,8 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock, return bt_accept_poll(sk); if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP | POLLIN | POLLRDNORM; diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index e58c8b3..4b488ec 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -136,7 +136,7 @@ static u16 bnep_net_eth_proto(struct sk_buff *skb) struct ethhdr *eh = (void *) skb->data; u16 proto = ntohs(eh->h_proto); - if (proto >= 1536) + if (proto >= ETH_P_802_3_MIN) return proto; if (get_unaligned((__be16 *) skb->data) == htons(0xFFFF)) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index bab338e..c581f12 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -161,9 +161,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) if (!pv) return; - for (vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid); - vid < BR_VLAN_BITMAP_LEN; - vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid+1)) { + for_each_set_bit_from(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) { f = __br_fdb_get(br, br->dev->dev_addr, vid); if (f && f->is_local && !f->dst) fdb_delete(br, f); @@ -724,13 +722,10 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], * specify a VLAN. To be nice, add/update entry for every * vlan on this port. */ - vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); - while (vid < BR_VLAN_BITMAP_LEN) { + for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) { err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); if (err) goto out; - vid = find_next_bit(pv->vlan_bitmap, - BR_VLAN_BITMAP_LEN, vid+1); } } @@ -815,11 +810,8 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], * vlan on this port. */ err = -ENOENT; - vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); - while (vid < BR_VLAN_BITMAP_LEN) { + for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) { err &= __br_fdb_delete(p, addr, vid); - vid = find_next_bit(pv->vlan_bitmap, - BR_VLAN_BITMAP_LEN, vid+1); } } out: diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ef1b914..f17fcb3 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -148,7 +148,6 @@ static void del_nbp(struct net_bridge_port *p) dev->priv_flags &= ~IFF_BRIDGE_PORT; netdev_rx_handler_unregister(dev); - synchronize_net(); netdev_upper_dev_unlink(dev, br->dev); diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index ee79f3f..19942e3 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -382,7 +382,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, return ret; } -static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct br_mdb_entry *entry; @@ -458,7 +458,7 @@ unlock: return err; } -static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net_device *dev; struct br_mdb_entry *entry; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 923fbea..81f2389 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1369,7 +1369,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br, return -EINVAL; if (iph->protocol != IPPROTO_IGMP) { - if ((iph->daddr & IGMP_LOCAL_GROUP_MASK) != IGMP_LOCAL_GROUP) + if (!ipv4_is_local_multicast(iph->daddr)) BR_INPUT_SKB_CB(skb)->mrouters_only = 1; return 0; } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 299fc5f..8e3abf5 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -136,10 +136,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, goto nla_put_failure; pvid = br_get_pvid(pv); - for (vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); - vid < BR_VLAN_BITMAP_LEN; - vid = find_next_bit(pv->vlan_bitmap, - BR_VLAN_BITMAP_LEN, vid+1)) { + for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) { vinfo.vid = vid; vinfo.flags = 0; if (vid == pvid) @@ -355,17 +352,14 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) /* Change state and parameters on port. */ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) { - struct ifinfomsg *ifm; struct nlattr *protinfo; struct nlattr *afspec; struct net_bridge_port *p; struct nlattr *tb[IFLA_BRPORT_MAX + 1]; - int err; - - ifm = nlmsg_data(nlh); + int err = 0; - protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO); - afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC); + protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO); + afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (!protinfo && !afspec) return 0; @@ -373,7 +367,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) /* We want to accept dev as bridge itself if the AF_SPEC * is set to see if someone is setting vlan info on the brigde */ - if (!p && ((dev->priv_flags & IFF_EBRIDGE) && !afspec)) + if (!p && !afspec) return -EINVAL; if (p && protinfo) { @@ -414,14 +408,11 @@ out: /* Delete port information */ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh) { - struct ifinfomsg *ifm; struct nlattr *afspec; struct net_bridge_port *p; int err; - ifm = nlmsg_data(nlh); - - afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC); + afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (!afspec) return 0; diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 3bf43f7..961d870 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -35,7 +35,7 @@ #include <linux/skbuff.h> #include <linux/kernel.h> #include <linux/timer.h> -#include <linux/netlink.h> +#include <net/netlink.h> #include <linux/netdevice.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> @@ -134,7 +134,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, else copy_len = uloginfo->cprange; - size = NLMSG_SPACE(sizeof(*pm) + copy_len); + size = nlmsg_total_size(sizeof(*pm) + copy_len); if (size > nlbufsiz) { pr_debug("Size %Zd needed, but nlbufsiz=%d\n", size, nlbufsiz); return; @@ -319,12 +319,11 @@ static void __exit ebt_ulog_fini(void) for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { ub = &ulog_buffers[i]; del_timer(&ub->timer); - spin_lock_bh(&ub->lock); + if (ub->skb) { kfree_skb(ub->skb); ub->skb = NULL; } - spin_unlock_bh(&ub->lock); } netlink_kernel_release(ebtulognl); } diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 40d8258..70f656c 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -64,9 +64,7 @@ static int ebt_broute(struct sk_buff *skb) static int __net_init broute_net_init(struct net *net) { net->xt.broute_table = ebt_register_table(net, &broute_table); - if (IS_ERR(net->xt.broute_table)) - return PTR_ERR(net->xt.broute_table); - return 0; + return PTR_RET(net->xt.broute_table); } static void __net_exit broute_net_exit(struct net *net) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 8d493c9..3d110c4 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -138,7 +138,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, ethproto = h->h_proto; if (e->bitmask & EBT_802_3) { - if (FWINV2(ntohs(ethproto) >= 1536, EBT_IPROTO)) + if (FWINV2(ntohs(ethproto) >= ETH_P_802_3_MIN, EBT_IPROTO)) return 1; } else if (!(e->bitmask & EBT_NOPROTO) && FWINV2(e->ethproto != ethproto, EBT_IPROTO)) diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 21760f0..df6d56d 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -301,10 +301,11 @@ static void dev_flowctrl(struct net_device *dev, int on) } void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, - struct cflayer *link_support, int head_room, - struct cflayer **layer, int (**rcv_func)( - struct sk_buff *, struct net_device *, - struct packet_type *, struct net_device *)) + struct cflayer *link_support, int head_room, + struct cflayer **layer, + int (**rcv_func)(struct sk_buff *, struct net_device *, + struct packet_type *, + struct net_device *)) { struct caif_device_entry *caifd; enum cfcnfg_phy_preference pref; diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 095259f..1d337e0 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -197,8 +197,8 @@ static void cfsk_put(struct cflayer *layr) /* Packet Control Callback function called from CAIF */ static void caif_ctrl_cb(struct cflayer *layr, - enum caif_ctrlcmd flow, - int phyid) + enum caif_ctrlcmd flow, + int phyid) { struct caifsock *cf_sk = container_of(layr, struct caifsock, layer); switch (flow) { @@ -274,7 +274,7 @@ static void caif_check_flow_release(struct sock *sk) * changed locking, address handling and added MSG_TRUNC. */ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *m, size_t len, int flags) + struct msghdr *m, size_t len, int flags) { struct sock *sk = sock->sk; @@ -346,8 +346,8 @@ static long caif_stream_data_wait(struct sock *sk, long timeo) * changed locking calls, changed address handling. */ static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size, - int flags) + struct msghdr *msg, size_t size, + int flags) { struct sock *sk = sock->sk; int copied = 0; @@ -462,7 +462,7 @@ out: * CAIF flow-on and sock_writable. */ static long caif_wait_for_flow_on(struct caifsock *cf_sk, - int wait_writeable, long timeo, int *err) + int wait_writeable, long timeo, int *err) { struct sock *sk = &cf_sk->sk; DEFINE_WAIT(wait); @@ -516,7 +516,7 @@ static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk, /* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */ static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) + struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); @@ -591,7 +591,7 @@ err: * and other minor adaptations. */ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, - struct msghdr *msg, size_t len) + struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); @@ -670,7 +670,7 @@ out_err: } static int setsockopt(struct socket *sock, - int lvl, int opt, char __user *ov, unsigned int ol) + int lvl, int opt, char __user *ov, unsigned int ol) { struct sock *sk = sock->sk; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); @@ -932,7 +932,7 @@ static int caif_release(struct socket *sock) /* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */ static unsigned int caif_poll(struct file *file, - struct socket *sock, poll_table *wait) + struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; unsigned int mask; @@ -1022,7 +1022,7 @@ static void caif_sock_destructor(struct sock *sk) } static int caif_create(struct net *net, struct socket *sock, int protocol, - int kern) + int kern) { struct sock *sk = NULL; struct caifsock *cf_sk = NULL; diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index ef8ebaa..d76278d 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -75,7 +75,7 @@ static int cfusbl_transmit(struct cflayer *layr, struct cfpkt *pkt) } static void cfusbl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid) + int phyid) { if (layr->up && layr->up->ctrlcmd) layr->up->ctrlcmd(layr->up, ctrl, layr->id); @@ -121,7 +121,7 @@ static struct packet_type caif_usb_type __read_mostly = { }; static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, - void *arg) + void *arg) { struct net_device *dev = arg; struct caif_dev_common common; diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index f1dbddb..246ac3a 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -61,11 +61,11 @@ struct cfcnfg { }; static void cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, - enum cfctrl_srv serv, u8 phyid, - struct cflayer *adapt_layer); + enum cfctrl_srv serv, u8 phyid, + struct cflayer *adapt_layer); static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id); static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id, - struct cflayer *adapt_layer); + struct cflayer *adapt_layer); static void cfctrl_resp_func(void); static void cfctrl_enum_resp(void); @@ -131,7 +131,7 @@ static void cfctrl_resp_func(void) } static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo_rcu(struct cfcnfg *cnfg, - u8 phyid) + u8 phyid) { struct cfcnfg_phyinfo *phy; @@ -216,8 +216,8 @@ static const int protohead[CFCTRL_SRV_MASK] = { static int caif_connect_req_to_link_param(struct cfcnfg *cnfg, - struct caif_connect_request *s, - struct cfctrl_link_param *l) + struct caif_connect_request *s, + struct cfctrl_link_param *l) { struct dev_info *dev_info; enum cfcnfg_phy_preference pref; @@ -301,8 +301,7 @@ static int caif_connect_req_to_link_param(struct cfcnfg *cnfg, int caif_connect_client(struct net *net, struct caif_connect_request *conn_req, struct cflayer *adap_layer, int *ifindex, - int *proto_head, - int *proto_tail) + int *proto_head, int *proto_tail) { struct cflayer *frml; struct cfcnfg_phyinfo *phy; @@ -364,7 +363,7 @@ unlock: EXPORT_SYMBOL(caif_connect_client); static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id, - struct cflayer *adapt_layer) + struct cflayer *adapt_layer) { if (adapt_layer != NULL && adapt_layer->ctrlcmd != NULL) adapt_layer->ctrlcmd(adapt_layer, @@ -526,7 +525,7 @@ out_err: EXPORT_SYMBOL(cfcnfg_add_phy_layer); int cfcnfg_set_phy_state(struct cfcnfg *cnfg, struct cflayer *phy_layer, - bool up) + bool up) { struct cfcnfg_phyinfo *phyinfo; diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index a376ec1..9cd057c 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -20,12 +20,12 @@ #ifdef CAIF_NO_LOOP static int handle_loop(struct cfctrl *ctrl, - int cmd, struct cfpkt *pkt){ + int cmd, struct cfpkt *pkt){ return -1; } #else static int handle_loop(struct cfctrl *ctrl, - int cmd, struct cfpkt *pkt); + int cmd, struct cfpkt *pkt); #endif static int cfctrl_recv(struct cflayer *layr, struct cfpkt *pkt); static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, @@ -72,7 +72,7 @@ void cfctrl_remove(struct cflayer *layer) } static bool param_eq(const struct cfctrl_link_param *p1, - const struct cfctrl_link_param *p2) + const struct cfctrl_link_param *p2) { bool eq = p1->linktype == p2->linktype && @@ -197,8 +197,8 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid) } int cfctrl_linkup_request(struct cflayer *layer, - struct cfctrl_link_param *param, - struct cflayer *user_layer) + struct cfctrl_link_param *param, + struct cflayer *user_layer) { struct cfctrl *cfctrl = container_obj(layer); u32 tmp32; @@ -301,7 +301,7 @@ int cfctrl_linkup_request(struct cflayer *layer, } int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid, - struct cflayer *client) + struct cflayer *client) { int ret; struct cfpkt *pkt; @@ -555,7 +555,7 @@ error: } static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid) + int phyid) { struct cfctrl *this = container_obj(layr); switch (ctrl) { diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c index 0a7df7e..204c5e2 100644 --- a/net/caif/cffrml.c +++ b/net/caif/cffrml.c @@ -28,7 +28,7 @@ struct cffrml { static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt); static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt); static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid); + int phyid); static u32 cffrml_rcv_error; static u32 cffrml_rcv_checsum_error; @@ -167,7 +167,7 @@ static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt) } static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid) + int phyid) { if (layr->up && layr->up->ctrlcmd) layr->up->ctrlcmd(layr->up, ctrl, layr->id); diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c index 94b0861..154d9f8 100644 --- a/net/caif/cfmuxl.c +++ b/net/caif/cfmuxl.c @@ -42,7 +42,7 @@ struct cfmuxl { static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt); static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt); static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid); + int phyid); static struct cflayer *get_up(struct cfmuxl *muxl, u16 id); struct cflayer *cfmuxl_create(void) @@ -244,7 +244,7 @@ static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt) } static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid) + int phyid) { struct cfmuxl *muxl = container_obj(layr); struct cflayer *layer; diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c index 863dedd..e8f9c14 100644 --- a/net/caif/cfpkt_skbuff.c +++ b/net/caif/cfpkt_skbuff.c @@ -266,8 +266,8 @@ inline u16 cfpkt_getlen(struct cfpkt *pkt) } inline u16 cfpkt_iterate(struct cfpkt *pkt, - u16 (*iter_func)(u16, void *, u16), - u16 data) + u16 (*iter_func)(u16, void *, u16), + u16 data) { /* * Don't care about the performance hit of linearizing, @@ -307,8 +307,8 @@ int cfpkt_setlen(struct cfpkt *pkt, u16 len) } struct cfpkt *cfpkt_append(struct cfpkt *dstpkt, - struct cfpkt *addpkt, - u16 expectlen) + struct cfpkt *addpkt, + u16 expectlen) { struct sk_buff *dst = pkt_to_skb(dstpkt); struct sk_buff *add = pkt_to_skb(addpkt); diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c index 2b563ad..db51830c 100644 --- a/net/caif/cfrfml.c +++ b/net/caif/cfrfml.c @@ -43,7 +43,7 @@ static void cfrfml_release(struct cflayer *layer) } struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info, - int mtu_size) + int mtu_size) { int tmp; struct cfrfml *this = kzalloc(sizeof(struct cfrfml), GFP_ATOMIC); @@ -69,7 +69,7 @@ struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info, } static struct cfpkt *rfm_append(struct cfrfml *rfml, char *seghead, - struct cfpkt *pkt, int *err) + struct cfpkt *pkt, int *err) { struct cfpkt *tmppkt; *err = -EPROTO; diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c index 8e68b97..147c232 100644 --- a/net/caif/cfserl.c +++ b/net/caif/cfserl.c @@ -29,7 +29,7 @@ struct cfserl { static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt); static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt); static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid); + int phyid); struct cflayer *cfserl_create(int instance, bool use_stx) { @@ -182,7 +182,7 @@ static int cfserl_transmit(struct cflayer *layer, struct cfpkt *newpkt) } static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid) + int phyid) { layr->up->ctrlcmd(layr->up, ctrl, phyid); } diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c index ba217e9..95f7f5e 100644 --- a/net/caif/cfsrvl.c +++ b/net/caif/cfsrvl.c @@ -25,7 +25,7 @@ #define container_obj(layr) container_of(layr, struct cfsrvl, layer) static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, - int phyid) + int phyid) { struct cfsrvl *service = container_obj(layr); @@ -158,10 +158,9 @@ static void cfsrvl_release(struct cflayer *layer) } void cfsrvl_init(struct cfsrvl *service, - u8 channel_id, - struct dev_info *dev_info, - bool supports_flowctrl - ) + u8 channel_id, + struct dev_info *dev_info, + bool supports_flowctrl) { caif_assert(offsetof(struct cfsrvl, layer) == 0); service->open = false; @@ -207,8 +206,8 @@ void caif_free_client(struct cflayer *adap_layer) EXPORT_SYMBOL(caif_free_client); void caif_client_register_refcnt(struct cflayer *adapt_layer, - void (*hold)(struct cflayer *lyr), - void (*put)(struct cflayer *lyr)) + void (*hold)(struct cflayer *lyr), + void (*put)(struct cflayer *lyr)) { struct cfsrvl *service; diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index e597733..26a4e4e 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -167,7 +167,7 @@ static void chnl_put(struct cflayer *lyr) } static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow, - int phyid) + int phyid) { struct chnl_net *priv = container_of(layr, struct chnl_net, chnl); pr_debug("NET flowctrl func called flow: %s\n", @@ -443,7 +443,7 @@ nla_put_failure: } static void caif_netlink_parms(struct nlattr *data[], - struct caif_connect_request *conn_req) + struct caif_connect_request *conn_req) { if (!data) { pr_warn("no params data found\n"); @@ -488,7 +488,7 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev, } static int ipcaif_changelink(struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[]) + struct nlattr *data[]) { struct chnl_net *caifdev; ASSERT_RTNL(); diff --git a/net/can/af_can.c b/net/can/af_can.c index c48e522..c4e5085 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -525,7 +525,7 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, d = find_dev_rcv_lists(dev); if (!d) { - printk(KERN_ERR "BUG: receive list not found for " + pr_err("BUG: receive list not found for " "dev %s, id %03X, mask %03X\n", DNAME(dev), can_id, mask); goto out; @@ -546,16 +546,13 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask, } /* - * Check for bugs in CAN protocol implementations: - * If no matching list item was found, the list cursor variable next - * will be NULL, while r will point to the last item of the list. + * Check for bugs in CAN protocol implementations using af_can.c: + * 'r' will be NULL if no matching list item was found for removal. */ if (!r) { - printk(KERN_ERR "BUG: receive list entry not found for " - "dev %s, id %03X, mask %03X\n", - DNAME(dev), can_id, mask); - r = NULL; + WARN(1, "BUG: receive list entry not found for dev %s, " + "id %03X, mask %03X\n", DNAME(dev), can_id, mask); goto out; } @@ -749,8 +746,7 @@ int can_proto_register(const struct can_proto *cp) int err = 0; if (proto < 0 || proto >= CAN_NPROTO) { - printk(KERN_ERR "can: protocol number %d out of range\n", - proto); + pr_err("can: protocol number %d out of range\n", proto); return -EINVAL; } @@ -761,8 +757,7 @@ int can_proto_register(const struct can_proto *cp) mutex_lock(&proto_tab_lock); if (proto_tab[proto]) { - printk(KERN_ERR "can: protocol %d already registered\n", - proto); + pr_err("can: protocol %d already registered\n", proto); err = -EBUSY; } else RCU_INIT_POINTER(proto_tab[proto], cp); @@ -816,11 +811,8 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, /* create new dev_rcv_lists for this device */ d = kzalloc(sizeof(*d), GFP_KERNEL); - if (!d) { - printk(KERN_ERR - "can: allocation of receive list failed\n"); + if (!d) return NOTIFY_DONE; - } BUG_ON(dev->ml_priv); dev->ml_priv = d; @@ -838,8 +830,8 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, dev->ml_priv = NULL; } } else - printk(KERN_ERR "can: notifier: receive list not " - "found for dev %s\n", dev->name); + pr_err("can: notifier: receive list not found for dev " + "%s\n", dev->name); spin_unlock(&can_rcvlists_lock); @@ -927,7 +919,7 @@ static __exit void can_exit(void) /* remove created dev_rcv_lists from still registered CAN devices */ rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { - if (dev->type == ARPHRD_CAN && dev->ml_priv){ + if (dev->type == ARPHRD_CAN && dev->ml_priv) { struct dev_rcv_lists *d = dev->ml_priv; diff --git a/net/can/gw.c b/net/can/gw.c index 2d117dc..2dc619d 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -778,8 +778,7 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, return 0; } -static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, - void *arg) +static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh) { struct rtcanmsg *r; struct cgw_job *gwj; @@ -868,7 +867,7 @@ static void cgw_remove_all_jobs(void) } } -static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh) { struct cgw_job *gwj = NULL; struct hlist_node *nx; diff --git a/net/core/datagram.c b/net/core/datagram.c index 368f9c3..ebba65d 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -749,7 +749,9 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, /* exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); + if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP | POLLIN | POLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) diff --git a/net/core/dev.c b/net/core/dev.c index 13e6447..63e2533 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2206,16 +2206,8 @@ out: } EXPORT_SYMBOL(skb_checksum_help); -/** - * skb_mac_gso_segment - mac layer segmentation handler. - * @skb: buffer to segment - * @features: features for the output path (see dev->features) - */ -struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, - netdev_features_t features) +__be16 skb_network_protocol(struct sk_buff *skb) { - struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); - struct packet_offload *ptype; __be16 type = skb->protocol; int vlan_depth = ETH_HLEN; @@ -2223,13 +2215,31 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, struct vlan_hdr *vh; if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) - return ERR_PTR(-EINVAL); + return 0; vh = (struct vlan_hdr *)(skb->data + vlan_depth); type = vh->h_vlan_encapsulated_proto; vlan_depth += VLAN_HLEN; } + return type; +} + +/** + * skb_mac_gso_segment - mac layer segmentation handler. + * @skb: buffer to segment + * @features: features for the output path (see dev->features) + */ +struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); + struct packet_offload *ptype; + __be16 type = skb_network_protocol(skb); + + if (unlikely(!type)) + return ERR_PTR(-EINVAL); + __skb_pull(skb, skb->mac_len); rcu_read_lock(); @@ -2396,24 +2406,12 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) return 0; } -static bool can_checksum_protocol(netdev_features_t features, __be16 protocol) -{ - return ((features & NETIF_F_GEN_CSUM) || - ((features & NETIF_F_V4_CSUM) && - protocol == htons(ETH_P_IP)) || - ((features & NETIF_F_V6_CSUM) && - protocol == htons(ETH_P_IPV6)) || - ((features & NETIF_F_FCOE_CRC) && - protocol == htons(ETH_P_FCOE))); -} - static netdev_features_t harmonize_features(struct sk_buff *skb, __be16 protocol, netdev_features_t features) { if (skb->ip_summed != CHECKSUM_NONE && !can_checksum_protocol(features, protocol)) { features &= ~NETIF_F_ALL_CSUM; - features &= ~NETIF_F_SG; } else if (illegal_highdma(skb->dev, skb)) { features &= ~NETIF_F_SG; } @@ -2588,6 +2586,7 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) */ if (shinfo->gso_size) { unsigned int hdr_len; + u16 gso_segs = shinfo->gso_segs; /* mac layer + network layer */ hdr_len = skb_transport_header(skb) - skb_mac_header(skb); @@ -2597,7 +2596,12 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) hdr_len += tcp_hdrlen(skb); else hdr_len += sizeof(struct udphdr); - qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len; + + if (shinfo->gso_type & SKB_GSO_DODGY) + gso_segs = DIV_ROUND_UP(skb->len - hdr_len, + shinfo->gso_size); + + qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len; } } @@ -3325,7 +3329,7 @@ EXPORT_SYMBOL_GPL(netdev_rx_handler_register); * netdev_rx_handler_unregister - unregister receive handler * @dev: device to unregister a handler from * - * Unregister a receive hander from a device. + * Unregister a receive handler from a device. * * The caller must hold the rtnl_mutex. */ @@ -4062,6 +4066,9 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, napi->gro_list = NULL; napi->skb = NULL; napi->poll = poll; + if (weight > NAPI_POLL_WEIGHT) + pr_err_once("netif_napi_add() called with weight %d on device %s\n", + weight, dev->name); napi->weight = weight; list_add(&napi->dev_list, &dev->napi_list); napi->dev = dev; @@ -4923,20 +4930,25 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); } - /* Fix illegal SG+CSUM combinations. */ - if ((features & NETIF_F_SG) && - !(features & NETIF_F_ALL_CSUM)) { - netdev_dbg(dev, - "Dropping NETIF_F_SG since no checksum feature.\n"); - features &= ~NETIF_F_SG; - } - /* TSO requires that SG is present as well. */ if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) { netdev_dbg(dev, "Dropping TSO features since no SG feature.\n"); features &= ~NETIF_F_ALL_TSO; } + if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) && + !(features & NETIF_F_IP_CSUM)) { + netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n"); + features &= ~NETIF_F_TSO; + features &= ~NETIF_F_TSO_ECN; + } + + if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) && + !(features & NETIF_F_IPV6_CSUM)) { + netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n"); + features &= ~NETIF_F_TSO6; + } + /* TSO ECN requires that TSO is present as well. */ if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN) features &= ~NETIF_F_TSO_ECN; @@ -5204,6 +5216,10 @@ int register_netdevice(struct net_device *dev) */ dev->vlan_features |= NETIF_F_HIGHDMA; + /* Make NETIF_F_SG inheritable to tunnel devices. + */ + dev->hw_enc_features |= NETIF_F_SG; + ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); ret = notifier_to_errno(ret); if (ret) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 3e9b2c3..adc1351 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -78,6 +78,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", + [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 58a4ba2..d5a9f8e 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -266,7 +266,7 @@ errout: return err; } -static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh) { struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); @@ -415,7 +415,7 @@ errout: return err; } -static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh) { struct net *net = sock_net(skb->sk); struct fib_rule_hdr *frh = nlmsg_data(nlh); diff --git a/net/core/filter.c b/net/core/filter.c index 2e20b55..dad2a17 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -348,6 +348,9 @@ load_b: case BPF_S_ANC_VLAN_TAG_PRESENT: A = !!vlan_tx_tag_present(skb); continue; + case BPF_S_ANC_PAY_OFFSET: + A = __skb_get_poff(skb); + continue; case BPF_S_ANC_NLATTR: { struct nlattr *nla; @@ -612,6 +615,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) ANCILLARY(ALU_XOR_X); ANCILLARY(VLAN_TAG); ANCILLARY(VLAN_TAG_PRESENT); + ANCILLARY(PAY_OFFSET); } /* ancillary operation unknown or unsupported */ @@ -814,6 +818,7 @@ static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN, [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND, [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND, diff --git a/net/core/flow.c b/net/core/flow.c index 2bfd081..7102f16 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -323,6 +323,24 @@ static void flow_cache_flush_tasklet(unsigned long data) complete(&info->completion); } +/* + * Return whether a cpu needs flushing. Conservatively, we assume + * the presence of any entries means the core may require flushing, + * since the flow_cache_ops.check() function may assume it's running + * on the same core as the per-cpu cache component. + */ +static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu) +{ + struct flow_cache_percpu *fcp; + int i; + + fcp = per_cpu_ptr(fc->percpu, cpu); + for (i = 0; i < flow_cache_hash_size(fc); i++) + if (!hlist_empty(&fcp->hash_table[i])) + return 0; + return 1; +} + static void flow_cache_flush_per_cpu(void *data) { struct flow_flush_info *info = data; @@ -337,22 +355,40 @@ void flow_cache_flush(void) { struct flow_flush_info info; static DEFINE_MUTEX(flow_flush_sem); + cpumask_var_t mask; + int i, self; + + /* Track which cpus need flushing to avoid disturbing all cores. */ + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return; + cpumask_clear(mask); /* Don't want cpus going down or up during this. */ get_online_cpus(); mutex_lock(&flow_flush_sem); info.cache = &flow_cache_global; - atomic_set(&info.cpuleft, num_online_cpus()); + for_each_online_cpu(i) + if (!flow_cache_percpu_empty(info.cache, i)) + cpumask_set_cpu(i, mask); + atomic_set(&info.cpuleft, cpumask_weight(mask)); + if (atomic_read(&info.cpuleft) == 0) + goto done; + init_completion(&info.completion); local_bh_disable(); - smp_call_function(flow_cache_flush_per_cpu, &info, 0); - flow_cache_flush_tasklet((unsigned long)&info); + self = cpumask_test_and_clear_cpu(smp_processor_id(), mask); + on_each_cpu_mask(mask, flow_cache_flush_per_cpu, &info, 0); + if (self) + flow_cache_flush_tasklet((unsigned long)&info); local_bh_enable(); wait_for_completion(&info.completion); + +done: mutex_unlock(&flow_flush_sem); put_online_cpus(); + free_cpumask_var(mask); } static void flow_cache_flush_task(struct work_struct *work) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index e187bf0..00ee068 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -5,6 +5,10 @@ #include <linux/if_vlan.h> #include <net/ip.h> #include <net/ipv6.h> +#include <linux/igmp.h> +#include <linux/icmp.h> +#include <linux/sctp.h> +#include <linux/dccp.h> #include <linux/if_tunnel.h> #include <linux/if_pppox.h> #include <linux/ppp_defs.h> @@ -119,6 +123,17 @@ ipv6: nhoff += 4; if (hdr->flags & GRE_SEQ) nhoff += 4; + if (proto == htons(ETH_P_TEB)) { + const struct ethhdr *eth; + struct ethhdr _eth; + + eth = skb_header_pointer(skb, nhoff, + sizeof(_eth), &_eth); + if (!eth) + return false; + proto = eth->h_proto; + nhoff += sizeof(*eth); + } goto again; } break; @@ -217,6 +232,59 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, } EXPORT_SYMBOL(__skb_tx_hash); +/* __skb_get_poff() returns the offset to the payload as far as it could + * be dissected. The main user is currently BPF, so that we can dynamically + * truncate packets without needing to push actual payload to the user + * space and can analyze headers only, instead. + */ +u32 __skb_get_poff(const struct sk_buff *skb) +{ + struct flow_keys keys; + u32 poff = 0; + + if (!skb_flow_dissect(skb, &keys)) + return 0; + + poff += keys.thoff; + switch (keys.ip_proto) { + case IPPROTO_TCP: { + const struct tcphdr *tcph; + struct tcphdr _tcph; + + tcph = skb_header_pointer(skb, poff, sizeof(_tcph), &_tcph); + if (!tcph) + return poff; + + poff += max_t(u32, sizeof(struct tcphdr), tcph->doff * 4); + break; + } + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + poff += sizeof(struct udphdr); + break; + /* For the rest, we do not really care about header + * extensions at this point for now. + */ + case IPPROTO_ICMP: + poff += sizeof(struct icmphdr); + break; + case IPPROTO_ICMPV6: + poff += sizeof(struct icmp6hdr); + break; + case IPPROTO_IGMP: + poff += sizeof(struct igmphdr); + break; + case IPPROTO_DCCP: + poff += sizeof(struct dccp_hdr); + break; + case IPPROTO_SCTP: + poff += sizeof(struct sctphdr); + break; + } + + return poff; +} + static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index) { if (unlikely(queue_index >= dev->real_num_tx_queues)) { diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 3863b8f..c72a646 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1613,7 +1613,7 @@ int neigh_table_clear(struct neigh_table *tbl) } EXPORT_SYMBOL(neigh_table_clear); -static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; @@ -1677,7 +1677,7 @@ out: return err; } -static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; @@ -1955,7 +1955,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { [NDTPA_LOCKTIME] = { .type = NLA_U64 }, }; -static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct neigh_table *tbl; diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 3174f19..569d355 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -271,7 +271,7 @@ static int ptype_seq_show(struct seq_file *seq, void *v) else seq_printf(seq, "%04x", ntohs(pt->type)); - seq_printf(seq, " %-8s %pF\n", + seq_printf(seq, " %-8s %pf\n", pt->dev ? pt->dev->name : "", pt->func); } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index fa32899..a3a17ae 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -47,7 +47,7 @@ static struct sk_buff_head skb_pool; static atomic_t trapped; -static struct srcu_struct netpoll_srcu; +DEFINE_STATIC_SRCU(netpoll_srcu); #define USEC_PER_POLL 50 #define NETPOLL_RX_ENABLED 1 @@ -1212,7 +1212,6 @@ EXPORT_SYMBOL(netpoll_setup); static int __init netpoll_init(void) { skb_queue_head_init(&skb_pool); - init_srcu_struct(&netpoll_srcu); return 0; } core_initcall(netpoll_init); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b65441d..589d0ab 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -517,32 +517,6 @@ out: return err; } -static const int rtm_min[RTM_NR_FAMILIES] = -{ - [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)), - [RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)), - [RTM_FAM(RTM_NEWROUTE)] = NLMSG_LENGTH(sizeof(struct rtmsg)), - [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)), - [RTM_FAM(RTM_NEWQDISC)] = NLMSG_LENGTH(sizeof(struct tcmsg)), - [RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)), - [RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)), - [RTM_FAM(RTM_NEWACTION)] = NLMSG_LENGTH(sizeof(struct tcamsg)), - [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), - [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), -}; - -static const int rta_max[RTM_NR_FAMILIES] = -{ - [RTM_FAM(RTM_NEWLINK)] = IFLA_MAX, - [RTM_FAM(RTM_NEWADDR)] = IFA_MAX, - [RTM_FAM(RTM_NEWROUTE)] = RTA_MAX, - [RTM_FAM(RTM_NEWRULE)] = FRA_MAX, - [RTM_FAM(RTM_NEWQDISC)] = TCA_MAX, - [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX, - [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX, - [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX, -}; - int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo) { struct sock *rtnl = net->rtnl; @@ -1539,7 +1513,7 @@ errout: return err; } -static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; @@ -1580,7 +1554,7 @@ errout: return err; } -static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); const struct rtnl_link_ops *ops; @@ -1711,7 +1685,7 @@ static int rtnl_group_changelink(struct net *net, int group, return 0; } -static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); const struct rtnl_link_ops *ops; @@ -1866,7 +1840,7 @@ out: } } -static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; @@ -1957,8 +1931,11 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) if (rtnl_msg_handlers[idx] == NULL || rtnl_msg_handlers[idx][type].dumpit == NULL) continue; - if (idx > s_idx) + if (idx > s_idx) { memset(&cb->args[0], 0, sizeof(cb->args)); + cb->prev_seq = 0; + cb->seq = 0; + } if (rtnl_msg_handlers[idx][type].dumpit(skb, cb)) break; } @@ -2051,7 +2028,39 @@ errout: rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); } -static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +/** + * ndo_dflt_fdb_add - default netdevice operation to add an FDB entry + */ +int ndo_dflt_fdb_add(struct ndmsg *ndm, + struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, + u16 flags) +{ + int err = -EINVAL; + + /* If aging addresses are supported device will need to + * implement its own handler for this. + */ + if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) { + pr_info("%s: FDB only supports static addresses\n", dev->name); + return err; + } + + if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) + err = dev_uc_add_excl(dev, addr); + else if (is_multicast_ether_addr(addr)) + err = dev_mc_add_excl(dev, addr); + + /* Only return duplicate errors if NLM_F_EXCL is set */ + if (err == -EEXIST && !(flags & NLM_F_EXCL)) + err = 0; + + return err; +} +EXPORT_SYMBOL(ndo_dflt_fdb_add); + +static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; @@ -2082,7 +2091,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) } addr = nla_data(tb[NDA_LLADDR]); - if (!is_valid_ether_addr(addr)) { + if (is_zero_ether_addr(addr)) { pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ether address\n"); return -EINVAL; } @@ -2103,10 +2112,13 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) } /* Embedded bridge, macvlan, and any other device support */ - if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_add) { - err = dev->netdev_ops->ndo_fdb_add(ndm, tb, - dev, addr, - nlh->nlmsg_flags); + if ((ndm->ndm_flags & NTF_SELF)) { + if (dev->netdev_ops->ndo_fdb_add) + err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr, + nlh->nlmsg_flags); + else + err = ndo_dflt_fdb_add(ndm, tb, dev, addr, + nlh->nlmsg_flags); if (!err) { rtnl_fdb_notify(dev, addr, RTM_NEWNEIGH); @@ -2117,7 +2129,36 @@ out: return err; } -static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +/** + * ndo_dflt_fdb_del - default netdevice operation to delete an FDB entry + */ +int ndo_dflt_fdb_del(struct ndmsg *ndm, + struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr) +{ + int err = -EOPNOTSUPP; + + /* If aging addresses are supported device will need to + * implement its own handler for this. + */ + if (ndm->ndm_state & NUD_PERMANENT) { + pr_info("%s: FDB only supports static addresses\n", dev->name); + return -EINVAL; + } + + if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) + err = dev_uc_del(dev, addr); + else if (is_multicast_ether_addr(addr)) + err = dev_mc_del(dev, addr); + else + err = -EINVAL; + + return err; +} +EXPORT_SYMBOL(ndo_dflt_fdb_del); + +static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; @@ -2174,8 +2215,11 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) } /* Embedded bridge, macvlan, and any other device support */ - if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) { - err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr); + if (ndm->ndm_flags & NTF_SELF) { + if (dev->netdev_ops->ndo_fdb_del) + err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr); + else + err = ndo_dflt_fdb_del(ndm, tb, dev, addr); if (!err) { rtnl_fdb_notify(dev, addr, RTM_DELNEIGH); @@ -2220,7 +2264,7 @@ skip: * @dev: netdevice * * Default netdevice operation to dump the existing unicast address list. - * Returns zero on success. + * Returns number of addresses from list put in skb. */ int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, @@ -2260,6 +2304,8 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) if (dev->netdev_ops->ndo_fdb_dump) idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, idx); + else + idx = ndo_dflt_fdb_dump(skb, cb, dev, idx); } rcu_read_unlock(); @@ -2411,8 +2457,7 @@ errout: return err; } -static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, - void *arg) +static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; @@ -2482,8 +2527,7 @@ out: return err; } -static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, - void *arg) +static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; @@ -2553,10 +2597,6 @@ out: return err; } -/* Protected by RTNL sempahore. */ -static struct rtattr **rta_buf; -static int rtattr_max; - /* Process one rtnetlink message. */ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) @@ -2564,7 +2604,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) struct net *net = sock_net(skb->sk); rtnl_doit_func doit; int sz_idx, kind; - int min_len; int family; int type; int err; @@ -2576,10 +2615,10 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) type -= RTM_BASE; /* All the messages must have at least 1 byte length */ - if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg))) + if (nlmsg_len(nlh) < sizeof(struct rtgenmsg)) return 0; - family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family; + family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; sz_idx = type>>2; kind = type&3; @@ -2612,32 +2651,11 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return err; } - memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *))); - - min_len = rtm_min[sz_idx]; - if (nlh->nlmsg_len < min_len) - return -EINVAL; - - if (nlh->nlmsg_len > min_len) { - int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); - struct rtattr *attr = (void *)nlh + NLMSG_ALIGN(min_len); - - while (RTA_OK(attr, attrlen)) { - unsigned int flavor = attr->rta_type & NLA_TYPE_MASK; - if (flavor) { - if (flavor > rta_max[sz_idx]) - return -EINVAL; - rta_buf[flavor-1] = attr; - } - attr = RTA_NEXT(attr, attrlen); - } - } - doit = rtnl_get_doit(family, type); if (doit == NULL) return -EOPNOTSUPP; - return doit(skb, nlh, (void *)&rta_buf[0]); + return doit(skb, nlh); } static void rtnetlink_rcv(struct sk_buff *skb) @@ -2707,16 +2725,6 @@ static struct pernet_operations rtnetlink_net_ops = { void __init rtnetlink_init(void) { - int i; - - rtattr_max = 0; - for (i = 0; i < ARRAY_SIZE(rta_max); i++) - if (rta_max[i] > rtattr_max) - rtattr_max = rta_max[i]; - rta_buf = kmalloc(rtattr_max * sizeof(struct rtattr *), GFP_KERNEL); - if (!rta_buf) - panic("rtnetlink_init: cannot allocate rta_buf\n"); - if (register_pernet_subsys(&rtnetlink_net_ops)) panic("rtnetlink_init: cannot initialize rtnetlink\n"); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 33245ef..ba64614 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -673,6 +673,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->mac_header = old->mac_header; new->inner_transport_header = old->inner_transport_header; new->inner_network_header = old->inner_network_header; + new->inner_mac_header = old->inner_mac_header; skb_dst_copy(new, old); new->rxhash = old->rxhash; new->ooo_okay = old->ooo_okay; @@ -867,6 +868,18 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) } EXPORT_SYMBOL(skb_clone); +static void skb_headers_offset_update(struct sk_buff *skb, int off) +{ + /* {transport,network,mac}_header and tail are relative to skb->head */ + skb->transport_header += off; + skb->network_header += off; + if (skb_mac_header_was_set(skb)) + skb->mac_header += off; + skb->inner_transport_header += off; + skb->inner_network_header += off; + skb->inner_mac_header += off; +} + static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) { #ifndef NET_SKBUFF_DATA_USES_OFFSET @@ -879,13 +892,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) __copy_skb_header(new, old); #ifndef NET_SKBUFF_DATA_USES_OFFSET - /* {transport,network,mac}_header are relative to skb->head */ - new->transport_header += offset; - new->network_header += offset; - if (skb_mac_header_was_set(new)) - new->mac_header += offset; - new->inner_transport_header += offset; - new->inner_network_header += offset; + skb_headers_offset_update(new, offset); #endif skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; @@ -1077,14 +1084,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, #else skb->end = skb->head + size; #endif - /* {transport,network,mac}_header and tail are relative to skb->head */ skb->tail += off; - skb->transport_header += off; - skb->network_header += off; - if (skb_mac_header_was_set(skb)) - skb->mac_header += off; - skb->inner_transport_header += off; - skb->inner_network_header += off; + skb_headers_offset_update(skb, off); /* Only adjust this if it actually is csum_start rather than csum */ if (skb->ip_summed == CHECKSUM_PARTIAL) skb->csum_start += nhead; @@ -1180,12 +1181,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, if (n->ip_summed == CHECKSUM_PARTIAL) n->csum_start += off; #ifdef NET_SKBUFF_DATA_USES_OFFSET - n->transport_header += off; - n->network_header += off; - if (skb_mac_header_was_set(skb)) - n->mac_header += off; - n->inner_transport_header += off; - n->inner_network_header += off; + skb_headers_offset_update(n, off); #endif return n; @@ -2741,12 +2737,19 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) unsigned int tnl_hlen = skb_tnl_header_len(skb); unsigned int headroom; unsigned int len; + __be16 proto; + bool csum; int sg = !!(features & NETIF_F_SG); int nfrags = skb_shinfo(skb)->nr_frags; int err = -ENOMEM; int i = 0; int pos; + proto = skb_network_protocol(skb); + if (unlikely(!proto)) + return ERR_PTR(-EINVAL); + + csum = !!can_checksum_protocol(features, proto); __skb_push(skb, doffset); headroom = skb_headroom(skb); pos = skb_headlen(skb); @@ -2884,6 +2887,12 @@ skip_fraglist: nskb->data_len = len - hsize; nskb->len += nskb->data_len; nskb->truesize += nskb->data_len; + + if (!csum) { + nskb->csum = skb_checksum(nskb, doffset, + nskb->len - doffset, 0); + nskb->ip_summed = CHECKSUM_NONE; + } } while ((offset += len) < skb->len); return segs; @@ -3361,6 +3370,7 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = skb_headroom(skb) + start; skb->csum_offset = off; + skb_set_transport_header(skb, start); return true; } EXPORT_SYMBOL_GPL(skb_partial_csum_set); diff --git a/net/core/sock.c b/net/core/sock.c index b261a79..2ff5f36 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -907,6 +907,10 @@ set_rcvbuf: sock_valbool_flag(sk, SOCK_NOFCS, valbool); break; + case SO_SELECT_ERR_QUEUE: + sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool); + break; + default: ret = -ENOPROTOOPT; break; @@ -1160,6 +1164,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sock_flag(sk, SOCK_FILTER_LOCKED); break; + case SO_SELECT_ERR_QUEUE: + v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); + break; + default: return -ENOPROTOOPT; } @@ -1298,7 +1306,6 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) module_put(owner); } -#ifdef CONFIG_CGROUPS #if IS_ENABLED(CONFIG_NET_CLS_CGROUP) void sock_update_classid(struct sock *sk, struct task_struct *task) { @@ -1321,7 +1328,6 @@ void sock_update_netprioidx(struct sock *sk, struct task_struct *task) } EXPORT_SYMBOL_GPL(sock_update_netprioidx); #endif -#endif /** * sk_alloc - All socket objects are allocated here diff --git a/net/core/utils.c b/net/core/utils.c index e3487e46..3c7f5b5 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -17,6 +17,7 @@ #include <linux/module.h> #include <linux/jiffies.h> #include <linux/kernel.h> +#include <linux/ctype.h> #include <linux/inet.h> #include <linux/mm.h> #include <linux/net.h> @@ -348,9 +349,7 @@ int mac_pton(const char *s, u8 *mac) /* Don't dirty result unless string is valid MAC. */ for (i = 0; i < ETH_ALEN; i++) { - if (!strchr("0123456789abcdefABCDEF", s[i * 3])) - return 0; - if (!strchr("0123456789abcdefABCDEF", s[i * 3 + 1])) + if (!isxdigit(s[i * 3]) || !isxdigit(s[i * 3 + 1])) return 0; if (i != ETH_ALEN - 1 && s[i * 3 + 2] != ':') return 0; diff --git a/net/dcb/dcbevent.c b/net/dcb/dcbevent.c index 1d9eb7c..4f72fc4 100644 --- a/net/dcb/dcbevent.c +++ b/net/dcb/dcbevent.c @@ -20,6 +20,7 @@ #include <linux/rtnetlink.h> #include <linux/notifier.h> #include <linux/export.h> +#include <net/dcbevent.h> static ATOMIC_NOTIFIER_HEAD(dcbevent_notif_chain); diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 21291f1..40d5829 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1658,7 +1658,7 @@ static const struct reply_func reply_funcs[DCB_CMD_MAX+1] = { [DCB_CMD_CEE_GET] = { RTM_GETDCB, dcbnl_cee_get }, }; -static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct net_device *netdev; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 4f9f5eb..ebc54fe 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -500,8 +500,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, return &rt->dst; } -static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, - struct request_values *rv_unused) +static int dccp_v4_send_response(struct sock *sk, struct request_sock *req) { int err = -1; struct sk_buff *skb; @@ -658,7 +657,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) dreq->dreq_gss = dreq->dreq_iss; dreq->dreq_service = service; - if (dccp_v4_send_response(sk, req, NULL)) + if (dccp_v4_send_response(sk, req)) goto drop_and_free; inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6e05981..9c61f9c 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -213,8 +213,7 @@ out: } -static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, - struct request_values *rv_unused) +static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) { struct inet6_request_sock *ireq6 = inet6_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); @@ -428,7 +427,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) dreq->dreq_gss = dreq->dreq_iss; dreq->dreq_service = service; - if (dccp_v6_send_response(sk, req, NULL)) + if (dccp_v6_send_response(sk, req)) goto drop_and_free; inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index c8da116..7d91970 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -563,7 +563,7 @@ static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = { .len = IFNAMSIZ - 1 }, }; -static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[IFA_MAX+1]; @@ -607,7 +607,7 @@ errout: return err; } -static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[IFA_MAX+1]; diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index e36614e..57dc159 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -145,22 +145,10 @@ static inline struct dn_fib_info *dn_fib_find_info(const struct dn_fib_info *nfi return NULL; } -__le16 dn_fib_get_attr16(struct rtattr *attr, int attrlen, int type) +static int dn_fib_count_nhs(const struct nlattr *attr) { - while(RTA_OK(attr,attrlen)) { - if (attr->rta_type == type) - return *(__le16*)RTA_DATA(attr); - attr = RTA_NEXT(attr, attrlen); - } - - return 0; -} - -static int dn_fib_count_nhs(struct rtattr *rta) -{ - int nhs = 0; - struct rtnexthop *nhp = RTA_DATA(rta); - int nhlen = RTA_PAYLOAD(rta); + struct rtnexthop *nhp = nla_data(attr); + int nhs = 0, nhlen = nla_len(attr); while(nhlen >= (int)sizeof(struct rtnexthop)) { if ((nhlen -= nhp->rtnh_len) < 0) @@ -172,10 +160,11 @@ static int dn_fib_count_nhs(struct rtattr *rta) return nhs; } -static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct rtattr *rta, const struct rtmsg *r) +static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr, + const struct rtmsg *r) { - struct rtnexthop *nhp = RTA_DATA(rta); - int nhlen = RTA_PAYLOAD(rta); + struct rtnexthop *nhp = nla_data(attr); + int nhlen = nla_len(attr); change_nexthops(fi) { int attrlen = nhlen - sizeof(struct rtnexthop); @@ -187,7 +176,10 @@ static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct rtattr *rta, cons nh->nh_weight = nhp->rtnh_hops + 1; if (attrlen) { - nh->nh_gw = dn_fib_get_attr16(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); + struct nlattr *gw_attr; + + gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY); + nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0; } nhp = RTNH_NEXT(nhp); } endfor_nexthops(fi); @@ -268,7 +260,8 @@ out: } -struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta *rta, const struct nlmsghdr *nlh, int *errp) +struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *attrs[], + const struct nlmsghdr *nlh, int *errp) { int err; struct dn_fib_info *fi = NULL; @@ -281,11 +274,9 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta if (dn_fib_props[r->rtm_type].scope > r->rtm_scope) goto err_inval; - if (rta->rta_mp) { - nhs = dn_fib_count_nhs(rta->rta_mp); - if (nhs == 0) - goto err_inval; - } + if (attrs[RTA_MULTIPATH] && + (nhs = dn_fib_count_nhs(attrs[RTA_MULTIPATH])) == 0) + goto err_inval; fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct dn_fib_nh), GFP_KERNEL); err = -ENOBUFS; @@ -295,53 +286,65 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta fi->fib_protocol = r->rtm_protocol; fi->fib_nhs = nhs; fi->fib_flags = r->rtm_flags; - if (rta->rta_priority) - fi->fib_priority = *rta->rta_priority; - if (rta->rta_mx) { - int attrlen = RTA_PAYLOAD(rta->rta_mx); - struct rtattr *attr = RTA_DATA(rta->rta_mx); - while(RTA_OK(attr, attrlen)) { - unsigned int flavour = attr->rta_type; + if (attrs[RTA_PRIORITY]) + fi->fib_priority = nla_get_u32(attrs[RTA_PRIORITY]); + + if (attrs[RTA_METRICS]) { + struct nlattr *attr; + int rem; - if (flavour) { - if (flavour > RTAX_MAX) + nla_for_each_nested(attr, attrs[RTA_METRICS], rem) { + int type = nla_type(attr); + + if (type) { + if (type > RTAX_MAX || nla_len(attr) < 4) goto err_inval; - fi->fib_metrics[flavour-1] = *(unsigned int *)RTA_DATA(attr); + + fi->fib_metrics[type-1] = nla_get_u32(attr); } - attr = RTA_NEXT(attr, attrlen); } } - if (rta->rta_prefsrc) - memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 2); - if (rta->rta_mp) { - if ((err = dn_fib_get_nhs(fi, rta->rta_mp, r)) != 0) + if (attrs[RTA_PREFSRC]) + fi->fib_prefsrc = nla_get_le16(attrs[RTA_PREFSRC]); + + if (attrs[RTA_MULTIPATH]) { + if ((err = dn_fib_get_nhs(fi, attrs[RTA_MULTIPATH], r)) != 0) goto failure; - if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif) + + if (attrs[RTA_OIF] && + fi->fib_nh->nh_oif != nla_get_u32(attrs[RTA_OIF])) goto err_inval; - if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 2)) + + if (attrs[RTA_GATEWAY] && + fi->fib_nh->nh_gw != nla_get_le16(attrs[RTA_GATEWAY])) goto err_inval; } else { struct dn_fib_nh *nh = fi->fib_nh; - if (rta->rta_oif) - nh->nh_oif = *rta->rta_oif; - if (rta->rta_gw) - memcpy(&nh->nh_gw, rta->rta_gw, 2); + + if (attrs[RTA_OIF]) + nh->nh_oif = nla_get_u32(attrs[RTA_OIF]); + + if (attrs[RTA_GATEWAY]) + nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]); + nh->nh_flags = r->rtm_flags; nh->nh_weight = 1; } if (r->rtm_type == RTN_NAT) { - if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif) + if (!attrs[RTA_GATEWAY] || nhs != 1 || attrs[RTA_OIF]) goto err_inval; - memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 2); + + fi->fib_nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]); goto link_it; } if (dn_fib_props[r->rtm_type].error) { - if (rta->rta_gw || rta->rta_oif || rta->rta_mp) + if (attrs[RTA_GATEWAY] || attrs[RTA_OIF] || attrs[RTA_MULTIPATH]) goto err_inval; + goto link_it; } @@ -367,8 +370,8 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta } if (fi->fib_prefsrc) { - if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL || - memcmp(&fi->fib_prefsrc, rta->rta_dst, 2)) + if (r->rtm_type != RTN_LOCAL || !attrs[RTA_DST] || + fi->fib_prefsrc != nla_get_le16(attrs[RTA_DST])) if (dnet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) goto err_inval; } @@ -486,39 +489,21 @@ void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res) spin_unlock_bh(&dn_fib_multipath_lock); } - -static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta) -{ - int i; - - for(i = 1; i <= RTA_MAX; i++) { - struct rtattr *attr = rta[i-1]; - if (attr) { - if (RTA_PAYLOAD(attr) < 4 && RTA_PAYLOAD(attr) != 2) - return -EINVAL; - if (i != RTA_MULTIPATH && i != RTA_METRICS && - i != RTA_TABLE) - rta[i-1] = (struct rtattr *)RTA_DATA(attr); - } - } - - return 0; -} - -static inline u32 rtm_get_table(struct rtattr **rta, u8 table) +static inline u32 rtm_get_table(struct nlattr *attrs[], u8 table) { - if (rta[RTA_TABLE - 1]) - table = nla_get_u32((struct nlattr *) rta[RTA_TABLE - 1]); + if (attrs[RTA_TABLE]) + table = nla_get_u32(attrs[RTA_TABLE]); return table; } -static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct dn_fib_table *tb; - struct rtattr **rta = arg; - struct rtmsg *r = NLMSG_DATA(nlh); + struct rtmsg *r = nlmsg_data(nlh); + struct nlattr *attrs[RTA_MAX+1]; + int err; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -526,22 +511,24 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void * if (!net_eq(net, &init_net)) return -EINVAL; - if (dn_fib_check_attr(r, rta)) - return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy); + if (err < 0) + return err; - tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 0); - if (tb) - return tb->delete(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); + tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 0); + if (!tb) + return -ESRCH; - return -ESRCH; + return tb->delete(tb, r, attrs, nlh, &NETLINK_CB(skb)); } -static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct dn_fib_table *tb; - struct rtattr **rta = arg; - struct rtmsg *r = NLMSG_DATA(nlh); + struct rtmsg *r = nlmsg_data(nlh); + struct nlattr *attrs[RTA_MAX+1]; + int err; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -549,14 +536,15 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void * if (!net_eq(net, &init_net)) return -EINVAL; - if (dn_fib_check_attr(r, rta)) - return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy); + if (err < 0) + return err; - tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 1); - if (tb) - return tb->insert(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); + tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 1); + if (!tb) + return -ENOBUFS; - return -ENOBUFS; + return tb->insert(tb, r, attrs, nlh, &NETLINK_CB(skb)); } static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa) @@ -566,10 +554,31 @@ static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifad struct nlmsghdr nlh; struct rtmsg rtm; } req; - struct dn_kern_rta rta; + struct { + struct nlattr hdr; + __le16 dst; + } dst_attr = { + .dst = dst, + }; + struct { + struct nlattr hdr; + __le16 prefsrc; + } prefsrc_attr = { + .prefsrc = ifa->ifa_local, + }; + struct { + struct nlattr hdr; + u32 oif; + } oif_attr = { + .oif = ifa->ifa_dev->dev->ifindex, + }; + struct nlattr *attrs[RTA_MAX+1] = { + [RTA_DST] = (struct nlattr *) &dst_attr, + [RTA_PREFSRC] = (struct nlattr * ) &prefsrc_attr, + [RTA_OIF] = (struct nlattr *) &oif_attr, + }; memset(&req.rtm, 0, sizeof(req.rtm)); - memset(&rta, 0, sizeof(rta)); if (type == RTN_UNICAST) tb = dn_fib_get_table(RT_MIN_TABLE, 1); @@ -591,14 +600,10 @@ static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifad req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST); req.rtm.rtm_type = type; - rta.rta_dst = &dst; - rta.rta_prefsrc = &ifa->ifa_local; - rta.rta_oif = &ifa->ifa_dev->dev->ifindex; - if (cmd == RTM_NEWROUTE) - tb->insert(tb, &req.rtm, &rta, &req.nlh, NULL); + tb->insert(tb, &req.rtm, attrs, &req.nlh, NULL); else - tb->delete(tb, &req.rtm, &rta, &req.nlh, NULL); + tb->delete(tb, &req.rtm, attrs, &req.nlh, NULL); } static void dn_fib_add_ifaddr(struct dn_ifaddr *ifa) diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 5ac0e15..fe32388 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1613,23 +1613,41 @@ errout: return -EMSGSIZE; } +const struct nla_policy rtm_dn_policy[RTA_MAX + 1] = { + [RTA_DST] = { .type = NLA_U16 }, + [RTA_SRC] = { .type = NLA_U16 }, + [RTA_IIF] = { .type = NLA_U32 }, + [RTA_OIF] = { .type = NLA_U32 }, + [RTA_GATEWAY] = { .type = NLA_U16 }, + [RTA_PRIORITY] = { .type = NLA_U32 }, + [RTA_PREFSRC] = { .type = NLA_U16 }, + [RTA_METRICS] = { .type = NLA_NESTED }, + [RTA_MULTIPATH] = { .type = NLA_NESTED }, + [RTA_TABLE] = { .type = NLA_U32 }, + [RTA_MARK] = { .type = NLA_U32 }, +}; + /* * This is called by both endnodes and routers now. */ -static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) +static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); - struct rtattr **rta = arg; struct rtmsg *rtm = nlmsg_data(nlh); struct dn_route *rt = NULL; struct dn_skb_cb *cb; int err; struct sk_buff *skb; struct flowidn fld; + struct nlattr *tb[RTA_MAX+1]; if (!net_eq(net, &init_net)) return -EINVAL; + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_dn_policy); + if (err < 0) + return err; + memset(&fld, 0, sizeof(fld)); fld.flowidn_proto = DNPROTO_NSP; @@ -1639,12 +1657,14 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void skb_reset_mac_header(skb); cb = DN_SKB_CB(skb); - if (rta[RTA_SRC-1]) - memcpy(&fld.saddr, RTA_DATA(rta[RTA_SRC-1]), 2); - if (rta[RTA_DST-1]) - memcpy(&fld.daddr, RTA_DATA(rta[RTA_DST-1]), 2); - if (rta[RTA_IIF-1]) - memcpy(&fld.flowidn_iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); + if (tb[RTA_SRC]) + fld.saddr = nla_get_le16(tb[RTA_SRC]); + + if (tb[RTA_DST]) + fld.daddr = nla_get_le16(tb[RTA_DST]); + + if (tb[RTA_IIF]) + fld.flowidn_iif = nla_get_u32(tb[RTA_IIF]); if (fld.flowidn_iif) { struct net_device *dev; @@ -1669,10 +1689,9 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void if (!err && -rt->dst.error) err = rt->dst.error; } else { - int oif = 0; - if (rta[RTA_OIF - 1]) - memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); - fld.flowidn_oif = oif; + if (tb[RTA_OIF]) + fld.flowidn_oif = nla_get_u32(tb[RTA_OIF]); + err = dn_route_output_key((struct dst_entry **)&rt, &fld, 0); } diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 6c2445b..b15c1d1 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -19,7 +19,7 @@ #include <linux/sockios.h> #include <linux/init.h> #include <linux/skbuff.h> -#include <linux/netlink.h> +#include <net/netlink.h> #include <linux/rtnetlink.h> #include <linux/proc_fs.h> #include <linux/netdevice.h> @@ -224,26 +224,27 @@ static struct dn_zone *dn_new_zone(struct dn_hash *table, int z) } -static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern_rta *rta, struct dn_fib_info *fi) +static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct nlattr *attrs[], struct dn_fib_info *fi) { struct rtnexthop *nhp; int nhlen; - if (rta->rta_priority && *rta->rta_priority != fi->fib_priority) + if (attrs[RTA_PRIORITY] && + nla_get_u32(attrs[RTA_PRIORITY]) != fi->fib_priority) return 1; - if (rta->rta_oif || rta->rta_gw) { - if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) && - (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 2) == 0)) + if (attrs[RTA_OIF] || attrs[RTA_GATEWAY]) { + if ((!attrs[RTA_OIF] || nla_get_u32(attrs[RTA_OIF]) == fi->fib_nh->nh_oif) && + (!attrs[RTA_GATEWAY] || nla_get_le16(attrs[RTA_GATEWAY]) != fi->fib_nh->nh_gw)) return 0; return 1; } - if (rta->rta_mp == NULL) + if (!attrs[RTA_MULTIPATH]) return 0; - nhp = RTA_DATA(rta->rta_mp); - nhlen = RTA_PAYLOAD(rta->rta_mp); + nhp = nla_data(attrs[RTA_MULTIPATH]); + nhlen = nla_len(attrs[RTA_MULTIPATH]); for_nexthops(fi) { int attrlen = nhlen - sizeof(struct rtnexthop); @@ -254,7 +255,10 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif) return 1; if (attrlen) { - gw = dn_fib_get_attr16(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); + struct nlattr *gw_attr; + + gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY); + gw = gw_attr ? nla_get_le16(gw_attr) : 0; if (gw && gw != nh->nh_gw) return 1; @@ -488,7 +492,7 @@ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!net_eq(net, &init_net)) return 0; - if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && + if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && ((struct rtmsg *)nlmsg_data(cb->nlh))->rtm_flags&RTM_F_CLONED) return dn_cache_dump(skb, cb); @@ -517,7 +521,8 @@ out: return skb->len; } -static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req) +static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[], + struct nlmsghdr *n, struct netlink_skb_parms *req) { struct dn_hash *table = (struct dn_hash *)tb->data; struct dn_fib_node *new_f, *f, **fp, **del_fp; @@ -536,15 +541,14 @@ static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct return -ENOBUFS; dz_key_0(key); - if (rta->rta_dst) { - __le16 dst; - memcpy(&dst, rta->rta_dst, 2); + if (attrs[RTA_DST]) { + __le16 dst = nla_get_le16(attrs[RTA_DST]); if (dst & ~DZ_MASK(dz)) return -EINVAL; key = dz_key(dst, dz); } - if ((fi = dn_fib_create_info(r, rta, n, &err)) == NULL) + if ((fi = dn_fib_create_info(r, attrs, n, &err)) == NULL) return err; if (dz->dz_nent > (dz->dz_divisor << 2) && @@ -654,7 +658,8 @@ out: } -static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req) +static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[], + struct nlmsghdr *n, struct netlink_skb_parms *req) { struct dn_hash *table = (struct dn_hash*)tb->data; struct dn_fib_node **fp, **del_fp, *f; @@ -671,9 +676,8 @@ static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct return -ESRCH; dz_key_0(key); - if (rta->rta_dst) { - __le16 dst; - memcpy(&dst, rta->rta_dst, 2); + if (attrs[RTA_DST]) { + __le16 dst = nla_get_le16(attrs[RTA_DST]); if (dst & ~DZ_MASK(dz)) return -EINVAL; key = dz_key(dst, dz); @@ -703,7 +707,7 @@ static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct (r->rtm_scope == RT_SCOPE_NOWHERE || f->fn_scope == r->rtm_scope) && (!r->rtm_protocol || fi->fib_protocol == r->rtm_protocol) && - dn_fib_nh_match(r, n, rta, fi) == 0) + dn_fib_nh_match(r, n, attrs, fi) == 0) del_fp = fp; } diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index dfe4201..2a7efe3 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -19,7 +19,7 @@ #include <linux/netdevice.h> #include <linux/netfilter.h> #include <linux/spinlock.h> -#include <linux/netlink.h> +#include <net/netlink.h> #include <linux/netfilter_decnet.h> #include <net/sock.h> @@ -39,21 +39,21 @@ static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp) unsigned char *ptr; struct nf_dn_rtmsg *rtm; - size = NLMSG_SPACE(rt_skb->len); - size += NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg)); - skb = alloc_skb(size, GFP_ATOMIC); + size = NLMSG_ALIGN(rt_skb->len) + + NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg)); + skb = nlmsg_new(size, GFP_ATOMIC); if (!skb) { *errp = -ENOMEM; return NULL; } old_tail = skb->tail; - nlh = nlmsg_put(skb, 0, 0, 0, size - sizeof(*nlh), 0); + nlh = nlmsg_put(skb, 0, 0, 0, size, 0); if (!nlh) { kfree_skb(skb); *errp = -ENOMEM; return NULL; } - rtm = (struct nf_dn_rtmsg *)NLMSG_DATA(nlh); + rtm = (struct nf_dn_rtmsg *)nlmsg_data(nlh); rtm->nfdn_ifindex = rt_skb->dev->ifindex; ptr = NFDN_RTMSG(rtm); skb_copy_from_linear_data(rt_skb, ptr, rt_skb->len); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 2bc62ea..0eb5d5e 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -1,6 +1,7 @@ /* * net/dsa/dsa.c - Hardware switch handling * Copyright (c) 2008-2009 Marvell Semiconductor + * Copyright (c) 2013 Florian Fainelli <florian@openwrt.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -14,6 +15,9 @@ #include <linux/slab.h> #include <linux/module.h> #include <net/dsa.h> +#include <linux/of.h> +#include <linux/of_mdio.h> +#include <linux/of_platform.h> #include "dsa_priv.h" char dsa_driver_version[] = "0.1"; @@ -287,34 +291,239 @@ static struct net_device *dev_to_net_device(struct device *dev) return NULL; } +#ifdef CONFIG_OF +static int dsa_of_setup_routing_table(struct dsa_platform_data *pd, + struct dsa_chip_data *cd, + int chip_index, + struct device_node *link) +{ + int ret; + const __be32 *reg; + int link_port_addr; + int link_sw_addr; + struct device_node *parent_sw; + int len; + + parent_sw = of_get_parent(link); + if (!parent_sw) + return -EINVAL; + + reg = of_get_property(parent_sw, "reg", &len); + if (!reg || (len != sizeof(*reg) * 2)) + return -EINVAL; + + link_sw_addr = be32_to_cpup(reg + 1); + + if (link_sw_addr >= pd->nr_chips) + return -EINVAL; + + /* First time routing table allocation */ + if (!cd->rtable) { + cd->rtable = kmalloc(pd->nr_chips * sizeof(s8), GFP_KERNEL); + if (!cd->rtable) + return -ENOMEM; + + /* default to no valid uplink/downlink */ + memset(cd->rtable, -1, pd->nr_chips * sizeof(s8)); + } + + reg = of_get_property(link, "reg", NULL); + if (!reg) { + ret = -EINVAL; + goto out; + } + + link_port_addr = be32_to_cpup(reg); + + cd->rtable[link_sw_addr] = link_port_addr; + + return 0; +out: + kfree(cd->rtable); + return ret; +} + +static void dsa_of_free_platform_data(struct dsa_platform_data *pd) +{ + int i; + int port_index; + + for (i = 0; i < pd->nr_chips; i++) { + port_index = 0; + while (port_index < DSA_MAX_PORTS) { + if (pd->chip[i].port_names[port_index]) + kfree(pd->chip[i].port_names[port_index]); + port_index++; + } + kfree(pd->chip[i].rtable); + } + kfree(pd->chip); +} + +static int dsa_of_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + struct device_node *child, *mdio, *ethernet, *port, *link; + struct mii_bus *mdio_bus; + struct platform_device *ethernet_dev; + struct dsa_platform_data *pd; + struct dsa_chip_data *cd; + const char *port_name; + int chip_index, port_index; + const unsigned int *sw_addr, *port_reg; + int ret; + + mdio = of_parse_phandle(np, "dsa,mii-bus", 0); + if (!mdio) + return -EINVAL; + + mdio_bus = of_mdio_find_bus(mdio); + if (!mdio_bus) + return -EINVAL; + + ethernet = of_parse_phandle(np, "dsa,ethernet", 0); + if (!ethernet) + return -EINVAL; + + ethernet_dev = of_find_device_by_node(ethernet); + if (!ethernet_dev) + return -ENODEV; + + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return -ENOMEM; + + pdev->dev.platform_data = pd; + pd->netdev = ðernet_dev->dev; + pd->nr_chips = of_get_child_count(np); + if (pd->nr_chips > DSA_MAX_SWITCHES) + pd->nr_chips = DSA_MAX_SWITCHES; + + pd->chip = kzalloc(pd->nr_chips * sizeof(struct dsa_chip_data), + GFP_KERNEL); + if (!pd->chip) { + ret = -ENOMEM; + goto out_free; + } + + chip_index = 0; + for_each_available_child_of_node(np, child) { + cd = &pd->chip[chip_index]; + + cd->mii_bus = &mdio_bus->dev; + + sw_addr = of_get_property(child, "reg", NULL); + if (!sw_addr) + continue; + + cd->sw_addr = be32_to_cpup(sw_addr); + if (cd->sw_addr > PHY_MAX_ADDR) + continue; + + for_each_available_child_of_node(child, port) { + port_reg = of_get_property(port, "reg", NULL); + if (!port_reg) + continue; + + port_index = be32_to_cpup(port_reg); + + port_name = of_get_property(port, "label", NULL); + if (!port_name) + continue; + + cd->port_names[port_index] = kstrdup(port_name, + GFP_KERNEL); + if (!cd->port_names[port_index]) { + ret = -ENOMEM; + goto out_free_chip; + } + + link = of_parse_phandle(port, "link", 0); + + if (!strcmp(port_name, "dsa") && link && + pd->nr_chips > 1) { + ret = dsa_of_setup_routing_table(pd, cd, + chip_index, link); + if (ret) + goto out_free_chip; + } + + if (port_index == DSA_MAX_PORTS) + break; + } + } + + return 0; + +out_free_chip: + dsa_of_free_platform_data(pd); +out_free: + kfree(pd); + pdev->dev.platform_data = NULL; + return ret; +} + +static void dsa_of_remove(struct platform_device *pdev) +{ + struct dsa_platform_data *pd = pdev->dev.platform_data; + + if (!pdev->dev.of_node) + return; + + dsa_of_free_platform_data(pd); + kfree(pd); +} +#else +static inline int dsa_of_probe(struct platform_device *pdev) +{ + return 0; +} + +static inline void dsa_of_remove(struct platform_device *pdev) +{ +} +#endif + static int dsa_probe(struct platform_device *pdev) { static int dsa_version_printed; struct dsa_platform_data *pd = pdev->dev.platform_data; struct net_device *dev; struct dsa_switch_tree *dst; - int i; + int i, ret; if (!dsa_version_printed++) printk(KERN_NOTICE "Distributed Switch Architecture " "driver version %s\n", dsa_driver_version); + if (pdev->dev.of_node) { + ret = dsa_of_probe(pdev); + if (ret) + return ret; + + pd = pdev->dev.platform_data; + } + if (pd == NULL || pd->netdev == NULL) return -EINVAL; dev = dev_to_net_device(pd->netdev); - if (dev == NULL) - return -EINVAL; + if (dev == NULL) { + ret = -EINVAL; + goto out; + } if (dev->dsa_ptr != NULL) { dev_put(dev); - return -EEXIST; + ret = -EEXIST; + goto out; } dst = kzalloc(sizeof(*dst), GFP_KERNEL); if (dst == NULL) { dev_put(dev); - return -ENOMEM; + ret = -ENOMEM; + goto out; } platform_set_drvdata(pdev, dst); @@ -366,6 +575,11 @@ static int dsa_probe(struct platform_device *pdev) } return 0; + +out: + dsa_of_remove(pdev); + + return ret; } static int dsa_remove(struct platform_device *pdev) @@ -385,6 +599,8 @@ static int dsa_remove(struct platform_device *pdev) dsa_switch_destroy(ds); } + dsa_of_remove(pdev); + return 0; } @@ -392,6 +608,12 @@ static void dsa_shutdown(struct platform_device *pdev) { } +static const struct of_device_id dsa_of_match_table[] = { + { .compatible = "marvell,dsa", }, + {} +}; +MODULE_DEVICE_TABLE(of, dsa_of_match_table); + static struct platform_driver dsa_driver = { .probe = dsa_probe, .remove = dsa_remove, @@ -399,6 +621,7 @@ static struct platform_driver dsa_driver = { .driver = { .name = "dsa", .owner = THIS_MODULE, + .of_match_table = dsa_of_match_table, }, }; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index a36c85ea..5359560 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -195,7 +195,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) if (netdev_uses_trailer_tags(dev)) return htons(ETH_P_TRAILER); - if (ntohs(eth->h_proto) >= 1536) + if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) return eth->h_proto; /* diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index 43b95ca..e1b4580 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -104,6 +104,7 @@ static const u8 lowpan_llprefix[] = {0xfe, 0x80}; struct lowpan_dev_info { struct net_device *real_dev; /* real WPAN device ptr */ struct mutex dev_list_mtx; /* mutex for list ops */ + unsigned short fragment_tag; }; struct lowpan_dev_record { @@ -120,7 +121,6 @@ struct lowpan_fragment { struct list_head list; /* fragments list */ }; -static unsigned short fragment_tag; static LIST_HEAD(lowpan_fragments); static DEFINE_SPINLOCK(flist_lock); @@ -284,6 +284,9 @@ lowpan_compress_udp_header(u8 **hc06_ptr, struct sk_buff *skb) /* checksum is always inline */ memcpy(*hc06_ptr, &uh->check, 2); *hc06_ptr += 2; + + /* skip the UDP header */ + skb_pull(skb, sizeof(struct udphdr)); } static inline int lowpan_fetch_skb_u8(struct sk_buff *skb, u8 *val) @@ -309,9 +312,8 @@ static inline int lowpan_fetch_skb_u16(struct sk_buff *skb, u16 *val) } static int -lowpan_uncompress_udp_header(struct sk_buff *skb) +lowpan_uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh) { - struct udphdr *uh = udp_hdr(skb); u8 tmp; if (!uh) @@ -358,6 +360,14 @@ lowpan_uncompress_udp_header(struct sk_buff *skb) /* copy checksum */ memcpy(&uh->check, &skb->data[0], 2); skb_pull(skb, 2); + + /* + * UDP lenght needs to be infered from the lower layers + * here, we obtain the hint from the remaining size of the + * frame + */ + uh->len = htons(skb->len + sizeof(struct udphdr)); + pr_debug("uncompressed UDP length: src = %d", uh->len); } else { pr_debug("ERROR: unsupported NH format\n"); goto err; @@ -572,17 +582,31 @@ static int lowpan_header_create(struct sk_buff *skb, * this isn't implemented in mainline yet, so currently we assign 0xff */ { + mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA; + mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev); + /* prepare wpan address data */ sa.addr_type = IEEE802154_ADDR_LONG; - sa.pan_id = 0xff; - - da.addr_type = IEEE802154_ADDR_LONG; - da.pan_id = 0xff; + sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); - memcpy(&(da.hwaddr), daddr, 8); memcpy(&(sa.hwaddr), saddr, 8); + /* intra-PAN communications */ + da.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); - mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA; + /* + * if the destination address is the broadcast address, use the + * corresponding short address + */ + if (lowpan_is_addr_broadcast(daddr)) { + da.addr_type = IEEE802154_ADDR_SHORT; + da.short_addr = IEEE802154_ADDR_BROADCAST; + } else { + da.addr_type = IEEE802154_ADDR_LONG; + memcpy(&(da.hwaddr), daddr, IEEE802154_ADDR_LEN); + + /* request acknowledgment */ + mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ; + } return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev, type, (void *)&da, (void *)&sa, skb->len); @@ -650,7 +674,7 @@ static void lowpan_fragment_timer_expired(unsigned long entry_addr) } static struct lowpan_fragment * -lowpan_alloc_new_frame(struct sk_buff *skb, u8 len, u16 tag) +lowpan_alloc_new_frame(struct sk_buff *skb, u16 len, u16 tag) { struct lowpan_fragment *frame; @@ -720,7 +744,7 @@ lowpan_process_data(struct sk_buff *skb) { struct lowpan_fragment *frame; /* slen stores the rightmost 8 bits of the 11 bits length */ - u8 slen, offset; + u8 slen, offset = 0; u16 len, tag; bool found = false; @@ -731,6 +755,18 @@ lowpan_process_data(struct sk_buff *skb) /* adds the 3 MSB to the 8 LSB to retrieve the 11 bits length */ len = ((iphc0 & 7) << 8) | slen; + if ((iphc0 & LOWPAN_DISPATCH_MASK) == LOWPAN_DISPATCH_FRAG1) { + pr_debug("%s received a FRAG1 packet (tag: %d, " + "size of the entire IP packet: %d)", + __func__, tag, len); + } else { /* FRAGN */ + if (lowpan_fetch_skb_u8(skb, &offset)) + goto unlock_and_drop; + pr_debug("%s received a FRAGN packet (tag: %d, " + "size of the entire IP packet: %d, " + "offset: %d)", __func__, tag, len, offset * 8); + } + /* * check if frame assembling with the same tag is * already in progress @@ -745,17 +781,13 @@ lowpan_process_data(struct sk_buff *skb) /* alloc new frame structure */ if (!found) { + pr_debug("%s first fragment received for tag %d, " + "begin packet reassembly", __func__, tag); frame = lowpan_alloc_new_frame(skb, len, tag); if (!frame) goto unlock_and_drop; } - if ((iphc0 & LOWPAN_DISPATCH_MASK) == LOWPAN_DISPATCH_FRAG1) - goto unlock_and_drop; - - if (lowpan_fetch_skb_u8(skb, &offset)) /* fetch offset */ - goto unlock_and_drop; - /* if payload fits buffer, copy it */ if (likely((offset * 8 + skb->len) <= frame->length)) skb_copy_to_linear_data_offset(frame->skb, offset * 8, @@ -773,6 +805,9 @@ lowpan_process_data(struct sk_buff *skb) list_del(&frame->list); spin_unlock_bh(&flist_lock); + pr_debug("%s successfully reassembled fragment " + "(tag %d)", __func__, tag); + dev_kfree_skb(skb); skb = frame->skb; kfree(frame); @@ -918,10 +953,35 @@ lowpan_process_data(struct sk_buff *skb) } /* UDP data uncompression */ - if (iphc0 & LOWPAN_IPHC_NH_C) - if (lowpan_uncompress_udp_header(skb)) + if (iphc0 & LOWPAN_IPHC_NH_C) { + struct udphdr uh; + struct sk_buff *new; + if (lowpan_uncompress_udp_header(skb, &uh)) goto drop; + /* + * replace the compressed UDP head by the uncompressed UDP + * header + */ + new = skb_copy_expand(skb, sizeof(struct udphdr), + skb_tailroom(skb), GFP_ATOMIC); + kfree_skb(skb); + + if (!new) + return -ENOMEM; + + skb = new; + + skb_push(skb, sizeof(struct udphdr)); + skb_reset_transport_header(skb); + skb_copy_to_linear_data(skb, &uh, sizeof(struct udphdr)); + + lowpan_raw_dump_table(__func__, "raw UDP header dump", + (u8 *)&uh, sizeof(uh)); + + hdr.nexthdr = UIP_PROTO_UDP; + } + /* Not fragmented package */ hdr.payload_len = htons(skb->len); @@ -969,13 +1029,13 @@ static int lowpan_get_mac_header_length(struct sk_buff *skb) static int lowpan_fragment_xmit(struct sk_buff *skb, u8 *head, - int mlen, int plen, int offset) + int mlen, int plen, int offset, int type) { struct sk_buff *frag; int hlen, ret; - /* if payload length is zero, therefore it's a first fragment */ - hlen = (plen == 0 ? LOWPAN_FRAG1_HEAD_SIZE : LOWPAN_FRAGN_HEAD_SIZE); + hlen = (type == LOWPAN_DISPATCH_FRAG1) ? + LOWPAN_FRAG1_HEAD_SIZE : LOWPAN_FRAGN_HEAD_SIZE; lowpan_raw_dump_inline(__func__, "6lowpan fragment header", head, hlen); @@ -1003,14 +1063,14 @@ lowpan_fragment_xmit(struct sk_buff *skb, u8 *head, } static int -lowpan_skb_fragmentation(struct sk_buff *skb) +lowpan_skb_fragmentation(struct sk_buff *skb, struct net_device *dev) { int err, header_length, payload_length, tag, offset = 0; u8 head[5]; header_length = lowpan_get_mac_header_length(skb); payload_length = skb->len - header_length; - tag = fragment_tag++; + tag = lowpan_dev_info(dev)->fragment_tag++; /* first fragment header */ head[0] = LOWPAN_DISPATCH_FRAG1 | ((payload_length >> 8) & 0x7); @@ -1018,7 +1078,16 @@ lowpan_skb_fragmentation(struct sk_buff *skb) head[2] = tag >> 8; head[3] = tag & 0xff; - err = lowpan_fragment_xmit(skb, head, header_length, 0, 0); + err = lowpan_fragment_xmit(skb, head, header_length, LOWPAN_FRAG_SIZE, + 0, LOWPAN_DISPATCH_FRAG1); + + if (err) { + pr_debug("%s unable to send FRAG1 packet (tag: %d)", + __func__, tag); + goto exit; + } + + offset = LOWPAN_FRAG_SIZE; /* next fragment header */ head[0] &= ~LOWPAN_DISPATCH_FRAG1; @@ -1033,10 +1102,17 @@ lowpan_skb_fragmentation(struct sk_buff *skb) len = payload_length - offset; err = lowpan_fragment_xmit(skb, head, header_length, - len, offset); + len, offset, LOWPAN_DISPATCH_FRAGN); + if (err) { + pr_debug("%s unable to send a subsequent FRAGN packet " + "(tag: %d, offset: %d", __func__, tag, offset); + goto exit; + } + offset += len; } +exit: return err; } @@ -1059,7 +1135,7 @@ static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev) } pr_debug("frame is too big, fragmentation is needed\n"); - err = lowpan_skb_fragmentation(skb); + err = lowpan_skb_fragmentation(skb, dev); error: dev_kfree_skb(skb); out: @@ -1087,6 +1163,12 @@ static u16 lowpan_get_short_addr(const struct net_device *dev) return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev); } +static u8 lowpan_get_dsn(const struct net_device *dev) +{ + struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; + return ieee802154_mlme_ops(real_dev)->get_dsn(real_dev); +} + static struct header_ops lowpan_header_ops = { .create = lowpan_header_create, }; @@ -1100,6 +1182,7 @@ static struct ieee802154_mlme_ops lowpan_mlme = { .get_pan_id = lowpan_get_pan_id, .get_phy = lowpan_get_phy, .get_short_addr = lowpan_get_short_addr, + .get_dsn = lowpan_get_dsn, }; static void lowpan_setup(struct net_device *dev) @@ -1203,6 +1286,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev, return -ENODEV; lowpan_dev_info(dev)->real_dev = real_dev; + lowpan_dev_info(dev)->fragment_tag = 0; mutex_init(&lowpan_dev_info(dev)->dev_list_mtx); entry = kzalloc(sizeof(struct lowpan_dev_record), GFP_KERNEL); diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h index bba5f83..4b8f917 100644 --- a/net/ieee802154/6lowpan.h +++ b/net/ieee802154/6lowpan.h @@ -92,9 +92,10 @@ */ #define lowpan_is_iid_16_bit_compressable(a) \ ((((a)->s6_addr16[4]) == 0) && \ - (((a)->s6_addr16[5]) == 0) && \ - (((a)->s6_addr16[6]) == 0) && \ - ((((a)->s6_addr[14]) & 0x80) == 0)) + (((a)->s6_addr[10]) == 0) && \ + (((a)->s6_addr[11]) == 0xff) && \ + (((a)->s6_addr[12]) == 0xfe) && \ + (((a)->s6_addr[13]) == 0)) /* multicast address */ #define is_addr_mcast(a) (((a)->s6_addr[0]) == 0xFF) diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index e0da175..581a595 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -291,6 +291,9 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk, size_t copied = 0; int err = -EOPNOTSUPP; struct sk_buff *skb; + struct sockaddr_ieee802154 *saddr; + + saddr = (struct sockaddr_ieee802154 *)msg->msg_name; skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) @@ -309,6 +312,13 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk, sock_recv_ts_and_drops(msg, sk, skb); + if (saddr) { + saddr->family = AF_IEEE802154; + saddr->addr = mac_cb(skb)->sa; + } + if (addr_len) + *addr_len = sizeof(*saddr); + if (flags & MSG_TRUNC) copied = skb->len; done: diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 97351e1..7e49bbc 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -64,8 +64,8 @@ struct sk_buff *ieee802154_nl_create(int flags, u8 req) int ieee802154_nl_mcast(struct sk_buff *msg, unsigned int group) { - /* XXX: nlh is right at the start of msg */ - void *hdr = genlmsg_data(NLMSG_DATA(msg->data)); + struct nlmsghdr *nlh = nlmsg_hdr(msg); + void *hdr = genlmsg_data(nlmsg_data(nlh)); if (genlmsg_end(msg, hdr) < 0) goto out; @@ -97,8 +97,8 @@ struct sk_buff *ieee802154_nl_new_reply(struct genl_info *info, int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info) { - /* XXX: nlh is right at the start of msg */ - void *hdr = genlmsg_data(NLMSG_DATA(msg->data)); + struct nlmsghdr *nlh = nlmsg_hdr(msg); + void *hdr = genlmsg_data(nlmsg_data(nlh)); if (genlmsg_end(msg, hdr) < 0) goto out; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 7944df7..8603ca8 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -166,6 +166,7 @@ config IP_PNP_RARP config NET_IPIP tristate "IP: tunneling" select INET_TUNNEL + select NET_IP_TUNNEL ---help--- Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the @@ -186,9 +187,14 @@ config NET_IPGRE_DEMUX This is helper module to demultiplex GRE packets on GRE version field criteria. Required by ip_gre and pptp modules. +config NET_IP_TUNNEL + tristate + default n + config NET_IPGRE tristate "IP: GRE tunnels over IP" depends on (IPV6 || IPV6=n) && NET_IPGRE_DEMUX + select NET_IP_TUNNEL help Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the @@ -313,6 +319,7 @@ config SYN_COOKIES config NET_IPVTI tristate "Virtual (secure) IP: tunneling" select INET_TUNNEL + select NET_IP_TUNNEL depends on INET_XFRM_MODE_TUNNEL ---help--- Tunneling means encapsulating data of one protocol type within diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 15ca63e..089cb9f 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -13,6 +13,7 @@ obj-y := route.o inetpeer.o protocol.o \ fib_frontend.o fib_semantics.o fib_trie.o \ inet_fragment.o ping.o +obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c929d9c..93824c5 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -111,7 +111,6 @@ #include <net/sock.h> #include <net/raw.h> #include <net/icmp.h> -#include <net/ipip.h> #include <net/inet_common.h> #include <net/xfrm.h> #include <net/net_namespace.h> @@ -1283,9 +1282,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int ihl; int id; unsigned int offset = 0; - - if (!(features & NETIF_F_V4_CSUM)) - features &= ~NETIF_F_SG; + bool tunnel; if (unlikely(skb_shinfo(skb)->gso_type & ~(SKB_GSO_TCPV4 | @@ -1293,6 +1290,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_UDP_TUNNEL | 0))) goto out; @@ -1307,6 +1305,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, ihl))) goto out; + tunnel = !!skb->encapsulation; + __skb_pull(skb, ihl); skb_reset_transport_header(skb); iph = ip_hdr(skb); @@ -1326,7 +1326,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, skb = segs; do { iph = ip_hdr(skb); - if (proto == IPPROTO_UDP) { + if (!tunnel && proto == IPPROTO_UDP) { iph->id = htons(id); iph->frag_off = htons(offset >> 3); if (skb->next != NULL) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index fea4929..247ec19 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -654,11 +654,19 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, arp_ptr += dev->addr_len; memcpy(arp_ptr, &src_ip, 4); arp_ptr += 4; - if (target_hw != NULL) - memcpy(arp_ptr, target_hw, dev->addr_len); - else - memset(arp_ptr, 0, dev->addr_len); - arp_ptr += dev->addr_len; + + switch (dev->type) { +#if IS_ENABLED(CONFIG_FIREWIRE_NET) + case ARPHRD_IEEE1394: + break; +#endif + default: + if (target_hw != NULL) + memcpy(arp_ptr, target_hw, dev->addr_len); + else + memset(arp_ptr, 0, dev->addr_len); + arp_ptr += dev->addr_len; + } memcpy(arp_ptr, &dest_ip, 4); return skb; @@ -781,7 +789,14 @@ static int arp_process(struct sk_buff *skb) arp_ptr += dev->addr_len; memcpy(&sip, arp_ptr, 4); arp_ptr += 4; - arp_ptr += dev->addr_len; + switch (dev_type) { +#if IS_ENABLED(CONFIG_FIREWIRE_NET) + case ARPHRD_IEEE1394: + break; +#endif + default: + arp_ptr += dev->addr_len; + } memcpy(&tip, arp_ptr, 4); /* * Check for bad requests for 127.x.x.x and requests for multicast diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index f678507..5d985e3 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -536,7 +536,7 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, return NULL; } -static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[IFA_MAX+1]; @@ -775,7 +775,7 @@ static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa) return NULL; } -static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct in_ifaddr *ifa; @@ -1499,6 +1499,8 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; head = &net->dev_index_head[h]; rcu_read_lock(); + cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^ + net->dev_base_seq; hlist_for_each_entry_rcu(dev, head, index_hlist) { if (idx < s_idx) goto cont; @@ -1519,6 +1521,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) rcu_read_unlock(); goto done; } + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); } cont: idx++; @@ -1730,8 +1733,7 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { }; static int inet_netconf_get_devconf(struct sk_buff *in_skb, - struct nlmsghdr *nlh, - void *arg) + struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[NETCONFA_MAX+1]; @@ -1791,6 +1793,77 @@ errout: return err; } +static int inet_netconf_dump_devconf(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + int h, s_h; + int idx, s_idx; + struct net_device *dev; + struct in_device *in_dev; + struct hlist_head *head; + + s_h = cb->args[0]; + s_idx = idx = cb->args[1]; + + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + idx = 0; + head = &net->dev_index_head[h]; + rcu_read_lock(); + cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^ + net->dev_base_seq; + hlist_for_each_entry_rcu(dev, head, index_hlist) { + if (idx < s_idx) + goto cont; + in_dev = __in_dev_get_rcu(dev); + if (!in_dev) + goto cont; + + if (inet_netconf_fill_devconf(skb, dev->ifindex, + &in_dev->cnf, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNETCONF, + NLM_F_MULTI, + -1) <= 0) { + rcu_read_unlock(); + goto done; + } + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +cont: + idx++; + } + rcu_read_unlock(); + } + if (h == NETDEV_HASHENTRIES) { + if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, + net->ipv4.devconf_all, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNETCONF, NLM_F_MULTI, + -1) <= 0) + goto done; + else + h++; + } + if (h == NETDEV_HASHENTRIES + 1) { + if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, + net->ipv4.devconf_dflt, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNETCONF, NLM_F_MULTI, + -1) <= 0) + goto done; + else + h++; + } +done: + cb->args[0] = h; + cb->args[1] = idx; + + return skb->len; +} + #ifdef CONFIG_SYSCTL static void devinet_copy_dflt_conf(struct net *net, int i) @@ -2195,6 +2268,6 @@ void __init devinet_init(void) rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL); rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL); rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, - NULL, NULL); + inet_netconf_dump_devconf, NULL); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index eb4bb12..c7629a2 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -604,7 +604,7 @@ errout: return err; } -static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct fib_config cfg; @@ -626,7 +626,7 @@ errout: return err; } -static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct fib_config cfg; @@ -957,8 +957,8 @@ static void nl_fib_input(struct sk_buff *skb) net = sock_net(skb->sk); nlh = nlmsg_hdr(skb); - if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || - nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) + if (skb->len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len || + nlmsg_len(nlh) < sizeof(*frn)) return; skb = skb_clone(skb, GFP_KERNEL); @@ -966,7 +966,7 @@ static void nl_fib_input(struct sk_buff *skb) return; nlh = nlmsg_hdr(skb); - frn = (struct fib_result_nl *) NLMSG_DATA(nlh); + frn = (struct fib_result_nl *) nlmsg_data(nlh); tb = fib_get_table(net, frn->tb_id_in); nl_fib_lookup(frn, tb); diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index 7a4c710..d2d5a99 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -27,11 +27,6 @@ static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; static DEFINE_SPINLOCK(gre_proto_lock); -struct gre_base_hdr { - __be16 flags; - __be16 protocol; -}; -#define GRE_HEADER_SECTION 4 int gre_add_protocol(const struct gre_protocol *proto, u8 version) { diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 786d97a..6acb541 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -559,7 +559,7 @@ static inline void syn_ack_recalc(struct request_sock *req, const int thresh, int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req) { - int err = req->rsk_ops->rtx_syn_ack(parent, req, NULL); + int err = req->rsk_ops->rtx_syn_ack(parent, req); if (!err) req->num_retrans++; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 7afa2c3..8620408 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -158,7 +158,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, #define EXPIRES_IN_MS(tmo) DIV_ROUND_UP((tmo - jiffies) * 1000, HZ) - if (icsk->icsk_pending == ICSK_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { r->idiag_timer = 1; r->idiag_retrans = icsk->icsk_retransmits; r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index f4fd23d..1206ca6 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -23,6 +23,28 @@ #include <net/sock.h> #include <net/inet_frag.h> +#include <net/inet_ecn.h> + +/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements + * Value : 0xff if frame should be dropped. + * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field + */ +const u8 ip_frag_ecn_table[16] = { + /* at least one fragment had CE, and others ECT_0 or ECT_1 */ + [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE, + [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE, + [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE, + + /* invalid combinations : drop frame */ + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, + [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, +}; +EXPORT_SYMBOL(ip_frag_ecn_table); static void inet_frag_secret_rebuild(unsigned long dummy) { @@ -102,7 +124,6 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) { write_lock(&f->lock); hlist_del(&fq->list); - fq->net->nqueues--; write_unlock(&f->lock); inet_frag_lru_del(fq); } @@ -182,6 +203,9 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) q = list_first_entry(&nf->lru_list, struct inet_frag_queue, lru_list); atomic_inc(&q->refcnt); + /* Remove q from list to avoid several CPUs grabbing it */ + list_del_init(&q->lru_list); + spin_unlock(&nf->lru_lock); spin_lock(&q->lock); @@ -235,7 +259,6 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, atomic_inc(&qp->refcnt); hlist_add_head(&qp->list, &f->hash[hash]); - nf->nqueues++; write_unlock(&f->lock); inet_frag_lru_add(nf, qp); return qp; diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index cc280a3..1975f52 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -29,6 +29,7 @@ #include <linux/module.h> #include <linux/if_vlan.h> #include <linux/inet_lro.h> +#include <net/checksum.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>"); @@ -114,11 +115,9 @@ static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc) *(p+2) = lro_desc->tcp_rcv_tsecr; } + csum_replace2(&iph->check, iph->tot_len, htons(lro_desc->ip_tot_len)); iph->tot_len = htons(lro_desc->ip_tot_len); - iph->check = 0; - iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl); - tcph->check = 0; tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0); lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index a6445b8..9385206 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -79,40 +79,11 @@ struct ipq { struct inet_peer *peer; }; -/* RFC 3168 support : - * We want to check ECN values of all fragments, do detect invalid combinations. - * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value. - */ -#define IPFRAG_ECN_NOT_ECT 0x01 /* one frag had ECN_NOT_ECT */ -#define IPFRAG_ECN_ECT_1 0x02 /* one frag had ECN_ECT_1 */ -#define IPFRAG_ECN_ECT_0 0x04 /* one frag had ECN_ECT_0 */ -#define IPFRAG_ECN_CE 0x08 /* one frag had ECN_CE */ - static inline u8 ip4_frag_ecn(u8 tos) { return 1 << (tos & INET_ECN_MASK); } -/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements - * Value : 0xff if frame should be dropped. - * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field - */ -static const u8 ip4_frag_ecn_table[16] = { - /* at least one fragment had CE, and others ECT_0 or ECT_1 */ - [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE, - [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE, - [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE, - - /* invalid combinations : drop frame */ - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, - [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, -}; - static struct inet_frags ip4_frags; int ip_frag_nqueues(struct net *net) @@ -551,7 +522,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, ipq_kill(qp); - ecn = ip4_frag_ecn_table[qp->ecn]; + ecn = ip_frag_ecn_table[qp->ecn]; if (unlikely(ecn == 0xff)) { err = -EINVAL; goto out_fail; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 91d66db..e5dfd28 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -37,7 +37,7 @@ #include <net/ip.h> #include <net/icmp.h> #include <net/protocol.h> -#include <net/ipip.h> +#include <net/ip_tunnels.h> #include <net/arp.h> #include <net/checksum.h> #include <net/dsfield.h> @@ -108,15 +108,6 @@ fatal route to network, even if it were you who configured fatal static route: you are innocent. :-) - - - 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain - practically identical code. It would be good to glue them - together, but it is not very evident, how to make them modular. - sit is integral part of IPv6, ipip and gre are naturally modular. - We could extract common parts (hash table, ioctl etc) - to a separate module (ip_tunnel.c). - Alexey Kuznetsov. */ @@ -126,400 +117,135 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); static struct rtnl_link_ops ipgre_link_ops __read_mostly; static int ipgre_tunnel_init(struct net_device *dev); -static void ipgre_tunnel_setup(struct net_device *dev); -static int ipgre_tunnel_bind_dev(struct net_device *dev); - -/* Fallback tunnel: no source, no destination, no key, no options */ - -#define HASH_SIZE 16 static int ipgre_net_id __read_mostly; -struct ipgre_net { - struct ip_tunnel __rcu *tunnels[4][HASH_SIZE]; - - struct net_device *fb_tunnel_dev; -}; - -/* Tunnel hash table */ - -/* - 4 hash tables: - - 3: (remote,local) - 2: (remote,*) - 1: (*,local) - 0: (*,*) +static int gre_tap_net_id __read_mostly; - We require exact key match i.e. if a key is present in packet - it will match only tunnel with the same key; if it is not present, - it will match only keyless tunnel. - - All keysless packets, if not matched configured keyless tunnels - will match fallback tunnel. - */ +static __sum16 check_checksum(struct sk_buff *skb) +{ + __sum16 csum = 0; -#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + csum = csum_fold(skb->csum); -#define tunnels_r_l tunnels[3] -#define tunnels_r tunnels[2] -#define tunnels_l tunnels[1] -#define tunnels_wc tunnels[0] + if (!csum) + break; + /* Fall through. */ -static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot) -{ - int i; - - for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - u64 rx_packets, rx_bytes, tx_packets, tx_bytes; - unsigned int start; - - do { - start = u64_stats_fetch_begin_bh(&tstats->syncp); - rx_packets = tstats->rx_packets; - tx_packets = tstats->tx_packets; - rx_bytes = tstats->rx_bytes; - tx_bytes = tstats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); - - tot->rx_packets += rx_packets; - tot->tx_packets += tx_packets; - tot->rx_bytes += rx_bytes; - tot->tx_bytes += tx_bytes; + case CHECKSUM_NONE: + skb->csum = 0; + csum = __skb_checksum_complete(skb); + skb->ip_summed = CHECKSUM_COMPLETE; + break; } - tot->multicast = dev->stats.multicast; - tot->rx_crc_errors = dev->stats.rx_crc_errors; - tot->rx_fifo_errors = dev->stats.rx_fifo_errors; - tot->rx_length_errors = dev->stats.rx_length_errors; - tot->rx_frame_errors = dev->stats.rx_frame_errors; - tot->rx_errors = dev->stats.rx_errors; - - tot->tx_fifo_errors = dev->stats.tx_fifo_errors; - tot->tx_carrier_errors = dev->stats.tx_carrier_errors; - tot->tx_dropped = dev->stats.tx_dropped; - tot->tx_aborted_errors = dev->stats.tx_aborted_errors; - tot->tx_errors = dev->stats.tx_errors; - - return tot; + return csum; } -/* Does key in tunnel parameters match packet */ -static bool ipgre_key_match(const struct ip_tunnel_parm *p, - __be16 flags, __be32 key) +static int ip_gre_calc_hlen(__be16 o_flags) { - if (p->i_flags & GRE_KEY) { - if (flags & GRE_KEY) - return key == p->i_key; - else - return false; /* key expected, none present */ - } else - return !(flags & GRE_KEY); -} + int addend = 4; -/* Given src, dst and key, find appropriate for input tunnel. */ + if (o_flags&TUNNEL_CSUM) + addend += 4; + if (o_flags&TUNNEL_KEY) + addend += 4; + if (o_flags&TUNNEL_SEQ) + addend += 4; + return addend; +} -static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, - __be32 remote, __be32 local, - __be16 flags, __be32 key, - __be16 gre_proto) +static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, + bool *csum_err, int *hdr_len) { - struct net *net = dev_net(dev); - int link = dev->ifindex; - unsigned int h0 = HASH(remote); - unsigned int h1 = HASH(key); - struct ip_tunnel *t, *cand = NULL; - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - int dev_type = (gre_proto == htons(ETH_P_TEB)) ? - ARPHRD_ETHER : ARPHRD_IPGRE; - int score, cand_score = 4; - - for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) { - if (local != t->parms.iph.saddr || - remote != t->parms.iph.daddr || - !(t->dev->flags & IFF_UP)) - continue; - - if (!ipgre_key_match(&t->parms, flags, key)) - continue; - - if (t->dev->type != ARPHRD_IPGRE && - t->dev->type != dev_type) - continue; - - score = 0; - if (t->parms.link != link) - score |= 1; - if (t->dev->type != dev_type) - score |= 2; - if (score == 0) - return t; - - if (score < cand_score) { - cand = t; - cand_score = score; - } - } - - for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) { - if (remote != t->parms.iph.daddr || - !(t->dev->flags & IFF_UP)) - continue; - - if (!ipgre_key_match(&t->parms, flags, key)) - continue; - - if (t->dev->type != ARPHRD_IPGRE && - t->dev->type != dev_type) - continue; - - score = 0; - if (t->parms.link != link) - score |= 1; - if (t->dev->type != dev_type) - score |= 2; - if (score == 0) - return t; - - if (score < cand_score) { - cand = t; - cand_score = score; - } - } + struct iphdr *iph = ip_hdr(skb); + struct gre_base_hdr *greh; + __be32 *options; - for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) { - if ((local != t->parms.iph.saddr && - (local != t->parms.iph.daddr || - !ipv4_is_multicast(local))) || - !(t->dev->flags & IFF_UP)) - continue; - - if (!ipgre_key_match(&t->parms, flags, key)) - continue; - - if (t->dev->type != ARPHRD_IPGRE && - t->dev->type != dev_type) - continue; - - score = 0; - if (t->parms.link != link) - score |= 1; - if (t->dev->type != dev_type) - score |= 2; - if (score == 0) - return t; - - if (score < cand_score) { - cand = t; - cand_score = score; - } - } + if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) + return -EINVAL; - for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) { - if (t->parms.i_key != key || - !(t->dev->flags & IFF_UP)) - continue; - - if (t->dev->type != ARPHRD_IPGRE && - t->dev->type != dev_type) - continue; - - score = 0; - if (t->parms.link != link) - score |= 1; - if (t->dev->type != dev_type) - score |= 2; - if (score == 0) - return t; - - if (score < cand_score) { - cand = t; - cand_score = score; - } - } + greh = (struct gre_base_hdr *)((u8 *)iph + (iph->ihl << 2)); + if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) + return -EINVAL; - if (cand != NULL) - return cand; + tpi->flags = gre_flags_to_tnl_flags(greh->flags); + *hdr_len = ip_gre_calc_hlen(tpi->flags); - dev = ign->fb_tunnel_dev; - if (dev->flags & IFF_UP) - return netdev_priv(dev); + if (!pskb_may_pull(skb, *hdr_len)) + return -EINVAL; - return NULL; -} + tpi->proto = greh->protocol; -static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign, - struct ip_tunnel_parm *parms) -{ - __be32 remote = parms->iph.daddr; - __be32 local = parms->iph.saddr; - __be32 key = parms->i_key; - unsigned int h = HASH(key); - int prio = 0; - - if (local) - prio |= 1; - if (remote && !ipv4_is_multicast(remote)) { - prio |= 2; - h ^= HASH(remote); + options = (__be32 *)(greh + 1); + if (greh->flags & GRE_CSUM) { + if (check_checksum(skb)) { + *csum_err = true; + return -EINVAL; + } + options++; } - return &ign->tunnels[prio][h]; -} - -static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign, - struct ip_tunnel *t) -{ - return __ipgre_bucket(ign, &t->parms); -} - -static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t) -{ - struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t); + if (greh->flags & GRE_KEY) { + tpi->key = *options; + options++; + } else + tpi->key = 0; - rcu_assign_pointer(t->next, rtnl_dereference(*tp)); - rcu_assign_pointer(*tp, t); -} + if (unlikely(greh->flags & GRE_SEQ)) { + tpi->seq = *options; + options++; + } else + tpi->seq = 0; -static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) -{ - struct ip_tunnel __rcu **tp; - struct ip_tunnel *iter; - - for (tp = ipgre_bucket(ign, t); - (iter = rtnl_dereference(*tp)) != NULL; - tp = &iter->next) { - if (t == iter) { - rcu_assign_pointer(*tp, t->next); - break; + /* WCCP version 1 and 2 protocol decoding. + * - Change protocol to IP + * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header + */ + if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { + tpi->proto = htons(ETH_P_IP); + if ((*(u8 *)options & 0xF0) != 0x40) { + *hdr_len += 4; + if (!pskb_may_pull(skb, *hdr_len)) + return -EINVAL; } } -} - -static struct ip_tunnel *ipgre_tunnel_find(struct net *net, - struct ip_tunnel_parm *parms, - int type) -{ - __be32 remote = parms->iph.daddr; - __be32 local = parms->iph.saddr; - __be32 key = parms->i_key; - int link = parms->link; - struct ip_tunnel *t; - struct ip_tunnel __rcu **tp; - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - - for (tp = __ipgre_bucket(ign, parms); - (t = rtnl_dereference(*tp)) != NULL; - tp = &t->next) - if (local == t->parms.iph.saddr && - remote == t->parms.iph.daddr && - key == t->parms.i_key && - link == t->parms.link && - type == t->dev->type) - break; - - return t; -} - -static struct ip_tunnel *ipgre_tunnel_locate(struct net *net, - struct ip_tunnel_parm *parms, int create) -{ - struct ip_tunnel *t, *nt; - struct net_device *dev; - char name[IFNAMSIZ]; - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - - t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE); - if (t || !create) - return t; - - if (parms->name[0]) - strlcpy(name, parms->name, IFNAMSIZ); - else - strcpy(name, "gre%d"); - - dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup); - if (!dev) - return NULL; - - dev_net_set(dev, net); - - nt = netdev_priv(dev); - nt->parms = *parms; - dev->rtnl_link_ops = &ipgre_link_ops; - dev->mtu = ipgre_tunnel_bind_dev(dev); - - if (register_netdevice(dev) < 0) - goto failed_free; - - /* Can use a lockless transmit, unless we generate output sequences */ - if (!(nt->parms.o_flags & GRE_SEQ)) - dev->features |= NETIF_F_LLTX; - - dev_hold(dev); - ipgre_tunnel_link(ign, nt); - return nt; - -failed_free: - free_netdev(dev); - return NULL; -} - -static void ipgre_tunnel_uninit(struct net_device *dev) -{ - struct net *net = dev_net(dev); - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - - ipgre_tunnel_unlink(ign, netdev_priv(dev)); - dev_put(dev); + return 0; } - static void ipgre_err(struct sk_buff *skb, u32 info) { -/* All the routers (except for Linux) return only - 8 bytes of packet payload. It means, that precise relaying of - ICMP in the real Internet is absolutely infeasible. + /* All the routers (except for Linux) return only + 8 bytes of packet payload. It means, that precise relaying of + ICMP in the real Internet is absolutely infeasible. - Moreover, Cisco "wise men" put GRE key to the third word - in GRE header. It makes impossible maintaining even soft state for keyed - GRE tunnels with enabled checksum. Tell them "thank you". - - Well, I wonder, rfc1812 was written by Cisco employee, - what the hell these idiots break standards established - by themselves??? - */ + Moreover, Cisco "wise men" put GRE key to the third word + in GRE header. It makes impossible maintaining even soft + state for keyed GRE tunnels with enabled checksum. Tell + them "thank you". + Well, I wonder, rfc1812 was written by Cisco employee, + what the hell these idiots break standards established + by themselves??? + */ + struct net *net = dev_net(skb->dev); + struct ip_tunnel_net *itn; const struct iphdr *iph = (const struct iphdr *)skb->data; - __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2)); - int grehlen = (iph->ihl<<2) + 4; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct ip_tunnel *t; - __be16 flags; - __be32 key = 0; + struct tnl_ptk_info tpi; + int hdr_len; + bool csum_err = false; - flags = p[0]; - if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { - if (flags&(GRE_VERSION|GRE_ROUTING)) + if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len)) { + if (!csum_err) /* ignore csum errors. */ return; - if (flags&GRE_KEY) { - grehlen += 4; - if (flags&GRE_CSUM) - grehlen += 4; - } } - /* If only 8 bytes returned, keyed message will be dropped here */ - if (skb_headlen(skb) < grehlen) - return; - - if (flags & GRE_KEY) - key = *(((__be32 *)p) + (grehlen / 4) - 1); - switch (type) { default: case ICMP_PARAMETERPROB: @@ -548,8 +274,13 @@ static void ipgre_err(struct sk_buff *skb, u32 info) break; } - t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, - flags, key, p[1]); + if (tpi.proto == htons(ETH_P_TEB)) + itn = net_generic(net, gre_tap_net_id); + else + itn = net_generic(net, ipgre_net_id); + + t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags, + iph->daddr, iph->saddr, tpi.key); if (t == NULL) return; @@ -578,158 +309,33 @@ static void ipgre_err(struct sk_buff *skb, u32 info) t->err_time = jiffies; } -static inline u8 -ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb) -{ - u8 inner = 0; - if (skb->protocol == htons(ETH_P_IP)) - inner = old_iph->tos; - else if (skb->protocol == htons(ETH_P_IPV6)) - inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph); - return INET_ECN_encapsulate(tos, inner); -} - static int ipgre_rcv(struct sk_buff *skb) { + struct net *net = dev_net(skb->dev); + struct ip_tunnel_net *itn; const struct iphdr *iph; - u8 *h; - __be16 flags; - __sum16 csum = 0; - __be32 key = 0; - u32 seqno = 0; struct ip_tunnel *tunnel; - int offset = 4; - __be16 gre_proto; - int err; + struct tnl_ptk_info tpi; + int hdr_len; + bool csum_err = false; - if (!pskb_may_pull(skb, 16)) + if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len) < 0) goto drop; - iph = ip_hdr(skb); - h = skb->data; - flags = *(__be16 *)h; - - if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { - /* - Version must be 0. - - We do not support routing headers. - */ - if (flags&(GRE_VERSION|GRE_ROUTING)) - goto drop; - - if (flags&GRE_CSUM) { - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - csum = csum_fold(skb->csum); - if (!csum) - break; - /* fall through */ - case CHECKSUM_NONE: - skb->csum = 0; - csum = __skb_checksum_complete(skb); - skb->ip_summed = CHECKSUM_COMPLETE; - } - offset += 4; - } - if (flags&GRE_KEY) { - key = *(__be32 *)(h + offset); - offset += 4; - } - if (flags&GRE_SEQ) { - seqno = ntohl(*(__be32 *)(h + offset)); - offset += 4; - } - } + if (tpi.proto == htons(ETH_P_TEB)) + itn = net_generic(net, gre_tap_net_id); + else + itn = net_generic(net, ipgre_net_id); - gre_proto = *(__be16 *)(h + 2); + iph = ip_hdr(skb); + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags, + iph->saddr, iph->daddr, tpi.key); - tunnel = ipgre_tunnel_lookup(skb->dev, - iph->saddr, iph->daddr, flags, key, - gre_proto); if (tunnel) { - struct pcpu_tstats *tstats; - - secpath_reset(skb); - - skb->protocol = gre_proto; - /* WCCP version 1 and 2 protocol decoding. - * - Change protocol to IP - * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header - */ - if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { - skb->protocol = htons(ETH_P_IP); - if ((*(h + offset) & 0xF0) != 0x40) - offset += 4; - } - - skb->mac_header = skb->network_header; - __pskb_pull(skb, offset); - skb_postpull_rcsum(skb, skb_transport_header(skb), offset); - skb->pkt_type = PACKET_HOST; -#ifdef CONFIG_NET_IPGRE_BROADCAST - if (ipv4_is_multicast(iph->daddr)) { - /* Looped back packet, drop it! */ - if (rt_is_output_route(skb_rtable(skb))) - goto drop; - tunnel->dev->stats.multicast++; - skb->pkt_type = PACKET_BROADCAST; - } -#endif - - if (((flags&GRE_CSUM) && csum) || - (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { - tunnel->dev->stats.rx_crc_errors++; - tunnel->dev->stats.rx_errors++; - goto drop; - } - if (tunnel->parms.i_flags&GRE_SEQ) { - if (!(flags&GRE_SEQ) || - (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { - tunnel->dev->stats.rx_fifo_errors++; - tunnel->dev->stats.rx_errors++; - goto drop; - } - tunnel->i_seqno = seqno + 1; - } - - /* Warning: All skb pointers will be invalidated! */ - if (tunnel->dev->type == ARPHRD_ETHER) { - if (!pskb_may_pull(skb, ETH_HLEN)) { - tunnel->dev->stats.rx_length_errors++; - tunnel->dev->stats.rx_errors++; - goto drop; - } - - iph = ip_hdr(skb); - skb->protocol = eth_type_trans(skb, tunnel->dev); - skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); - } - - __skb_tunnel_rx(skb, tunnel->dev); - - skb_reset_network_header(skb); - err = IP_ECN_decapsulate(iph, skb); - if (unlikely(err)) { - if (log_ecn_error) - net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", - &iph->saddr, iph->tos); - if (err > 1) { - ++tunnel->dev->stats.rx_frame_errors; - ++tunnel->dev->stats.rx_errors; - goto drop; - } - } - - tstats = this_cpu_ptr(tunnel->dev->tstats); - u64_stats_update_begin(&tstats->syncp); - tstats->rx_packets++; - tstats->rx_bytes += skb->len; - u64_stats_update_end(&tstats->syncp); - - gro_cells_receive(&tunnel->gro_cells, skb); + ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); return 0; } icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); - drop: kfree_skb(skb); return 0; @@ -746,7 +352,7 @@ static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; return skb; } else if (skb->ip_summed == CHECKSUM_PARTIAL && - tunnel->parms.o_flags&GRE_CSUM) { + tunnel->parms.o_flags&TUNNEL_CSUM) { err = skb_checksum_help(skb); if (unlikely(err)) goto error; @@ -760,494 +366,157 @@ error: return ERR_PTR(err); } -static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +static struct sk_buff *gre_build_header(struct sk_buff *skb, + const struct tnl_ptk_info *tpi, + int hdr_len) { - struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); - struct ip_tunnel *tunnel = netdev_priv(dev); - const struct iphdr *old_iph; - const struct iphdr *tiph; - struct flowi4 fl4; - u8 tos; - __be16 df; - struct rtable *rt; /* Route to the other host */ - struct net_device *tdev; /* Device to other host */ - struct iphdr *iph; /* Our new IP header */ - unsigned int max_headroom; /* The extra header space needed */ - int gre_hlen; - __be32 dst; - int mtu; - u8 ttl; - int err; - int pkt_len; - - skb = handle_offloads(tunnel, skb); - if (IS_ERR(skb)) { - dev->stats.tx_dropped++; - return NETDEV_TX_OK; - } + struct gre_base_hdr *greh; - if (!skb->encapsulation) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } + skb_push(skb, hdr_len); - old_iph = ip_hdr(skb); + greh = (struct gre_base_hdr *)skb->data; + greh->flags = tnl_flags_to_gre_flags(tpi->flags); + greh->protocol = tpi->proto; - if (dev->type == ARPHRD_ETHER) - IPCB(skb)->flags = 0; + if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) { + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); - if (dev->header_ops && dev->type == ARPHRD_IPGRE) { - gre_hlen = 0; - tiph = (const struct iphdr *)skb->data; - } else { - gre_hlen = tunnel->hlen; - tiph = &tunnel->parms.iph; - } - - if ((dst = tiph->daddr) == 0) { - /* NBMA tunnel */ - - if (skb_dst(skb) == NULL) { - dev->stats.tx_fifo_errors++; - goto tx_error; + if (tpi->flags&TUNNEL_SEQ) { + *ptr = tpi->seq; + ptr--; } - - if (skb->protocol == htons(ETH_P_IP)) { - rt = skb_rtable(skb); - dst = rt_nexthop(rt, old_iph->daddr); + if (tpi->flags&TUNNEL_KEY) { + *ptr = tpi->key; + ptr--; } -#if IS_ENABLED(CONFIG_IPV6) - else if (skb->protocol == htons(ETH_P_IPV6)) { - const struct in6_addr *addr6; - struct neighbour *neigh; - bool do_tx_error_icmp; - int addr_type; - - neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr); - if (neigh == NULL) - goto tx_error; - - addr6 = (const struct in6_addr *)&neigh->primary_key; - addr_type = ipv6_addr_type(addr6); - - if (addr_type == IPV6_ADDR_ANY) { - addr6 = &ipv6_hdr(skb)->daddr; - addr_type = ipv6_addr_type(addr6); - } - - if ((addr_type & IPV6_ADDR_COMPATv4) == 0) - do_tx_error_icmp = true; - else { - do_tx_error_icmp = false; - dst = addr6->s6_addr32[3]; - } - neigh_release(neigh); - if (do_tx_error_icmp) - goto tx_error_icmp; + if (tpi->flags&TUNNEL_CSUM && + !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) { + *(__sum16 *)ptr = 0; + *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, + skb->len, 0)); } -#endif - else - goto tx_error; } - ttl = tiph->ttl; - tos = tiph->tos; - if (tos & 0x1) { - tos &= ~0x1; - if (skb->protocol == htons(ETH_P_IP)) - tos = old_iph->tos; - else if (skb->protocol == htons(ETH_P_IPV6)) - tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph); - } + return skb; +} - rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr, - tunnel->parms.o_key, RT_TOS(tos), - tunnel->parms.link); - if (IS_ERR(rt)) { - dev->stats.tx_carrier_errors++; - goto tx_error; - } - tdev = rt->dst.dev; +static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, + const struct iphdr *tnl_params, + __be16 proto) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + struct tnl_ptk_info tpi; - if (tdev == dev) { - ip_rt_put(rt); - dev->stats.collisions++; - goto tx_error; + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; } - df = tiph->frag_off; - if (df) - mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen; - else - mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; - - if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - - if (skb->protocol == htons(ETH_P_IP)) { - df |= (old_iph->frag_off&htons(IP_DF)); + tpi.flags = tunnel->parms.o_flags; + tpi.proto = proto; + tpi.key = tunnel->parms.o_key; + if (tunnel->parms.o_flags & TUNNEL_SEQ) + tunnel->o_seqno++; + tpi.seq = htonl(tunnel->o_seqno); - if (!skb_is_gso(skb) && - (old_iph->frag_off&htons(IP_DF)) && - mtu < ntohs(old_iph->tot_len)) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); - ip_rt_put(rt); - goto tx_error; - } + /* Push GRE header. */ + skb = gre_build_header(skb, &tpi, tunnel->hlen); + if (unlikely(!skb)) { + dev->stats.tx_dropped++; + return; } -#if IS_ENABLED(CONFIG_IPV6) - else if (skb->protocol == htons(ETH_P_IPV6)) { - struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); - - if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) { - if ((tunnel->parms.iph.daddr && - !ipv4_is_multicast(tunnel->parms.iph.daddr)) || - rt6->rt6i_dst.plen == 128) { - rt6->rt6i_flags |= RTF_MODIFIED; - dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); - } - } - if (!skb_is_gso(skb) && - mtu >= IPV6_MIN_MTU && - mtu < skb->len - tunnel->hlen + gre_hlen) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - ip_rt_put(rt); - goto tx_error; - } - } -#endif + ip_tunnel_xmit(skb, dev, tnl_params); +} - if (tunnel->err_count > 0) { - if (time_before(jiffies, - tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { - tunnel->err_count--; +static netdev_tx_t ipgre_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + const struct iphdr *tnl_params; - dst_link_failure(skb); - } else - tunnel->err_count = 0; - } + skb = handle_offloads(tunnel, skb); + if (IS_ERR(skb)) + goto out; - max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len; - - if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| - (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { - struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); - if (max_headroom > dev->needed_headroom) - dev->needed_headroom = max_headroom; - if (!new_skb) { - ip_rt_put(rt); - dev->stats.tx_dropped++; - dev_kfree_skb(skb); - return NETDEV_TX_OK; - } - if (skb->sk) - skb_set_owner_w(new_skb, skb->sk); - dev_kfree_skb(skb); - skb = new_skb; - old_iph = ip_hdr(skb); - /* Warning : tiph value might point to freed memory */ - } + if (dev->header_ops) { + /* Need space for new headers */ + if (skb_cow_head(skb, dev->needed_headroom - + (tunnel->hlen + sizeof(struct iphdr)))); + goto free_skb; - skb_push(skb, gre_hlen); - skb_reset_network_header(skb); - skb_set_transport_header(skb, sizeof(*iph)); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | - IPSKB_REROUTED); - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - - /* - * Push down and install the IPIP header. - */ + tnl_params = (const struct iphdr *)skb->data; - iph = ip_hdr(skb); - iph->version = 4; - iph->ihl = sizeof(struct iphdr) >> 2; - iph->frag_off = df; - iph->protocol = IPPROTO_GRE; - iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb); - iph->daddr = fl4.daddr; - iph->saddr = fl4.saddr; - iph->ttl = ttl; - - tunnel_ip_select_ident(skb, old_iph, &rt->dst); - - if (ttl == 0) { - if (skb->protocol == htons(ETH_P_IP)) - iph->ttl = old_iph->ttl; -#if IS_ENABLED(CONFIG_IPV6) - else if (skb->protocol == htons(ETH_P_IPV6)) - iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit; -#endif - else - iph->ttl = ip4_dst_hoplimit(&rt->dst); - } - - ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; - ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ? - htons(ETH_P_TEB) : skb->protocol; - - if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { - __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4); + /* Pull skb since ip_tunnel_xmit() needs skb->data pointing + * to gre header. + */ + skb_pull(skb, tunnel->hlen + sizeof(struct iphdr)); + } else { + if (skb_cow_head(skb, dev->needed_headroom)) + goto free_skb; - if (tunnel->parms.o_flags&GRE_SEQ) { - ++tunnel->o_seqno; - *ptr = htonl(tunnel->o_seqno); - ptr--; - } - if (tunnel->parms.o_flags&GRE_KEY) { - *ptr = tunnel->parms.o_key; - ptr--; - } - /* Skip GRE checksum if skb is getting offloaded. */ - if (!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE) && - (tunnel->parms.o_flags&GRE_CSUM)) { - int offset = skb_transport_offset(skb); - - if (skb_has_shared_frag(skb)) { - err = __skb_linearize(skb); - if (err) - goto tx_error; - } - - *ptr = 0; - *(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset, - skb->len - offset, - 0)); - } + tnl_params = &tunnel->parms.iph; } - nf_reset(skb); + __gre_xmit(skb, dev, tnl_params, skb->protocol); - pkt_len = skb->len - skb_transport_offset(skb); - err = ip_local_out(skb); - if (likely(net_xmit_eval(err) == 0)) { - u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; - u64_stats_update_end(&tstats->syncp); - } else { - dev->stats.tx_errors++; - dev->stats.tx_aborted_errors++; - } return NETDEV_TX_OK; -#if IS_ENABLED(CONFIG_IPV6) -tx_error_icmp: - dst_link_failure(skb); -#endif -tx_error: - dev->stats.tx_errors++; +free_skb: dev_kfree_skb(skb); +out: + dev->stats.tx_dropped++; return NETDEV_TX_OK; } -static int ipgre_tunnel_bind_dev(struct net_device *dev) +static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, + struct net_device *dev) { - struct net_device *tdev = NULL; - struct ip_tunnel *tunnel; - const struct iphdr *iph; - int hlen = LL_MAX_HEADER; - int mtu = ETH_DATA_LEN; - int addend = sizeof(struct iphdr) + 4; - - tunnel = netdev_priv(dev); - iph = &tunnel->parms.iph; - - /* Guess output device to choose reasonable mtu and needed_headroom */ - - if (iph->daddr) { - struct flowi4 fl4; - struct rtable *rt; - - rt = ip_route_output_gre(dev_net(dev), &fl4, - iph->daddr, iph->saddr, - tunnel->parms.o_key, - RT_TOS(iph->tos), - tunnel->parms.link); - if (!IS_ERR(rt)) { - tdev = rt->dst.dev; - ip_rt_put(rt); - } - - if (dev->type != ARPHRD_ETHER) - dev->flags |= IFF_POINTOPOINT; - } + struct ip_tunnel *tunnel = netdev_priv(dev); - if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); + skb = handle_offloads(tunnel, skb); + if (IS_ERR(skb)) + goto out; - if (tdev) { - hlen = tdev->hard_header_len + tdev->needed_headroom; - mtu = tdev->mtu; - } - dev->iflink = tunnel->parms.link; - - /* Precalculate GRE options length */ - if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { - if (tunnel->parms.o_flags&GRE_CSUM) - addend += 4; - if (tunnel->parms.o_flags&GRE_KEY) - addend += 4; - if (tunnel->parms.o_flags&GRE_SEQ) - addend += 4; - } - dev->needed_headroom = addend + hlen; - mtu -= dev->hard_header_len + addend; + if (skb_cow_head(skb, dev->needed_headroom)) + goto free_skb; - if (mtu < 68) - mtu = 68; + __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB)); - tunnel->hlen = addend; - /* TCP offload with GRE SEQ is not supported. */ - if (!(tunnel->parms.o_flags & GRE_SEQ)) { - dev->features |= NETIF_F_GSO_SOFTWARE; - dev->hw_features |= NETIF_F_GSO_SOFTWARE; - } + return NETDEV_TX_OK; - return mtu; +free_skb: + dev_kfree_skb(skb); +out: + dev->stats.tx_dropped++; + return NETDEV_TX_OK; } -static int -ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) +static int ipgre_tunnel_ioctl(struct net_device *dev, + struct ifreq *ifr, int cmd) { int err = 0; struct ip_tunnel_parm p; - struct ip_tunnel *t; - struct net *net = dev_net(dev); - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - - switch (cmd) { - case SIOCGETTUNNEL: - t = NULL; - if (dev == ign->fb_tunnel_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { - err = -EFAULT; - break; - } - t = ipgre_tunnel_locate(net, &p, 0); - } - if (t == NULL) - t = netdev_priv(dev); - memcpy(&p, &t->parms, sizeof(p)); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) - err = -EFAULT; - break; - - case SIOCADDTUNNEL: - case SIOCCHGTUNNEL: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; - - err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - goto done; - - err = -EINVAL; - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || - ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) - goto done; - if (p.iph.ttl) - p.iph.frag_off |= htons(IP_DF); - - if (!(p.i_flags&GRE_KEY)) - p.i_key = 0; - if (!(p.o_flags&GRE_KEY)) - p.o_key = 0; - - t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); - - if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { - if (t != NULL) { - if (t->dev != dev) { - err = -EEXIST; - break; - } - } else { - unsigned int nflags = 0; - - t = netdev_priv(dev); - - if (ipv4_is_multicast(p.iph.daddr)) - nflags = IFF_BROADCAST; - else if (p.iph.daddr) - nflags = IFF_POINTOPOINT; - - if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { - err = -EINVAL; - break; - } - ipgre_tunnel_unlink(ign, t); - synchronize_net(); - t->parms.iph.saddr = p.iph.saddr; - t->parms.iph.daddr = p.iph.daddr; - t->parms.i_key = p.i_key; - t->parms.o_key = p.o_key; - memcpy(dev->dev_addr, &p.iph.saddr, 4); - memcpy(dev->broadcast, &p.iph.daddr, 4); - ipgre_tunnel_link(ign, t); - netdev_state_change(dev); - } - } - - if (t) { - err = 0; - if (cmd == SIOCCHGTUNNEL) { - t->parms.iph.ttl = p.iph.ttl; - t->parms.iph.tos = p.iph.tos; - t->parms.iph.frag_off = p.iph.frag_off; - if (t->parms.link != p.link) { - t->parms.link = p.link; - dev->mtu = ipgre_tunnel_bind_dev(dev); - netdev_state_change(dev); - } - } - if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) - err = -EFAULT; - } else - err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); - break; - - case SIOCDELTUNNEL: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; - - if (dev == ign->fb_tunnel_dev) { - err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - goto done; - err = -ENOENT; - if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL) - goto done; - err = -EPERM; - if (t == netdev_priv(ign->fb_tunnel_dev)) - goto done; - dev = t->dev; - } - unregister_netdevice(dev); - err = 0; - break; - default: - err = -EINVAL; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + return -EFAULT; + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || + p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || + ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) { + return -EINVAL; } + p.i_flags = gre_flags_to_tnl_flags(p.i_flags); + p.o_flags = gre_flags_to_tnl_flags(p.o_flags); -done: - return err; -} + err = ip_tunnel_ioctl(dev, &p, cmd); + if (err) + return err; -static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) -{ - struct ip_tunnel *tunnel = netdev_priv(dev); - if (new_mtu < 68 || - new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) - return -EINVAL; - dev->mtu = new_mtu; + p.i_flags = tnl_flags_to_gre_flags(p.i_flags); + p.o_flags = tnl_flags_to_gre_flags(p.o_flags); + + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + return -EFAULT; return 0; } @@ -1277,25 +546,23 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) ... ftp fec0:6666:6666::193.233.7.65 ... - */ - static int ipgre_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len) { struct ip_tunnel *t = netdev_priv(dev); - struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); - __be16 *p = (__be16 *)(iph+1); + struct iphdr *iph; + struct gre_base_hdr *greh; - memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); - p[0] = t->parms.o_flags; - p[1] = htons(type); + iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph)); + greh = (struct gre_base_hdr *)(iph+1); + greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags); + greh->protocol = htons(type); - /* - * Set the source hardware address. - */ + memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); + /* Set the source hardware address. */ if (saddr) memcpy(&iph->saddr, saddr, 4); if (daddr) @@ -1303,7 +570,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, if (iph->daddr) return t->hlen; - return -t->hlen; + return -(t->hlen + sizeof(*iph)); } static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) @@ -1357,31 +624,21 @@ static int ipgre_close(struct net_device *dev) } return 0; } - #endif static const struct net_device_ops ipgre_netdev_ops = { .ndo_init = ipgre_tunnel_init, - .ndo_uninit = ipgre_tunnel_uninit, + .ndo_uninit = ip_tunnel_uninit, #ifdef CONFIG_NET_IPGRE_BROADCAST .ndo_open = ipgre_open, .ndo_stop = ipgre_close, #endif - .ndo_start_xmit = ipgre_tunnel_xmit, + .ndo_start_xmit = ipgre_xmit, .ndo_do_ioctl = ipgre_tunnel_ioctl, - .ndo_change_mtu = ipgre_tunnel_change_mtu, - .ndo_get_stats64 = ipgre_get_stats64, + .ndo_change_mtu = ip_tunnel_change_mtu, + .ndo_get_stats64 = ip_tunnel_get_stats64, }; -static void ipgre_dev_free(struct net_device *dev) -{ - struct ip_tunnel *tunnel = netdev_priv(dev); - - gro_cells_destroy(&tunnel->gro_cells); - free_percpu(dev->tstats); - free_netdev(dev); -} - #define GRE_FEATURES (NETIF_F_SG | \ NETIF_F_FRAGLIST | \ NETIF_F_HIGHDMA | \ @@ -1390,35 +647,48 @@ static void ipgre_dev_free(struct net_device *dev) static void ipgre_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ipgre_netdev_ops; - dev->destructor = ipgre_dev_free; + ip_tunnel_setup(dev, ipgre_net_id); +} - dev->type = ARPHRD_IPGRE; - dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; +static void __gre_tunnel_init(struct net_device *dev) +{ + struct ip_tunnel *tunnel; + + tunnel = netdev_priv(dev); + tunnel->hlen = ip_gre_calc_hlen(tunnel->parms.o_flags); + tunnel->parms.iph.protocol = IPPROTO_GRE; + + dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; - dev->flags = IFF_NOARP; - dev->iflink = 0; - dev->addr_len = 4; - dev->features |= NETIF_F_NETNS_LOCAL; - dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; - dev->features |= GRE_FEATURES; + dev->features |= NETIF_F_NETNS_LOCAL | GRE_FEATURES; dev->hw_features |= GRE_FEATURES; + + if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { + /* TCP offload with GRE SEQ is not supported. */ + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + /* Can use a lockless transmit, unless we generate + * output sequences + */ + dev->features |= NETIF_F_LLTX; + } } static int ipgre_tunnel_init(struct net_device *dev) { - struct ip_tunnel *tunnel; - struct iphdr *iph; - int err; + struct ip_tunnel *tunnel = netdev_priv(dev); + struct iphdr *iph = &tunnel->parms.iph; - tunnel = netdev_priv(dev); - iph = &tunnel->parms.iph; + __gre_tunnel_init(dev); - tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); + memcpy(dev->dev_addr, &iph->saddr, 4); + memcpy(dev->broadcast, &iph->daddr, 4); - memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); - memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); + dev->type = ARPHRD_IPGRE; + dev->flags = IFF_NOARP; + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + dev->addr_len = 4; if (iph->daddr) { #ifdef CONFIG_NET_IPGRE_BROADCAST @@ -1432,106 +702,30 @@ static int ipgre_tunnel_init(struct net_device *dev) } else dev->header_ops = &ipgre_header_ops; - dev->tstats = alloc_percpu(struct pcpu_tstats); - if (!dev->tstats) - return -ENOMEM; - - err = gro_cells_init(&tunnel->gro_cells, dev); - if (err) { - free_percpu(dev->tstats); - return err; - } - - return 0; -} - -static void ipgre_fb_tunnel_init(struct net_device *dev) -{ - struct ip_tunnel *tunnel = netdev_priv(dev); - struct iphdr *iph = &tunnel->parms.iph; - - tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); - - iph->version = 4; - iph->protocol = IPPROTO_GRE; - iph->ihl = 5; - tunnel->hlen = sizeof(struct iphdr) + 4; - - dev_hold(dev); + return ip_tunnel_init(dev); } - static const struct gre_protocol ipgre_protocol = { .handler = ipgre_rcv, .err_handler = ipgre_err, }; -static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head) -{ - int prio; - - for (prio = 0; prio < 4; prio++) { - int h; - for (h = 0; h < HASH_SIZE; h++) { - struct ip_tunnel *t; - - t = rtnl_dereference(ign->tunnels[prio][h]); - - while (t != NULL) { - unregister_netdevice_queue(t->dev, head); - t = rtnl_dereference(t->next); - } - } - } -} - static int __net_init ipgre_init_net(struct net *net) { - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - int err; - - ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", - ipgre_tunnel_setup); - if (!ign->fb_tunnel_dev) { - err = -ENOMEM; - goto err_alloc_dev; - } - dev_net_set(ign->fb_tunnel_dev, net); - - ipgre_fb_tunnel_init(ign->fb_tunnel_dev); - ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops; - - if ((err = register_netdev(ign->fb_tunnel_dev))) - goto err_reg_dev; - - rcu_assign_pointer(ign->tunnels_wc[0], - netdev_priv(ign->fb_tunnel_dev)); - return 0; - -err_reg_dev: - ipgre_dev_free(ign->fb_tunnel_dev); -err_alloc_dev: - return err; + return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL); } static void __net_exit ipgre_exit_net(struct net *net) { - struct ipgre_net *ign; - LIST_HEAD(list); - - ign = net_generic(net, ipgre_net_id); - rtnl_lock(); - ipgre_destroy_tunnels(ign, &list); - unregister_netdevice_many(&list); - rtnl_unlock(); + struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id); + ip_tunnel_delete_net(itn); } static struct pernet_operations ipgre_net_ops = { .init = ipgre_init_net, .exit = ipgre_exit_net, .id = &ipgre_net_id, - .size = sizeof(struct ipgre_net), + .size = sizeof(struct ip_tunnel_net), }; static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -1576,8 +770,8 @@ out: return ipgre_tunnel_validate(tb, data); } -static void ipgre_netlink_parms(struct nlattr *data[], - struct ip_tunnel_parm *parms) +static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[], + struct ip_tunnel_parm *parms) { memset(parms, 0, sizeof(*parms)); @@ -1590,10 +784,10 @@ static void ipgre_netlink_parms(struct nlattr *data[], parms->link = nla_get_u32(data[IFLA_GRE_LINK]); if (data[IFLA_GRE_IFLAGS]) - parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]); + parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS])); if (data[IFLA_GRE_OFLAGS]) - parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]); + parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS])); if (data[IFLA_GRE_IKEY]) parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); @@ -1617,148 +811,46 @@ static void ipgre_netlink_parms(struct nlattr *data[], parms->iph.frag_off = htons(IP_DF); } -static int ipgre_tap_init(struct net_device *dev) +static int gre_tap_init(struct net_device *dev) { - struct ip_tunnel *tunnel; - - tunnel = netdev_priv(dev); - - tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); + __gre_tunnel_init(dev); - ipgre_tunnel_bind_dev(dev); - - dev->tstats = alloc_percpu(struct pcpu_tstats); - if (!dev->tstats) - return -ENOMEM; - - return 0; + return ip_tunnel_init(dev); } -static const struct net_device_ops ipgre_tap_netdev_ops = { - .ndo_init = ipgre_tap_init, - .ndo_uninit = ipgre_tunnel_uninit, - .ndo_start_xmit = ipgre_tunnel_xmit, +static const struct net_device_ops gre_tap_netdev_ops = { + .ndo_init = gre_tap_init, + .ndo_uninit = ip_tunnel_uninit, + .ndo_start_xmit = gre_tap_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, - .ndo_change_mtu = ipgre_tunnel_change_mtu, - .ndo_get_stats64 = ipgre_get_stats64, + .ndo_change_mtu = ip_tunnel_change_mtu, + .ndo_get_stats64 = ip_tunnel_get_stats64, }; static void ipgre_tap_setup(struct net_device *dev) { - ether_setup(dev); - - dev->netdev_ops = &ipgre_tap_netdev_ops; - dev->destructor = ipgre_dev_free; - - dev->iflink = 0; - dev->features |= NETIF_F_NETNS_LOCAL; - - dev->features |= GRE_FEATURES; - dev->hw_features |= GRE_FEATURES; + dev->netdev_ops = &gre_tap_netdev_ops; + ip_tunnel_setup(dev, gre_tap_net_id); } -static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], - struct nlattr *data[]) +static int ipgre_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) { - struct ip_tunnel *nt; - struct net *net = dev_net(dev); - struct ipgre_net *ign = net_generic(net, ipgre_net_id); - int mtu; - int err; - - nt = netdev_priv(dev); - ipgre_netlink_parms(data, &nt->parms); - - if (ipgre_tunnel_find(net, &nt->parms, dev->type)) - return -EEXIST; - - if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) - eth_hw_addr_random(dev); - - mtu = ipgre_tunnel_bind_dev(dev); - if (!tb[IFLA_MTU]) - dev->mtu = mtu; - - /* Can use a lockless transmit, unless we generate output sequences */ - if (!(nt->parms.o_flags & GRE_SEQ)) - dev->features |= NETIF_F_LLTX; - - err = register_netdevice(dev); - if (err) - goto out; - - dev_hold(dev); - ipgre_tunnel_link(ign, nt); + struct ip_tunnel_parm p; -out: - return err; + ipgre_netlink_parms(data, tb, &p); + return ip_tunnel_newlink(dev, tb, &p); } static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct ip_tunnel *t, *nt; - struct net *net = dev_net(dev); - struct ipgre_net *ign = net_generic(net, ipgre_net_id); struct ip_tunnel_parm p; - int mtu; - - if (dev == ign->fb_tunnel_dev) - return -EINVAL; - - nt = netdev_priv(dev); - ipgre_netlink_parms(data, &p); - - t = ipgre_tunnel_locate(net, &p, 0); - - if (t) { - if (t->dev != dev) - return -EEXIST; - } else { - t = nt; - - if (dev->type != ARPHRD_ETHER) { - unsigned int nflags = 0; - - if (ipv4_is_multicast(p.iph.daddr)) - nflags = IFF_BROADCAST; - else if (p.iph.daddr) - nflags = IFF_POINTOPOINT; - - if ((dev->flags ^ nflags) & - (IFF_POINTOPOINT | IFF_BROADCAST)) - return -EINVAL; - } - ipgre_tunnel_unlink(ign, t); - t->parms.iph.saddr = p.iph.saddr; - t->parms.iph.daddr = p.iph.daddr; - t->parms.i_key = p.i_key; - if (dev->type != ARPHRD_ETHER) { - memcpy(dev->dev_addr, &p.iph.saddr, 4); - memcpy(dev->broadcast, &p.iph.daddr, 4); - } - ipgre_tunnel_link(ign, t); - netdev_state_change(dev); - } - - t->parms.o_key = p.o_key; - t->parms.iph.ttl = p.iph.ttl; - t->parms.iph.tos = p.iph.tos; - t->parms.iph.frag_off = p.iph.frag_off; - - if (t->parms.link != p.link) { - t->parms.link = p.link; - mtu = ipgre_tunnel_bind_dev(dev); - if (!tb[IFLA_MTU]) - dev->mtu = mtu; - netdev_state_change(dev); - } - - return 0; + ipgre_netlink_parms(data, tb, &p); + return ip_tunnel_changelink(dev, tb, &p); } static size_t ipgre_get_size(const struct net_device *dev) @@ -1793,8 +885,8 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) struct ip_tunnel_parm *p = &t->parms; if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || - nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) || - nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) || + nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) || + nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) || @@ -1832,6 +924,7 @@ static struct rtnl_link_ops ipgre_link_ops __read_mostly = { .validate = ipgre_tunnel_validate, .newlink = ipgre_newlink, .changelink = ipgre_changelink, + .dellink = ip_tunnel_dellink, .get_size = ipgre_get_size, .fill_info = ipgre_fill_info, }; @@ -1845,13 +938,28 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { .validate = ipgre_tap_validate, .newlink = ipgre_newlink, .changelink = ipgre_changelink, + .dellink = ip_tunnel_dellink, .get_size = ipgre_get_size, .fill_info = ipgre_fill_info, }; -/* - * And now the modules code and kernel interface. - */ +static int __net_init ipgre_tap_init_net(struct net *net) +{ + return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL); +} + +static void __net_exit ipgre_tap_exit_net(struct net *net) +{ + struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id); + ip_tunnel_delete_net(itn); +} + +static struct pernet_operations ipgre_tap_net_ops = { + .init = ipgre_tap_init_net, + .exit = ipgre_tap_exit_net, + .id = &gre_tap_net_id, + .size = sizeof(struct ip_tunnel_net), +}; static int __init ipgre_init(void) { @@ -1863,6 +971,10 @@ static int __init ipgre_init(void) if (err < 0) return err; + err = register_pernet_device(&ipgre_tap_net_ops); + if (err < 0) + goto pnet_tap_faied; + err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); if (err < 0) { pr_info("%s: can't add protocol\n", __func__); @@ -1877,16 +989,17 @@ static int __init ipgre_init(void) if (err < 0) goto tap_ops_failed; -out: - return err; + return 0; tap_ops_failed: rtnl_link_unregister(&ipgre_link_ops); rtnl_link_failed: gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); add_proto_failed: + unregister_pernet_device(&ipgre_tap_net_ops); +pnet_tap_faied: unregister_pernet_device(&ipgre_net_ops); - goto out; + return err; } static void __exit ipgre_fini(void) @@ -1895,6 +1008,7 @@ static void __exit ipgre_fini(void) rtnl_link_unregister(&ipgre_link_ops); if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) pr_info("%s: can't remove protocol\n", __func__); + unregister_pernet_device(&ipgre_tap_net_ops); unregister_pernet_device(&ipgre_net_ops); } @@ -1904,3 +1018,4 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("gre"); MODULE_ALIAS_RTNL_LINK("gretap"); MODULE_ALIAS_NETDEV("gre0"); +MODULE_ALIAS_NETDEV("gretap0"); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c new file mode 100644 index 0000000..e4147ec --- /dev/null +++ b/net/ipv4/ip_tunnel.c @@ -0,0 +1,1035 @@ +/* + * Copyright (c) 2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/capability.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/in.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/if_arp.h> +#include <linux/mroute.h> +#include <linux/init.h> +#include <linux/in6.h> +#include <linux/inetdevice.h> +#include <linux/igmp.h> +#include <linux/netfilter_ipv4.h> +#include <linux/etherdevice.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/rculist.h> + +#include <net/sock.h> +#include <net/ip.h> +#include <net/icmp.h> +#include <net/protocol.h> +#include <net/ip_tunnels.h> +#include <net/arp.h> +#include <net/checksum.h> +#include <net/dsfield.h> +#include <net/inet_ecn.h> +#include <net/xfrm.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> +#include <net/rtnetlink.h> + +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ipv6.h> +#include <net/ip6_fib.h> +#include <net/ip6_route.h> +#endif + +static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn, + __be32 key, __be32 remote) +{ + return hash_32((__force u32)key ^ (__force u32)remote, + IP_TNL_HASH_BITS); +} + +/* Often modified stats are per cpu, other are shared (netdev->stats) */ +struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot) +{ + int i; + + for_each_possible_cpu(i) { + const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); + u64 rx_packets, rx_bytes, tx_packets, tx_bytes; + unsigned int start; + + do { + start = u64_stats_fetch_begin_bh(&tstats->syncp); + rx_packets = tstats->rx_packets; + tx_packets = tstats->tx_packets; + rx_bytes = tstats->rx_bytes; + tx_bytes = tstats->tx_bytes; + } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); + + tot->rx_packets += rx_packets; + tot->tx_packets += tx_packets; + tot->rx_bytes += rx_bytes; + tot->tx_bytes += tx_bytes; + } + + tot->multicast = dev->stats.multicast; + + tot->rx_crc_errors = dev->stats.rx_crc_errors; + tot->rx_fifo_errors = dev->stats.rx_fifo_errors; + tot->rx_length_errors = dev->stats.rx_length_errors; + tot->rx_frame_errors = dev->stats.rx_frame_errors; + tot->rx_errors = dev->stats.rx_errors; + + tot->tx_fifo_errors = dev->stats.tx_fifo_errors; + tot->tx_carrier_errors = dev->stats.tx_carrier_errors; + tot->tx_dropped = dev->stats.tx_dropped; + tot->tx_aborted_errors = dev->stats.tx_aborted_errors; + tot->tx_errors = dev->stats.tx_errors; + + tot->collisions = dev->stats.collisions; + + return tot; +} +EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64); + +static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, + __be16 flags, __be32 key) +{ + if (p->i_flags & TUNNEL_KEY) { + if (flags & TUNNEL_KEY) + return key == p->i_key; + else + /* key expected, none present */ + return false; + } else + return !(flags & TUNNEL_KEY); +} + +/* Fallback tunnel: no source, no destination, no key, no options + + Tunnel hash table: + We require exact key match i.e. if a key is present in packet + it will match only tunnel with the same key; if it is not present, + it will match only keyless tunnel. + + All keysless packets, if not matched configured keyless tunnels + will match fallback tunnel. + Given src, dst and key, find appropriate for input tunnel. +*/ +struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, + int link, __be16 flags, + __be32 remote, __be32 local, + __be32 key) +{ + unsigned int hash; + struct ip_tunnel *t, *cand = NULL; + struct hlist_head *head; + + hash = ip_tunnel_hash(itn, key, remote); + head = &itn->tunnels[hash]; + + hlist_for_each_entry_rcu(t, head, hash_node) { + if (local != t->parms.iph.saddr || + remote != t->parms.iph.daddr || + !(t->dev->flags & IFF_UP)) + continue; + + if (!ip_tunnel_key_match(&t->parms, flags, key)) + continue; + + if (t->parms.link == link) + return t; + else + cand = t; + } + + hlist_for_each_entry_rcu(t, head, hash_node) { + if (remote != t->parms.iph.daddr || + !(t->dev->flags & IFF_UP)) + continue; + + if (!ip_tunnel_key_match(&t->parms, flags, key)) + continue; + + if (t->parms.link == link) + return t; + else if (!cand) + cand = t; + } + + hash = ip_tunnel_hash(itn, key, 0); + head = &itn->tunnels[hash]; + + hlist_for_each_entry_rcu(t, head, hash_node) { + if ((local != t->parms.iph.saddr && + (local != t->parms.iph.daddr || + !ipv4_is_multicast(local))) || + !(t->dev->flags & IFF_UP)) + continue; + + if (!ip_tunnel_key_match(&t->parms, flags, key)) + continue; + + if (t->parms.link == link) + return t; + else if (!cand) + cand = t; + } + + if (flags & TUNNEL_NO_KEY) + goto skip_key_lookup; + + hlist_for_each_entry_rcu(t, head, hash_node) { + if (t->parms.i_key != key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->parms.link == link) + return t; + else if (!cand) + cand = t; + } + +skip_key_lookup: + if (cand) + return cand; + + if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) + return netdev_priv(itn->fb_tunnel_dev); + + + return NULL; +} +EXPORT_SYMBOL_GPL(ip_tunnel_lookup); + +static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, + struct ip_tunnel_parm *parms) +{ + unsigned int h; + __be32 remote; + + if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr)) + remote = parms->iph.daddr; + else + remote = 0; + + h = ip_tunnel_hash(itn, parms->i_key, remote); + return &itn->tunnels[h]; +} + +static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t) +{ + struct hlist_head *head = ip_bucket(itn, &t->parms); + + hlist_add_head_rcu(&t->hash_node, head); +} + +static void ip_tunnel_del(struct ip_tunnel *t) +{ + hlist_del_init_rcu(&t->hash_node); +} + +static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, + struct ip_tunnel_parm *parms, + int type) +{ + __be32 remote = parms->iph.daddr; + __be32 local = parms->iph.saddr; + __be32 key = parms->i_key; + int link = parms->link; + struct ip_tunnel *t = NULL; + struct hlist_head *head = ip_bucket(itn, parms); + + hlist_for_each_entry_rcu(t, head, hash_node) { + if (local == t->parms.iph.saddr && + remote == t->parms.iph.daddr && + key == t->parms.i_key && + link == t->parms.link && + type == t->dev->type) + break; + } + return t; +} + +static struct net_device *__ip_tunnel_create(struct net *net, + const struct rtnl_link_ops *ops, + struct ip_tunnel_parm *parms) +{ + int err; + struct ip_tunnel *tunnel; + struct net_device *dev; + char name[IFNAMSIZ]; + + if (parms->name[0]) + strlcpy(name, parms->name, IFNAMSIZ); + else { + if (strlen(ops->kind) > (IFNAMSIZ - 3)) { + err = -E2BIG; + goto failed; + } + strlcpy(name, ops->kind, IFNAMSIZ); + strncat(name, "%d", 2); + } + + ASSERT_RTNL(); + dev = alloc_netdev(ops->priv_size, name, ops->setup); + if (!dev) { + err = -ENOMEM; + goto failed; + } + dev_net_set(dev, net); + + dev->rtnl_link_ops = ops; + + tunnel = netdev_priv(dev); + tunnel->parms = *parms; + + err = register_netdevice(dev); + if (err) + goto failed_free; + + return dev; + +failed_free: + free_netdev(dev); +failed: + return ERR_PTR(err); +} + +static inline struct rtable *ip_route_output_tunnel(struct net *net, + struct flowi4 *fl4, + int proto, + __be32 daddr, __be32 saddr, + __be32 key, __u8 tos, int oif) +{ + memset(fl4, 0, sizeof(*fl4)); + fl4->flowi4_oif = oif; + fl4->daddr = daddr; + fl4->saddr = saddr; + fl4->flowi4_tos = tos; + fl4->flowi4_proto = proto; + fl4->fl4_gre_key = key; + return ip_route_output_key(net, fl4); +} + +static int ip_tunnel_bind_dev(struct net_device *dev) +{ + struct net_device *tdev = NULL; + struct ip_tunnel *tunnel = netdev_priv(dev); + const struct iphdr *iph; + int hlen = LL_MAX_HEADER; + int mtu = ETH_DATA_LEN; + int t_hlen = tunnel->hlen + sizeof(struct iphdr); + + iph = &tunnel->parms.iph; + + /* Guess output device to choose reasonable mtu and needed_headroom */ + if (iph->daddr) { + struct flowi4 fl4; + struct rtable *rt; + + rt = ip_route_output_tunnel(dev_net(dev), &fl4, + tunnel->parms.iph.protocol, + iph->daddr, iph->saddr, + tunnel->parms.o_key, + RT_TOS(iph->tos), + tunnel->parms.link); + if (!IS_ERR(rt)) { + tdev = rt->dst.dev; + ip_rt_put(rt); + } + if (dev->type != ARPHRD_ETHER) + dev->flags |= IFF_POINTOPOINT; + } + + if (!tdev && tunnel->parms.link) + tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); + + if (tdev) { + hlen = tdev->hard_header_len + tdev->needed_headroom; + mtu = tdev->mtu; + } + dev->iflink = tunnel->parms.link; + + dev->needed_headroom = t_hlen + hlen; + mtu -= (dev->hard_header_len + t_hlen); + + if (mtu < 68) + mtu = 68; + + return mtu; +} + +static struct ip_tunnel *ip_tunnel_create(struct net *net, + struct ip_tunnel_net *itn, + struct ip_tunnel_parm *parms) +{ + struct ip_tunnel *nt, *fbt; + struct net_device *dev; + + BUG_ON(!itn->fb_tunnel_dev); + fbt = netdev_priv(itn->fb_tunnel_dev); + dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); + if (IS_ERR(dev)) + return NULL; + + dev->mtu = ip_tunnel_bind_dev(dev); + + nt = netdev_priv(dev); + ip_tunnel_add(itn, nt); + return nt; +} + +int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, + const struct tnl_ptk_info *tpi, bool log_ecn_error) +{ + struct pcpu_tstats *tstats; + const struct iphdr *iph = ip_hdr(skb); + int err; + + secpath_reset(skb); + + skb->protocol = tpi->proto; + + skb->mac_header = skb->network_header; + __pskb_pull(skb, tunnel->hlen); + skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen); +#ifdef CONFIG_NET_IPGRE_BROADCAST + if (ipv4_is_multicast(iph->daddr)) { + /* Looped back packet, drop it! */ + if (rt_is_output_route(skb_rtable(skb))) + goto drop; + tunnel->dev->stats.multicast++; + skb->pkt_type = PACKET_BROADCAST; + } +#endif + + if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) || + ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { + tunnel->dev->stats.rx_crc_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + + if (tunnel->parms.i_flags&TUNNEL_SEQ) { + if (!(tpi->flags&TUNNEL_SEQ) || + (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { + tunnel->dev->stats.rx_fifo_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + tunnel->i_seqno = ntohl(tpi->seq) + 1; + } + + /* Warning: All skb pointers will be invalidated! */ + if (tunnel->dev->type == ARPHRD_ETHER) { + if (!pskb_may_pull(skb, ETH_HLEN)) { + tunnel->dev->stats.rx_length_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + + iph = ip_hdr(skb); + skb->protocol = eth_type_trans(skb, tunnel->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + } + + skb->pkt_type = PACKET_HOST; + __skb_tunnel_rx(skb, tunnel->dev); + + skb_reset_network_header(skb); + err = IP_ECN_decapsulate(iph, skb); + if (unlikely(err)) { + if (log_ecn_error) + net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", + &iph->saddr, iph->tos); + if (err > 1) { + ++tunnel->dev->stats.rx_frame_errors; + ++tunnel->dev->stats.rx_errors; + goto drop; + } + } + + tstats = this_cpu_ptr(tunnel->dev->tstats); + u64_stats_update_begin(&tstats->syncp); + tstats->rx_packets++; + tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); + + gro_cells_receive(&tunnel->gro_cells, skb); + return 0; + +drop: + kfree_skb(skb); + return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_rcv); + +void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, + const struct iphdr *tnl_params) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + const struct iphdr *inner_iph; + struct iphdr *iph; + struct flowi4 fl4; + u8 tos, ttl; + __be16 df; + struct rtable *rt; /* Route to the other host */ + struct net_device *tdev; /* Device to other host */ + unsigned int max_headroom; /* The extra header space needed */ + __be32 dst; + int mtu; + + inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + + dst = tnl_params->daddr; + if (dst == 0) { + /* NBMA tunnel */ + + if (skb_dst(skb) == NULL) { + dev->stats.tx_fifo_errors++; + goto tx_error; + } + + if (skb->protocol == htons(ETH_P_IP)) { + rt = skb_rtable(skb); + dst = rt_nexthop(rt, inner_iph->daddr); + } +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) { + const struct in6_addr *addr6; + struct neighbour *neigh; + bool do_tx_error_icmp; + int addr_type; + + neigh = dst_neigh_lookup(skb_dst(skb), + &ipv6_hdr(skb)->daddr); + if (neigh == NULL) + goto tx_error; + + addr6 = (const struct in6_addr *)&neigh->primary_key; + addr_type = ipv6_addr_type(addr6); + + if (addr_type == IPV6_ADDR_ANY) { + addr6 = &ipv6_hdr(skb)->daddr; + addr_type = ipv6_addr_type(addr6); + } + + if ((addr_type & IPV6_ADDR_COMPATv4) == 0) + do_tx_error_icmp = true; + else { + do_tx_error_icmp = false; + dst = addr6->s6_addr32[3]; + } + neigh_release(neigh); + if (do_tx_error_icmp) + goto tx_error_icmp; + } +#endif + else + goto tx_error; + } + + tos = tnl_params->tos; + if (tos & 0x1) { + tos &= ~0x1; + if (skb->protocol == htons(ETH_P_IP)) + tos = inner_iph->tos; + else if (skb->protocol == htons(ETH_P_IPV6)) + tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); + } + + rt = ip_route_output_tunnel(dev_net(dev), &fl4, + tunnel->parms.iph.protocol, + dst, tnl_params->saddr, + tunnel->parms.o_key, + RT_TOS(tos), + tunnel->parms.link); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error; + } + tdev = rt->dst.dev; + + if (tdev == dev) { + ip_rt_put(rt); + dev->stats.collisions++; + goto tx_error; + } + + df = tnl_params->frag_off; + + if (df) + mtu = dst_mtu(&rt->dst) - dev->hard_header_len + - sizeof(struct iphdr); + else + mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; + + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + + if (skb->protocol == htons(ETH_P_IP)) { + df |= (inner_iph->frag_off&htons(IP_DF)); + + if (!skb_is_gso(skb) && + (inner_iph->frag_off&htons(IP_DF)) && + mtu < ntohs(inner_iph->tot_len)) { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + ip_rt_put(rt); + goto tx_error; + } + } +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) { + struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); + + if (rt6 && mtu < dst_mtu(skb_dst(skb)) && + mtu >= IPV6_MIN_MTU) { + if ((tunnel->parms.iph.daddr && + !ipv4_is_multicast(tunnel->parms.iph.daddr)) || + rt6->rt6i_dst.plen == 128) { + rt6->rt6i_flags |= RTF_MODIFIED; + dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); + } + } + + if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && + mtu < skb->len) { + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + ip_rt_put(rt); + goto tx_error; + } + } +#endif + + if (tunnel->err_count > 0) { + if (time_before(jiffies, + tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { + tunnel->err_count--; + + dst_link_failure(skb); + } else + tunnel->err_count = 0; + } + + ttl = tnl_params->ttl; + if (ttl == 0) { + if (skb->protocol == htons(ETH_P_IP)) + ttl = inner_iph->ttl; +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) + ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; +#endif + else + ttl = ip4_dst_hoplimit(&rt->dst); + } + + max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr) + + rt->dst.header_len; + if (max_headroom > dev->needed_headroom) { + dev->needed_headroom = max_headroom; + if (skb_cow_head(skb, dev->needed_headroom)) { + dev->stats.tx_dropped++; + dev_kfree_skb(skb); + return; + } + } + + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + + /* Push down and install the IP header. */ + skb_push(skb, sizeof(struct iphdr)); + skb_reset_network_header(skb); + + iph = ip_hdr(skb); + inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + + iph->version = 4; + iph->ihl = sizeof(struct iphdr) >> 2; + iph->frag_off = df; + iph->protocol = tnl_params->protocol; + iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); + iph->daddr = fl4.daddr; + iph->saddr = fl4.saddr; + iph->ttl = ttl; + tunnel_ip_select_ident(skb, inner_iph, &rt->dst); + + iptunnel_xmit(skb, dev); + return; + +#if IS_ENABLED(CONFIG_IPV6) +tx_error_icmp: + dst_link_failure(skb); +#endif +tx_error: + dev->stats.tx_errors++; + dev_kfree_skb(skb); +} +EXPORT_SYMBOL_GPL(ip_tunnel_xmit); + +static void ip_tunnel_update(struct ip_tunnel_net *itn, + struct ip_tunnel *t, + struct net_device *dev, + struct ip_tunnel_parm *p, + bool set_mtu) +{ + ip_tunnel_del(t); + t->parms.iph.saddr = p->iph.saddr; + t->parms.iph.daddr = p->iph.daddr; + t->parms.i_key = p->i_key; + t->parms.o_key = p->o_key; + if (dev->type != ARPHRD_ETHER) { + memcpy(dev->dev_addr, &p->iph.saddr, 4); + memcpy(dev->broadcast, &p->iph.daddr, 4); + } + ip_tunnel_add(itn, t); + + t->parms.iph.ttl = p->iph.ttl; + t->parms.iph.tos = p->iph.tos; + t->parms.iph.frag_off = p->iph.frag_off; + + if (t->parms.link != p->link) { + int mtu; + + t->parms.link = p->link; + mtu = ip_tunnel_bind_dev(dev); + if (set_mtu) + dev->mtu = mtu; + } + netdev_state_change(dev); +} + +int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) +{ + int err = 0; + struct ip_tunnel *t; + struct net *net = dev_net(dev); + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); + + BUG_ON(!itn->fb_tunnel_dev); + switch (cmd) { + case SIOCGETTUNNEL: + t = NULL; + if (dev == itn->fb_tunnel_dev) + t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); + if (t == NULL) + t = netdev_priv(dev); + memcpy(p, &t->parms, sizeof(*p)); + break; + + case SIOCADDTUNNEL: + case SIOCCHGTUNNEL: + err = -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + goto done; + if (p->iph.ttl) + p->iph.frag_off |= htons(IP_DF); + if (!(p->i_flags&TUNNEL_KEY)) + p->i_key = 0; + if (!(p->o_flags&TUNNEL_KEY)) + p->o_key = 0; + + t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); + + if (!t && (cmd == SIOCADDTUNNEL)) + t = ip_tunnel_create(net, itn, p); + + if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { + if (t != NULL) { + if (t->dev != dev) { + err = -EEXIST; + break; + } + } else { + unsigned int nflags = 0; + + if (ipv4_is_multicast(p->iph.daddr)) + nflags = IFF_BROADCAST; + else if (p->iph.daddr) + nflags = IFF_POINTOPOINT; + + if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { + err = -EINVAL; + break; + } + + t = netdev_priv(dev); + } + } + + if (t) { + err = 0; + ip_tunnel_update(itn, t, dev, p, true); + } else + err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); + break; + + case SIOCDELTUNNEL: + err = -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) + goto done; + + if (dev == itn->fb_tunnel_dev) { + err = -ENOENT; + t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); + if (t == NULL) + goto done; + err = -EPERM; + if (t == netdev_priv(itn->fb_tunnel_dev)) + goto done; + dev = t->dev; + } + unregister_netdevice(dev); + err = 0; + break; + + default: + err = -EINVAL; + } + +done: + return err; +} +EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); + +int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + int t_hlen = tunnel->hlen + sizeof(struct iphdr); + + if (new_mtu < 68 || + new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); + +static void ip_tunnel_dev_free(struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + + gro_cells_destroy(&tunnel->gro_cells); + free_percpu(dev->tstats); + free_netdev(dev); +} + +void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) +{ + struct net *net = dev_net(dev); + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_net *itn; + + itn = net_generic(net, tunnel->ip_tnl_net_id); + + if (itn->fb_tunnel_dev != dev) { + ip_tunnel_del(netdev_priv(dev)); + unregister_netdevice_queue(dev, head); + } +} +EXPORT_SYMBOL_GPL(ip_tunnel_dellink); + +int __net_init ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, + struct rtnl_link_ops *ops, char *devname) +{ + struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); + struct ip_tunnel_parm parms; + + itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL); + if (!itn->tunnels) + return -ENOMEM; + + if (!ops) { + itn->fb_tunnel_dev = NULL; + return 0; + } + memset(&parms, 0, sizeof(parms)); + if (devname) + strlcpy(parms.name, devname, IFNAMSIZ); + + rtnl_lock(); + itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); + rtnl_unlock(); + if (IS_ERR(itn->fb_tunnel_dev)) { + kfree(itn->tunnels); + return PTR_ERR(itn->fb_tunnel_dev); + } + + return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_init_net); + +static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head) +{ + int h; + + for (h = 0; h < IP_TNL_HASH_SIZE; h++) { + struct ip_tunnel *t; + struct hlist_node *n; + struct hlist_head *thead = &itn->tunnels[h]; + + hlist_for_each_entry_safe(t, n, thead, hash_node) + unregister_netdevice_queue(t->dev, head); + } + if (itn->fb_tunnel_dev) + unregister_netdevice_queue(itn->fb_tunnel_dev, head); +} + +void __net_exit ip_tunnel_delete_net(struct ip_tunnel_net *itn) +{ + LIST_HEAD(list); + + rtnl_lock(); + ip_tunnel_destroy(itn, &list); + unregister_netdevice_many(&list); + rtnl_unlock(); + kfree(itn->tunnels); +} +EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); + +int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], + struct ip_tunnel_parm *p) +{ + struct ip_tunnel *nt; + struct net *net = dev_net(dev); + struct ip_tunnel_net *itn; + int mtu; + int err; + + nt = netdev_priv(dev); + itn = net_generic(net, nt->ip_tnl_net_id); + + if (ip_tunnel_find(itn, p, dev->type)) + return -EEXIST; + + nt->parms = *p; + err = register_netdevice(dev); + if (err) + goto out; + + if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) + eth_hw_addr_random(dev); + + mtu = ip_tunnel_bind_dev(dev); + if (!tb[IFLA_MTU]) + dev->mtu = mtu; + + ip_tunnel_add(itn, nt); + +out: + return err; +} +EXPORT_SYMBOL_GPL(ip_tunnel_newlink); + +int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], + struct ip_tunnel_parm *p) +{ + struct ip_tunnel *t, *nt; + struct net *net = dev_net(dev); + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); + + if (dev == itn->fb_tunnel_dev) + return -EINVAL; + + nt = netdev_priv(dev); + + t = ip_tunnel_find(itn, p, dev->type); + + if (t) { + if (t->dev != dev) + return -EEXIST; + } else { + t = nt; + + if (dev->type != ARPHRD_ETHER) { + unsigned int nflags = 0; + + if (ipv4_is_multicast(p->iph.daddr)) + nflags = IFF_BROADCAST; + else if (p->iph.daddr) + nflags = IFF_POINTOPOINT; + + if ((dev->flags ^ nflags) & + (IFF_POINTOPOINT | IFF_BROADCAST)) + return -EINVAL; + } + } + + ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]); + return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_changelink); + +int ip_tunnel_init(struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + struct iphdr *iph = &tunnel->parms.iph; + int err; + + dev->destructor = ip_tunnel_dev_free; + dev->tstats = alloc_percpu(struct pcpu_tstats); + if (!dev->tstats) + return -ENOMEM; + + err = gro_cells_init(&tunnel->gro_cells, dev); + if (err) { + free_percpu(dev->tstats); + return err; + } + + tunnel->dev = dev; + strcpy(tunnel->parms.name, dev->name); + iph->version = 4; + iph->ihl = 5; + + return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_init); + +void ip_tunnel_uninit(struct net_device *dev) +{ + struct net *net = dev_net(dev); + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_net *itn; + + itn = net_generic(net, tunnel->ip_tnl_net_id); + /* fb_tunnel_dev will be unregisted in net-exit call. */ + if (itn->fb_tunnel_dev != dev) + ip_tunnel_del(netdev_priv(dev)); +} +EXPORT_SYMBOL_GPL(ip_tunnel_uninit); + +/* Do least required initialization, rest of init is done in tunnel_init call */ +void ip_tunnel_setup(struct net_device *dev, int net_id) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + tunnel->ip_tnl_net_id = net_id; +} +EXPORT_SYMBOL_GPL(ip_tunnel_setup); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index c3a4233..9d2bdb2 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -38,7 +38,7 @@ #include <net/sock.h> #include <net/ip.h> #include <net/icmp.h> -#include <net/ipip.h> +#include <net/ip_tunnels.h> #include <net/inet_ecn.h> #include <net/xfrm.h> #include <net/net_namespace.h> @@ -82,44 +82,6 @@ static int vti_tunnel_bind_dev(struct net_device *dev); } while (0) -static struct rtnl_link_stats64 *vti_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot) -{ - int i; - - for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - u64 rx_packets, rx_bytes, tx_packets, tx_bytes; - unsigned int start; - - do { - start = u64_stats_fetch_begin_bh(&tstats->syncp); - rx_packets = tstats->rx_packets; - tx_packets = tstats->tx_packets; - rx_bytes = tstats->rx_bytes; - tx_bytes = tstats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); - - tot->rx_packets += rx_packets; - tot->tx_packets += tx_packets; - tot->rx_bytes += rx_bytes; - tot->tx_bytes += tx_bytes; - } - - tot->multicast = dev->stats.multicast; - tot->rx_crc_errors = dev->stats.rx_crc_errors; - tot->rx_fifo_errors = dev->stats.rx_fifo_errors; - tot->rx_length_errors = dev->stats.rx_length_errors; - tot->rx_errors = dev->stats.rx_errors; - tot->tx_fifo_errors = dev->stats.tx_fifo_errors; - tot->tx_carrier_errors = dev->stats.tx_carrier_errors; - tot->tx_dropped = dev->stats.tx_dropped; - tot->tx_aborted_errors = dev->stats.tx_aborted_errors; - tot->tx_errors = dev->stats.tx_errors; - - return tot; -} - static struct ip_tunnel *vti_tunnel_lookup(struct net *net, __be32 remote, __be32 local) { @@ -597,7 +559,7 @@ static const struct net_device_ops vti_netdev_ops = { .ndo_start_xmit = vti_tunnel_xmit, .ndo_do_ioctl = vti_tunnel_ioctl, .ndo_change_mtu = vti_tunnel_change_mtu, - .ndo_get_stats64 = vti_get_stats64, + .ndo_get_stats64 = ip_tunnel_get_stats64, }; static void vti_dev_free(struct net_device *dev) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index bf6c5cf..efa1138 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -206,7 +206,7 @@ static int __init ic_open_devs(void) struct ic_device *d, **last; struct net_device *dev; unsigned short oflags; - unsigned long start; + unsigned long start, next_msg; last = &ic_first_dev; rtnl_lock(); @@ -263,12 +263,23 @@ static int __init ic_open_devs(void) /* wait for a carrier on at least one device */ start = jiffies; + next_msg = start + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12); while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) { + int wait, elapsed; + for_each_netdev(&init_net, dev) if (ic_is_init_dev(dev) && netif_carrier_ok(dev)) goto have_carrier; msleep(1); + + if time_before(jiffies, next_msg) + continue; + + elapsed = jiffies_to_msecs(jiffies - start); + wait = (CONF_CARRIER_TIMEOUT - elapsed + 500)/1000; + pr_info("Waiting up to %d more seconds for network.\n", wait); + next_msg = jiffies + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12); } have_carrier: rtnl_unlock(); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 8f024d4..77bfcce 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -111,227 +111,21 @@ #include <net/sock.h> #include <net/ip.h> #include <net/icmp.h> -#include <net/ipip.h> +#include <net/ip_tunnels.h> #include <net/inet_ecn.h> #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> -#define HASH_SIZE 16 -#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) - static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); static int ipip_net_id __read_mostly; -struct ipip_net { - struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE]; - struct ip_tunnel __rcu *tunnels_r[HASH_SIZE]; - struct ip_tunnel __rcu *tunnels_l[HASH_SIZE]; - struct ip_tunnel __rcu *tunnels_wc[1]; - struct ip_tunnel __rcu **tunnels[4]; - - struct net_device *fb_tunnel_dev; -}; static int ipip_tunnel_init(struct net_device *dev); -static void ipip_tunnel_setup(struct net_device *dev); -static void ipip_dev_free(struct net_device *dev); static struct rtnl_link_ops ipip_link_ops __read_mostly; -static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot) -{ - int i; - - for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - u64 rx_packets, rx_bytes, tx_packets, tx_bytes; - unsigned int start; - - do { - start = u64_stats_fetch_begin_bh(&tstats->syncp); - rx_packets = tstats->rx_packets; - tx_packets = tstats->tx_packets; - rx_bytes = tstats->rx_bytes; - tx_bytes = tstats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); - - tot->rx_packets += rx_packets; - tot->tx_packets += tx_packets; - tot->rx_bytes += rx_bytes; - tot->tx_bytes += tx_bytes; - } - - tot->tx_fifo_errors = dev->stats.tx_fifo_errors; - tot->tx_carrier_errors = dev->stats.tx_carrier_errors; - tot->tx_dropped = dev->stats.tx_dropped; - tot->tx_aborted_errors = dev->stats.tx_aborted_errors; - tot->tx_errors = dev->stats.tx_errors; - tot->collisions = dev->stats.collisions; - - return tot; -} - -static struct ip_tunnel *ipip_tunnel_lookup(struct net *net, - __be32 remote, __be32 local) -{ - unsigned int h0 = HASH(remote); - unsigned int h1 = HASH(local); - struct ip_tunnel *t; - struct ipip_net *ipn = net_generic(net, ipip_net_id); - - for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1]) - if (local == t->parms.iph.saddr && - remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) - return t; - - for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0]) - if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) - return t; - - for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1]) - if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) - return t; - - t = rcu_dereference(ipn->tunnels_wc[0]); - if (t && (t->dev->flags&IFF_UP)) - return t; - return NULL; -} - -static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn, - struct ip_tunnel_parm *parms) -{ - __be32 remote = parms->iph.daddr; - __be32 local = parms->iph.saddr; - unsigned int h = 0; - int prio = 0; - - if (remote) { - prio |= 2; - h ^= HASH(remote); - } - if (local) { - prio |= 1; - h ^= HASH(local); - } - return &ipn->tunnels[prio][h]; -} - -static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn, - struct ip_tunnel *t) -{ - return __ipip_bucket(ipn, &t->parms); -} - -static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t) -{ - struct ip_tunnel __rcu **tp; - struct ip_tunnel *iter; - - for (tp = ipip_bucket(ipn, t); - (iter = rtnl_dereference(*tp)) != NULL; - tp = &iter->next) { - if (t == iter) { - rcu_assign_pointer(*tp, t->next); - break; - } - } -} - -static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) -{ - struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t); - - rcu_assign_pointer(t->next, rtnl_dereference(*tp)); - rcu_assign_pointer(*tp, t); -} - -static int ipip_tunnel_create(struct net_device *dev) -{ - struct ip_tunnel *t = netdev_priv(dev); - struct net *net = dev_net(dev); - struct ipip_net *ipn = net_generic(net, ipip_net_id); - int err; - - err = ipip_tunnel_init(dev); - if (err < 0) - goto out; - - err = register_netdevice(dev); - if (err < 0) - goto out; - - strcpy(t->parms.name, dev->name); - dev->rtnl_link_ops = &ipip_link_ops; - - dev_hold(dev); - ipip_tunnel_link(ipn, t); - return 0; - -out: - return err; -} - -static struct ip_tunnel *ipip_tunnel_locate(struct net *net, - struct ip_tunnel_parm *parms, int create) -{ - __be32 remote = parms->iph.daddr; - __be32 local = parms->iph.saddr; - struct ip_tunnel *t, *nt; - struct ip_tunnel __rcu **tp; - struct net_device *dev; - char name[IFNAMSIZ]; - struct ipip_net *ipn = net_generic(net, ipip_net_id); - - for (tp = __ipip_bucket(ipn, parms); - (t = rtnl_dereference(*tp)) != NULL; - tp = &t->next) { - if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) - return t; - } - if (!create) - return NULL; - - if (parms->name[0]) - strlcpy(name, parms->name, IFNAMSIZ); - else - strcpy(name, "tunl%d"); - - dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup); - if (dev == NULL) - return NULL; - - dev_net_set(dev, net); - - nt = netdev_priv(dev); - nt->parms = *parms; - - if (ipip_tunnel_create(dev) < 0) - goto failed_free; - - return nt; - -failed_free: - ipip_dev_free(dev); - return NULL; -} - -/* called with RTNL */ -static void ipip_tunnel_uninit(struct net_device *dev) -{ - struct net *net = dev_net(dev); - struct ipip_net *ipn = net_generic(net, ipip_net_id); - - if (dev == ipn->fb_tunnel_dev) - RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL); - else - ipip_tunnel_unlink(ipn, netdev_priv(dev)); - dev_put(dev); -} - static int ipip_err(struct sk_buff *skb, u32 info) { @@ -339,41 +133,17 @@ static int ipip_err(struct sk_buff *skb, u32 info) 8 bytes of packet payload. It means, that precise relaying of ICMP in the real Internet is absolutely infeasible. */ + struct net *net = dev_net(skb->dev); + struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); const struct iphdr *iph = (const struct iphdr *)skb->data; - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; struct ip_tunnel *t; int err; - - switch (type) { - default: - case ICMP_PARAMETERPROB: - return 0; - - case ICMP_DEST_UNREACH: - switch (code) { - case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: - /* Impossible event. */ - return 0; - default: - /* All others are translated to HOST_UNREACH. - rfc2003 contains "deep thoughts" about NET_UNREACH, - I believe they are just ether pollution. --ANK - */ - break; - } - break; - case ICMP_TIME_EXCEEDED: - if (code != ICMP_EXC_TTL) - return 0; - break; - case ICMP_REDIRECT: - break; - } + const int type = icmp_hdr(skb)->type; + const int code = icmp_hdr(skb)->code; err = -ENOENT; - t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); + t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + iph->daddr, iph->saddr, 0); if (t == NULL) goto out; @@ -403,53 +173,29 @@ static int ipip_err(struct sk_buff *skb, u32 info) else t->err_count = 1; t->err_time = jiffies; -out: +out: return err; } +static const struct tnl_ptk_info tpi = { + /* no tunnel info required for ipip. */ + .proto = htons(ETH_P_IP), +}; + static int ipip_rcv(struct sk_buff *skb) { + struct net *net = dev_net(skb->dev); + struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); struct ip_tunnel *tunnel; const struct iphdr *iph = ip_hdr(skb); - int err; - - tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); - if (tunnel != NULL) { - struct pcpu_tstats *tstats; + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + iph->saddr, iph->daddr, 0); + if (tunnel) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; - - secpath_reset(skb); - - skb->mac_header = skb->network_header; - skb_reset_network_header(skb); - skb->protocol = htons(ETH_P_IP); - skb->pkt_type = PACKET_HOST; - - __skb_tunnel_rx(skb, tunnel->dev); - - err = IP_ECN_decapsulate(iph, skb); - if (unlikely(err)) { - if (log_ecn_error) - net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", - &iph->saddr, iph->tos); - if (err > 1) { - ++tunnel->dev->stats.rx_frame_errors; - ++tunnel->dev->stats.rx_errors; - goto drop; - } - } - - tstats = this_cpu_ptr(tunnel->dev->tstats); - u64_stats_update_begin(&tstats->syncp); - tstats->rx_packets++; - tstats->rx_bytes += skb->len; - u64_stats_update_end(&tstats->syncp); - - netif_rx(skb); - return 0; + return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); } return -1; @@ -463,329 +209,64 @@ drop: * This function assumes it is being called from dev_queue_xmit() * and that skb is filled properly by that function. */ - static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tiph = &tunnel->parms.iph; - u8 tos = tunnel->parms.iph.tos; - __be16 df = tiph->frag_off; - struct rtable *rt; /* Route to the other host */ - struct net_device *tdev; /* Device to other host */ - const struct iphdr *old_iph; - struct iphdr *iph; /* Our new IP header */ - unsigned int max_headroom; /* The extra header space needed */ - __be32 dst = tiph->daddr; - struct flowi4 fl4; - int mtu; - - if (skb->protocol != htons(ETH_P_IP)) - goto tx_error; - if (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_help(skb)) + if (unlikely(skb->protocol != htons(ETH_P_IP))) goto tx_error; - old_iph = ip_hdr(skb); - - if (tos & 1) - tos = old_iph->tos; - - if (!dst) { - /* NBMA tunnel */ - if ((rt = skb_rtable(skb)) == NULL) { - dev->stats.tx_fifo_errors++; - goto tx_error; - } - dst = rt_nexthop(rt, old_iph->daddr); + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; } - rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, - dst, tiph->saddr, - 0, 0, - IPPROTO_IPIP, RT_TOS(tos), - tunnel->parms.link); - if (IS_ERR(rt)) { - dev->stats.tx_carrier_errors++; - goto tx_error_icmp; - } - tdev = rt->dst.dev; - - if (tdev == dev) { - ip_rt_put(rt); - dev->stats.collisions++; - goto tx_error; - } - - df |= old_iph->frag_off & htons(IP_DF); - - if (df) { - mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); - - if (mtu < 68) { - dev->stats.collisions++; - ip_rt_put(rt); - goto tx_error; - } - - if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - - if ((old_iph->frag_off & htons(IP_DF)) && - mtu < ntohs(old_iph->tot_len)) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(mtu)); - ip_rt_put(rt); - goto tx_error; - } - } - - if (tunnel->err_count > 0) { - if (time_before(jiffies, - tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { - tunnel->err_count--; - dst_link_failure(skb); - } else - tunnel->err_count = 0; - } - - /* - * Okay, now see if we can stuff it in the buffer as-is. - */ - max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr)); - - if (skb_headroom(skb) < max_headroom || skb_shared(skb) || - (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { - struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); - if (!new_skb) { - ip_rt_put(rt); - dev->stats.tx_dropped++; - dev_kfree_skb(skb); - return NETDEV_TX_OK; - } - if (skb->sk) - skb_set_owner_w(new_skb, skb->sk); - dev_kfree_skb(skb); - skb = new_skb; - old_iph = ip_hdr(skb); - } - - skb->transport_header = skb->network_header; - skb_push(skb, sizeof(struct iphdr)); - skb_reset_network_header(skb); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | - IPSKB_REROUTED); - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - - /* - * Push down and install the IPIP header. - */ - - iph = ip_hdr(skb); - iph->version = 4; - iph->ihl = sizeof(struct iphdr)>>2; - iph->frag_off = df; - iph->protocol = IPPROTO_IPIP; - iph->tos = INET_ECN_encapsulate(tos, old_iph->tos); - iph->daddr = fl4.daddr; - iph->saddr = fl4.saddr; - - if ((iph->ttl = tiph->ttl) == 0) - iph->ttl = old_iph->ttl; - - iptunnel_xmit(skb, dev); + ip_tunnel_xmit(skb, dev, tiph); return NETDEV_TX_OK; -tx_error_icmp: - dst_link_failure(skb); tx_error: dev->stats.tx_errors++; dev_kfree_skb(skb); return NETDEV_TX_OK; } -static void ipip_tunnel_bind_dev(struct net_device *dev) -{ - struct net_device *tdev = NULL; - struct ip_tunnel *tunnel; - const struct iphdr *iph; - - tunnel = netdev_priv(dev); - iph = &tunnel->parms.iph; - - if (iph->daddr) { - struct rtable *rt; - struct flowi4 fl4; - - rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, - iph->daddr, iph->saddr, - 0, 0, - IPPROTO_IPIP, - RT_TOS(iph->tos), - tunnel->parms.link); - if (!IS_ERR(rt)) { - tdev = rt->dst.dev; - ip_rt_put(rt); - } - dev->flags |= IFF_POINTOPOINT; - } - - if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); - - if (tdev) { - dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); - dev->mtu = tdev->mtu - sizeof(struct iphdr); - } - dev->iflink = tunnel->parms.link; -} - -static void ipip_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) -{ - struct net *net = dev_net(t->dev); - struct ipip_net *ipn = net_generic(net, ipip_net_id); - - ipip_tunnel_unlink(ipn, t); - synchronize_net(); - t->parms.iph.saddr = p->iph.saddr; - t->parms.iph.daddr = p->iph.daddr; - memcpy(t->dev->dev_addr, &p->iph.saddr, 4); - memcpy(t->dev->broadcast, &p->iph.daddr, 4); - ipip_tunnel_link(ipn, t); - t->parms.iph.ttl = p->iph.ttl; - t->parms.iph.tos = p->iph.tos; - t->parms.iph.frag_off = p->iph.frag_off; - if (t->parms.link != p->link) { - t->parms.link = p->link; - ipip_tunnel_bind_dev(t->dev); - } - netdev_state_change(t->dev); -} - static int -ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) +ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { int err = 0; struct ip_tunnel_parm p; - struct ip_tunnel *t; - struct net *net = dev_net(dev); - struct ipip_net *ipn = net_generic(net, ipip_net_id); - - switch (cmd) { - case SIOCGETTUNNEL: - t = NULL; - if (dev == ipn->fb_tunnel_dev) { - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { - err = -EFAULT; - break; - } - t = ipip_tunnel_locate(net, &p, 0); - } - if (t == NULL) - t = netdev_priv(dev); - memcpy(&p, &t->parms, sizeof(p)); - if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) - err = -EFAULT; - break; - - case SIOCADDTUNNEL: - case SIOCCHGTUNNEL: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; - - err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - goto done; - - err = -EINVAL; - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) - goto done; - if (p.iph.ttl) - p.iph.frag_off |= htons(IP_DF); - - t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); - - if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { - if (t != NULL) { - if (t->dev != dev) { - err = -EEXIST; - break; - } - } else { - if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) || - (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) { - err = -EINVAL; - break; - } - t = netdev_priv(dev); - } - - ipip_tunnel_update(t, &p); - } - - if (t) { - err = 0; - if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) - err = -EFAULT; - } else - err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); - break; - - case SIOCDELTUNNEL: - err = -EPERM; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - goto done; - - if (dev == ipn->fb_tunnel_dev) { - err = -EFAULT; - if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) - goto done; - err = -ENOENT; - if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL) - goto done; - err = -EPERM; - if (t->dev == ipn->fb_tunnel_dev) - goto done; - dev = t->dev; - } - unregister_netdevice(dev); - err = 0; - break; - default: - err = -EINVAL; - } - -done: - return err; -} + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + return -EFAULT; -static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) -{ - if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr)) + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || + p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) + return -EINVAL; + if (p.i_key || p.o_key || p.i_flags || p.o_flags) return -EINVAL; - dev->mtu = new_mtu; + if (p.iph.ttl) + p.iph.frag_off |= htons(IP_DF); + + err = ip_tunnel_ioctl(dev, &p, cmd); + if (err) + return err; + + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + return -EFAULT; + return 0; } static const struct net_device_ops ipip_netdev_ops = { - .ndo_uninit = ipip_tunnel_uninit, + .ndo_init = ipip_tunnel_init, + .ndo_uninit = ip_tunnel_uninit, .ndo_start_xmit = ipip_tunnel_xmit, .ndo_do_ioctl = ipip_tunnel_ioctl, - .ndo_change_mtu = ipip_tunnel_change_mtu, - .ndo_get_stats64 = ipip_get_stats64, + .ndo_change_mtu = ip_tunnel_change_mtu, + .ndo_get_stats64 = ip_tunnel_get_stats64, }; -static void ipip_dev_free(struct net_device *dev) -{ - free_percpu(dev->tstats); - free_netdev(dev); -} - #define IPIP_FEATURES (NETIF_F_SG | \ NETIF_F_FRAGLIST | \ NETIF_F_HIGHDMA | \ @@ -794,11 +275,8 @@ static void ipip_dev_free(struct net_device *dev) static void ipip_tunnel_setup(struct net_device *dev) { dev->netdev_ops = &ipip_netdev_ops; - dev->destructor = ipip_dev_free; dev->type = ARPHRD_TUNNEL; - dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); - dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr); dev->flags = IFF_NOARP; dev->iflink = 0; dev->addr_len = 4; @@ -808,46 +286,19 @@ static void ipip_tunnel_setup(struct net_device *dev) dev->features |= IPIP_FEATURES; dev->hw_features |= IPIP_FEATURES; + ip_tunnel_setup(dev, ipip_net_id); } static int ipip_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - tunnel->dev = dev; - memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); - ipip_tunnel_bind_dev(dev); - - dev->tstats = alloc_percpu(struct pcpu_tstats); - if (!dev->tstats) - return -ENOMEM; - - return 0; -} - -static int __net_init ipip_fb_tunnel_init(struct net_device *dev) -{ - struct ip_tunnel *tunnel = netdev_priv(dev); - struct iphdr *iph = &tunnel->parms.iph; - struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id); - - tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); - - iph->version = 4; - iph->protocol = IPPROTO_IPIP; - iph->ihl = 5; - - dev->tstats = alloc_percpu(struct pcpu_tstats); - if (!dev->tstats) - return -ENOMEM; - - dev_hold(dev); - rcu_assign_pointer(ipn->tunnels_wc[0], tunnel); - return 0; + tunnel->hlen = 0; + tunnel->parms.iph.protocol = IPPROTO_IPIP; + return ip_tunnel_init(dev); } static void ipip_netlink_parms(struct nlattr *data[], @@ -887,28 +338,16 @@ static void ipip_netlink_parms(struct nlattr *data[], static int ipip_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct net *net = dev_net(dev); - struct ip_tunnel *nt; - - nt = netdev_priv(dev); - ipip_netlink_parms(data, &nt->parms); - - if (ipip_tunnel_locate(net, &nt->parms, 0)) - return -EEXIST; + struct ip_tunnel_parm p; - return ipip_tunnel_create(dev); + ipip_netlink_parms(data, &p); + return ip_tunnel_newlink(dev, tb, &p); } static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct ip_tunnel *t; struct ip_tunnel_parm p; - struct net *net = dev_net(dev); - struct ipip_net *ipn = net_generic(net, ipip_net_id); - - if (dev == ipn->fb_tunnel_dev) - return -EINVAL; ipip_netlink_parms(data, &p); @@ -916,16 +355,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr)) return -EINVAL; - t = ipip_tunnel_locate(net, &p, 0); - - if (t) { - if (t->dev != dev) - return -EEXIST; - } else - t = netdev_priv(dev); - - ipip_tunnel_update(t, &p); - return 0; + return ip_tunnel_changelink(dev, tb, &p); } static size_t ipip_get_size(const struct net_device *dev) @@ -982,6 +412,7 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly = { .setup = ipip_tunnel_setup, .newlink = ipip_newlink, .changelink = ipip_changelink, + .dellink = ip_tunnel_dellink, .get_size = ipip_get_size, .fill_info = ipip_fill_info, }; @@ -992,90 +423,29 @@ static struct xfrm_tunnel ipip_handler __read_mostly = { .priority = 1, }; -static const char banner[] __initconst = - KERN_INFO "IPv4 over IPv4 tunneling driver\n"; - -static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head) -{ - int prio; - - for (prio = 1; prio < 4; prio++) { - int h; - for (h = 0; h < HASH_SIZE; h++) { - struct ip_tunnel *t; - - t = rtnl_dereference(ipn->tunnels[prio][h]); - while (t != NULL) { - unregister_netdevice_queue(t->dev, head); - t = rtnl_dereference(t->next); - } - } - } -} - static int __net_init ipip_init_net(struct net *net) { - struct ipip_net *ipn = net_generic(net, ipip_net_id); - struct ip_tunnel *t; - int err; - - ipn->tunnels[0] = ipn->tunnels_wc; - ipn->tunnels[1] = ipn->tunnels_l; - ipn->tunnels[2] = ipn->tunnels_r; - ipn->tunnels[3] = ipn->tunnels_r_l; - - ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), - "tunl0", - ipip_tunnel_setup); - if (!ipn->fb_tunnel_dev) { - err = -ENOMEM; - goto err_alloc_dev; - } - dev_net_set(ipn->fb_tunnel_dev, net); - - err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev); - if (err) - goto err_reg_dev; - - if ((err = register_netdev(ipn->fb_tunnel_dev))) - goto err_reg_dev; - - t = netdev_priv(ipn->fb_tunnel_dev); - - strcpy(t->parms.name, ipn->fb_tunnel_dev->name); - return 0; - -err_reg_dev: - ipip_dev_free(ipn->fb_tunnel_dev); -err_alloc_dev: - /* nothing */ - return err; + return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0"); } static void __net_exit ipip_exit_net(struct net *net) { - struct ipip_net *ipn = net_generic(net, ipip_net_id); - LIST_HEAD(list); - - rtnl_lock(); - ipip_destroy_tunnels(ipn, &list); - unregister_netdevice_queue(ipn->fb_tunnel_dev, &list); - unregister_netdevice_many(&list); - rtnl_unlock(); + struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); + ip_tunnel_delete_net(itn); } static struct pernet_operations ipip_net_ops = { .init = ipip_init_net, .exit = ipip_exit_net, .id = &ipip_net_id, - .size = sizeof(struct ipip_net), + .size = sizeof(struct ip_tunnel_net), }; static int __init ipip_init(void) { int err; - printk(banner); + pr_info("ipip: IPv4 over IPv4 tunneling driver\n"); err = register_pernet_device(&ipip_net_ops); if (err < 0) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5f95b3a..9d9610a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -61,7 +61,7 @@ #include <linux/netfilter_ipv4.h> #include <linux/compat.h> #include <linux/export.h> -#include <net/ipip.h> +#include <net/ip_tunnels.h> #include <net/checksum.h> #include <net/netlink.h> #include <net/fib_rules.h> @@ -626,9 +626,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) if (ip_hdr(skb)->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); nlh->nlmsg_type = NLMSG_ERROR; - nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); + nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); - e = NLMSG_DATA(nlh); + e = nlmsg_data(nlh); e->error = -ETIMEDOUT; memset(&e->msg, 0, sizeof(e->msg)); @@ -910,14 +910,14 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, if (ip_hdr(skb)->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); - if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { + if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) { nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; } else { nlh->nlmsg_type = NLMSG_ERROR; - nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); + nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); - e = NLMSG_DATA(nlh); + e = nlmsg_data(nlh); e->error = -EMSGSIZE; memset(&e->msg, 0, sizeof(e->msg)); } diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 79ca5e7..eadab1e 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -48,9 +48,7 @@ static int __net_init arptable_filter_net_init(struct net *net) net->ipv4.arptable_filter = arpt_register_table(net, &packet_filter, repl); kfree(repl); - if (IS_ERR(net->ipv4.arptable_filter)) - return PTR_ERR(net->ipv4.arptable_filter); - return 0; + return PTR_RET(net->ipv4.arptable_filter); } static void __net_exit arptable_filter_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 7d168dc..e7f8cad 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -37,7 +37,7 @@ #include <linux/skbuff.h> #include <linux/kernel.h> #include <linux/timer.h> -#include <linux/netlink.h> +#include <net/netlink.h> #include <linux/netdevice.h> #include <linux/mm.h> #include <linux/moduleparam.h> @@ -172,7 +172,7 @@ static void ipt_ulog_packet(unsigned int hooknum, else copy_len = loginfo->copy_range; - size = NLMSG_SPACE(sizeof(*pm) + copy_len); + size = nlmsg_total_size(sizeof(*pm) + copy_len); ub = &ulog_buffers[groupnum]; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 32030a2..b6f2ea1 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -224,6 +224,8 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPForwardRetrans", LINUX_MIB_TCPFORWARDRETRANS), SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS), SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS), + SNMP_MIB_ITEM("TCPLossProbes", LINUX_MIB_TCPLOSSPROBES), + SNMP_MIB_ITEM("TCPLossProbeRecovery", LINUX_MIB_TCPLOSSPROBERECOVERY), SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL), SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL), SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED), diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6e28514..550781a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2311,7 +2311,7 @@ nla_put_failure: return -EMSGSIZE; } -static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); struct rtmsg *rtm; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index ef54377..7f4a5cb 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -267,7 +267,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt) { struct tcp_options_received tcp_opt; - const u8 *hash_location; struct inet_request_sock *ireq; struct tcp_request_sock *treq; struct tcp_sock *tp = tcp_sk(sk); @@ -294,7 +293,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); + tcp_parse_options(skb, &tcp_opt, 0, NULL); if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok)) goto out; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 960fd29..fa2f63f 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -28,7 +28,7 @@ static int zero; static int one = 1; -static int two = 2; +static int four = 4; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; @@ -592,13 +592,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .procname = "tcp_frto_response", - .data = &sysctl_tcp_frto_response, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "tcp_low_latency", .data = &sysctl_tcp_low_latency, .maxlen = sizeof(int), @@ -733,13 +726,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec, }, { - .procname = "tcp_cookie_size", - .data = &sysctl_tcp_cookie_size, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "tcp_thin_linear_timeouts", .data = &sysctl_tcp_thin_linear_timeouts, .maxlen = sizeof(int), @@ -760,7 +746,7 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &zero, - .extra2 = &two, + .extra2 = &four, }, { .procname = "udp_mem", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e220207..a96f7b5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -409,15 +409,6 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_sync_mss = tcp_sync_mss; - /* TCP Cookie Transactions */ - if (sysctl_tcp_cookie_size > 0) { - /* Default, cookies without s_data_payload. */ - tp->cookie_values = - kzalloc(sizeof(*tp->cookie_values), - sk->sk_allocation); - if (tp->cookie_values != NULL) - kref_init(&tp->cookie_values->kref); - } /* Presumed zeroed, in order of appearance: * cookie_in_always, cookie_out_never, * s_data_constant, s_data_in, s_data_out @@ -2397,92 +2388,6 @@ static int do_tcp_setsockopt(struct sock *sk, int level, release_sock(sk); return err; } - case TCP_COOKIE_TRANSACTIONS: { - struct tcp_cookie_transactions ctd; - struct tcp_cookie_values *cvp = NULL; - - if (sizeof(ctd) > optlen) - return -EINVAL; - if (copy_from_user(&ctd, optval, sizeof(ctd))) - return -EFAULT; - - if (ctd.tcpct_used > sizeof(ctd.tcpct_value) || - ctd.tcpct_s_data_desired > TCP_MSS_DESIRED) - return -EINVAL; - - if (ctd.tcpct_cookie_desired == 0) { - /* default to global value */ - } else if ((0x1 & ctd.tcpct_cookie_desired) || - ctd.tcpct_cookie_desired > TCP_COOKIE_MAX || - ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) { - return -EINVAL; - } - - if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) { - /* Supercedes all other values */ - lock_sock(sk); - if (tp->cookie_values != NULL) { - kref_put(&tp->cookie_values->kref, - tcp_cookie_values_release); - tp->cookie_values = NULL; - } - tp->rx_opt.cookie_in_always = 0; /* false */ - tp->rx_opt.cookie_out_never = 1; /* true */ - release_sock(sk); - return err; - } - - /* Allocate ancillary memory before locking. - */ - if (ctd.tcpct_used > 0 || - (tp->cookie_values == NULL && - (sysctl_tcp_cookie_size > 0 || - ctd.tcpct_cookie_desired > 0 || - ctd.tcpct_s_data_desired > 0))) { - cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used, - GFP_KERNEL); - if (cvp == NULL) - return -ENOMEM; - - kref_init(&cvp->kref); - } - lock_sock(sk); - tp->rx_opt.cookie_in_always = - (TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags); - tp->rx_opt.cookie_out_never = 0; /* false */ - - if (tp->cookie_values != NULL) { - if (cvp != NULL) { - /* Changed values are recorded by a changed - * pointer, ensuring the cookie will differ, - * without separately hashing each value later. - */ - kref_put(&tp->cookie_values->kref, - tcp_cookie_values_release); - } else { - cvp = tp->cookie_values; - } - } - - if (cvp != NULL) { - cvp->cookie_desired = ctd.tcpct_cookie_desired; - - if (ctd.tcpct_used > 0) { - memcpy(cvp->s_data_payload, ctd.tcpct_value, - ctd.tcpct_used); - cvp->s_data_desired = ctd.tcpct_used; - cvp->s_data_constant = 1; /* true */ - } else { - /* No constant payload data. */ - cvp->s_data_desired = ctd.tcpct_s_data_desired; - cvp->s_data_constant = 0; /* false */ - } - - tp->cookie_values = cvp; - } - release_sock(sk); - return err; - } default: /* fallthru */ break; @@ -2902,41 +2807,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return -EFAULT; return 0; - case TCP_COOKIE_TRANSACTIONS: { - struct tcp_cookie_transactions ctd; - struct tcp_cookie_values *cvp = tp->cookie_values; - - if (get_user(len, optlen)) - return -EFAULT; - if (len < sizeof(ctd)) - return -EINVAL; - - memset(&ctd, 0, sizeof(ctd)); - ctd.tcpct_flags = (tp->rx_opt.cookie_in_always ? - TCP_COOKIE_IN_ALWAYS : 0) - | (tp->rx_opt.cookie_out_never ? - TCP_COOKIE_OUT_NEVER : 0); - - if (cvp != NULL) { - ctd.tcpct_flags |= (cvp->s_data_in ? - TCP_S_DATA_IN : 0) - | (cvp->s_data_out ? - TCP_S_DATA_OUT : 0); - - ctd.tcpct_cookie_desired = cvp->cookie_desired; - ctd.tcpct_s_data_desired = cvp->s_data_desired; - - memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0], - cvp->cookie_pair_size); - ctd.tcpct_used = cvp->cookie_pair_size; - } - - if (put_user(sizeof(ctd), optlen)) - return -EFAULT; - if (copy_to_user(optval, &ctd, sizeof(ctd))) - return -EFAULT; - return 0; - } case TCP_THIN_LINEAR_TIMEOUTS: val = tp->thin_lto; break; @@ -3044,6 +2914,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_TCPV6 | SKB_GSO_GRE | + SKB_GSO_UDP_TUNNEL | 0) || !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) goto out; @@ -3408,134 +3279,6 @@ EXPORT_SYMBOL(tcp_md5_hash_key); #endif -/* Each Responder maintains up to two secret values concurrently for - * efficient secret rollover. Each secret value has 4 states: - * - * Generating. (tcp_secret_generating != tcp_secret_primary) - * Generates new Responder-Cookies, but not yet used for primary - * verification. This is a short-term state, typically lasting only - * one round trip time (RTT). - * - * Primary. (tcp_secret_generating == tcp_secret_primary) - * Used both for generation and primary verification. - * - * Retiring. (tcp_secret_retiring != tcp_secret_secondary) - * Used for verification, until the first failure that can be - * verified by the newer Generating secret. At that time, this - * cookie's state is changed to Secondary, and the Generating - * cookie's state is changed to Primary. This is a short-term state, - * typically lasting only one round trip time (RTT). - * - * Secondary. (tcp_secret_retiring == tcp_secret_secondary) - * Used for secondary verification, after primary verification - * failures. This state lasts no more than twice the Maximum Segment - * Lifetime (2MSL). Then, the secret is discarded. - */ -struct tcp_cookie_secret { - /* The secret is divided into two parts. The digest part is the - * equivalent of previously hashing a secret and saving the state, - * and serves as an initialization vector (IV). The message part - * serves as the trailing secret. - */ - u32 secrets[COOKIE_WORKSPACE_WORDS]; - unsigned long expires; -}; - -#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL) -#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2) -#define TCP_SECRET_LIFE (HZ * 600) - -static struct tcp_cookie_secret tcp_secret_one; -static struct tcp_cookie_secret tcp_secret_two; - -/* Essentially a circular list, without dynamic allocation. */ -static struct tcp_cookie_secret *tcp_secret_generating; -static struct tcp_cookie_secret *tcp_secret_primary; -static struct tcp_cookie_secret *tcp_secret_retiring; -static struct tcp_cookie_secret *tcp_secret_secondary; - -static DEFINE_SPINLOCK(tcp_secret_locker); - -/* Select a pseudo-random word in the cookie workspace. - */ -static inline u32 tcp_cookie_work(const u32 *ws, const int n) -{ - return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])]; -} - -/* Fill bakery[COOKIE_WORKSPACE_WORDS] with generator, updating as needed. - * Called in softirq context. - * Returns: 0 for success. - */ -int tcp_cookie_generator(u32 *bakery) -{ - unsigned long jiffy = jiffies; - - if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) { - spin_lock_bh(&tcp_secret_locker); - if (!time_after_eq(jiffy, tcp_secret_generating->expires)) { - /* refreshed by another */ - memcpy(bakery, - &tcp_secret_generating->secrets[0], - COOKIE_WORKSPACE_WORDS); - } else { - /* still needs refreshing */ - get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS); - - /* The first time, paranoia assumes that the - * randomization function isn't as strong. But, - * this secret initialization is delayed until - * the last possible moment (packet arrival). - * Although that time is observable, it is - * unpredictably variable. Mash in the most - * volatile clock bits available, and expire the - * secret extra quickly. - */ - if (unlikely(tcp_secret_primary->expires == - tcp_secret_secondary->expires)) { - struct timespec tv; - - getnstimeofday(&tv); - bakery[COOKIE_DIGEST_WORDS+0] ^= - (u32)tv.tv_nsec; - - tcp_secret_secondary->expires = jiffy - + TCP_SECRET_1MSL - + (0x0f & tcp_cookie_work(bakery, 0)); - } else { - tcp_secret_secondary->expires = jiffy - + TCP_SECRET_LIFE - + (0xff & tcp_cookie_work(bakery, 1)); - tcp_secret_primary->expires = jiffy - + TCP_SECRET_2MSL - + (0x1f & tcp_cookie_work(bakery, 2)); - } - memcpy(&tcp_secret_secondary->secrets[0], - bakery, COOKIE_WORKSPACE_WORDS); - - rcu_assign_pointer(tcp_secret_generating, - tcp_secret_secondary); - rcu_assign_pointer(tcp_secret_retiring, - tcp_secret_primary); - /* - * Neither call_rcu() nor synchronize_rcu() needed. - * Retiring data is not freed. It is replaced after - * further (locked) pointer updates, and a quiet time - * (minimum 1MSL, maximum LIFE - 2MSL). - */ - } - spin_unlock_bh(&tcp_secret_locker); - } else { - rcu_read_lock_bh(); - memcpy(bakery, - &rcu_dereference(tcp_secret_generating)->secrets[0], - COOKIE_WORKSPACE_WORDS); - rcu_read_unlock_bh(); - } - return 0; -} -EXPORT_SYMBOL(tcp_cookie_generator); - void tcp_done(struct sock *sk) { struct request_sock *req = tcp_sk(sk)->fastopen_rsk; @@ -3590,7 +3333,6 @@ void __init tcp_init(void) unsigned long limit; int max_rshare, max_wshare, cnt; unsigned int i; - unsigned long jiffy = jiffies; BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); @@ -3666,13 +3408,5 @@ void __init tcp_init(void) tcp_register_congestion_control(&tcp_reno); - memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets)); - memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets)); - tcp_secret_one.expires = jiffy; /* past due */ - tcp_secret_two.expires = jiffy; /* past due */ - tcp_secret_generating = &tcp_secret_one; - tcp_secret_primary = &tcp_secret_one; - tcp_secret_retiring = &tcp_secret_two; - tcp_secret_secondary = &tcp_secret_two; tcp_tasklet_init(); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3bd55ba..6d9ca35 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -93,12 +93,11 @@ int sysctl_tcp_stdurg __read_mostly; int sysctl_tcp_rfc1337 __read_mostly; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; int sysctl_tcp_frto __read_mostly = 2; -int sysctl_tcp_frto_response __read_mostly; int sysctl_tcp_thin_dupack __read_mostly; int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; -int sysctl_tcp_early_retrans __read_mostly = 2; +int sysctl_tcp_early_retrans __read_mostly = 3; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ @@ -108,17 +107,15 @@ int sysctl_tcp_early_retrans __read_mostly = 2; #define FLAG_DATA_SACKED 0x20 /* New SACK. */ #define FLAG_ECE 0x40 /* ECE in this ACK */ #define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/ -#define FLAG_ONLY_ORIG_SACKED 0x200 /* SACKs only non-rexmit sent before RTO */ +#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */ #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ -#define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */ #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) #define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE) #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) -#define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED) #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) @@ -1159,10 +1156,8 @@ static u8 tcp_sacktag_one(struct sock *sk, tcp_highest_sack_seq(tp))) state->reord = min(fack_count, state->reord); - - /* SACK enhanced F-RTO (RFC4138; Appendix B) */ - if (!after(end_seq, tp->frto_highmark)) - state->flag |= FLAG_ONLY_ORIG_SACKED; + if (!after(end_seq, tp->high_seq)) + state->flag |= FLAG_ORIG_SACK_ACKED; } if (sacked & TCPCB_LOST) { @@ -1555,7 +1550,6 @@ static int tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, u32 prior_snd_una) { - const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); const unsigned char *ptr = (skb_transport_header(ack_skb) + TCP_SKB_CB(ack_skb)->sacked); @@ -1728,12 +1722,6 @@ walk: start_seq, end_seq, dup_sack); advance_sp: - /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct - * due to in-order walk - */ - if (after(end_seq, tp->frto_highmark)) - state.flag &= ~FLAG_ONLY_ORIG_SACKED; - i++; } @@ -1750,8 +1738,7 @@ advance_sp: tcp_verify_left_out(tp); if ((state.reord < tp->fackets_out) && - ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) && - (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) + ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) tcp_update_reordering(sk, tp->fackets_out - state.reord, 0); out: @@ -1825,197 +1812,6 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp) tp->sacked_out = 0; } -static int tcp_is_sackfrto(const struct tcp_sock *tp) -{ - return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp); -} - -/* F-RTO can only be used if TCP has never retransmitted anything other than - * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) - */ -bool tcp_use_frto(struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - const struct inet_connection_sock *icsk = inet_csk(sk); - struct sk_buff *skb; - - if (!sysctl_tcp_frto) - return false; - - /* MTU probe and F-RTO won't really play nicely along currently */ - if (icsk->icsk_mtup.probe_size) - return false; - - if (tcp_is_sackfrto(tp)) - return true; - - /* Avoid expensive walking of rexmit queue if possible */ - if (tp->retrans_out > 1) - return false; - - skb = tcp_write_queue_head(sk); - if (tcp_skb_is_last(sk, skb)) - return true; - skb = tcp_write_queue_next(sk, skb); /* Skips head */ - tcp_for_write_queue_from(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) - return false; - /* Short-circuit when first non-SACKed skb has been checked */ - if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) - break; - } - return true; -} - -/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO - * recovery a bit and use heuristics in tcp_process_frto() to detect if - * the RTO was spurious. Only clear SACKED_RETRANS of the head here to - * keep retrans_out counting accurate (with SACK F-RTO, other than head - * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS - * bits are handled if the Loss state is really to be entered (in - * tcp_enter_frto_loss). - * - * Do like tcp_enter_loss() would; when RTO expires the second time it - * does: - * "Reduce ssthresh if it has not yet been made inside this window." - */ -void tcp_enter_frto(struct sock *sk) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - - if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) || - tp->snd_una == tp->high_seq || - ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) && - !icsk->icsk_retransmits)) { - tp->prior_ssthresh = tcp_current_ssthresh(sk); - /* Our state is too optimistic in ssthresh() call because cwnd - * is not reduced until tcp_enter_frto_loss() when previous F-RTO - * recovery has not yet completed. Pattern would be this: RTO, - * Cumulative ACK, RTO (2xRTO for the same segment does not end - * up here twice). - * RFC4138 should be more specific on what to do, even though - * RTO is quite unlikely to occur after the first Cumulative ACK - * due to back-off and complexity of triggering events ... - */ - if (tp->frto_counter) { - u32 stored_cwnd; - stored_cwnd = tp->snd_cwnd; - tp->snd_cwnd = 2; - tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); - tp->snd_cwnd = stored_cwnd; - } else { - tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); - } - /* ... in theory, cong.control module could do "any tricks" in - * ssthresh(), which means that ca_state, lost bits and lost_out - * counter would have to be faked before the call occurs. We - * consider that too expensive, unlikely and hacky, so modules - * using these in ssthresh() must deal these incompatibility - * issues if they receives CA_EVENT_FRTO and frto_counter != 0 - */ - tcp_ca_event(sk, CA_EVENT_FRTO); - } - - tp->undo_marker = tp->snd_una; - tp->undo_retrans = 0; - - skb = tcp_write_queue_head(sk); - if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) - tp->undo_marker = 0; - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out -= tcp_skb_pcount(skb); - } - tcp_verify_left_out(tp); - - /* Too bad if TCP was application limited */ - tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1); - - /* Earlier loss recovery underway (see RFC4138; Appendix B). - * The last condition is necessary at least in tp->frto_counter case. - */ - if (tcp_is_sackfrto(tp) && (tp->frto_counter || - ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) && - after(tp->high_seq, tp->snd_una)) { - tp->frto_highmark = tp->high_seq; - } else { - tp->frto_highmark = tp->snd_nxt; - } - tcp_set_ca_state(sk, TCP_CA_Disorder); - tp->high_seq = tp->snd_nxt; - tp->frto_counter = 1; -} - -/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO, - * which indicates that we should follow the traditional RTO recovery, - * i.e. mark everything lost and do go-back-N retransmission. - */ -static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - - tp->lost_out = 0; - tp->retrans_out = 0; - if (tcp_is_reno(tp)) - tcp_reset_reno_sack(tp); - - tcp_for_write_queue(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - - TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; - /* - * Count the retransmission made on RTO correctly (only when - * waiting for the first ACK and did not get it)... - */ - if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) { - /* For some reason this R-bit might get cleared? */ - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) - tp->retrans_out += tcp_skb_pcount(skb); - /* ...enter this if branch just for the first segment */ - flag |= FLAG_DATA_ACKED; - } else { - if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) - tp->undo_marker = 0; - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - } - - /* Marking forward transmissions that were made after RTO lost - * can cause unnecessary retransmissions in some scenarios, - * SACK blocks will mitigate that in some but not in all cases. - * We used to not mark them but it was causing break-ups with - * receivers that do only in-order receival. - * - * TODO: we could detect presence of such receiver and select - * different behavior per flow. - */ - if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out += tcp_skb_pcount(skb); - tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; - } - } - tcp_verify_left_out(tp); - - tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments; - tp->snd_cwnd_cnt = 0; - tp->snd_cwnd_stamp = tcp_time_stamp; - tp->frto_counter = 0; - - tp->reordering = min_t(unsigned int, tp->reordering, - sysctl_tcp_reordering); - tcp_set_ca_state(sk, TCP_CA_Loss); - tp->high_seq = tp->snd_nxt; - TCP_ECN_queue_cwr(tp); - - tcp_clear_all_retrans_hints(tp); -} - static void tcp_clear_retrans_partial(struct tcp_sock *tp) { tp->retrans_out = 0; @@ -2042,10 +1838,13 @@ void tcp_enter_loss(struct sock *sk, int how) const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; + bool new_recovery = false; /* Reduce ssthresh if it has not yet been made inside this window. */ - if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || + if (icsk->icsk_ca_state <= TCP_CA_Disorder || + !after(tp->high_seq, tp->snd_una) || (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { + new_recovery = true; tp->prior_ssthresh = tcp_current_ssthresh(sk); tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tcp_ca_event(sk, CA_EVENT_LOSS); @@ -2087,8 +1886,14 @@ void tcp_enter_loss(struct sock *sk, int how) tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); - /* Abort F-RTO algorithm if one is in progress */ - tp->frto_counter = 0; + + /* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous + * loss recovery is underway except recurring timeout(s) on + * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing + */ + tp->frto = sysctl_tcp_frto && + (new_recovery || icsk->icsk_retransmits) && + !inet_csk(sk)->icsk_mtup.probe_size; } /* If ACK arrived pointing to a remembered SACK, it means that our @@ -2147,15 +1952,16 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag) * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples * available, or RTO is scheduled to fire first. */ - if (sysctl_tcp_early_retrans < 2 || (flag & FLAG_ECE) || !tp->srtt) + if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 || + (flag & FLAG_ECE) || !tp->srtt) return false; delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2)); if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay))) return false; - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX); - tp->early_retrans_delayed = 1; + inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay, + TCP_RTO_MAX); return true; } @@ -2271,10 +2077,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) struct tcp_sock *tp = tcp_sk(sk); __u32 packets_out; - /* Do not perform any recovery during F-RTO algorithm */ - if (tp->frto_counter) - return false; - /* Trick#1: The loss is proven. */ if (tp->lost_out) return true; @@ -2318,7 +2120,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) * interval if appropriate. */ if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out && - (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) && + (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) && !tcp_may_send_now(sk)) return !tcp_pause_early_retransmit(sk, flag); @@ -2635,12 +2437,12 @@ static int tcp_try_undo_partial(struct sock *sk, int acked) return failed; } -/* Undo during loss recovery after partial ACK. */ -static bool tcp_try_undo_loss(struct sock *sk) +/* Undo during loss recovery after partial ACK or using F-RTO. */ +static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) { struct tcp_sock *tp = tcp_sk(sk); - if (tcp_may_undo(tp)) { + if (frto_undo || tcp_may_undo(tp)) { struct sk_buff *skb; tcp_for_write_queue(skb, sk) { if (skb == tcp_send_head(sk)) @@ -2654,9 +2456,12 @@ static bool tcp_try_undo_loss(struct sock *sk) tp->lost_out = 0; tcp_undo_cwr(sk, true); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); + if (frto_undo) + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPSPURIOUSRTOS); inet_csk(sk)->icsk_retransmits = 0; tp->undo_marker = 0; - if (tcp_is_sack(tp)) + if (frto_undo || tcp_is_sack(tp)) tcp_set_ca_state(sk, TCP_CA_Open); return true; } @@ -2678,6 +2483,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) struct tcp_sock *tp = tcp_sk(sk); tp->high_seq = tp->snd_nxt; + tp->tlp_high_seq = 0; tp->snd_cwnd_cnt = 0; tp->prior_cwnd = tp->snd_cwnd; tp->prr_delivered = 0; @@ -2755,7 +2561,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked) tcp_verify_left_out(tp); - if (!tp->frto_counter && !tcp_any_retrans_done(sk)) + if (!tcp_any_retrans_done(sk)) tp->retrans_stamp = 0; if (flag & FLAG_ECE) @@ -2872,6 +2678,58 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack) tcp_set_ca_state(sk, TCP_CA_Recovery); } +/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are + * recovered or spurious. Otherwise retransmits more on partial ACKs. + */ +static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + bool recovered = !before(tp->snd_una, tp->high_seq); + + if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */ + if (flag & FLAG_ORIG_SACK_ACKED) { + /* Step 3.b. A timeout is spurious if not all data are + * lost, i.e., never-retransmitted data are (s)acked. + */ + tcp_try_undo_loss(sk, true); + return; + } + if (after(tp->snd_nxt, tp->high_seq) && + (flag & FLAG_DATA_SACKED || is_dupack)) { + tp->frto = 0; /* Loss was real: 2nd part of step 3.a */ + } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) { + tp->high_seq = tp->snd_nxt; + __tcp_push_pending_frames(sk, tcp_current_mss(sk), + TCP_NAGLE_OFF); + if (after(tp->snd_nxt, tp->high_seq)) + return; /* Step 2.b */ + tp->frto = 0; + } + } + + if (recovered) { + /* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */ + icsk->icsk_retransmits = 0; + tcp_try_undo_recovery(sk); + return; + } + if (flag & FLAG_DATA_ACKED) + icsk->icsk_retransmits = 0; + if (tcp_is_reno(tp)) { + /* A Reno DUPACK means new data in F-RTO step 2.b above are + * delivered. Lower inflight to clock out (re)tranmissions. + */ + if (after(tp->snd_nxt, tp->high_seq) && is_dupack) + tcp_add_reno_sack(sk); + else if (flag & FLAG_SND_UNA_ADVANCED) + tcp_reset_reno_sack(tp); + } + if (tcp_try_undo_loss(sk, false)) + return; + tcp_xmit_retransmit_queue(sk); +} + /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and @@ -2918,12 +2776,6 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, tp->retrans_stamp = 0; } else if (!before(tp->snd_una, tp->high_seq)) { switch (icsk->icsk_ca_state) { - case TCP_CA_Loss: - icsk->icsk_retransmits = 0; - if (tcp_try_undo_recovery(sk)) - return; - break; - case TCP_CA_CWR: /* CWR is to be held something *above* high_seq * is ACKed for CWR bit to reach receiver. */ @@ -2954,18 +2806,10 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked; break; case TCP_CA_Loss: - if (flag & FLAG_DATA_ACKED) - icsk->icsk_retransmits = 0; - if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED) - tcp_reset_reno_sack(tp); - if (!tcp_try_undo_loss(sk)) { - tcp_moderate_cwnd(tp); - tcp_xmit_retransmit_queue(sk); - return; - } + tcp_process_loss(sk, flag, is_dupack); if (icsk->icsk_ca_state != TCP_CA_Open) return; - /* Loss is undone; fall through to processing in Open state. */ + /* Fall through to processing in Open state. */ default: if (tcp_is_reno(tp)) { if (flag & FLAG_SND_UNA_ADVANCED) @@ -3078,6 +2922,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) */ void tcp_rearm_rto(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); /* If the retrans timer is currently being used by Fast Open @@ -3091,12 +2936,13 @@ void tcp_rearm_rto(struct sock *sk) } else { u32 rto = inet_csk(sk)->icsk_rto; /* Offset the time elapsed after installing regular RTO */ - if (tp->early_retrans_delayed) { + if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { struct sk_buff *skb = tcp_write_queue_head(sk); const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto; s32 delta = (s32)(rto_time_stamp - tcp_time_stamp); /* delta may not be positive if the socket is locked - * when the delayed ER timer fires and is rescheduled. + * when the retrans timer fires and is rescheduled. */ if (delta > 0) rto = delta; @@ -3104,7 +2950,6 @@ void tcp_rearm_rto(struct sock *sk) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, TCP_RTO_MAX); } - tp->early_retrans_delayed = 0; } /* This function is called when the delayed ER timer fires. TCP enters @@ -3192,8 +3037,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, flag |= FLAG_RETRANS_DATA_ACKED; ca_seq_rtt = -1; seq_rtt = -1; - if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1)) - flag |= FLAG_NONHEAD_RETRANS_ACKED; } else { ca_seq_rtt = now - scb->when; last_ackt = skb->tstamp; @@ -3202,6 +3045,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, } if (!(sacked & TCPCB_SACKED_ACKED)) reord = min(pkts_acked, reord); + if (!after(scb->end_seq, tp->high_seq)) + flag |= FLAG_ORIG_SACK_ACKED; } if (sacked & TCPCB_SACKED_ACKED) @@ -3402,150 +3247,6 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 return flag; } -/* A very conservative spurious RTO response algorithm: reduce cwnd and - * continue in congestion avoidance. - */ -static void tcp_conservative_spur_to_response(struct tcp_sock *tp) -{ - tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); - tp->snd_cwnd_cnt = 0; - TCP_ECN_queue_cwr(tp); - tcp_moderate_cwnd(tp); -} - -/* A conservative spurious RTO response algorithm: reduce cwnd using - * PRR and continue in congestion avoidance. - */ -static void tcp_cwr_spur_to_response(struct sock *sk) -{ - tcp_enter_cwr(sk, 0); -} - -static void tcp_undo_spur_to_response(struct sock *sk, int flag) -{ - if (flag & FLAG_ECE) - tcp_cwr_spur_to_response(sk); - else - tcp_undo_cwr(sk, true); -} - -/* F-RTO spurious RTO detection algorithm (RFC4138) - * - * F-RTO affects during two new ACKs following RTO (well, almost, see inline - * comments). State (ACK number) is kept in frto_counter. When ACK advances - * window (but not to or beyond highest sequence sent before RTO): - * On First ACK, send two new segments out. - * On Second ACK, RTO was likely spurious. Do spurious response (response - * algorithm is not part of the F-RTO detection algorithm - * given in RFC4138 but can be selected separately). - * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss - * and TCP falls back to conventional RTO recovery. F-RTO allows overriding - * of Nagle, this is done using frto_counter states 2 and 3, when a new data - * segment of any size sent during F-RTO, state 2 is upgraded to 3. - * - * Rationale: if the RTO was spurious, new ACKs should arrive from the - * original window even after we transmit two new data segments. - * - * SACK version: - * on first step, wait until first cumulative ACK arrives, then move to - * the second step. In second step, the next ACK decides. - * - * F-RTO is implemented (mainly) in four functions: - * - tcp_use_frto() is used to determine if TCP is can use F-RTO - * - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is - * called when tcp_use_frto() showed green light - * - tcp_process_frto() handles incoming ACKs during F-RTO algorithm - * - tcp_enter_frto_loss() is called if there is not enough evidence - * to prove that the RTO is indeed spurious. It transfers the control - * from F-RTO to the conventional RTO recovery - */ -static bool tcp_process_frto(struct sock *sk, int flag) -{ - struct tcp_sock *tp = tcp_sk(sk); - - tcp_verify_left_out(tp); - - /* Duplicate the behavior from Loss state (fastretrans_alert) */ - if (flag & FLAG_DATA_ACKED) - inet_csk(sk)->icsk_retransmits = 0; - - if ((flag & FLAG_NONHEAD_RETRANS_ACKED) || - ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED))) - tp->undo_marker = 0; - - if (!before(tp->snd_una, tp->frto_highmark)) { - tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag); - return true; - } - - if (!tcp_is_sackfrto(tp)) { - /* RFC4138 shortcoming in step 2; should also have case c): - * ACK isn't duplicate nor advances window, e.g., opposite dir - * data, winupdate - */ - if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP)) - return true; - - if (!(flag & FLAG_DATA_ACKED)) { - tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3), - flag); - return true; - } - } else { - if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) { - if (!tcp_packets_in_flight(tp)) { - tcp_enter_frto_loss(sk, 2, flag); - return true; - } - - /* Prevent sending of new data. */ - tp->snd_cwnd = min(tp->snd_cwnd, - tcp_packets_in_flight(tp)); - return true; - } - - if ((tp->frto_counter >= 2) && - (!(flag & FLAG_FORWARD_PROGRESS) || - ((flag & FLAG_DATA_SACKED) && - !(flag & FLAG_ONLY_ORIG_SACKED)))) { - /* RFC4138 shortcoming (see comment above) */ - if (!(flag & FLAG_FORWARD_PROGRESS) && - (flag & FLAG_NOT_DUP)) - return true; - - tcp_enter_frto_loss(sk, 3, flag); - return true; - } - } - - if (tp->frto_counter == 1) { - /* tcp_may_send_now needs to see updated state */ - tp->snd_cwnd = tcp_packets_in_flight(tp) + 2; - tp->frto_counter = 2; - - if (!tcp_may_send_now(sk)) - tcp_enter_frto_loss(sk, 2, flag); - - return true; - } else { - switch (sysctl_tcp_frto_response) { - case 2: - tcp_undo_spur_to_response(sk, flag); - break; - case 1: - tcp_conservative_spur_to_response(tp); - break; - default: - tcp_cwr_spur_to_response(sk); - break; - } - tp->frto_counter = 0; - tp->undo_marker = 0; - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); - } - return false; -} - /* RFC 5961 7 [ACK Throttling] */ static void tcp_send_challenge_ack(struct sock *sk) { @@ -3564,6 +3265,38 @@ static void tcp_send_challenge_ack(struct sock *sk) } } +/* This routine deals with acks during a TLP episode. + * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe. + */ +static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) +{ + struct tcp_sock *tp = tcp_sk(sk); + bool is_tlp_dupack = (ack == tp->tlp_high_seq) && + !(flag & (FLAG_SND_UNA_ADVANCED | + FLAG_NOT_DUP | FLAG_DATA_SACKED)); + + /* Mark the end of TLP episode on receiving TLP dupack or when + * ack is after tlp_high_seq. + */ + if (is_tlp_dupack) { + tp->tlp_high_seq = 0; + return; + } + + if (after(ack, tp->tlp_high_seq)) { + tp->tlp_high_seq = 0; + /* Don't reduce cwnd if DSACK arrives for TLP retrans. */ + if (!(flag & FLAG_DSACKING_ACK)) { + tcp_init_cwnd_reduction(sk, true); + tcp_set_ca_state(sk, TCP_CA_CWR); + tcp_end_cwnd_reduction(sk); + tcp_set_ca_state(sk, TCP_CA_Open); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPLOSSPROBERECOVERY); + } + } +} + /* This routine deals with incoming acks, but not outgoing ones. */ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) { @@ -3578,7 +3311,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) int prior_packets; int prior_sacked = tp->sacked_out; int pkts_acked = 0; - bool frto_cwnd = false; /* If the ack is older than previous acks * then we can probably ignore it. @@ -3598,7 +3330,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (after(ack, tp->snd_nxt)) goto invalid_ack; - if (tp->early_retrans_delayed) + if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); if (after(ack, prior_snd_una)) @@ -3651,30 +3384,29 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) pkts_acked = prior_packets - tp->packets_out; - if (tp->frto_counter) - frto_cwnd = tcp_process_frto(sk, flag); - /* Guarantee sacktag reordering detection against wrap-arounds */ - if (before(tp->frto_highmark, tp->snd_una)) - tp->frto_highmark = 0; - if (tcp_ack_is_dubious(sk, flag)) { /* Advance CWND, if state allows this. */ - if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && - tcp_may_raise_cwnd(sk, flag)) + if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, is_dupack, flag); } else { - if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) + if (flag & FLAG_DATA_ACKED) tcp_cong_avoid(sk, ack, prior_in_flight); } + if (tp->tlp_high_seq) + tcp_process_tlp_ack(sk, ack, flag); + if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { struct dst_entry *dst = __sk_dst_get(sk); if (dst) dst_confirm(dst); } + + if (icsk->icsk_pending == ICSK_TIME_RETRANS) + tcp_schedule_loss_probe(sk); return 1; no_queue: @@ -3688,6 +3420,9 @@ no_queue: */ if (tcp_send_head(sk)) tcp_ack_probe(sk); + + if (tp->tlp_high_seq) + tcp_process_tlp_ack(sk, ack, flag); return 1; invalid_ack: @@ -3712,8 +3447,8 @@ old_ack: * But, this can also be called on packets in the established flow when * the fast version below fails. */ -void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, - const u8 **hvpp, int estab, +void tcp_parse_options(const struct sk_buff *skb, + struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc) { const unsigned char *ptr; @@ -3797,31 +3532,6 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o */ break; #endif - case TCPOPT_COOKIE: - /* This option is variable length. - */ - switch (opsize) { - case TCPOLEN_COOKIE_BASE: - /* not yet implemented */ - break; - case TCPOLEN_COOKIE_PAIR: - /* not yet implemented */ - break; - case TCPOLEN_COOKIE_MIN+0: - case TCPOLEN_COOKIE_MIN+2: - case TCPOLEN_COOKIE_MIN+4: - case TCPOLEN_COOKIE_MIN+6: - case TCPOLEN_COOKIE_MAX: - /* 16-bit multiple */ - opt_rx->cookie_plus = opsize; - *hvpp = ptr; - break; - default: - /* ignore option */ - break; - } - break; - case TCPOPT_EXP: /* Fast Open option shares code 254 using a * 16 bits magic number. It's valid only in @@ -3867,8 +3577,7 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr * If it is wrong it falls back on tcp_parse_options(). */ static bool tcp_fast_parse_options(const struct sk_buff *skb, - const struct tcphdr *th, - struct tcp_sock *tp, const u8 **hvpp) + const struct tcphdr *th, struct tcp_sock *tp) { /* In the spirit of fast parsing, compare doff directly to constant * values. Because equality is used, short doff can be ignored here. @@ -3882,7 +3591,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb, return true; } - tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); + tcp_parse_options(skb, &tp->rx_opt, 1, NULL); if (tp->rx_opt.saw_tstamp) tp->rx_opt.rcv_tsecr -= tp->tsoffset; @@ -5263,12 +4972,10 @@ out: static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th, int syn_inerr) { - const u8 *hash_location; struct tcp_sock *tp = tcp_sk(sk); /* RFC1323: H1. Apply PAWS check first. */ - if (tcp_fast_parse_options(skb, th, tp, &hash_location) && - tp->rx_opt.saw_tstamp && + if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && tcp_paws_discard(sk, skb)) { if (!th->rst) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); @@ -5622,12 +5329,11 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, if (mss == tp->rx_opt.user_mss) { struct tcp_options_received opt; - const u8 *hash_location; /* Get original SYNACK MSS value if user MSS sets mss_clamp */ tcp_clear_options(&opt); opt.user_mss = opt.mss_clamp = 0; - tcp_parse_options(synack, &opt, &hash_location, 0, NULL); + tcp_parse_options(synack, &opt, 0, NULL); mss = opt.mss_clamp; } @@ -5658,14 +5364,12 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th, unsigned int len) { - const u8 *hash_location; struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - struct tcp_cookie_values *cvp = tp->cookie_values; struct tcp_fastopen_cookie foc = { .len = -1 }; int saved_clamp = tp->rx_opt.mss_clamp; - tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, &foc); + tcp_parse_options(skb, &tp->rx_opt, 0, &foc); if (tp->rx_opt.saw_tstamp) tp->rx_opt.rcv_tsecr -= tp->tsoffset; @@ -5762,30 +5466,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * is initialized. */ tp->copied_seq = tp->rcv_nxt; - if (cvp != NULL && - cvp->cookie_pair_size > 0 && - tp->rx_opt.cookie_plus > 0) { - int cookie_size = tp->rx_opt.cookie_plus - - TCPOLEN_COOKIE_BASE; - int cookie_pair_size = cookie_size - + cvp->cookie_desired; - - /* A cookie extension option was sent and returned. - * Note that each incoming SYNACK replaces the - * Responder cookie. The initial exchange is most - * fragile, as protection against spoofing relies - * entirely upon the sequence and timestamp (above). - * This replacement strategy allows the correct pair to - * pass through, while any others will be filtered via - * Responder verification later. - */ - if (sizeof(cvp->cookie_pair) >= cookie_pair_size) { - memcpy(&cvp->cookie_pair[cvp->cookie_desired], - hash_location, cookie_size); - cvp->cookie_pair_size = cookie_pair_size; - } - } - smp_mb(); tcp_finish_connect(sk, skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d09203c..2278669 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -838,7 +838,6 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, */ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, - struct request_values *rvp, u16 queue_mapping, bool nocache) { @@ -851,7 +850,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) return -1; - skb = tcp_make_synack(sk, dst, req, rvp, NULL); + skb = tcp_make_synack(sk, dst, req, NULL); if (skb) { __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr); @@ -868,10 +867,9 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, return err; } -static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, - struct request_values *rvp) +static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) { - int res = tcp_v4_send_synack(sk, NULL, req, rvp, 0, false); + int res = tcp_v4_send_synack(sk, NULL, req, 0, false); if (!res) TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); @@ -1371,8 +1369,7 @@ static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, static int tcp_v4_conn_req_fastopen(struct sock *sk, struct sk_buff *skb, struct sk_buff *skb_synack, - struct request_sock *req, - struct request_values *rvp) + struct request_sock *req) { struct tcp_sock *tp = tcp_sk(sk); struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; @@ -1467,9 +1464,7 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk, int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { - struct tcp_extend_values tmp_ext; struct tcp_options_received tmp_opt; - const u8 *hash_location; struct request_sock *req; struct inet_request_sock *ireq; struct tcp_sock *tp = tcp_sk(sk); @@ -1519,42 +1514,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = TCP_MSS_DEFAULT; tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, - want_cookie ? NULL : &foc); - - if (tmp_opt.cookie_plus > 0 && - tmp_opt.saw_tstamp && - !tp->rx_opt.cookie_out_never && - (sysctl_tcp_cookie_size > 0 || - (tp->cookie_values != NULL && - tp->cookie_values->cookie_desired > 0))) { - u8 *c; - u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; - int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; - - if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) - goto drop_and_release; - - /* Secret recipe starts with IP addresses */ - *mess++ ^= (__force u32)daddr; - *mess++ ^= (__force u32)saddr; - - /* plus variable length Initiator Cookie */ - c = (u8 *)mess; - while (l-- > 0) - *c++ ^= *hash_location++; - - want_cookie = false; /* not our kind of cookie */ - tmp_ext.cookie_out_never = 0; /* false */ - tmp_ext.cookie_plus = tmp_opt.cookie_plus; - } else if (!tp->rx_opt.cookie_in_always) { - /* redundant indications, but ensure initialization. */ - tmp_ext.cookie_out_never = 1; /* true */ - tmp_ext.cookie_plus = 0; - } else { - goto drop_and_release; - } - tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; + tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); @@ -1636,7 +1596,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * of tcp_v4_send_synack()->tcp_select_initial_window(). */ skb_synack = tcp_make_synack(sk, dst, req, - (struct request_values *)&tmp_ext, fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL); if (skb_synack) { @@ -1660,8 +1619,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (fastopen_cookie_present(&foc) && foc.len != 0) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); - } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req, - (struct request_values *)&tmp_ext)) + } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req)) goto drop_and_free; return 0; @@ -1950,6 +1908,50 @@ void tcp_v4_early_demux(struct sk_buff *skb) } } +/* Packet is added to VJ-style prequeue for processing in process + * context, if a reader task is waiting. Apparently, this exciting + * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93) + * failed somewhere. Latency? Burstiness? Well, at least now we will + * see, why it failed. 8)8) --ANK + * + */ +bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (sysctl_tcp_low_latency || !tp->ucopy.task) + return false; + + if (skb->len <= tcp_hdrlen(skb) && + skb_queue_len(&tp->ucopy.prequeue) == 0) + return false; + + __skb_queue_tail(&tp->ucopy.prequeue, skb); + tp->ucopy.memory += skb->truesize; + if (tp->ucopy.memory > sk->sk_rcvbuf) { + struct sk_buff *skb1; + + BUG_ON(sock_owned_by_user(sk)); + + while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { + sk_backlog_rcv(sk, skb1); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPPREQUEUEDROPPED); + } + + tp->ucopy.memory = 0; + } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { + wake_up_interruptible_sync_poll(sk_sleep(sk), + POLLIN | POLLRDNORM | POLLRDBAND); + if (!inet_csk_ack_scheduled(sk)) + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + (3 * tcp_rto_min(sk)) / 4, + TCP_RTO_MAX); + } + return true; +} +EXPORT_SYMBOL(tcp_prequeue); + /* * From tcp_input.c */ @@ -2197,12 +2199,6 @@ void tcp_v4_destroy_sock(struct sock *sk) if (inet_csk(sk)->icsk_bind_hash) inet_put_port(sk); - /* TCP Cookie Transactions */ - if (tp->cookie_values != NULL) { - kref_put(&tp->cookie_values->kref, - tcp_cookie_values_release); - tp->cookie_values = NULL; - } BUG_ON(tp->fastopen_rsk != NULL); /* If socket is aborted during connect operation */ @@ -2659,7 +2655,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) __u16 srcp = ntohs(inet->inet_sport); int rx_queue; - if (icsk->icsk_pending == ICSK_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk->icsk_timeout; } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b83a49c..05eaf89 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -93,13 +93,12 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, const struct tcphdr *th) { struct tcp_options_received tmp_opt; - const u8 *hash_location; struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); bool paws_reject = false; tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); + tcp_parse_options(skb, &tmp_opt, 0, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.rcv_tsecr -= tcptw->tw_ts_offset; @@ -388,32 +387,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct tcp_request_sock *treq = tcp_rsk(req); struct inet_connection_sock *newicsk = inet_csk(newsk); struct tcp_sock *newtp = tcp_sk(newsk); - struct tcp_sock *oldtp = tcp_sk(sk); - struct tcp_cookie_values *oldcvp = oldtp->cookie_values; - - /* TCP Cookie Transactions require space for the cookie pair, - * as it differs for each connection. There is no need to - * copy any s_data_payload stored at the original socket. - * Failure will prevent resuming the connection. - * - * Presumed copied, in order of appearance: - * cookie_in_always, cookie_out_never - */ - if (oldcvp != NULL) { - struct tcp_cookie_values *newcvp = - kzalloc(sizeof(*newtp->cookie_values), - GFP_ATOMIC); - - if (newcvp != NULL) { - kref_init(&newcvp->kref); - newcvp->cookie_desired = - oldcvp->cookie_desired; - newtp->cookie_values = newcvp; - } else { - /* Not Yet Implemented */ - newtp->cookie_values = NULL; - } - } /* Now setup tcp_sock */ newtp->pred_flags = 0; @@ -422,8 +395,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rcv_nxt = treq->rcv_isn + 1; newtp->snd_sml = newtp->snd_una = - newtp->snd_nxt = newtp->snd_up = - treq->snt_isn + 1 + tcp_s_data_size(oldtp); + newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1; tcp_prequeue_init(newtp); INIT_LIST_HEAD(&newtp->tsq_node); @@ -440,6 +412,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->fackets_out = 0; newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tcp_enable_early_retrans(newtp); + newtp->tlp_high_seq = 0; /* So many TCP implementations out there (incorrectly) count the * initial SYN frame in their delayed-ACK and congestion control @@ -449,9 +422,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->snd_cwnd = TCP_INIT_CWND; newtp->snd_cwnd_cnt = 0; - newtp->frto_counter = 0; - newtp->frto_highmark = 0; - if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops && !try_module_get(newicsk->icsk_ca_ops->owner)) newicsk->icsk_ca_ops = &tcp_init_congestion_ops; @@ -459,8 +429,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); skb_queue_head_init(&newtp->out_of_order_queue); - newtp->write_seq = newtp->pushed_seq = - treq->snt_isn + 1 + tcp_s_data_size(oldtp); + newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; newtp->rx_opt.saw_tstamp = 0; @@ -537,7 +506,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, bool fastopen) { struct tcp_options_received tmp_opt; - const u8 *hash_location; struct sock *child; const struct tcphdr *th = tcp_hdr(skb); __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); @@ -547,7 +515,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(struct tcphdr)>>2)) { - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); + tcp_parse_options(skb, &tmp_opt, 0, NULL); if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = req->ts_recent; @@ -647,7 +615,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, */ if ((flg & TCP_FLAG_ACK) && !fastopen && (TCP_SKB_CB(skb)->ack_seq != - tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk)))) + tcp_rsk(req)->snt_isn + 1)) return sk; /* Also, it would be not so bad idea to check rcv_tsecr, which diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5d0b438..af354c98 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -65,27 +65,22 @@ int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS; /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; -int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */ -EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size); - static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp); /* Account for new data that has been sent to the network. */ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); unsigned int prior_packets = tp->packets_out; tcp_advance_send_head(sk, skb); tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; - /* Don't override Nagle indefinitely with F-RTO */ - if (tp->frto_counter == 2) - tp->frto_counter = 3; - tp->packets_out += tcp_skb_pcount(skb); - if (!prior_packets || tp->early_retrans_delayed) + if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); } @@ -384,7 +379,6 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp) #define OPTION_TS (1 << 1) #define OPTION_MD5 (1 << 2) #define OPTION_WSCALE (1 << 3) -#define OPTION_COOKIE_EXTENSION (1 << 4) #define OPTION_FAST_OPEN_COOKIE (1 << 8) struct tcp_out_options { @@ -398,36 +392,6 @@ struct tcp_out_options { struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ }; -/* The sysctl int routines are generic, so check consistency here. - */ -static u8 tcp_cookie_size_check(u8 desired) -{ - int cookie_size; - - if (desired > 0) - /* previously specified */ - return desired; - - cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size); - if (cookie_size <= 0) - /* no default specified */ - return 0; - - if (cookie_size <= TCP_COOKIE_MIN) - /* value too small, specify minimum */ - return TCP_COOKIE_MIN; - - if (cookie_size >= TCP_COOKIE_MAX) - /* value too large, specify maximum */ - return TCP_COOKIE_MAX; - - if (cookie_size & 1) - /* 8-bit multiple, illegal, fix it */ - cookie_size++; - - return (u8)cookie_size; -} - /* Write previously computed TCP options to the packet. * * Beware: Something in the Internet is very sensitive to the ordering of @@ -446,27 +410,9 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, { u16 options = opts->options; /* mungable copy */ - /* Having both authentication and cookies for security is redundant, - * and there's certainly not enough room. Instead, the cookie-less - * extension variant is proposed. - * - * Consider the pessimal case with authentication. The options - * could look like: - * COOKIE|MD5(20) + MSS(4) + SACK|TS(12) + WSCALE(4) == 40 - */ if (unlikely(OPTION_MD5 & options)) { - if (unlikely(OPTION_COOKIE_EXTENSION & options)) { - *ptr++ = htonl((TCPOPT_COOKIE << 24) | - (TCPOLEN_COOKIE_BASE << 16) | - (TCPOPT_MD5SIG << 8) | - TCPOLEN_MD5SIG); - } else { - *ptr++ = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_MD5SIG << 8) | - TCPOLEN_MD5SIG); - } - options &= ~OPTION_COOKIE_EXTENSION; + *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | + (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); /* overload cookie hash location */ opts->hash_location = (__u8 *)ptr; ptr += 4; @@ -495,44 +441,6 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, *ptr++ = htonl(opts->tsecr); } - /* Specification requires after timestamp, so do it now. - * - * Consider the pessimal case without authentication. The options - * could look like: - * MSS(4) + SACK|TS(12) + COOKIE(20) + WSCALE(4) == 40 - */ - if (unlikely(OPTION_COOKIE_EXTENSION & options)) { - __u8 *cookie_copy = opts->hash_location; - u8 cookie_size = opts->hash_size; - - /* 8-bit multiple handled in tcp_cookie_size_check() above, - * and elsewhere. - */ - if (0x2 & cookie_size) { - __u8 *p = (__u8 *)ptr; - - /* 16-bit multiple */ - *p++ = TCPOPT_COOKIE; - *p++ = TCPOLEN_COOKIE_BASE + cookie_size; - *p++ = *cookie_copy++; - *p++ = *cookie_copy++; - ptr++; - cookie_size -= 2; - } else { - /* 32-bit multiple */ - *ptr++ = htonl(((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_COOKIE << 8) | - TCPOLEN_COOKIE_BASE) + - cookie_size); - } - - if (cookie_size > 0) { - memcpy(ptr, cookie_copy, cookie_size); - ptr += (cookie_size / 4); - } - } - if (unlikely(OPTION_SACK_ADVERTISE & options)) { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | @@ -591,11 +499,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, struct tcp_md5sig_key **md5) { struct tcp_sock *tp = tcp_sk(sk); - struct tcp_cookie_values *cvp = tp->cookie_values; unsigned int remaining = MAX_TCP_OPTION_SPACE; - u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? - tcp_cookie_size_check(cvp->cookie_desired) : - 0; struct tcp_fastopen_request *fastopen = tp->fastopen_req; #ifdef CONFIG_TCP_MD5SIG @@ -647,52 +551,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, tp->syn_fastopen = 1; } } - /* Note that timestamps are required by the specification. - * - * Odd numbers of bytes are prohibited by the specification, ensuring - * that the cookie is 16-bit aligned, and the resulting cookie pair is - * 32-bit aligned. - */ - if (*md5 == NULL && - (OPTION_TS & opts->options) && - cookie_size > 0) { - int need = TCPOLEN_COOKIE_BASE + cookie_size; - - if (0x2 & need) { - /* 32-bit multiple */ - need += 2; /* NOPs */ - - if (need > remaining) { - /* try shrinking cookie to fit */ - cookie_size -= 2; - need -= 4; - } - } - while (need > remaining && TCP_COOKIE_MIN <= cookie_size) { - cookie_size -= 4; - need -= 4; - } - if (TCP_COOKIE_MIN <= cookie_size) { - opts->options |= OPTION_COOKIE_EXTENSION; - opts->hash_location = (__u8 *)&cvp->cookie_pair[0]; - opts->hash_size = cookie_size; - - /* Remember for future incarnations. */ - cvp->cookie_desired = cookie_size; - - if (cvp->cookie_desired != cvp->cookie_pair_size) { - /* Currently use random bytes as a nonce, - * assuming these are completely unpredictable - * by hostile users of the same system. - */ - get_random_bytes(&cvp->cookie_pair[0], - cookie_size); - cvp->cookie_pair_size = cookie_size; - } - remaining -= need; - } - } return MAX_TCP_OPTION_SPACE - remaining; } @@ -702,14 +561,10 @@ static unsigned int tcp_synack_options(struct sock *sk, unsigned int mss, struct sk_buff *skb, struct tcp_out_options *opts, struct tcp_md5sig_key **md5, - struct tcp_extend_values *xvp, struct tcp_fastopen_cookie *foc) { struct inet_request_sock *ireq = inet_rsk(req); unsigned int remaining = MAX_TCP_OPTION_SPACE; - u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? - xvp->cookie_plus : - 0; #ifdef CONFIG_TCP_MD5SIG *md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); @@ -757,28 +612,7 @@ static unsigned int tcp_synack_options(struct sock *sk, remaining -= need; } } - /* Similar rationale to tcp_syn_options() applies here, too. - * If the <SYN> options fit, the same options should fit now! - */ - if (*md5 == NULL && - ireq->tstamp_ok && - cookie_plus > TCPOLEN_COOKIE_BASE) { - int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */ - - if (0x2 & need) { - /* 32-bit multiple */ - need += 2; /* NOPs */ - } - if (need <= remaining) { - opts->options |= OPTION_COOKIE_EXTENSION; - opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE; - remaining -= need; - } else { - /* There's no error return, so flag it. */ - xvp->cookie_out_never = 1; /* true */ - opts->hash_size = 0; - } - } + return MAX_TCP_OPTION_SPACE - remaining; } @@ -1632,11 +1466,8 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf if (nonagle & TCP_NAGLE_PUSH) return true; - /* Don't use the nagle rule for urgent data (or for the final FIN). - * Nagle can be ignored during F-RTO too (see RFC4138). - */ - if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || - (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) + /* Don't use the nagle rule for urgent data (or for the final FIN). */ + if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) return true; if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) @@ -1961,6 +1792,9 @@ static int tcp_mtu_probe(struct sock *sk) * snd_up-64k-mss .. snd_up cannot be large. However, taking into * account rare use of URG, this is not a big flaw. * + * Send at most one packet when push_one > 0. Temporarily ignore + * cwnd limit to force at most one packet out when push_one == 2. + * Returns true, if no segments are in flight and we have queued segments, * but cannot send anything now because of SWS or another problem. */ @@ -1996,8 +1830,13 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, goto repair; /* Skip network transmission */ cwnd_quota = tcp_cwnd_test(tp, skb); - if (!cwnd_quota) - break; + if (!cwnd_quota) { + if (push_one == 2) + /* Force out a loss probe pkt. */ + cwnd_quota = 1; + else + break; + } if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) break; @@ -2051,10 +1890,129 @@ repair: if (likely(sent_pkts)) { if (tcp_in_cwnd_reduction(sk)) tp->prr_out += sent_pkts; + + /* Send one loss probe per tail loss episode. */ + if (push_one != 2) + tcp_schedule_loss_probe(sk); tcp_cwnd_validate(sk); return false; } - return !tp->packets_out && tcp_send_head(sk); + return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk)); +} + +bool tcp_schedule_loss_probe(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + u32 timeout, tlp_time_stamp, rto_time_stamp; + u32 rtt = tp->srtt >> 3; + + if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS)) + return false; + /* No consecutive loss probes. */ + if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) { + tcp_rearm_rto(sk); + return false; + } + /* Don't do any loss probe on a Fast Open connection before 3WHS + * finishes. + */ + if (sk->sk_state == TCP_SYN_RECV) + return false; + + /* TLP is only scheduled when next timer event is RTO. */ + if (icsk->icsk_pending != ICSK_TIME_RETRANS) + return false; + + /* Schedule a loss probe in 2*RTT for SACK capable connections + * in Open state, that are either limited by cwnd or application. + */ + if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out || + !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) + return false; + + if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && + tcp_send_head(sk)) + return false; + + /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account + * for delayed ack when there's one outstanding packet. + */ + timeout = rtt << 1; + if (tp->packets_out == 1) + timeout = max_t(u32, timeout, + (rtt + (rtt >> 1) + TCP_DELACK_MAX)); + timeout = max_t(u32, timeout, msecs_to_jiffies(10)); + + /* If RTO is shorter, just schedule TLP in its place. */ + tlp_time_stamp = tcp_time_stamp + timeout; + rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout; + if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) { + s32 delta = rto_time_stamp - tcp_time_stamp; + if (delta > 0) + timeout = delta; + } + + inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, + TCP_RTO_MAX); + return true; +} + +/* When probe timeout (PTO) fires, send a new segment if one exists, else + * retransmit the last segment. + */ +void tcp_send_loss_probe(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + int pcount; + int mss = tcp_current_mss(sk); + int err = -1; + + if (tcp_send_head(sk) != NULL) { + err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); + goto rearm_timer; + } + + /* At most one outstanding TLP retransmission. */ + if (tp->tlp_high_seq) + goto rearm_timer; + + /* Retransmit last segment. */ + skb = tcp_write_queue_tail(sk); + if (WARN_ON(!skb)) + goto rearm_timer; + + pcount = tcp_skb_pcount(skb); + if (WARN_ON(!pcount)) + goto rearm_timer; + + if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) { + if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss))) + goto rearm_timer; + skb = tcp_write_queue_tail(sk); + } + + if (WARN_ON(!skb || !tcp_skb_pcount(skb))) + goto rearm_timer; + + /* Probe with zero data doesn't trigger fast recovery. */ + if (skb->len > 0) + err = __tcp_retransmit_skb(sk, skb); + + /* Record snd_nxt for loss detection. */ + if (likely(!err)) + tp->tlp_high_seq = tp->snd_nxt; + +rearm_timer: + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, + TCP_RTO_MAX); + + if (likely(!err)) + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPLOSSPROBES); + return; } /* Push out any pending frames which were held back due to @@ -2675,32 +2633,24 @@ int tcp_send_synack(struct sock *sk) * sk: listener socket * dst: dst entry attached to the SYNACK * req: request_sock pointer - * rvp: request_values pointer * * Allocate one skb and build a SYNACK packet. * @dst is consumed : Caller should not use it again. */ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, - struct request_values *rvp, struct tcp_fastopen_cookie *foc) { struct tcp_out_options opts; - struct tcp_extend_values *xvp = tcp_xv(rvp); struct inet_request_sock *ireq = inet_rsk(req); struct tcp_sock *tp = tcp_sk(sk); - const struct tcp_cookie_values *cvp = tp->cookie_values; struct tcphdr *th; struct sk_buff *skb; struct tcp_md5sig_key *md5; int tcp_header_size; int mss; - int s_data_desired = 0; - if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) - s_data_desired = cvp->s_data_desired; - skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, - sk_gfp_atomic(sk, GFP_ATOMIC)); + skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC)); if (unlikely(!skb)) { dst_release(dst); return NULL; @@ -2742,9 +2692,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, else #endif TCP_SKB_CB(skb)->when = tcp_time_stamp; - tcp_header_size = tcp_synack_options(sk, req, mss, - skb, &opts, &md5, xvp, foc) - + sizeof(*th); + tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5, + foc) + sizeof(*th); skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); @@ -2762,40 +2711,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, TCPHDR_SYN | TCPHDR_ACK); - if (OPTION_COOKIE_EXTENSION & opts.options) { - if (s_data_desired) { - u8 *buf = skb_put(skb, s_data_desired); - - /* copy data directly from the listening socket. */ - memcpy(buf, cvp->s_data_payload, s_data_desired); - TCP_SKB_CB(skb)->end_seq += s_data_desired; - } - - if (opts.hash_size > 0) { - __u32 workspace[SHA_WORKSPACE_WORDS]; - u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS]; - u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1]; - - /* Secret recipe depends on the Timestamp, (future) - * Sequence and Acknowledgment Numbers, Initiator - * Cookie, and others handled by IP variant caller. - */ - *tail-- ^= opts.tsval; - *tail-- ^= tcp_rsk(req)->rcv_isn + 1; - *tail-- ^= TCP_SKB_CB(skb)->seq + 1; - - /* recommended */ - *tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source); - *tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */ - - sha_transform((__u32 *)&xvp->cookie_bakery[0], - (char *)mess, - &workspace[0]); - opts.hash_location = - (__u8 *)&xvp->cookie_bakery[0]; - } - } - th->seq = htonl(TCP_SKB_CB(skb)->seq); /* XXX data is queued and acked as is. No buffer/window check */ th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b78aac3..4b85e6f 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -342,10 +342,6 @@ void tcp_retransmit_timer(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); - if (tp->early_retrans_delayed) { - tcp_resume_early_retransmit(sk); - return; - } if (tp->fastopen_rsk) { WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && sk->sk_state != TCP_FIN_WAIT1); @@ -360,6 +356,8 @@ void tcp_retransmit_timer(struct sock *sk) WARN_ON(tcp_write_queue_empty(sk)); + tp->tlp_high_seq = 0; + if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) && !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) { /* Receiver dastardly shrinks window. Our retransmits @@ -418,11 +416,7 @@ void tcp_retransmit_timer(struct sock *sk) NET_INC_STATS_BH(sock_net(sk), mib_idx); } - if (tcp_use_frto(sk)) { - tcp_enter_frto(sk); - } else { - tcp_enter_loss(sk, 0); - } + tcp_enter_loss(sk, 0); if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) { /* Retransmission failed because of local congestion, @@ -495,13 +489,20 @@ void tcp_write_timer_handler(struct sock *sk) } event = icsk->icsk_pending; - icsk->icsk_pending = 0; switch (event) { + case ICSK_TIME_EARLY_RETRANS: + tcp_resume_early_retransmit(sk); + break; + case ICSK_TIME_LOSS_PROBE: + tcp_send_loss_probe(sk); + break; case ICSK_TIME_RETRANS: + icsk->icsk_pending = 0; tcp_retransmit_timer(sk); break; case ICSK_TIME_PROBE0: + icsk->icsk_pending = 0; tcp_probe_timer(sk); break; } diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index 1b91bf4..76a1e23 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -236,7 +236,7 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); break; - case CA_EVENT_FRTO: + case CA_EVENT_LOSS: tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); /* Update RTT_min when next ack arrives */ w->reset_rtt_min = 1; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0a073a2..7117d14 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2279,31 +2279,88 @@ void __init udp_init(void) int udp4_ufo_send_check(struct sk_buff *skb) { - const struct iphdr *iph; - struct udphdr *uh; - - if (!pskb_may_pull(skb, sizeof(*uh))) + if (!pskb_may_pull(skb, sizeof(struct udphdr))) return -EINVAL; - iph = ip_hdr(skb); - uh = udp_hdr(skb); + if (likely(!skb->encapsulation)) { + const struct iphdr *iph; + struct udphdr *uh; - uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, - IPPROTO_UDP, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - skb->ip_summed = CHECKSUM_PARTIAL; + iph = ip_hdr(skb); + uh = udp_hdr(skb); + + uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, + IPPROTO_UDP, 0); + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + } return 0; } +static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + int mac_len = skb->mac_len; + int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); + int outer_hlen; + netdev_features_t enc_features; + + if (unlikely(!pskb_may_pull(skb, tnl_hlen))) + goto out; + + skb->encapsulation = 0; + __skb_pull(skb, tnl_hlen); + skb_reset_mac_header(skb); + skb_set_network_header(skb, skb_inner_network_offset(skb)); + skb->mac_len = skb_inner_network_offset(skb); + + /* segment inner packet. */ + enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); + segs = skb_mac_gso_segment(skb, enc_features); + if (!segs || IS_ERR(segs)) + goto out; + + outer_hlen = skb_tnl_header_len(skb); + skb = segs; + do { + struct udphdr *uh; + int udp_offset = outer_hlen - tnl_hlen; + + skb->mac_len = mac_len; + + skb_push(skb, outer_hlen); + skb_reset_mac_header(skb); + skb_set_network_header(skb, mac_len); + skb_set_transport_header(skb, udp_offset); + uh = udp_hdr(skb); + uh->len = htons(skb->len - udp_offset); + + /* csum segment if tunnel sets skb with csum. */ + if (unlikely(uh->check)) { + struct iphdr *iph = ip_hdr(skb); + + uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, + skb->len - udp_offset, + IPPROTO_UDP, 0); + uh->check = csum_fold(skb_checksum(skb, udp_offset, + skb->len - udp_offset, 0)); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + + } + skb->ip_summed = CHECKSUM_NONE; + } while ((skb = skb->next)); +out: + return segs; +} + struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); unsigned int mss; - int offset; - __wsum csum; - mss = skb_shinfo(skb)->gso_size; if (unlikely(skb->len <= mss)) goto out; @@ -2313,6 +2370,7 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int type = skb_shinfo(skb)->gso_type; if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE) || !(type & (SKB_GSO_UDP)))) goto out; @@ -2323,20 +2381,27 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, goto out; } - /* Do software UFO. Complete and fill in the UDP checksum as HW cannot - * do checksum of UDP packets sent as multiple IP fragments. - */ - offset = skb_checksum_start_offset(skb); - csum = skb_checksum(skb, offset, skb->len - offset, 0); - offset += skb->csum_offset; - *(__sum16 *)(skb->data + offset) = csum_fold(csum); - skb->ip_summed = CHECKSUM_NONE; - /* Fragment the skb. IP headers of the fragments are updated in * inet_gso_segment() */ - segs = skb_segment(skb, features); + if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) + segs = skb_udp_tunnel_segment(skb, features); + else { + int offset; + __wsum csum; + + /* Do software UFO. Complete and fill in the UDP checksum as + * HW cannot do checksum of UDP packets sent as multiple + * IP fragments. + */ + offset = skb_checksum_start_offset(skb); + csum = skb_checksum(skb, offset, skb->len - offset, 0); + offset += skb->csum_offset; + *(__sum16 *)(skb->data + offset) = csum_fold(csum); + skb->ip_summed = CHECKSUM_NONE; + + segs = skb_segment(skb, features); + } out: return segs; } - diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 505b30a..369a781 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -64,9 +64,9 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, goto out; err = -ENOMEM; - rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) + - sizeof(struct inet_diag_meminfo) + - 64)), GFP_KERNEL); + rep = nlmsg_new(sizeof(struct inet_diag_msg) + + sizeof(struct inet_diag_meminfo) + 64, + GFP_KERNEL); if (!rep) goto out; diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index ed0b9e2..11b13ea 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -156,6 +156,7 @@ config INET6_XFRM_MODE_ROUTEOPTIMIZATION config IPV6_SIT tristate "IPv6: IPv6-in-IPv4 tunnel (SIT driver)" select INET_TUNNEL + select NET_IP_TUNNEL select IPV6_NDISC_NODETYPE default y ---help--- @@ -201,6 +202,7 @@ config IPV6_TUNNEL config IPV6_GRE tristate "IPv6: GRE tunnel" select IPV6_TUNNEL + select NET_IP_TUNNEL ---help--- Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a459c4f..a33b157 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -70,6 +70,7 @@ #include <net/snmp.h> #include <net/af_ieee802154.h> +#include <net/firewire.h> #include <net/ipv6.h> #include <net/protocol.h> #include <net/ndisc.h> @@ -544,8 +545,7 @@ static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = { }; static int inet6_netconf_get_devconf(struct sk_buff *in_skb, - struct nlmsghdr *nlh, - void *arg) + struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[NETCONFA_MAX+1]; @@ -605,6 +605,77 @@ errout: return err; } +static int inet6_netconf_dump_devconf(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + int h, s_h; + int idx, s_idx; + struct net_device *dev; + struct inet6_dev *idev; + struct hlist_head *head; + + s_h = cb->args[0]; + s_idx = idx = cb->args[1]; + + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + idx = 0; + head = &net->dev_index_head[h]; + rcu_read_lock(); + cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ + net->dev_base_seq; + hlist_for_each_entry_rcu(dev, head, index_hlist) { + if (idx < s_idx) + goto cont; + idev = __in6_dev_get(dev); + if (!idev) + goto cont; + + if (inet6_netconf_fill_devconf(skb, dev->ifindex, + &idev->cnf, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNETCONF, + NLM_F_MULTI, + -1) <= 0) { + rcu_read_unlock(); + goto done; + } + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +cont: + idx++; + } + rcu_read_unlock(); + } + if (h == NETDEV_HASHENTRIES) { + if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNETCONF, NLM_F_MULTI, + -1) <= 0) + goto done; + else + h++; + } + if (h == NETDEV_HASHENTRIES + 1) { + if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, + net->ipv6.devconf_dflt, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNETCONF, NLM_F_MULTI, + -1) <= 0) + goto done; + else + h++; + } +done: + cb->args[0] = h; + cb->args[1] = idx; + + return skb->len; +} + #ifdef CONFIG_SYSCTL static void dev_forward_change(struct inet6_dev *idev) { @@ -1668,6 +1739,20 @@ static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev) return 0; } +static int addrconf_ifid_ieee1394(u8 *eui, struct net_device *dev) +{ + union fwnet_hwaddr *ha; + + if (dev->addr_len != FWNET_ALEN) + return -1; + + ha = (union fwnet_hwaddr *)dev->dev_addr; + + memcpy(eui, &ha->uc.uniq_id, sizeof(ha->uc.uniq_id)); + eui[0] ^= 2; + return 0; +} + static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev) { /* XXX: inherit EUI-64 from other interface -- yoshfuji */ @@ -1732,6 +1817,8 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) return addrconf_ifid_gre(eui, dev); case ARPHRD_IEEE802154: return addrconf_ifid_eui64(eui, dev); + case ARPHRD_IEEE1394: + return addrconf_ifid_ieee1394(eui, dev); } return -1; } @@ -2600,7 +2687,8 @@ static void addrconf_dev_config(struct net_device *dev) (dev->type != ARPHRD_FDDI) && (dev->type != ARPHRD_ARCNET) && (dev->type != ARPHRD_INFINIBAND) && - (dev->type != ARPHRD_IEEE802154)) { + (dev->type != ARPHRD_IEEE802154) && + (dev->type != ARPHRD_IEEE1394)) { /* Alas, we support only Ethernet autoconfiguration. */ return; } @@ -3537,7 +3625,7 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = { }; static int -inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ifaddrmsg *ifm; @@ -3603,7 +3691,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, } static int -inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ifaddrmsg *ifm; @@ -3834,6 +3922,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, NLM_F_MULTI); if (err <= 0) break; + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); } break; } @@ -3891,6 +3980,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, s_ip_idx = ip_idx = cb->args[2]; rcu_read_lock(); + cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ net->dev_base_seq; for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; head = &net->dev_index_head[h]; @@ -3942,8 +4032,7 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) return inet6_dump_addr(skb, cb, type); } -static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, - void *arg) +static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); struct ifaddrmsg *ifm; @@ -4368,6 +4457,8 @@ errout: static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) { + struct net *net = dev_net(ifp->idev->dev); + inet6_ifa_notify(event ? : RTM_NEWADDR, ifp); switch (event) { @@ -4393,6 +4484,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) dst_free(&ifp->rt->dst); break; } + atomic_inc(&net->ipv6.dev_addr_genid); } static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) @@ -4961,7 +5053,7 @@ int __init addrconf_init(void) __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr, NULL); __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf, - NULL, NULL); + inet6_netconf_dump_devconf, NULL); ipv6_addr_label_rtnl_register(); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index aad6435..f083a58 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -414,8 +414,7 @@ static const struct nla_policy ifal_policy[IFAL_MAX+1] = { [IFAL_LABEL] = { .len = sizeof(u32), }, }; -static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, - void *arg) +static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct ifaddrlblmsg *ifal; @@ -436,10 +435,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, if (!tb[IFAL_ADDRESS]) return -EINVAL; - pfx = nla_data(tb[IFAL_ADDRESS]); - if (!pfx) - return -EINVAL; if (!tb[IFAL_LABEL]) return -EINVAL; @@ -533,8 +529,7 @@ static inline int ip6addrlbl_msgsize(void) + nla_total_size(4); /* IFAL_LABEL */ } -static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, - void *arg) +static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh) { struct net *net = sock_net(in_skb->sk); struct ifaddrlblmsg *ifal; @@ -561,10 +556,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, if (!tb[IFAL_ADDRESS]) return -EINVAL; - addr = nla_data(tb[IFAL_ADDRESS]); - if (!addr) - return -EINVAL; rcu_read_lock(); p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 6b793bf..ab5c7ad 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -49,7 +49,6 @@ #include <net/udp.h> #include <net/udplite.h> #include <net/tcp.h> -#include <net/ipip.h> #include <net/protocol.h> #include <net/inet_common.h> #include <net/route.h> @@ -323,7 +322,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct net_device *dev = NULL; rcu_read_lock(); - if (addr_type & IPV6_ADDR_LINKLOCAL) { + if (__ipv6_addr_needs_scope_id(addr_type)) { if (addr_len >= sizeof(struct sockaddr_in6) && addr->sin6_scope_id) { /* Override any existing binding, if another one @@ -471,8 +470,8 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, sin->sin6_port = inet->inet_sport; } - if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) - sin->sin6_scope_id = sk->sk_bound_dev_if; + sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, + sk->sk_bound_dev_if); *uaddr_len = sizeof(*sin); return 0; } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index f5a5478..4b56cbb 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -124,7 +124,7 @@ ipv4_connected: goto out; } - if (addr_type&IPV6_ADDR_LINKLOCAL) { + if (__ipv6_addr_needs_scope_id(addr_type)) { if (addr_len >= sizeof(struct sockaddr_in6) && usin->sin6_scope_id) { if (sk->sk_bound_dev_if && @@ -355,18 +355,19 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; sin->sin6_port = serr->port; - sin->sin6_scope_id = 0; if (skb->protocol == htons(ETH_P_IPV6)) { const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset), struct ipv6hdr, daddr); sin->sin6_addr = ip6h->daddr; if (np->sndflow) sin->sin6_flowinfo = ip6_flowinfo(ip6h); - if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) - sin->sin6_scope_id = IP6CB(skb)->iif; + sin->sin6_scope_id = + ipv6_iface_scope_id(&sin->sin6_addr, + IP6CB(skb)->iif); } else { ipv6_addr_set_v4mapped(*(__be32 *)(nh + serr->addr_offset), &sin->sin6_addr); + sin->sin6_scope_id = 0; } } @@ -376,18 +377,19 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) { sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; - sin->sin6_scope_id = 0; if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) ip6_datagram_recv_ctl(sk, msg, skb); - if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) - sin->sin6_scope_id = IP6CB(skb)->iif; + sin->sin6_scope_id = + ipv6_iface_scope_id(&sin->sin6_addr, + IP6CB(skb)->iif); } else { struct inet_sock *inet = inet_sk(sk); ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &sin->sin6_addr); + sin->sin6_scope_id = 0; if (inet->cmsg_flags) ip_cmsg_recv(msg, skb); } @@ -592,7 +594,9 @@ int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, sin6.sin6_addr = ipv6_hdr(skb)->daddr; sin6.sin6_port = ports[1]; sin6.sin6_flowinfo = 0; - sin6.sin6_scope_id = 0; + sin6.sin6_scope_id = + ipv6_iface_scope_id(&ipv6_hdr(skb)->daddr, + opt->iif); put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6); } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index fff5bdd..71b900c 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -434,7 +434,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) * Source addr check */ - if (addr_type & IPV6_ADDR_LINKLOCAL) + if (__ipv6_addr_needs_scope_id(addr_type)) iif = skb->dev->ifindex; /* diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 9bfab19..e4311cb 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -54,6 +54,10 @@ int inet6_csk_bind_conflict(const struct sock *sk, if (ipv6_rcv_saddr_equal(sk, sk2)) break; } + if (!relax && reuse && sk2->sk_reuse && + sk2->sk_state != TCP_LISTEN && + ipv6_rcv_saddr_equal(sk, sk2)) + break; } } @@ -169,10 +173,8 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) sin6->sin6_port = inet_sk(sk)->inet_dport; /* We do not store received flowlabel for TCP */ sin6->sin6_flowinfo = 0; - sin6->sin6_scope_id = 0; - if (sk->sk_bound_dev_if && - ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) - sin6->sin6_scope_id = sk->sk_bound_dev_if; + sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, + sk->sk_bound_dev_if); } EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index b973ed3..46e8843 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -144,7 +144,9 @@ static void ip6_fl_gc(unsigned long dummy) spin_lock(&ip6_fl_lock); for (i=0; i<=FL_HASH_MASK; i++) { - struct ip6_flowlabel *fl, **flp; + struct ip6_flowlabel *fl; + struct ip6_flowlabel __rcu **flp; + flp = &fl_ht[i]; while ((fl = rcu_dereference_protected(*flp, lockdep_is_held(&ip6_fl_lock))) != NULL) { @@ -179,7 +181,9 @@ static void __net_exit ip6_fl_purge(struct net *net) spin_lock(&ip6_fl_lock); for (i = 0; i <= FL_HASH_MASK; i++) { - struct ip6_flowlabel *fl, **flp; + struct ip6_flowlabel *fl; + struct ip6_flowlabel __rcu **flp; + flp = &fl_ht[i]; while ((fl = rcu_dereference_protected(*flp, lockdep_is_held(&ip6_fl_lock))) != NULL) { @@ -506,7 +510,8 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) struct ipv6_pinfo *np = inet6_sk(sk); struct in6_flowlabel_req freq; struct ipv6_fl_socklist *sfl1=NULL; - struct ipv6_fl_socklist *sfl, **sflp; + struct ipv6_fl_socklist *sfl; + struct ipv6_fl_socklist __rcu **sflp; struct ip6_flowlabel *fl, *fl1 = NULL; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index e4efffe..d3ddd84 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -38,6 +38,7 @@ #include <net/sock.h> #include <net/ip.h> +#include <net/ip_tunnels.h> #include <net/icmp.h> #include <net/protocol.h> #include <net/addrconf.h> @@ -110,46 +111,6 @@ static u32 HASH_ADDR(const struct in6_addr *addr) #define tunnels_l tunnels[1] #define tunnels_wc tunnels[0] -static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot) -{ - int i; - - for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - u64 rx_packets, rx_bytes, tx_packets, tx_bytes; - unsigned int start; - - do { - start = u64_stats_fetch_begin_bh(&tstats->syncp); - rx_packets = tstats->rx_packets; - tx_packets = tstats->tx_packets; - rx_bytes = tstats->rx_bytes; - tx_bytes = tstats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); - - tot->rx_packets += rx_packets; - tot->tx_packets += tx_packets; - tot->rx_bytes += rx_bytes; - tot->tx_bytes += tx_bytes; - } - - tot->multicast = dev->stats.multicast; - tot->rx_crc_errors = dev->stats.rx_crc_errors; - tot->rx_fifo_errors = dev->stats.rx_fifo_errors; - tot->rx_length_errors = dev->stats.rx_length_errors; - tot->rx_frame_errors = dev->stats.rx_frame_errors; - tot->rx_errors = dev->stats.rx_errors; - - tot->tx_fifo_errors = dev->stats.tx_fifo_errors; - tot->tx_carrier_errors = dev->stats.tx_carrier_errors; - tot->tx_dropped = dev->stats.tx_dropped; - tot->tx_aborted_errors = dev->stats.tx_aborted_errors; - tot->tx_errors = dev->stats.tx_errors; - - return tot; -} - /* Given src, dst and key, find appropriate for input tunnel. */ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, @@ -667,7 +628,6 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, struct net_device_stats *stats = &tunnel->dev->stats; int err = -1; u8 proto; - int pkt_len; struct sk_buff *new_skb; if (dev->type == ARPHRD_ETHER) @@ -801,23 +761,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, } } - nf_reset(skb); - pkt_len = skb->len; - err = ip6_local_out(skb); - - if (net_xmit_eval(err) == 0) { - struct pcpu_tstats *tstats = this_cpu_ptr(tunnel->dev->tstats); - - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; - } else { - stats->tx_errors++; - stats->tx_aborted_errors++; - } - + ip6tunnel_xmit(skb, dev); if (ndst) ip6_tnl_dst_store(tunnel, ndst); - return 0; tx_err_link_failure: stats->tx_carrier_errors++; @@ -1271,7 +1217,7 @@ static const struct net_device_ops ip6gre_netdev_ops = { .ndo_start_xmit = ip6gre_tunnel_xmit, .ndo_do_ioctl = ip6gre_tunnel_ioctl, .ndo_change_mtu = ip6gre_tunnel_change_mtu, - .ndo_get_stats64 = ip6gre_get_stats64, + .ndo_get_stats64 = ip_tunnel_get_stats64, }; static void ip6gre_dev_free(struct net_device *dev) @@ -1520,7 +1466,7 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = ip6gre_tunnel_change_mtu, - .ndo_get_stats64 = ip6gre_get_stats64, + .ndo_get_stats64 = ip_tunnel_get_stats64, }; static void ip6gre_tap_setup(struct net_device *dev) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 8234c1d..71b766e 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -92,14 +92,12 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, u8 *prevhdr; int offset = 0; - if (!(features & NETIF_F_V6_CSUM)) - features &= ~NETIF_F_SG; - if (unlikely(skb_shinfo(skb)->gso_type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_UDP_TUNNEL | SKB_GSO_TCPV6 | 0))) goto out; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index fff83cb..1e55866 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -47,6 +47,7 @@ #include <net/icmp.h> #include <net/ip.h> +#include <net/ip_tunnels.h> #include <net/ipv6.h> #include <net/ip6_route.h> #include <net/addrconf.h> @@ -955,7 +956,6 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, unsigned int max_headroom = sizeof(struct ipv6hdr); u8 proto; int err = -1; - int pkt_len; if (!fl6->flowi6_mark) dst = ip6_tnl_dst_check(t); @@ -1035,19 +1035,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; ipv6h->daddr = fl6->daddr; - nf_reset(skb); - pkt_len = skb->len; - err = ip6_local_out(skb); - - if (net_xmit_eval(err) == 0) { - struct pcpu_tstats *tstats = this_cpu_ptr(t->dev->tstats); - - tstats->tx_bytes += pkt_len; - tstats->tx_packets++; - } else { - stats->tx_errors++; - stats->tx_aborted_errors++; - } + ip6tunnel_xmit(skb, dev); if (ndst) ip6_tnl_dst_store(t, ndst); return 0; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 96bfb4e..241fb8a 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -842,9 +842,9 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c) if (ipv6_hdr(skb)->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); nlh->nlmsg_type = NLMSG_ERROR; - nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); + nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); - ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT; + ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT; rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else kfree_skb(skb); @@ -1100,13 +1100,13 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, if (ipv6_hdr(skb)->version == 0) { struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); - if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { + if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) { nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh; } else { nlh->nlmsg_type = NLMSG_ERROR; - nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); + nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); - ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; + ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE; } rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 76ef435..2712ab2 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -610,8 +610,6 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, } } #endif - if (!dev->addr_len) - send_sllao = 0; if (send_sllao) optlen += ndisc_opt_addr_space(dev); diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c index 33608c6..938e0b7 100644 --- a/net/ipv6/netfilter/ip6t_NPT.c +++ b/net/ipv6/netfilter/ip6t_NPT.c @@ -18,9 +18,8 @@ static int ip6t_npt_checkentry(const struct xt_tgchk_param *par) { struct ip6t_npt_tginfo *npt = par->targinfo; - __wsum src_sum = 0, dst_sum = 0; struct in6_addr pfx; - unsigned int i; + __wsum src_sum, dst_sum; if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64) return -EINVAL; @@ -33,12 +32,8 @@ static int ip6t_npt_checkentry(const struct xt_tgchk_param *par) if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6)) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) { - src_sum = csum_add(src_sum, - (__force __wsum)npt->src_pfx.in6.s6_addr16[i]); - dst_sum = csum_add(dst_sum, - (__force __wsum)npt->dst_pfx.in6.s6_addr16[i]); - } + src_sum = csum_partial(&npt->src_pfx.in6, sizeof(npt->src_pfx.in6), 0); + dst_sum = csum_partial(&npt->dst_pfx.in6, sizeof(npt->dst_pfx.in6), 0); npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum)); return 0; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 2b6c226..97bcf2b 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -330,12 +330,8 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) sizeof(sin6.sin6_addr)); nf_ct_put(ct); - - if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL) - sin6.sin6_scope_id = sk->sk_bound_dev_if; - else - sin6.sin6_scope_id = 0; - + sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, + sk->sk_bound_dev_if); return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 330b5e7..eedff8c 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -263,7 +263,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (addr_type != IPV6_ADDR_ANY) { struct net_device *dev = NULL; - if (addr_type & IPV6_ADDR_LINKLOCAL) { + if (__ipv6_addr_needs_scope_id(addr_type)) { if (addr_len >= sizeof(struct sockaddr_in6) && addr->sin6_scope_id) { /* Override any existing binding, if another @@ -498,9 +498,8 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, sin6->sin6_port = 0; sin6->sin6_addr = ipv6_hdr(skb)->saddr; sin6->sin6_flowinfo = 0; - sin6->sin6_scope_id = 0; - if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) - sin6->sin6_scope_id = IP6CB(skb)->iif; + sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, + IP6CB(skb)->iif); } sock_recv_ts_and_drops(msg, sk, skb); @@ -802,7 +801,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && - ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) + __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr))) fl6.flowi6_oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 196ab93..e6e44ce 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -58,6 +58,7 @@ #include <net/ndisc.h> #include <net/addrconf.h> #include <net/inet_frag.h> +#include <net/inet_ecn.h> struct ip6frag_skb_cb { @@ -67,6 +68,10 @@ struct ip6frag_skb_cb #define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb)) +static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) +{ + return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); +} static struct inet_frags ip6_frags; @@ -119,6 +124,7 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a) fq->user = arg->user; fq->saddr = *arg->src; fq->daddr = *arg->dst; + fq->ecn = arg->ecn; } EXPORT_SYMBOL(ip6_frag_init); @@ -173,7 +179,8 @@ static void ip6_frag_expire(unsigned long data) } static __inline__ struct frag_queue * -fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6_addr *dst) +fq_find(struct net *net, __be32 id, const struct in6_addr *src, + const struct in6_addr *dst, u8 ecn) { struct inet_frag_queue *q; struct ip6_create_arg arg; @@ -183,6 +190,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6 arg.user = IP6_DEFRAG_LOCAL_DELIVER; arg.src = src; arg.dst = dst; + arg.ecn = ecn; read_lock(&ip6_frags.lock); hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd); @@ -202,6 +210,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct net_device *dev; int offset, end; struct net *net = dev_net(skb_dst(skb)->dev); + u8 ecn; if (fq->q.last_in & INET_FRAG_COMPLETE) goto err; @@ -219,6 +228,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, return -1; } + ecn = ip6_frag_ecn(ipv6_hdr(skb)); + if (skb->ip_summed == CHECKSUM_COMPLETE) { const unsigned char *nh = skb_network_header(skb); skb->csum = csum_sub(skb->csum, @@ -319,6 +330,7 @@ found: } fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; + fq->ecn |= ecn; add_frag_mem_limit(&fq->q, skb->truesize); /* The first fragment. @@ -362,9 +374,14 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, int payload_len; unsigned int nhoff; int sum_truesize; + u8 ecn; inet_frag_kill(&fq->q, &ip6_frags); + ecn = ip_frag_ecn_table[fq->ecn]; + if (unlikely(ecn == 0xff)) + goto out_fail; + /* Make the one we just received the head. */ if (prev) { head = prev->next; @@ -463,6 +480,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->dev = dev; head->tstamp = fq->q.stamp; ipv6_hdr(head)->payload_len = htons(payload_len); + ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); IP6CB(head)->nhoff = nhoff; /* Yes, and fold redundant checksum back. 8) */ @@ -526,7 +544,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb) IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS, evicted); - fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr); + fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, + ip6_frag_ecn(hdr)); if (fq != NULL) { int ret; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e5fe004..ad0aa6b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2355,7 +2355,7 @@ beginning: return last_err; } -static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh) { struct fib6_config cfg; int err; @@ -2370,7 +2370,7 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a return ip6_route_del(&cfg); } -static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh) { struct fib6_config cfg; int err; @@ -2562,7 +2562,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) prefix, 0, NLM_F_MULTI); } -static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) +static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[RTA_MAX+1]; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 02f96dc..3353634 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -49,7 +49,7 @@ #include <net/ip.h> #include <net/udp.h> #include <net/icmp.h> -#include <net/ipip.h> +#include <net/ip_tunnels.h> #include <net/inet_ecn.h> #include <net/xfrm.h> #include <net/dsfield.h> @@ -87,41 +87,6 @@ struct sit_net { struct net_device *fb_tunnel_dev; }; -static struct rtnl_link_stats64 *ipip6_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *tot) -{ - int i; - - for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - u64 rx_packets, rx_bytes, tx_packets, tx_bytes; - unsigned int start; - - do { - start = u64_stats_fetch_begin_bh(&tstats->syncp); - rx_packets = tstats->rx_packets; - tx_packets = tstats->tx_packets; - rx_bytes = tstats->rx_bytes; - tx_bytes = tstats->tx_bytes; - } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); - - tot->rx_packets += rx_packets; - tot->tx_packets += tx_packets; - tot->rx_bytes += rx_bytes; - tot->tx_bytes += tx_bytes; - } - - tot->rx_errors = dev->stats.rx_errors; - tot->rx_frame_errors = dev->stats.rx_frame_errors; - tot->tx_fifo_errors = dev->stats.tx_fifo_errors; - tot->tx_carrier_errors = dev->stats.tx_carrier_errors; - tot->tx_dropped = dev->stats.tx_dropped; - tot->tx_aborted_errors = dev->stats.tx_aborted_errors; - tot->tx_errors = dev->stats.tx_errors; - - return tot; -} - /* * Must be invoked with rcu_read_lock */ @@ -899,6 +864,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if ((iph->ttl = tiph->ttl) == 0) iph->ttl = iph6->hop_limit; + skb->ip_summed = CHECKSUM_NONE; + ip_select_ident(iph, skb_dst(skb), NULL); iptunnel_xmit(skb, dev); return NETDEV_TX_OK; @@ -1200,7 +1167,7 @@ static const struct net_device_ops ipip6_netdev_ops = { .ndo_start_xmit = ipip6_tunnel_xmit, .ndo_do_ioctl = ipip6_tunnel_ioctl, .ndo_change_mtu = ipip6_tunnel_change_mtu, - .ndo_get_stats64= ipip6_get_stats64, + .ndo_get_stats64 = ip_tunnel_get_stats64, }; static void ipip6_dev_free(struct net_device *dev) diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 8a0848b..d5dda20 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -149,7 +149,6 @@ static inline int cookie_check(const struct sk_buff *skb, __u32 cookie) struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) { struct tcp_options_received tcp_opt; - const u8 *hash_location; struct inet_request_sock *ireq; struct inet6_request_sock *ireq6; struct tcp_request_sock *treq; @@ -177,7 +176,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); + tcp_parse_options(skb, &tcp_opt, 0, NULL); if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok)) goto out; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f6d629f..1033d2b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -461,7 +461,6 @@ out: static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, struct flowi6 *fl6, struct request_sock *req, - struct request_values *rvp, u16 queue_mapping) { struct inet6_request_sock *treq = inet6_rsk(req); @@ -473,7 +472,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL) goto done; - skb = tcp_make_synack(sk, dst, req, rvp, NULL); + skb = tcp_make_synack(sk, dst, req, NULL); if (skb) { __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); @@ -488,13 +487,12 @@ done: return err; } -static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req, - struct request_values *rvp) +static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req) { struct flowi6 fl6; int res; - res = tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0); + res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0); if (!res) TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); return res; @@ -947,9 +945,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) */ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { - struct tcp_extend_values tmp_ext; struct tcp_options_received tmp_opt; - const u8 *hash_location; struct request_sock *req; struct inet6_request_sock *treq; struct ipv6_pinfo *np = inet6_sk(sk); @@ -987,50 +983,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); - - if (tmp_opt.cookie_plus > 0 && - tmp_opt.saw_tstamp && - !tp->rx_opt.cookie_out_never && - (sysctl_tcp_cookie_size > 0 || - (tp->cookie_values != NULL && - tp->cookie_values->cookie_desired > 0))) { - u8 *c; - u32 *d; - u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; - int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; - - if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) - goto drop_and_free; - - /* Secret recipe starts with IP addresses */ - d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0]; - *mess++ ^= *d++; - *mess++ ^= *d++; - *mess++ ^= *d++; - *mess++ ^= *d++; - d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0]; - *mess++ ^= *d++; - *mess++ ^= *d++; - *mess++ ^= *d++; - *mess++ ^= *d++; - - /* plus variable length Initiator Cookie */ - c = (u8 *)mess; - while (l-- > 0) - *c++ ^= *hash_location++; - - want_cookie = false; /* not our kind of cookie */ - tmp_ext.cookie_out_never = 0; /* false */ - tmp_ext.cookie_plus = tmp_opt.cookie_plus; - } else if (!tp->rx_opt.cookie_in_always) { - /* redundant indications, but ensure initialization. */ - tmp_ext.cookie_out_never = 1; /* true */ - tmp_ext.cookie_plus = 0; - } else { - goto drop_and_free; - } - tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; + tcp_parse_options(skb, &tmp_opt, 0, NULL); if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); @@ -1108,7 +1061,6 @@ have_isn: goto drop_and_release; if (tcp_v6_send_synack(sk, dst, &fl6, req, - (struct request_values *)&tmp_ext, skb_get_queue_mapping(skb)) || want_cookie) goto drop_and_free; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d8e5e85..da6019b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -450,15 +450,16 @@ try_again: sin6->sin6_family = AF_INET6; sin6->sin6_port = udp_hdr(skb)->source; sin6->sin6_flowinfo = 0; - sin6->sin6_scope_id = 0; - if (is_udp4) + if (is_udp4) { ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &sin6->sin6_addr); - else { + sin6->sin6_scope_id = 0; + } else { sin6->sin6_addr = ipv6_hdr(skb)->saddr; - if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) - sin6->sin6_scope_id = IP6CB(skb)->iif; + sin6->sin6_scope_id = + ipv6_iface_scope_id(&sin6->sin6_addr, + IP6CB(skb)->iif); } } @@ -1118,7 +1119,7 @@ do_udp_sendmsg: if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && - ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) + __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr))) fl6.flowi6_oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index cf05cf0..3bb3a89 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -21,6 +21,10 @@ static int udp6_ufo_send_check(struct sk_buff *skb) const struct ipv6hdr *ipv6h; struct udphdr *uh; + /* UDP Tunnel offload on ipv6 is not yet supported. */ + if (skb->encapsulation) + return -EINVAL; + if (!pskb_may_pull(skb, sizeof(*uh))) return -EINVAL; @@ -56,7 +60,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, /* Packet is from an untrusted source, reset gso_segs. */ int type = skb_shinfo(skb)->gso_type; - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + if (unlikely(type & ~(SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index a7d11ffe..7dfb9ed 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1461,7 +1461,8 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock, return iucv_accept_poll(sk); if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 8aecf5d..6984c3a 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1777,7 +1777,7 @@ int l2tp_session_delete(struct l2tp_session *session) if (session->session_close != NULL) (*session->session_close)(session); if (session->deref) - (*session->ref)(session); + (*session->deref)(session); l2tp_session_dec_refcount(session); return 0; } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fb30681..1d1ddab 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -254,7 +254,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, goto out_unlock; } - __ieee80211_key_free(key); + __ieee80211_key_free(key, true); ret = 0; out_unlock: @@ -1035,9 +1035,12 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) sta_info_flush_defer(vlan); sta_info_flush_defer(sdata); rcu_barrier(); - list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { sta_info_flush_cleanup(vlan); + ieee80211_free_keys(vlan); + } sta_info_flush_cleanup(sdata); + ieee80211_free_keys(sdata); sdata->vif.bss_conf.enable_beacon = false; clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); @@ -1177,6 +1180,18 @@ static int sta_apply_parameters(struct ieee80211_local *local, mask |= BIT(NL80211_STA_FLAG_ASSOCIATED); if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED)) set |= BIT(NL80211_STA_FLAG_ASSOCIATED); + } else if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { + /* + * TDLS -- everything follows authorized, but + * only becoming authorized is possible, not + * going back + */ + if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) { + set |= BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED); + mask |= BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED); + } } ret = sta_apply_auth_flags(local, sta, mask, set); @@ -1261,7 +1276,8 @@ static int sta_apply_parameters(struct ieee80211_local *local, if (ieee80211_vif_is_mesh(&sdata->vif)) { #ifdef CONFIG_MAC80211_MESH u32 changed = 0; - if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED) { + + if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) { switch (params->plink_state) { case NL80211_PLINK_ESTAB: if (sta->plink_state != NL80211_PLINK_ESTAB) @@ -1292,15 +1308,18 @@ static int sta_apply_parameters(struct ieee80211_local *local, /* nothing */ break; } - } else { - switch (params->plink_action) { - case PLINK_ACTION_OPEN: - changed |= mesh_plink_open(sta); - break; - case PLINK_ACTION_BLOCK: - changed |= mesh_plink_block(sta); - break; - } + } + + switch (params->plink_action) { + case NL80211_PLINK_ACTION_NO_ACTION: + /* nothing */ + break; + case NL80211_PLINK_ACTION_OPEN: + changed |= mesh_plink_open(sta); + break; + case NL80211_PLINK_ACTION_BLOCK: + changed |= mesh_plink_block(sta); + break; } if (params->local_pm) @@ -1346,8 +1365,10 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, * defaults -- if userspace wants something else we'll * change it accordingly in sta_apply_parameters() */ - sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); - sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); + if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) { + sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); + sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); + } err = sta_apply_parameters(local, sta, params); if (err) { @@ -1356,8 +1377,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, } /* - * for TDLS, rate control should be initialized only when supported - * rates are known. + * for TDLS, rate control should be initialized only when + * rates are known and station is marked authorized */ if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) rate_control_rate_init(sta); @@ -1394,50 +1415,67 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_change_station(struct wiphy *wiphy, - struct net_device *dev, - u8 *mac, + struct net_device *dev, u8 *mac, struct station_parameters *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; struct ieee80211_sub_if_data *vlansdata; + enum cfg80211_station_type statype; int err; mutex_lock(&local->sta_mtx); sta = sta_info_get_bss(sdata, mac); if (!sta) { - mutex_unlock(&local->sta_mtx); - return -ENOENT; + err = -ENOENT; + goto out_err; } - /* in station mode, some updates are only valid with TDLS */ - if (sdata->vif.type == NL80211_IFTYPE_STATION && - (params->supported_rates || params->ht_capa || params->vht_capa || - params->sta_modify_mask || - (params->sta_flags_mask & BIT(NL80211_STA_FLAG_WME))) && - !test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { - mutex_unlock(&local->sta_mtx); - return -EINVAL; + switch (sdata->vif.type) { + case NL80211_IFTYPE_MESH_POINT: + if (sdata->u.mesh.user_mpm) + statype = CFG80211_STA_MESH_PEER_USER; + else + statype = CFG80211_STA_MESH_PEER_KERNEL; + break; + case NL80211_IFTYPE_ADHOC: + statype = CFG80211_STA_IBSS; + break; + case NL80211_IFTYPE_STATION: + if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { + statype = CFG80211_STA_AP_STA; + break; + } + if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + statype = CFG80211_STA_TDLS_PEER_ACTIVE; + else + statype = CFG80211_STA_TDLS_PEER_SETUP; + break; + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + statype = CFG80211_STA_AP_CLIENT; + break; + default: + err = -EOPNOTSUPP; + goto out_err; } + err = cfg80211_check_station_change(wiphy, params, statype); + if (err) + goto out_err; + if (params->vlan && params->vlan != sta->sdata->dev) { bool prev_4addr = false; bool new_4addr = false; vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); - if (vlansdata->vif.type != NL80211_IFTYPE_AP_VLAN && - vlansdata->vif.type != NL80211_IFTYPE_AP) { - mutex_unlock(&local->sta_mtx); - return -EINVAL; - } - if (params->vlan->ieee80211_ptr->use_4addr) { if (vlansdata->u.vlan.sta) { - mutex_unlock(&local->sta_mtx); - return -EBUSY; + err = -EBUSY; + goto out_err; } rcu_assign_pointer(vlansdata->u.vlan.sta, sta); @@ -1464,12 +1502,12 @@ static int ieee80211_change_station(struct wiphy *wiphy, } err = sta_apply_parameters(local, sta, params); - if (err) { - mutex_unlock(&local->sta_mtx); - return err; - } + if (err) + goto out_err; - if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && params->supported_rates) + /* When peer becomes authorized, init rate control as well */ + if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && + test_sta_flag(sta, WLAN_STA_AUTHORIZED)) rate_control_rate_init(sta); mutex_unlock(&local->sta_mtx); @@ -1479,7 +1517,11 @@ static int ieee80211_change_station(struct wiphy *wiphy, ieee80211_recalc_ps(local, -1); ieee80211_recalc_ps_vif(sdata); } + return 0; +out_err: + mutex_unlock(&local->sta_mtx); + return err; } #ifdef CONFIG_MAC80211_MESH @@ -1687,6 +1729,7 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, ifmsh->mesh_sp_id = setup->sync_method; ifmsh->mesh_pp_id = setup->path_sel_proto; ifmsh->mesh_pm_id = setup->path_metric; + ifmsh->user_mpm = setup->user_mpm; ifmsh->security = IEEE80211_MESH_SEC_NONE; if (setup->is_authenticated) ifmsh->security |= IEEE80211_MESH_SEC_AUTHED; @@ -1730,8 +1773,11 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy, conf->dot11MeshTTL = nconf->dot11MeshTTL; if (_chg_mesh_attr(NL80211_MESHCONF_ELEMENT_TTL, mask)) conf->element_ttl = nconf->element_ttl; - if (_chg_mesh_attr(NL80211_MESHCONF_AUTO_OPEN_PLINKS, mask)) + if (_chg_mesh_attr(NL80211_MESHCONF_AUTO_OPEN_PLINKS, mask)) { + if (ifmsh->user_mpm) + return -EBUSY; conf->auto_open_plinks = nconf->auto_open_plinks; + } if (_chg_mesh_attr(NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, mask)) conf->dot11MeshNbrOffsetMaxNeighbor = nconf->dot11MeshNbrOffsetMaxNeighbor; @@ -2371,7 +2417,8 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel, unsigned int duration, u64 *cookie, - struct sk_buff *txskb) + struct sk_buff *txskb, + enum ieee80211_roc_type type) { struct ieee80211_roc_work *roc, *tmp; bool queued = false; @@ -2390,6 +2437,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, roc->duration = duration; roc->req_duration = duration; roc->frame = txskb; + roc->type = type; roc->mgmt_tx_cookie = (unsigned long)txskb; roc->sdata = sdata; INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work); @@ -2420,7 +2468,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, if (!duration) duration = 10; - ret = drv_remain_on_channel(local, sdata, channel, duration); + ret = drv_remain_on_channel(local, sdata, channel, duration, type); if (ret) { kfree(roc); return ret; @@ -2439,10 +2487,13 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, * * If it hasn't started yet, just increase the duration * and add the new one to the list of dependents. + * If the type of the new ROC has higher priority, modify the + * type of the previous one to match that of the new one. */ if (!tmp->started) { list_add_tail(&roc->list, &tmp->dependents); tmp->duration = max(tmp->duration, roc->duration); + tmp->type = max(tmp->type, roc->type); queued = true; break; } @@ -2454,16 +2505,18 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, /* * In the offloaded ROC case, if it hasn't begun, add * this new one to the dependent list to be handled - * when the the master one begins. If it has begun, + * when the master one begins. If it has begun, * check that there's still a minimum time left and * if so, start this one, transmitting the frame, but - * add it to the list directly after this one with a + * add it to the list directly after this one with * a reduced time so we'll ask the driver to execute * it right after finishing the previous one, in the * hope that it'll also be executed right afterwards, * effectively extending the old one. * If there's no minimum time left, just add it to the * normal list. + * TODO: the ROC type is ignored here, assuming that it + * is better to immediately use the current ROC. */ if (!tmp->hw_begun) { list_add_tail(&roc->list, &tmp->dependents); @@ -2557,7 +2610,8 @@ static int ieee80211_remain_on_channel(struct wiphy *wiphy, mutex_lock(&local->mtx); ret = ieee80211_start_roc_work(local, sdata, chan, - duration, cookie, NULL); + duration, cookie, NULL, + IEEE80211_ROC_TYPE_NORMAL); mutex_unlock(&local->mtx); return ret; @@ -2790,7 +2844,8 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, /* This will handle all kinds of coalescing and immediate TX */ ret = ieee80211_start_roc_work(local, sdata, chan, - wait, cookie, skb); + wait, cookie, skb, + IEEE80211_ROC_TYPE_MGMT_TX); if (ret) kfree_skb(skb); out_unlock: diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index c7591f7..4f841fe 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -325,6 +325,36 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf, } STA_OPS(ht_capa); +static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + char buf[128], *p = buf; + struct sta_info *sta = file->private_data; + struct ieee80211_sta_vht_cap *vhtc = &sta->sta.vht_cap; + + p += scnprintf(p, sizeof(buf) + buf - p, "VHT %ssupported\n", + vhtc->vht_supported ? "" : "not "); + if (vhtc->vht_supported) { + p += scnprintf(p, sizeof(buf)+buf-p, "cap: %#.8x\n", vhtc->cap); + + p += scnprintf(p, sizeof(buf)+buf-p, "RX MCS: %.4x\n", + le16_to_cpu(vhtc->vht_mcs.rx_mcs_map)); + if (vhtc->vht_mcs.rx_highest) + p += scnprintf(p, sizeof(buf)+buf-p, + "MCS RX highest: %d Mbps\n", + le16_to_cpu(vhtc->vht_mcs.rx_highest)); + p += scnprintf(p, sizeof(buf)+buf-p, "TX MCS: %.4x\n", + le16_to_cpu(vhtc->vht_mcs.tx_mcs_map)); + if (vhtc->vht_mcs.tx_highest) + p += scnprintf(p, sizeof(buf)+buf-p, + "MCS TX highest: %d Mbps\n", + le16_to_cpu(vhtc->vht_mcs.tx_highest)); + } + + return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); +} +STA_OPS(vht_capa); + static ssize_t sta_current_tx_rate_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { @@ -405,6 +435,7 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) DEBUGFS_ADD(dev); DEBUGFS_ADD(last_signal); DEBUGFS_ADD(ht_capa); + DEBUGFS_ADD(vht_capa); DEBUGFS_ADD(last_ack_signal); DEBUGFS_ADD(current_tx_rate); DEBUGFS_ADD(last_rx_rate); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index ee56d07..832acea 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -787,15 +787,16 @@ static inline int drv_get_antenna(struct ieee80211_local *local, static inline int drv_remain_on_channel(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *chan, - unsigned int duration) + unsigned int duration, + enum ieee80211_roc_type type) { int ret; might_sleep(); - trace_drv_remain_on_channel(local, sdata, chan, duration); + trace_drv_remain_on_channel(local, sdata, chan, duration, type); ret = local->ops->remain_on_channel(&local->hw, &sdata->vif, - chan, duration); + chan, duration, type); trace_drv_return_int(local, ret); return ret; diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 0db25d4..af8cee0 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -40,13 +40,6 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, if (!ht_cap->ht_supported) return; - if (sdata->vif.type != NL80211_IFTYPE_STATION) { - /* AP interfaces call this code when adding new stations, - * so just silently ignore non station interfaces. - */ - return; - } - /* NOTE: If you add more over-rides here, update register_hw * ht_capa_mod_msk logic in main.c as well. * And, if this method can ever change ht_cap.ht_supported, fix @@ -97,7 +90,7 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, const struct ieee80211_ht_cap *ht_cap_ie, struct sta_info *sta) { - struct ieee80211_sta_ht_cap ht_cap; + struct ieee80211_sta_ht_cap ht_cap, own_cap; u8 ampdu_info, tx_mcs_set_cap; int i, max_tx_streams; bool changed; @@ -111,6 +104,18 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, ht_cap.ht_supported = true; + own_cap = sband->ht_cap; + + /* + * If user has specified capability over-rides, take care + * of that if the station we're setting up is the AP that + * we advertised a restricted capability set to. Override + * our own capabilities and then use those below. + */ + if (sdata->vif.type == NL80211_IFTYPE_STATION && + !test_sta_flag(sta, WLAN_STA_TDLS_PEER)) + ieee80211_apply_htcap_overrides(sdata, &own_cap); + /* * The bits listed in this expression should be * the same for the peer and us, if the station @@ -118,21 +123,20 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, * we mask them out. */ ht_cap.cap = le16_to_cpu(ht_cap_ie->cap_info) & - (sband->ht_cap.cap | - ~(IEEE80211_HT_CAP_LDPC_CODING | - IEEE80211_HT_CAP_SUP_WIDTH_20_40 | - IEEE80211_HT_CAP_GRN_FLD | - IEEE80211_HT_CAP_SGI_20 | - IEEE80211_HT_CAP_SGI_40 | - IEEE80211_HT_CAP_DSSSCCK40)); + (own_cap.cap | ~(IEEE80211_HT_CAP_LDPC_CODING | + IEEE80211_HT_CAP_SUP_WIDTH_20_40 | + IEEE80211_HT_CAP_GRN_FLD | + IEEE80211_HT_CAP_SGI_20 | + IEEE80211_HT_CAP_SGI_40 | + IEEE80211_HT_CAP_DSSSCCK40)); /* * The STBC bits are asymmetric -- if we don't have * TX then mask out the peer's RX and vice versa. */ - if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_TX_STBC)) + if (!(own_cap.cap & IEEE80211_HT_CAP_TX_STBC)) ht_cap.cap &= ~IEEE80211_HT_CAP_RX_STBC; - if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_RX_STBC)) + if (!(own_cap.cap & IEEE80211_HT_CAP_RX_STBC)) ht_cap.cap &= ~IEEE80211_HT_CAP_TX_STBC; ampdu_info = ht_cap_ie->ampdu_params_info; @@ -142,7 +146,7 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, (ampdu_info & IEEE80211_HT_AMPDU_PARM_DENSITY) >> 2; /* own MCS TX capabilities */ - tx_mcs_set_cap = sband->ht_cap.mcs.tx_params; + tx_mcs_set_cap = own_cap.mcs.tx_params; /* Copy peer MCS TX capabilities, the driver might need them. */ ht_cap.mcs.tx_params = ht_cap_ie->mcs.tx_params; @@ -168,26 +172,20 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, */ for (i = 0; i < max_tx_streams; i++) ht_cap.mcs.rx_mask[i] = - sband->ht_cap.mcs.rx_mask[i] & ht_cap_ie->mcs.rx_mask[i]; + own_cap.mcs.rx_mask[i] & ht_cap_ie->mcs.rx_mask[i]; if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION) for (i = IEEE80211_HT_MCS_UNEQUAL_MODULATION_START_BYTE; i < IEEE80211_HT_MCS_MASK_LEN; i++) ht_cap.mcs.rx_mask[i] = - sband->ht_cap.mcs.rx_mask[i] & + own_cap.mcs.rx_mask[i] & ht_cap_ie->mcs.rx_mask[i]; /* handle MCS rate 32 too */ - if (sband->ht_cap.mcs.rx_mask[32/8] & ht_cap_ie->mcs.rx_mask[32/8] & 1) + if (own_cap.mcs.rx_mask[32/8] & ht_cap_ie->mcs.rx_mask[32/8] & 1) ht_cap.mcs.rx_mask[32/8] |= 1; apply: - /* - * If user has specified capability over-rides, take care - * of that here. - */ - ieee80211_apply_htcap_overrides(sdata, &ht_cap); - changed = memcmp(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap)); memcpy(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap)); diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 40b71df..539d4a1 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -985,36 +985,9 @@ static void ieee80211_ibss_timer(unsigned long data) { struct ieee80211_sub_if_data *sdata = (struct ieee80211_sub_if_data *) data; - struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - struct ieee80211_local *local = sdata->local; - - if (local->quiescing) { - ifibss->timer_running = true; - return; - } - - ieee80211_queue_work(&local->hw, &sdata->work); -} - -#ifdef CONFIG_PM -void ieee80211_ibss_quiesce(struct ieee80211_sub_if_data *sdata) -{ - struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - if (del_timer_sync(&ifibss->timer)) - ifibss->timer_running = true; -} - -void ieee80211_ibss_restart(struct ieee80211_sub_if_data *sdata) -{ - struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - - if (ifibss->timer_running) { - add_timer(&ifibss->timer); - ifibss->timer_running = false; - } + ieee80211_queue_work(&sdata->local->hw, &sdata->work); } -#endif void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata) { diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 388580a..f4433f0 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -315,6 +315,7 @@ struct ieee80211_roc_work { u32 duration, req_duration; struct sk_buff *frame; u64 cookie, mgmt_tx_cookie; + enum ieee80211_roc_type type; }; /* flags used in struct ieee80211_if_managed.flags */ @@ -400,7 +401,6 @@ struct ieee80211_if_managed { u16 aid; - unsigned long timers_running; /* used for quiesce/restart */ bool powersave; /* powersave requested for this iface */ bool broken_ap; /* AP is broken -- turn off powersave */ u8 dtim_period; @@ -479,6 +479,8 @@ struct ieee80211_if_managed { struct ieee80211_ht_cap ht_capa; /* configured ht-cap over-rides */ struct ieee80211_ht_cap ht_capa_mask; /* Valid parts of ht_capa */ + struct ieee80211_vht_cap vht_capa; /* configured VHT overrides */ + struct ieee80211_vht_cap vht_capa_mask; /* Valid parts of vht_capa */ }; struct ieee80211_if_ibss { @@ -490,8 +492,6 @@ struct ieee80211_if_ibss { u32 basic_rates; - bool timer_running; - bool fixed_bssid; bool fixed_channel; bool privacy; @@ -543,8 +543,6 @@ struct ieee80211_if_mesh { struct timer_list mesh_path_timer; struct timer_list mesh_path_root_timer; - unsigned long timers_running; - unsigned long wrkq_flags; u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN]; @@ -590,6 +588,7 @@ struct ieee80211_if_mesh { IEEE80211_MESH_SEC_AUTHED = 0x1, IEEE80211_MESH_SEC_SECURED = 0x2, } security; + bool user_mpm; /* Extensible Synchronization Framework */ const struct ieee80211_mesh_sync_ops *sync_ops; s64 sync_offset_clockdrift_max; @@ -682,6 +681,8 @@ struct ieee80211_sub_if_data { /* count for keys needing tailroom space allocation */ int crypto_tx_tailroom_needed_cnt; + int crypto_tx_tailroom_pending_dec; + struct delayed_work dec_tailroom_needed_wk; struct net_device *dev; struct ieee80211_local *local; @@ -765,10 +766,6 @@ struct ieee80211_sub_if_data { } debugfs; #endif -#ifdef CONFIG_PM - struct ieee80211_bss_conf suspend_bss_conf; -#endif - /* must be last, dynamically sized area in this! */ struct ieee80211_vif vif; }; @@ -1136,11 +1133,6 @@ struct ieee80211_local { struct ieee80211_sub_if_data __rcu *p2p_sdata; - /* dummy netdev for use w/ NAPI */ - struct net_device napi_dev; - - struct napi_struct napi; - /* virtual monitor interface */ struct ieee80211_sub_if_data __rcu *monitor_sdata; struct cfg80211_chan_def monitor_chandef; @@ -1283,8 +1275,6 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, const struct ieee80211_channel_sw_ie *sw_elem, struct ieee80211_bss *bss, u64 timestamp); -void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata); -void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata); void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); @@ -1302,8 +1292,6 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, struct cfg80211_ibss_params *params); int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata); -void ieee80211_ibss_quiesce(struct ieee80211_sub_if_data *sdata); -void ieee80211_ibss_restart(struct ieee80211_sub_if_data *sdata); void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata); void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); @@ -1441,6 +1429,8 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta); void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, u8 opmode, enum ieee80211_band band, bool nss_only); +void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta_vht_cap *vht_cap); /* Spectrum management */ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 3bfe261..7530c60 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -488,8 +488,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) res = drv_start(local); if (res) goto err_del_bss; - if (local->ops->napi_poll) - napi_enable(&local->napi); /* we're brought up, everything changes */ hw_reconf_flags = ~0; ieee80211_led_radio(local, true); @@ -841,14 +839,16 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, rcu_barrier(); sta_info_flush_cleanup(sdata); - skb_queue_purge(&sdata->skb_queue); - /* * Free all remaining keys, there shouldn't be any, - * except maybe group keys in AP more or WDS? + * except maybe in WDS mode? */ ieee80211_free_keys(sdata); + /* fall through */ + case NL80211_IFTYPE_AP: + skb_queue_purge(&sdata->skb_queue); + drv_remove_interface_debugfs(local, sdata); if (going_down) @@ -860,8 +860,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_ps(local, -1); if (local->open_count == 0) { - if (local->ops->napi_poll) - napi_disable(&local->napi); ieee80211_clear_tx_pending(local); ieee80211_stop_device(local); @@ -1550,6 +1548,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, INIT_WORK(&sdata->cleanup_stations_wk, ieee80211_cleanup_sdata_stas_wk); INIT_DELAYED_WORK(&sdata->dfs_cac_timer_work, ieee80211_dfs_cac_timer_work); + INIT_DELAYED_WORK(&sdata->dec_tailroom_needed_wk, + ieee80211_delayed_tailroom_dec); for (i = 0; i < IEEE80211_NUM_BANDS; i++) { struct ieee80211_supported_band *sband; diff --git a/net/mac80211/key.c b/net/mac80211/key.c index ef252eb..99e9f6a 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -397,7 +397,8 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, return key; } -static void __ieee80211_key_destroy(struct ieee80211_key *key) +static void __ieee80211_key_destroy(struct ieee80211_key *key, + bool delay_tailroom) { if (!key) return; @@ -416,8 +417,18 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key) if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC) ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); if (key->local) { + struct ieee80211_sub_if_data *sdata = key->sdata; + ieee80211_debugfs_key_remove(key); - key->sdata->crypto_tx_tailroom_needed_cnt--; + + if (delay_tailroom) { + /* see ieee80211_delayed_tailroom_dec */ + sdata->crypto_tx_tailroom_pending_dec++; + schedule_delayed_work(&sdata->dec_tailroom_needed_wk, + HZ/2); + } else { + sdata->crypto_tx_tailroom_needed_cnt--; + } } kfree(key); @@ -440,32 +451,6 @@ int ieee80211_key_link(struct ieee80211_key *key, key->sdata = sdata; key->sta = sta; - if (sta) { - /* - * some hardware cannot handle TKIP with QoS, so - * we indicate whether QoS could be in use. - */ - if (test_sta_flag(sta, WLAN_STA_WME)) - key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA; - } else { - if (sdata->vif.type == NL80211_IFTYPE_STATION) { - struct sta_info *ap; - - /* - * We're getting a sta pointer in, so must be under - * appropriate locking for sta_info_get(). - */ - - /* same here, the AP could be using QoS */ - ap = sta_info_get(key->sdata, key->sdata->u.mgd.bssid); - if (ap) { - if (test_sta_flag(ap, WLAN_STA_WME)) - key->conf.flags |= - IEEE80211_KEY_FLAG_WMM_STA; - } - } - } - mutex_lock(&sdata->local->key_mtx); if (sta && pairwise) @@ -478,7 +463,7 @@ int ieee80211_key_link(struct ieee80211_key *key, increment_tailroom_need_count(sdata); __ieee80211_key_replace(sdata, sta, pairwise, old_key, key); - __ieee80211_key_destroy(old_key); + __ieee80211_key_destroy(old_key, true); ieee80211_debugfs_key_add(key); @@ -489,7 +474,7 @@ int ieee80211_key_link(struct ieee80211_key *key, return ret; } -void __ieee80211_key_free(struct ieee80211_key *key) +void __ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom) { if (!key) return; @@ -501,14 +486,14 @@ void __ieee80211_key_free(struct ieee80211_key *key) __ieee80211_key_replace(key->sdata, key->sta, key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, key, NULL); - __ieee80211_key_destroy(key); + __ieee80211_key_destroy(key, delay_tailroom); } void ieee80211_key_free(struct ieee80211_local *local, struct ieee80211_key *key) { mutex_lock(&local->key_mtx); - __ieee80211_key_free(key); + __ieee80211_key_free(key, true); mutex_unlock(&local->key_mtx); } @@ -566,36 +551,60 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_iter_keys); -void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata) -{ - struct ieee80211_key *key; - - ASSERT_RTNL(); - - mutex_lock(&sdata->local->key_mtx); - - list_for_each_entry(key, &sdata->key_list, list) - ieee80211_key_disable_hw_accel(key); - - mutex_unlock(&sdata->local->key_mtx); -} - void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata) { struct ieee80211_key *key, *tmp; + cancel_delayed_work_sync(&sdata->dec_tailroom_needed_wk); + mutex_lock(&sdata->local->key_mtx); + sdata->crypto_tx_tailroom_needed_cnt -= + sdata->crypto_tx_tailroom_pending_dec; + sdata->crypto_tx_tailroom_pending_dec = 0; + ieee80211_debugfs_key_remove_mgmt_default(sdata); list_for_each_entry_safe(key, tmp, &sdata->key_list, list) - __ieee80211_key_free(key); + __ieee80211_key_free(key, false); ieee80211_debugfs_key_update_default(sdata); + WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt || + sdata->crypto_tx_tailroom_pending_dec); + mutex_unlock(&sdata->local->key_mtx); } +void ieee80211_delayed_tailroom_dec(struct work_struct *wk) +{ + struct ieee80211_sub_if_data *sdata; + + sdata = container_of(wk, struct ieee80211_sub_if_data, + dec_tailroom_needed_wk.work); + + /* + * The reason for the delayed tailroom needed decrementing is to + * make roaming faster: during roaming, all keys are first deleted + * and then new keys are installed. The first new key causes the + * crypto_tx_tailroom_needed_cnt to go from 0 to 1, which invokes + * the cost of synchronize_net() (which can be slow). Avoid this + * by deferring the crypto_tx_tailroom_needed_cnt decrementing on + * key removal for a while, so if we roam the value is larger than + * zero and no 0->1 transition happens. + * + * The cost is that if the AP switching was from an AP with keys + * to one without, we still allocate tailroom while it would no + * longer be needed. However, in the typical (fast) roaming case + * within an ESS this usually won't happen. + */ + + mutex_lock(&sdata->local->key_mtx); + sdata->crypto_tx_tailroom_needed_cnt -= + sdata->crypto_tx_tailroom_pending_dec; + sdata->crypto_tx_tailroom_pending_dec = 0; + mutex_unlock(&sdata->local->key_mtx); +} void ieee80211_gtk_rekey_notify(struct ieee80211_vif *vif, const u8 *bssid, const u8 *replay_ctr, gfp_t gfp) diff --git a/net/mac80211/key.h b/net/mac80211/key.h index 382dc44..2a682d8 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -134,7 +134,7 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, int __must_check ieee80211_key_link(struct ieee80211_key *key, struct ieee80211_sub_if_data *sdata, struct sta_info *sta); -void __ieee80211_key_free(struct ieee80211_key *key); +void __ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom); void ieee80211_key_free(struct ieee80211_local *local, struct ieee80211_key *key); void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx, @@ -143,9 +143,10 @@ void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx); void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata); void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata); -void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata); #define key_mtx_dereference(local, ref) \ rcu_dereference_protected(ref, lockdep_is_held(&((local)->key_mtx))) +void ieee80211_delayed_tailroom_dec(struct work_struct *wk); + #endif /* IEEE80211_KEY_H */ diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 1a8591b..5a53aa5 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -399,30 +399,6 @@ static int ieee80211_ifa6_changed(struct notifier_block *nb, } #endif -static int ieee80211_napi_poll(struct napi_struct *napi, int budget) -{ - struct ieee80211_local *local = - container_of(napi, struct ieee80211_local, napi); - - return local->ops->napi_poll(&local->hw, budget); -} - -void ieee80211_napi_schedule(struct ieee80211_hw *hw) -{ - struct ieee80211_local *local = hw_to_local(hw); - - napi_schedule(&local->napi); -} -EXPORT_SYMBOL(ieee80211_napi_schedule); - -void ieee80211_napi_complete(struct ieee80211_hw *hw) -{ - struct ieee80211_local *local = hw_to_local(hw); - - napi_complete(&local->napi); -} -EXPORT_SYMBOL(ieee80211_napi_complete); - /* There isn't a lot of sense in it, but you can transmit anything you like */ static const struct ieee80211_txrx_stypes ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = { @@ -501,6 +477,27 @@ static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = { }, }; +static const struct ieee80211_vht_cap mac80211_vht_capa_mod_mask = { + .vht_cap_info = + cpu_to_le32(IEEE80211_VHT_CAP_RXLDPC | + IEEE80211_VHT_CAP_SHORT_GI_80 | + IEEE80211_VHT_CAP_SHORT_GI_160 | + IEEE80211_VHT_CAP_RXSTBC_1 | + IEEE80211_VHT_CAP_RXSTBC_2 | + IEEE80211_VHT_CAP_RXSTBC_3 | + IEEE80211_VHT_CAP_RXSTBC_4 | + IEEE80211_VHT_CAP_TXSTBC | + IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE | + IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE | + IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN | + IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN | + IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK), + .supp_mcs = { + .rx_mcs_map = cpu_to_le16(~0), + .tx_mcs_map = cpu_to_le16(~0), + }, +}; + static const u8 extended_capabilities[] = { 0, 0, 0, 0, 0, 0, 0, WLAN_EXT_CAPA8_OPMODE_NOTIF, @@ -572,7 +569,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, wiphy->features |= NL80211_FEATURE_SK_TX_STATUS | NL80211_FEATURE_SAE | NL80211_FEATURE_HT_IBSS | - NL80211_FEATURE_VIF_TXPOWER; + NL80211_FEATURE_VIF_TXPOWER | + NL80211_FEATURE_USERSPACE_MPM; if (!ops->hw_scan) wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | @@ -609,6 +607,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH; local->user_power_level = IEEE80211_UNSET_POWER_LEVEL; wiphy->ht_capa_mod_mask = &mac80211_ht_capa_mod_mask; + wiphy->vht_capa_mod_mask = &mac80211_vht_capa_mod_mask; INIT_LIST_HEAD(&local->interfaces); @@ -664,9 +663,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, skb_queue_head_init(&local->skb_queue); skb_queue_head_init(&local->skb_queue_unreliable); - /* init dummy netdev for use w/ NAPI */ - init_dummy_netdev(&local->napi_dev); - ieee80211_led_names(local); ieee80211_roc_setup(local); @@ -1021,9 +1017,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) goto fail_ifa6; #endif - netif_napi_add(&local->napi_dev, &local->napi, ieee80211_napi_poll, - local->hw.napi_weight); - return 0; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 4749b38..77b5710 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -13,10 +13,6 @@ #include "ieee80211_i.h" #include "mesh.h" -#define TMR_RUNNING_HK 0 -#define TMR_RUNNING_MP 1 -#define TMR_RUNNING_MPR 2 - static int mesh_allocated; static struct kmem_cache *rm_cache; @@ -50,11 +46,6 @@ static void ieee80211_mesh_housekeeping_timer(unsigned long data) set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags); - if (local->quiescing) { - set_bit(TMR_RUNNING_HK, &ifmsh->timers_running); - return; - } - ieee80211_queue_work(&local->hw, &sdata->work); } @@ -165,7 +156,7 @@ void mesh_sta_cleanup(struct sta_info *sta) * an update. */ changed = mesh_accept_plinks_update(sdata); - if (sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) { + if (!sdata->u.mesh.user_mpm) { changed |= mesh_plink_deactivate(sta); del_timer_sync(&sta->plink_timer); } @@ -479,15 +470,8 @@ static void ieee80211_mesh_path_timer(unsigned long data) { struct ieee80211_sub_if_data *sdata = (struct ieee80211_sub_if_data *) data; - struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - struct ieee80211_local *local = sdata->local; - - if (local->quiescing) { - set_bit(TMR_RUNNING_MP, &ifmsh->timers_running); - return; - } - ieee80211_queue_work(&local->hw, &sdata->work); + ieee80211_queue_work(&sdata->local->hw, &sdata->work); } static void ieee80211_mesh_path_root_timer(unsigned long data) @@ -495,16 +479,10 @@ static void ieee80211_mesh_path_root_timer(unsigned long data) struct ieee80211_sub_if_data *sdata = (struct ieee80211_sub_if_data *) data; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - struct ieee80211_local *local = sdata->local; set_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags); - if (local->quiescing) { - set_bit(TMR_RUNNING_MPR, &ifmsh->timers_running); - return; - } - - ieee80211_queue_work(&local->hw, &sdata->work); + ieee80211_queue_work(&sdata->local->hw, &sdata->work); } void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh) @@ -622,35 +600,6 @@ static void ieee80211_mesh_rootpath(struct ieee80211_sub_if_data *sdata) round_jiffies(TU_TO_EXP_TIME(interval))); } -#ifdef CONFIG_PM -void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata) -{ - struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - - /* use atomic bitops in case all timers fire at the same time */ - - if (del_timer_sync(&ifmsh->housekeeping_timer)) - set_bit(TMR_RUNNING_HK, &ifmsh->timers_running); - if (del_timer_sync(&ifmsh->mesh_path_timer)) - set_bit(TMR_RUNNING_MP, &ifmsh->timers_running); - if (del_timer_sync(&ifmsh->mesh_path_root_timer)) - set_bit(TMR_RUNNING_MPR, &ifmsh->timers_running); -} - -void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata) -{ - struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - - if (test_and_clear_bit(TMR_RUNNING_HK, &ifmsh->timers_running)) - add_timer(&ifmsh->housekeeping_timer); - if (test_and_clear_bit(TMR_RUNNING_MP, &ifmsh->timers_running)) - add_timer(&ifmsh->mesh_path_timer); - if (test_and_clear_bit(TMR_RUNNING_MPR, &ifmsh->timers_running)) - add_timer(&ifmsh->mesh_path_root_timer); - ieee80211_mesh_root_setup(ifmsh); -} -#endif - static int ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) { @@ -871,8 +820,6 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) local->fif_other_bss--; atomic_dec(&local->iff_allmultis); ieee80211_configure_filter(local); - - sdata->u.mesh.timers_running = 0; } static void diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 336c88a..6ffabbe 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -313,8 +313,6 @@ void mesh_path_timer(unsigned long data); void mesh_path_flush_by_nexthop(struct sta_info *sta); void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); -void mesh_path_quiesce(struct ieee80211_sub_if_data *sdata); -void mesh_path_restart(struct ieee80211_sub_if_data *sdata); void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata); bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt); @@ -359,22 +357,12 @@ static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata) void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local); -void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata); -void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata); -void mesh_plink_quiesce(struct sta_info *sta); -void mesh_plink_restart(struct sta_info *sta); void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata); void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata); void ieee80211s_stop(void); #else static inline void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {} -static inline void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata) -{} -static inline void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata) -{} -static inline void mesh_plink_quiesce(struct sta_info *sta) {} -static inline void mesh_plink_restart(struct sta_info *sta) {} static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata) { return false; } static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 07d396d..937e06f 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -420,7 +420,6 @@ __mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *hw_addr) return NULL; sta->plink_state = NL80211_PLINK_LISTEN; - init_timer(&sta->plink_timer); sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); @@ -437,8 +436,9 @@ mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *addr, { struct sta_info *sta = NULL; - /* Userspace handles peer allocation when security is enabled */ - if (sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED) + /* Userspace handles station allocation */ + if (sdata->u.mesh.user_mpm || + sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED) cfg80211_notify_new_peer_candidate(sdata->dev, addr, elems->ie_start, elems->total_len, @@ -534,10 +534,8 @@ static void mesh_plink_timer(unsigned long data) */ sta = (struct sta_info *) data; - if (sta->sdata->local->quiescing) { - sta->plink_timer_was_running = true; + if (sta->sdata->local->quiescing) return; - } spin_lock_bh(&sta->lock); if (sta->ignore_plink_timer) { @@ -598,29 +596,6 @@ static void mesh_plink_timer(unsigned long data) } } -#ifdef CONFIG_PM -void mesh_plink_quiesce(struct sta_info *sta) -{ - if (!ieee80211_vif_is_mesh(&sta->sdata->vif)) - return; - - /* no kernel mesh sta timers have been initialized */ - if (sta->sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE) - return; - - if (del_timer_sync(&sta->plink_timer)) - sta->plink_timer_was_running = true; -} - -void mesh_plink_restart(struct sta_info *sta) -{ - if (sta->plink_timer_was_running) { - add_timer(&sta->plink_timer); - sta->plink_timer_was_running = false; - } -} -#endif - static inline void mesh_plink_timer_set(struct sta_info *sta, int timeout) { sta->plink_timer.expires = jiffies + (HZ * timeout / 1000); @@ -695,6 +670,10 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, if (len < IEEE80211_MIN_ACTION_SIZE + 3) return; + if (sdata->u.mesh.user_mpm) + /* userspace must register for these */ + return; + if (is_multicast_ether_addr(mgmt->da)) { mpl_dbg(sdata, "Mesh plink: ignore frame from multicast address\n"); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 82cc303..1671586 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -87,9 +87,6 @@ MODULE_PARM_DESC(probe_wait_ms, */ #define IEEE80211_SIGNAL_AVE_MIN_COUNT 4 -#define TMR_RUNNING_TIMER 0 -#define TMR_RUNNING_CHANSW 1 - /* * All cfg80211 functions have to be called outside a locked * section so that they can acquire a lock themselves... This @@ -609,6 +606,7 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata, BUILD_BUG_ON(sizeof(vht_cap) != sizeof(sband->vht_cap)); memcpy(&vht_cap, &sband->vht_cap, sizeof(vht_cap)); + ieee80211_apply_vhtcap_overrides(sdata, &vht_cap); /* determine capability flags */ cap = vht_cap.cap; @@ -1038,14 +1036,8 @@ static void ieee80211_chswitch_timer(unsigned long data) { struct ieee80211_sub_if_data *sdata = (struct ieee80211_sub_if_data *) data; - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - if (sdata->local->quiescing) { - set_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running); - return; - } - - ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work); + ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.chswitch_work); } void @@ -1802,9 +1794,11 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.p2p_ctwindow = 0; sdata->vif.bss_conf.p2p_oppps = false; - /* on the next assoc, re-program HT parameters */ + /* on the next assoc, re-program HT/VHT parameters */ memset(&ifmgd->ht_capa, 0, sizeof(ifmgd->ht_capa)); memset(&ifmgd->ht_capa_mask, 0, sizeof(ifmgd->ht_capa_mask)); + memset(&ifmgd->vht_capa, 0, sizeof(ifmgd->vht_capa)); + memset(&ifmgd->vht_capa_mask, 0, sizeof(ifmgd->vht_capa_mask)); sdata->ap_power_level = IEEE80211_UNSET_POWER_LEVEL; @@ -1830,8 +1824,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, del_timer_sync(&sdata->u.mgd.timer); del_timer_sync(&sdata->u.mgd.chswitch_timer); - sdata->u.mgd.timers_running = 0; - sdata->vif.bss_conf.dtim_period = 0; ifmgd->flags = 0; @@ -3140,15 +3132,8 @@ static void ieee80211_sta_timer(unsigned long data) { struct ieee80211_sub_if_data *sdata = (struct ieee80211_sub_if_data *) data; - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - struct ieee80211_local *local = sdata->local; - - if (local->quiescing) { - set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running); - return; - } - ieee80211_queue_work(&local->hw, &sdata->work); + ieee80211_queue_work(&sdata->local->hw, &sdata->work); } static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, @@ -3500,72 +3485,6 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata) } } -#ifdef CONFIG_PM -void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata) -{ - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - - /* - * Stop timers before deleting work items, as timers - * could race and re-add the work-items. They will be - * re-established on connection. - */ - del_timer_sync(&ifmgd->conn_mon_timer); - del_timer_sync(&ifmgd->bcn_mon_timer); - - /* - * we need to use atomic bitops for the running bits - * only because both timers might fire at the same - * time -- the code here is properly synchronised. - */ - - cancel_work_sync(&ifmgd->request_smps_work); - - cancel_work_sync(&ifmgd->monitor_work); - cancel_work_sync(&ifmgd->beacon_connection_loss_work); - cancel_work_sync(&ifmgd->csa_connection_drop_work); - if (del_timer_sync(&ifmgd->timer)) - set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running); - - if (del_timer_sync(&ifmgd->chswitch_timer)) - set_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running); - cancel_work_sync(&ifmgd->chswitch_work); -} - -void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) -{ - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - - mutex_lock(&ifmgd->mtx); - if (!ifmgd->associated) { - mutex_unlock(&ifmgd->mtx); - return; - } - - if (sdata->flags & IEEE80211_SDATA_DISCONNECT_RESUME) { - sdata->flags &= ~IEEE80211_SDATA_DISCONNECT_RESUME; - mlme_dbg(sdata, "driver requested disconnect after resume\n"); - ieee80211_sta_connection_lost(sdata, - ifmgd->associated->bssid, - WLAN_REASON_UNSPECIFIED, - true); - mutex_unlock(&ifmgd->mtx); - return; - } - mutex_unlock(&ifmgd->mtx); - - if (test_and_clear_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running)) - add_timer(&ifmgd->timer); - if (test_and_clear_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running)) - add_timer(&ifmgd->chswitch_timer); - ieee80211_sta_reset_beacon_monitor(sdata); - - mutex_lock(&sdata->local->mtx); - ieee80211_restart_sta_timer(sdata); - mutex_unlock(&sdata->local->mtx); -} -#endif - /* interface setup */ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) { @@ -4073,6 +3992,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; } + if (req->flags & ASSOC_REQ_DISABLE_VHT) + ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; + /* Also disable HT if we don't support it or the AP doesn't use WMM */ sband = local->hw.wiphy->bands[req->bss->channel->band]; if (!sband->ht_cap.ht_supported || @@ -4096,6 +4018,10 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, memcpy(&ifmgd->ht_capa_mask, &req->ht_capa_mask, sizeof(ifmgd->ht_capa_mask)); + memcpy(&ifmgd->vht_capa, &req->vht_capa, sizeof(ifmgd->vht_capa)); + memcpy(&ifmgd->vht_capa_mask, &req->vht_capa_mask, + sizeof(ifmgd->vht_capa_mask)); + if (req->ie && req->ie_len) { memcpy(assoc_data->ie, req->ie, req->ie_len); assoc_data->ie_len = req->ie_len; diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index cc79b4a..db547fc 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -277,7 +277,7 @@ void ieee80211_start_next_roc(struct ieee80211_local *local) duration = 10; ret = drv_remain_on_channel(local, roc->sdata, roc->chan, - duration); + duration, roc->type); roc->started = true; diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index d0275f3..b471a67 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -6,32 +6,11 @@ #include "driver-ops.h" #include "led.h" -/* return value indicates whether the driver should be further notified */ -static void ieee80211_quiesce(struct ieee80211_sub_if_data *sdata) -{ - switch (sdata->vif.type) { - case NL80211_IFTYPE_STATION: - ieee80211_sta_quiesce(sdata); - break; - case NL80211_IFTYPE_ADHOC: - ieee80211_ibss_quiesce(sdata); - break; - case NL80211_IFTYPE_MESH_POINT: - ieee80211_mesh_quiesce(sdata); - break; - default: - break; - } - - cancel_work_sync(&sdata->work); -} - int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_sub_if_data *sdata; struct sta_info *sta; - struct ieee80211_chanctx *ctx; if (!local->open_count) goto suspend; @@ -93,19 +72,12 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) return err; } else if (err > 0) { WARN_ON(err != 1); - local->wowlan = false; + return err; } else { - list_for_each_entry(sdata, &local->interfaces, list) - if (ieee80211_sdata_running(sdata)) - ieee80211_quiesce(sdata); goto suspend; } } - /* disable keys */ - list_for_each_entry(sdata, &local->interfaces, list) - ieee80211_disable_keys(sdata); - /* tear down aggregation sessions and remove STAs */ mutex_lock(&local->sta_mtx); list_for_each_entry(sta, &local->sta_list, list) { @@ -117,100 +89,25 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) WARN_ON(drv_sta_state(local, sta->sdata, sta, state, state - 1)); } - - mesh_plink_quiesce(sta); } mutex_unlock(&local->sta_mtx); /* remove all interfaces */ list_for_each_entry(sdata, &local->interfaces, list) { - static u8 zero_addr[ETH_ALEN] = {}; - u32 changed = 0; - if (!ieee80211_sdata_running(sdata)) continue; - - switch (sdata->vif.type) { - case NL80211_IFTYPE_AP_VLAN: - case NL80211_IFTYPE_MONITOR: - /* skip these */ - continue; - case NL80211_IFTYPE_STATION: - if (sdata->vif.bss_conf.assoc) - changed = BSS_CHANGED_ASSOC | - BSS_CHANGED_BSSID | - BSS_CHANGED_IDLE; - break; - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_ADHOC: - case NL80211_IFTYPE_MESH_POINT: - if (sdata->vif.bss_conf.enable_beacon) - changed = BSS_CHANGED_BEACON_ENABLED; - break; - default: - break; - } - - ieee80211_quiesce(sdata); - - sdata->suspend_bss_conf = sdata->vif.bss_conf; - memset(&sdata->vif.bss_conf, 0, sizeof(sdata->vif.bss_conf)); - sdata->vif.bss_conf.idle = true; - if (sdata->suspend_bss_conf.bssid) - sdata->vif.bss_conf.bssid = zero_addr; - - /* disable beaconing or remove association */ - ieee80211_bss_info_change_notify(sdata, changed); - - if (sdata->vif.type == NL80211_IFTYPE_AP && - rcu_access_pointer(sdata->u.ap.beacon)) - drv_stop_ap(local, sdata); - - if (local->use_chanctx) { - struct ieee80211_chanctx_conf *conf; - - mutex_lock(&local->chanctx_mtx); - conf = rcu_dereference_protected( - sdata->vif.chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); - if (conf) { - ctx = container_of(conf, - struct ieee80211_chanctx, - conf); - drv_unassign_vif_chanctx(local, sdata, ctx); - } - - mutex_unlock(&local->chanctx_mtx); - } drv_remove_interface(local, sdata); } sdata = rtnl_dereference(local->monitor_sdata); - if (sdata) { - if (local->use_chanctx) { - struct ieee80211_chanctx_conf *conf; - - mutex_lock(&local->chanctx_mtx); - conf = rcu_dereference_protected( - sdata->vif.chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); - if (conf) { - ctx = container_of(conf, - struct ieee80211_chanctx, - conf); - drv_unassign_vif_chanctx(local, sdata, ctx); - } - - mutex_unlock(&local->chanctx_mtx); - } - + if (sdata) drv_remove_interface(local, sdata); - } - mutex_lock(&local->chanctx_mtx); - list_for_each_entry(ctx, &local->chanctx_list, list) - drv_remove_chanctx(local, ctx); - mutex_unlock(&local->chanctx_mtx); + /* + * We disconnected on all interfaces before suspend, all channel + * contexts should be released. + */ + WARN_ON(!list_empty(&local->chanctx_list)); /* stop hardware - this must stop RX */ if (local->open_count) diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index eea45a2..1c36c9b 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -55,7 +55,6 @@ #include "rate.h" #include "rc80211_minstrel.h" -#define SAMPLE_COLUMNS 10 #define SAMPLE_TBL(_mi, _idx, _col) \ _mi->sample_table[(_idx * SAMPLE_COLUMNS) + _col] @@ -70,16 +69,31 @@ rix_to_ndx(struct minstrel_sta_info *mi, int rix) return i; } +/* find & sort topmost throughput rates */ +static inline void +minstrel_sort_best_tp_rates(struct minstrel_sta_info *mi, int i, u8 *tp_list) +{ + int j = MAX_THR_RATES; + + while (j > 0 && mi->r[i].cur_tp > mi->r[tp_list[j - 1]].cur_tp) + j--; + if (j < MAX_THR_RATES - 1) + memmove(&tp_list[j + 1], &tp_list[j], MAX_THR_RATES - (j + 1)); + if (j < MAX_THR_RATES) + tp_list[j] = i; +} + static void minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) { - u32 max_tp = 0, index_max_tp = 0, index_max_tp2 = 0; - u32 max_prob = 0, index_max_prob = 0; + u8 tmp_tp_rate[MAX_THR_RATES]; + u8 tmp_prob_rate = 0; u32 usecs; - u32 p; int i; - mi->stats_update = jiffies; + for (i=0; i < MAX_THR_RATES; i++) + tmp_tp_rate[i] = 0; + for (i = 0; i < mi->n_rates; i++) { struct minstrel_rate *mr = &mi->r[i]; @@ -87,27 +101,32 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) if (!usecs) usecs = 1000000; - /* To avoid rounding issues, probabilities scale from 0 (0%) - * to 18000 (100%) */ - if (mr->attempts) { - p = (mr->success * 18000) / mr->attempts; + if (unlikely(mr->attempts > 0)) { + mr->sample_skipped = 0; + mr->cur_prob = MINSTREL_FRAC(mr->success, mr->attempts); mr->succ_hist += mr->success; mr->att_hist += mr->attempts; - mr->cur_prob = p; - p = ((p * (100 - mp->ewma_level)) + (mr->probability * - mp->ewma_level)) / 100; - mr->probability = p; - mr->cur_tp = p * (1000000 / usecs); - } + mr->probability = minstrel_ewma(mr->probability, + mr->cur_prob, + EWMA_LEVEL); + } else + mr->sample_skipped++; mr->last_success = mr->success; mr->last_attempts = mr->attempts; mr->success = 0; mr->attempts = 0; + /* Update throughput per rate, reset thr. below 10% success */ + if (mr->probability < MINSTREL_FRAC(10, 100)) + mr->cur_tp = 0; + else + mr->cur_tp = mr->probability * (1000000 / usecs); + /* Sample less often below the 10% chance of success. * Sample less often above the 95% chance of success. */ - if ((mr->probability > 17100) || (mr->probability < 1800)) { + if (mr->probability > MINSTREL_FRAC(95, 100) || + mr->probability < MINSTREL_FRAC(10, 100)) { mr->adjusted_retry_count = mr->retry_count >> 1; if (mr->adjusted_retry_count > 2) mr->adjusted_retry_count = 2; @@ -118,35 +137,30 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) } if (!mr->adjusted_retry_count) mr->adjusted_retry_count = 2; - } - for (i = 0; i < mi->n_rates; i++) { - struct minstrel_rate *mr = &mi->r[i]; - if (max_tp < mr->cur_tp) { - index_max_tp = i; - max_tp = mr->cur_tp; - } - if (max_prob < mr->probability) { - index_max_prob = i; - max_prob = mr->probability; + minstrel_sort_best_tp_rates(mi, i, tmp_tp_rate); + + /* To determine the most robust rate (max_prob_rate) used at + * 3rd mmr stage we distinct between two cases: + * (1) if any success probabilitiy >= 95%, out of those rates + * choose the maximum throughput rate as max_prob_rate + * (2) if all success probabilities < 95%, the rate with + * highest success probability is choosen as max_prob_rate */ + if (mr->probability >= MINSTREL_FRAC(95,100)) { + if (mr->cur_tp >= mi->r[tmp_prob_rate].cur_tp) + tmp_prob_rate = i; + } else { + if (mr->probability >= mi->r[tmp_prob_rate].probability) + tmp_prob_rate = i; } } - max_tp = 0; - for (i = 0; i < mi->n_rates; i++) { - struct minstrel_rate *mr = &mi->r[i]; - - if (i == index_max_tp) - continue; + /* Assign the new rate set */ + memcpy(mi->max_tp_rate, tmp_tp_rate, sizeof(mi->max_tp_rate)); + mi->max_prob_rate = tmp_prob_rate; - if (max_tp < mr->cur_tp) { - index_max_tp2 = i; - max_tp = mr->cur_tp; - } - } - mi->max_tp_rate = index_max_tp; - mi->max_tp_rate2 = index_max_tp2; - mi->max_prob_rate = index_max_prob; + /* Reset update timer */ + mi->stats_update = jiffies; } static void @@ -207,10 +221,10 @@ static int minstrel_get_next_sample(struct minstrel_sta_info *mi) { unsigned int sample_ndx; - sample_ndx = SAMPLE_TBL(mi, mi->sample_idx, mi->sample_column); - mi->sample_idx++; - if ((int) mi->sample_idx > (mi->n_rates - 2)) { - mi->sample_idx = 0; + sample_ndx = SAMPLE_TBL(mi, mi->sample_row, mi->sample_column); + mi->sample_row++; + if ((int) mi->sample_row >= mi->n_rates) { + mi->sample_row = 0; mi->sample_column++; if (mi->sample_column >= SAMPLE_COLUMNS) mi->sample_column = 0; @@ -228,31 +242,37 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, struct minstrel_priv *mp = priv; struct ieee80211_tx_rate *ar = info->control.rates; unsigned int ndx, sample_ndx = 0; - bool mrr; - bool sample_slower = false; - bool sample = false; + bool mrr_capable; + bool indirect_rate_sampling = false; + bool rate_sampling = false; int i, delta; int mrr_ndx[3]; - int sample_rate; + int sampling_ratio; + /* management/no-ack frames do not use rate control */ if (rate_control_send_low(sta, priv_sta, txrc)) return; - mrr = mp->has_mrr && !txrc->rts && !txrc->bss_conf->use_cts_prot; - - ndx = mi->max_tp_rate; - - if (mrr) - sample_rate = mp->lookaround_rate_mrr; + /* check multi-rate-retry capabilities & adjust lookaround_rate */ + mrr_capable = mp->has_mrr && + !txrc->rts && + !txrc->bss_conf->use_cts_prot; + if (mrr_capable) + sampling_ratio = mp->lookaround_rate_mrr; else - sample_rate = mp->lookaround_rate; + sampling_ratio = mp->lookaround_rate; + + /* init rateindex [ndx] with max throughput rate */ + ndx = mi->max_tp_rate[0]; + /* increase sum packet counter */ mi->packet_count++; - delta = (mi->packet_count * sample_rate / 100) - + + delta = (mi->packet_count * sampling_ratio / 100) - (mi->sample_count + mi->sample_deferred / 2); /* delta > 0: sampling required */ - if ((delta > 0) && (mrr || !mi->prev_sample)) { + if ((delta > 0) && (mrr_capable || !mi->prev_sample)) { struct minstrel_rate *msr; if (mi->packet_count >= 10000) { mi->sample_deferred = 0; @@ -271,21 +291,28 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, mi->sample_count += (delta - mi->n_rates * 2); } + /* get next random rate sample */ sample_ndx = minstrel_get_next_sample(mi); msr = &mi->r[sample_ndx]; - sample = true; - sample_slower = mrr && (msr->perfect_tx_time > - mi->r[ndx].perfect_tx_time); - - if (!sample_slower) { + rate_sampling = true; + + /* Decide if direct ( 1st mrr stage) or indirect (2nd mrr stage) + * rate sampling method should be used. + * Respect such rates that are not sampled for 20 interations. + */ + if (mrr_capable && + msr->perfect_tx_time > mi->r[ndx].perfect_tx_time && + msr->sample_skipped < 20) + indirect_rate_sampling = true; + + if (!indirect_rate_sampling) { if (msr->sample_limit != 0) { ndx = sample_ndx; mi->sample_count++; if (msr->sample_limit > 0) msr->sample_limit--; - } else { - sample = false; - } + } else + rate_sampling = false; } else { /* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark * packets that have the sampling rate deferred to the @@ -297,34 +324,39 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, mi->sample_deferred++; } } - mi->prev_sample = sample; + mi->prev_sample = rate_sampling; /* If we're not using MRR and the sampling rate already * has a probability of >95%, we shouldn't be attempting * to use it, as this only wastes precious airtime */ - if (!mrr && sample && (mi->r[ndx].probability > 17100)) - ndx = mi->max_tp_rate; + if (!mrr_capable && rate_sampling && + (mi->r[ndx].probability > MINSTREL_FRAC(95, 100))) + ndx = mi->max_tp_rate[0]; + /* mrr setup for 1st stage */ ar[0].idx = mi->r[ndx].rix; ar[0].count = minstrel_get_retry_count(&mi->r[ndx], info); - if (!mrr) { - if (!sample) + /* non mrr setup for 2nd stage */ + if (!mrr_capable) { + if (!rate_sampling) ar[0].count = mp->max_retry; ar[1].idx = mi->lowest_rix; ar[1].count = mp->max_retry; return; } - /* MRR setup */ - if (sample) { - if (sample_slower) + /* mrr setup for 2nd stage */ + if (rate_sampling) { + if (indirect_rate_sampling) mrr_ndx[0] = sample_ndx; else - mrr_ndx[0] = mi->max_tp_rate; + mrr_ndx[0] = mi->max_tp_rate[0]; } else { - mrr_ndx[0] = mi->max_tp_rate2; + mrr_ndx[0] = mi->max_tp_rate[1]; } + + /* mrr setup for 3rd & 4th stage */ mrr_ndx[1] = mi->max_prob_rate; mrr_ndx[2] = 0; for (i = 1; i < 4; i++) { @@ -351,26 +383,21 @@ static void init_sample_table(struct minstrel_sta_info *mi) { unsigned int i, col, new_idx; - unsigned int n_srates = mi->n_rates - 1; u8 rnd[8]; mi->sample_column = 0; - mi->sample_idx = 0; - memset(mi->sample_table, 0, SAMPLE_COLUMNS * mi->n_rates); + mi->sample_row = 0; + memset(mi->sample_table, 0xff, SAMPLE_COLUMNS * mi->n_rates); for (col = 0; col < SAMPLE_COLUMNS; col++) { - for (i = 0; i < n_srates; i++) { + for (i = 0; i < mi->n_rates; i++) { get_random_bytes(rnd, sizeof(rnd)); - new_idx = (i + rnd[i & 7]) % n_srates; + new_idx = (i + rnd[i & 7]) % mi->n_rates; - while (SAMPLE_TBL(mi, new_idx, col) != 0) - new_idx = (new_idx + 1) % n_srates; + while (SAMPLE_TBL(mi, new_idx, col) != 0xff) + new_idx = (new_idx + 1) % mi->n_rates; - /* Don't sample the slowest rate (i.e. slowest base - * rate). We must presume that the slowest rate works - * fine, or else other management frames will also be - * failing and the link will break */ - SAMPLE_TBL(mi, new_idx, col) = i + 1; + SAMPLE_TBL(mi, new_idx, col) = i; } } } @@ -542,9 +569,6 @@ minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) mp->lookaround_rate = 5; mp->lookaround_rate_mrr = 10; - /* moving average weight for EWMA */ - mp->ewma_level = 75; - /* maximum time that the hw is allowed to stay in one MRR segment */ mp->segment_size = 6000; diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h index 5ecf757..85ebf42 100644 --- a/net/mac80211/rc80211_minstrel.h +++ b/net/mac80211/rc80211_minstrel.h @@ -9,6 +9,28 @@ #ifndef __RC_MINSTREL_H #define __RC_MINSTREL_H +#define EWMA_LEVEL 75 /* ewma weighting factor [%] */ +#define SAMPLE_COLUMNS 10 /* number of columns in sample table */ + + +/* scaled fraction values */ +#define MINSTREL_SCALE 16 +#define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div) +#define MINSTREL_TRUNC(val) ((val) >> MINSTREL_SCALE) + +/* number of highest throughput rates to consider*/ +#define MAX_THR_RATES 4 + +/* + * Perform EWMA (Exponentially Weighted Moving Average) calculation + */ +static inline int +minstrel_ewma(int old, int new, int weight) +{ + return (new * (100 - weight) + old * weight) / 100; +} + + struct minstrel_rate { int bitrate; int rix; @@ -26,6 +48,7 @@ struct minstrel_rate { u32 attempts; u32 last_attempts; u32 last_success; + u8 sample_skipped; /* parts per thousand */ u32 cur_prob; @@ -45,14 +68,13 @@ struct minstrel_sta_info { unsigned int lowest_rix; - unsigned int max_tp_rate; - unsigned int max_tp_rate2; - unsigned int max_prob_rate; + u8 max_tp_rate[MAX_THR_RATES]; + u8 max_prob_rate; unsigned int packet_count; unsigned int sample_count; int sample_deferred; - unsigned int sample_idx; + unsigned int sample_row; unsigned int sample_column; int n_rates; @@ -73,7 +95,6 @@ struct minstrel_priv { unsigned int cw_min; unsigned int cw_max; unsigned int max_retry; - unsigned int ewma_level; unsigned int segment_size; unsigned int update_interval; unsigned int lookaround_rate; diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c index d5a5622..d104834 100644 --- a/net/mac80211/rc80211_minstrel_debugfs.c +++ b/net/mac80211/rc80211_minstrel_debugfs.c @@ -73,15 +73,17 @@ minstrel_stats_open(struct inode *inode, struct file *file) for (i = 0; i < mi->n_rates; i++) { struct minstrel_rate *mr = &mi->r[i]; - *(p++) = (i == mi->max_tp_rate) ? 'T' : ' '; - *(p++) = (i == mi->max_tp_rate2) ? 't' : ' '; + *(p++) = (i == mi->max_tp_rate[0]) ? 'A' : ' '; + *(p++) = (i == mi->max_tp_rate[1]) ? 'B' : ' '; + *(p++) = (i == mi->max_tp_rate[2]) ? 'C' : ' '; + *(p++) = (i == mi->max_tp_rate[3]) ? 'D' : ' '; *(p++) = (i == mi->max_prob_rate) ? 'P' : ' '; p += sprintf(p, "%3u%s", mr->bitrate / 2, (mr->bitrate & 1 ? ".5" : " ")); - tp = mr->cur_tp / ((18000 << 10) / 96); - prob = mr->cur_prob / 18; - eprob = mr->probability / 18; + tp = MINSTREL_TRUNC(mr->cur_tp / 10); + prob = MINSTREL_TRUNC(mr->cur_prob * 1000); + eprob = MINSTREL_TRUNC(mr->probability * 1000); p += sprintf(p, " %6u.%1u %6u.%1u %6u.%1u " "%3u(%3u) %8llu %8llu\n", diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 3af141c..749552b 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -17,8 +17,6 @@ #include "rc80211_minstrel_ht.h" #define AVG_PKT_SIZE 1200 -#define SAMPLE_COLUMNS 10 -#define EWMA_LEVEL 75 /* Number of bits for an average sized packet */ #define MCS_NBITS (AVG_PKT_SIZE << 3) @@ -26,11 +24,11 @@ /* Number of symbols for a packet with (bps) bits per symbol */ #define MCS_NSYMS(bps) ((MCS_NBITS + (bps) - 1) / (bps)) -/* Transmission time for a packet containing (syms) symbols */ +/* Transmission time (nanoseconds) for a packet containing (syms) symbols */ #define MCS_SYMBOL_TIME(sgi, syms) \ (sgi ? \ - ((syms) * 18 + 4) / 5 : /* syms * 3.6 us */ \ - (syms) << 2 /* syms * 4 us */ \ + ((syms) * 18000 + 4000) / 5 : /* syms * 3.6 us */ \ + ((syms) * 1000) << 2 /* syms * 4 us */ \ ) /* Transmit duration for the raw data part of an average sized packet */ @@ -64,9 +62,9 @@ } #define CCK_DURATION(_bitrate, _short, _len) \ - (10 /* SIFS */ + \ + (1000 * (10 /* SIFS */ + \ (_short ? 72 + 24 : 144 + 48 ) + \ - (8 * (_len + 4) * 10) / (_bitrate)) + (8 * (_len + 4) * 10) / (_bitrate))) #define CCK_ACK_DURATION(_bitrate, _short) \ (CCK_DURATION((_bitrate > 10 ? 20 : 10), false, 60) + \ @@ -129,15 +127,6 @@ const struct mcs_group minstrel_mcs_groups[] = { static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES]; /* - * Perform EWMA (Exponentially Weighted Moving Average) calculation - */ -static int -minstrel_ewma(int old, int new, int weight) -{ - return (new * (100 - weight) + old * weight) / 100; -} - -/* * Look up an MCS group index based on mac80211 rate information */ static int @@ -211,7 +200,8 @@ static void minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate) { struct minstrel_rate_stats *mr; - unsigned int usecs = 0; + unsigned int nsecs = 0; + unsigned int tp; mr = &mi->groups[group].rates[rate]; @@ -221,10 +211,12 @@ minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate) } if (group != MINSTREL_CCK_GROUP) - usecs = mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len); + nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len); - usecs += minstrel_mcs_groups[group].duration[rate]; - mr->cur_tp = MINSTREL_TRUNC((1000000 / usecs) * mr->probability); + nsecs += minstrel_mcs_groups[group].duration[rate]; + tp = 1000000 * ((mr->probability * 1000) / nsecs); + + mr->cur_tp = MINSTREL_TRUNC(tp); } /* @@ -308,8 +300,8 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) } } - /* try to sample up to half of the available rates during each interval */ - mi->sample_count *= 4; + /* try to sample all available rates during each interval */ + mi->sample_count *= 8; cur_prob = 0; cur_prob_tp = 0; @@ -320,20 +312,13 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) if (!mg->supported) continue; - mr = minstrel_get_ratestats(mi, mg->max_prob_rate); - if (cur_prob_tp < mr->cur_tp && - minstrel_mcs_groups[group].streams == 1) { - mi->max_prob_rate = mg->max_prob_rate; - cur_prob = mr->cur_prob; - cur_prob_tp = mr->cur_tp; - } - mr = minstrel_get_ratestats(mi, mg->max_tp_rate); if (cur_tp < mr->cur_tp) { mi->max_tp_rate2 = mi->max_tp_rate; cur_tp2 = cur_tp; mi->max_tp_rate = mg->max_tp_rate; cur_tp = mr->cur_tp; + mi->max_prob_streams = minstrel_mcs_groups[group].streams - 1; } mr = minstrel_get_ratestats(mi, mg->max_tp_rate2); @@ -343,6 +328,23 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) } } + if (mi->max_prob_streams < 1) + mi->max_prob_streams = 1; + + for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) { + mg = &mi->groups[group]; + if (!mg->supported) + continue; + mr = minstrel_get_ratestats(mi, mg->max_prob_rate); + if (cur_prob_tp < mr->cur_tp && + minstrel_mcs_groups[group].streams <= mi->max_prob_streams) { + mi->max_prob_rate = mg->max_prob_rate; + cur_prob = mr->cur_prob; + cur_prob_tp = mr->cur_tp; + } + } + + mi->stats_update = jiffies; } @@ -467,7 +469,7 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, if (!mi->sample_wait && !mi->sample_tries && mi->sample_count > 0) { mi->sample_wait = 16 + 2 * MINSTREL_TRUNC(mi->avg_ampdu_len); - mi->sample_tries = 2; + mi->sample_tries = 1; mi->sample_count--; } @@ -536,7 +538,7 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, mr->retry_updated = true; group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; - tx_time_data = group->duration[index % MCS_GROUP_RATES] * ampdu_len; + tx_time_data = group->duration[index % MCS_GROUP_RATES] * ampdu_len / 1000; /* Contention time for first 2 tries */ ctime = (t_slot * cw) >> 1; @@ -616,6 +618,7 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) { struct minstrel_rate_stats *mr; struct minstrel_mcs_group_data *mg; + unsigned int sample_dur, sample_group; int sample_idx = 0; if (mi->sample_wait > 0) { @@ -626,11 +629,11 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) if (!mi->sample_tries) return -1; - mi->sample_tries--; mg = &mi->groups[mi->sample_group]; sample_idx = sample_table[mg->column][mg->index]; mr = &mg->rates[sample_idx]; - sample_idx += mi->sample_group * MCS_GROUP_RATES; + sample_group = mi->sample_group; + sample_idx += sample_group * MCS_GROUP_RATES; minstrel_next_sample_idx(mi); /* @@ -651,14 +654,18 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) * Make sure that lower rates get sampled only occasionally, * if the link is working perfectly. */ - if (minstrel_get_duration(sample_idx) > - minstrel_get_duration(mi->max_tp_rate)) { + sample_dur = minstrel_get_duration(sample_idx); + if (sample_dur >= minstrel_get_duration(mi->max_tp_rate2) && + (mi->max_prob_streams < + minstrel_mcs_groups[sample_group].streams || + sample_dur >= minstrel_get_duration(mi->max_prob_rate))) { if (mr->sample_skipped < 20) return -1; if (mi->sample_slow++ > 2) return -1; } + mi->sample_tries--; return sample_idx; } diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h index 302dbd5..9b16e9d 100644 --- a/net/mac80211/rc80211_minstrel_ht.h +++ b/net/mac80211/rc80211_minstrel_ht.h @@ -16,11 +16,6 @@ #define MINSTREL_MAX_STREAMS 3 #define MINSTREL_STREAM_GROUPS 4 -/* scaled fraction values */ -#define MINSTREL_SCALE 16 -#define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div) -#define MINSTREL_TRUNC(val) ((val) >> MINSTREL_SCALE) - #define MCS_GROUP_RATES 8 struct mcs_group { @@ -85,6 +80,7 @@ struct minstrel_ht_sta { /* best probability rate */ unsigned int max_prob_rate; + unsigned int max_prob_streams; /* time of last status update */ unsigned long stats_update; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index c6844ad..2528b5a 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -648,24 +648,6 @@ static ieee80211_rx_result ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) return RX_CONTINUE; } -#define SEQ_MODULO 0x1000 -#define SEQ_MASK 0xfff - -static inline int seq_less(u16 sq1, u16 sq2) -{ - return ((sq1 - sq2) & SEQ_MASK) > (SEQ_MODULO >> 1); -} - -static inline u16 seq_inc(u16 sq) -{ - return (sq + 1) & SEQ_MASK; -} - -static inline u16 seq_sub(u16 sq1, u16 sq2) -{ - return (sq1 - sq2) & SEQ_MASK; -} - static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata, struct tid_ampdu_rx *tid_agg_rx, int index, @@ -687,7 +669,7 @@ static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata, __skb_queue_tail(frames, skb); no_frame: - tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); + tid_agg_rx->head_seq_num = ieee80211_sn_inc(tid_agg_rx->head_seq_num); } static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata, @@ -699,8 +681,9 @@ static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata lockdep_assert_held(&tid_agg_rx->reorder_lock); - while (seq_less(tid_agg_rx->head_seq_num, head_seq_num)) { - index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % + while (ieee80211_sn_less(tid_agg_rx->head_seq_num, head_seq_num)) { + index = ieee80211_sn_sub(tid_agg_rx->head_seq_num, + tid_agg_rx->ssn) % tid_agg_rx->buf_size; ieee80211_release_reorder_frame(sdata, tid_agg_rx, index, frames); @@ -727,8 +710,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, lockdep_assert_held(&tid_agg_rx->reorder_lock); /* release the buffer until next missing frame */ - index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % - tid_agg_rx->buf_size; + index = ieee80211_sn_sub(tid_agg_rx->head_seq_num, + tid_agg_rx->ssn) % tid_agg_rx->buf_size; if (!tid_agg_rx->reorder_buf[index] && tid_agg_rx->stored_mpdu_num) { /* @@ -756,19 +739,22 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata, * Increment the head seq# also for the skipped slots. */ tid_agg_rx->head_seq_num = - (tid_agg_rx->head_seq_num + skipped) & SEQ_MASK; + (tid_agg_rx->head_seq_num + + skipped) & IEEE80211_SN_MASK; skipped = 0; } } else while (tid_agg_rx->reorder_buf[index]) { ieee80211_release_reorder_frame(sdata, tid_agg_rx, index, frames); - index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % + index = ieee80211_sn_sub(tid_agg_rx->head_seq_num, + tid_agg_rx->ssn) % tid_agg_rx->buf_size; } if (tid_agg_rx->stored_mpdu_num) { - j = index = seq_sub(tid_agg_rx->head_seq_num, - tid_agg_rx->ssn) % tid_agg_rx->buf_size; + j = index = ieee80211_sn_sub(tid_agg_rx->head_seq_num, + tid_agg_rx->ssn) % + tid_agg_rx->buf_size; for (; j != (index - 1) % tid_agg_rx->buf_size; j = (j + 1) % tid_agg_rx->buf_size) { @@ -809,7 +795,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata head_seq_num = tid_agg_rx->head_seq_num; /* frame with out of date sequence number */ - if (seq_less(mpdu_seq_num, head_seq_num)) { + if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) { dev_kfree_skb(skb); goto out; } @@ -818,8 +804,9 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata * If frame the sequence number exceeds our buffering window * size release some previous frames to make room for this one. */ - if (!seq_less(mpdu_seq_num, head_seq_num + buf_size)) { - head_seq_num = seq_inc(seq_sub(mpdu_seq_num, buf_size)); + if (!ieee80211_sn_less(mpdu_seq_num, head_seq_num + buf_size)) { + head_seq_num = ieee80211_sn_inc( + ieee80211_sn_sub(mpdu_seq_num, buf_size)); /* release stored frames up to new head to stack */ ieee80211_release_reorder_frames(sdata, tid_agg_rx, head_seq_num, frames); @@ -827,7 +814,8 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata /* Now the new frame is always in the range of the reordering buffer */ - index = seq_sub(mpdu_seq_num, tid_agg_rx->ssn) % tid_agg_rx->buf_size; + index = ieee80211_sn_sub(mpdu_seq_num, + tid_agg_rx->ssn) % tid_agg_rx->buf_size; /* check if we already stored this frame */ if (tid_agg_rx->reorder_buf[index]) { @@ -843,7 +831,8 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata */ if (mpdu_seq_num == tid_agg_rx->head_seq_num && tid_agg_rx->stored_mpdu_num == 0) { - tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); + tid_agg_rx->head_seq_num = + ieee80211_sn_inc(tid_agg_rx->head_seq_num); ret = false; goto out; } @@ -1894,8 +1883,10 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) * 'align' will only take the values 0 or 2 here * since all frames are required to be aligned * to 2-byte boundaries when being passed to - * mac80211. That also explains the __skb_push() - * below. + * mac80211; the code here works just as well if + * that isn't true, but mac80211 assumes it can + * access fields as 2-byte aligned (e.g. for + * compare_ether_addr) */ align = ((unsigned long)(skb->data + sizeof(struct ethhdr))) & 3; if (align) { @@ -2552,7 +2543,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) case WLAN_SP_MESH_PEERING_CONFIRM: if (!ieee80211_vif_is_mesh(&sdata->vif)) goto invalid; - if (sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE) + if (sdata->u.mesh.user_mpm) /* userspace handles this frame */ break; goto queue; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 238a0cc..85458a2 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -342,6 +342,11 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, INIT_WORK(&sta->drv_unblock_wk, sta_unblock); INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work); mutex_init(&sta->ampdu_mlme.mtx); +#ifdef CONFIG_MAC80211_MESH + if (ieee80211_vif_is_mesh(&sdata->vif) && + !sdata->u.mesh.user_mpm) + init_timer(&sta->plink_timer); +#endif memcpy(sta->sta.addr, addr, ETH_ALEN); sta->local = local; @@ -795,13 +800,16 @@ int __must_check __sta_info_destroy(struct sta_info *sta) mutex_lock(&local->key_mtx); for (i = 0; i < NUM_DEFAULT_KEYS; i++) { - __ieee80211_key_free(key_mtx_dereference(local, sta->gtk[i])); + __ieee80211_key_free(key_mtx_dereference(local, sta->gtk[i]), + true); have_key = true; } if (sta->ptk) { - __ieee80211_key_free(key_mtx_dereference(local, sta->ptk)); + __ieee80211_key_free(key_mtx_dereference(local, sta->ptk), + true); have_key = true; } + mutex_unlock(&local->key_mtx); if (!have_key) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 4947341..e5868c3 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -281,7 +281,6 @@ struct sta_ampdu_mlme { * @plink_state: peer link state * @plink_timeout: timeout of peer link * @plink_timer: peer link watch timer - * @plink_timer_was_running: used by suspend/resume to restore timers * @t_offset: timing offset relative to this host * @t_offset_setpoint: reference timing offset of this sta to be used when * calculating clockdrift @@ -379,7 +378,6 @@ struct sta_info { __le16 reason; u8 plink_retries; bool ignore_plink_timer; - bool plink_timer_was_running; enum nl80211_plink_state plink_state; u32 plink_timeout; struct timer_list plink_timer; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 3d7cd2a..e7db2b8 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1042,15 +1042,17 @@ TRACE_EVENT(drv_remain_on_channel, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *chan, - unsigned int duration), + unsigned int duration, + enum ieee80211_roc_type type), - TP_ARGS(local, sdata, chan, duration), + TP_ARGS(local, sdata, chan, duration, type), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(int, center_freq) __field(unsigned int, duration) + __field(u32, type) ), TP_fast_assign( @@ -1058,12 +1060,13 @@ TRACE_EVENT(drv_remain_on_channel, VIF_ASSIGN; __entry->center_freq = chan->center_freq; __entry->duration = duration; + __entry->type = type; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT " freq:%dMHz duration:%dms", + LOCAL_PR_FMT VIF_PR_FMT " freq:%dMHz duration:%dms type=%d", LOCAL_PR_ARG, VIF_PR_ARG, - __entry->center_freq, __entry->duration + __entry->center_freq, __entry->duration, __entry->type ) ); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 8914d2d..4e8a861 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2085,7 +2085,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, encaps_data = bridge_tunnel_header; encaps_len = sizeof(bridge_tunnel_header); skip_header_bytes -= 2; - } else if (ethertype >= 0x600) { + } else if (ethertype >= ETH_P_802_3_MIN) { encaps_data = rfc1042_header; encaps_len = sizeof(rfc1042_header); skip_header_bytes -= 2; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 0f38f43..b7a856e 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1357,6 +1357,25 @@ void ieee80211_stop_device(struct ieee80211_local *local) drv_stop(local); } +static void ieee80211_assign_chanctx(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_chanctx_conf *conf; + struct ieee80211_chanctx *ctx; + + if (!local->use_chanctx) + return; + + mutex_lock(&local->chanctx_mtx); + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); + if (conf) { + ctx = container_of(conf, struct ieee80211_chanctx, conf); + drv_assign_vif_chanctx(local, sdata, ctx); + } + mutex_unlock(&local->chanctx_mtx); +} + int ieee80211_reconfig(struct ieee80211_local *local) { struct ieee80211_hw *hw = &local->hw; @@ -1445,36 +1464,14 @@ int ieee80211_reconfig(struct ieee80211_local *local) } list_for_each_entry(sdata, &local->interfaces, list) { - struct ieee80211_chanctx_conf *ctx_conf; - if (!ieee80211_sdata_running(sdata)) continue; - - mutex_lock(&local->chanctx_mtx); - ctx_conf = rcu_dereference_protected(sdata->vif.chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); - if (ctx_conf) { - ctx = container_of(ctx_conf, struct ieee80211_chanctx, - conf); - drv_assign_vif_chanctx(local, sdata, ctx); - } - mutex_unlock(&local->chanctx_mtx); + ieee80211_assign_chanctx(local, sdata); } sdata = rtnl_dereference(local->monitor_sdata); - if (sdata && local->use_chanctx && ieee80211_sdata_running(sdata)) { - struct ieee80211_chanctx_conf *ctx_conf; - - mutex_lock(&local->chanctx_mtx); - ctx_conf = rcu_dereference_protected(sdata->vif.chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); - if (ctx_conf) { - ctx = container_of(ctx_conf, struct ieee80211_chanctx, - conf); - drv_assign_vif_chanctx(local, sdata, ctx); - } - mutex_unlock(&local->chanctx_mtx); - } + if (sdata && ieee80211_sdata_running(sdata)) + ieee80211_assign_chanctx(local, sdata); /* add STAs back */ mutex_lock(&local->sta_mtx); @@ -1534,11 +1531,6 @@ int ieee80211_reconfig(struct ieee80211_local *local) BSS_CHANGED_IDLE | BSS_CHANGED_TXPOWER; -#ifdef CONFIG_PM - if (local->resuming && !reconfig_due_to_wowlan) - sdata->vif.bss_conf = sdata->suspend_bss_conf; -#endif - switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: changed |= BSS_CHANGED_ASSOC | @@ -1678,28 +1670,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) mb(); local->resuming = false; - list_for_each_entry(sdata, &local->interfaces, list) { - switch(sdata->vif.type) { - case NL80211_IFTYPE_STATION: - ieee80211_sta_restart(sdata); - break; - case NL80211_IFTYPE_ADHOC: - ieee80211_ibss_restart(sdata); - break; - case NL80211_IFTYPE_MESH_POINT: - ieee80211_mesh_restart(sdata); - break; - default: - break; - } - } - mod_timer(&local->sta_cleanup, jiffies + 1); - - mutex_lock(&local->sta_mtx); - list_for_each_entry(sta, &local->sta_list, list) - mesh_plink_restart(sta); - mutex_unlock(&local->sta_mtx); #else WARN_ON(1); #endif diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index a2c2258..171344d 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -13,6 +13,104 @@ #include "rate.h" +static void __check_vhtcap_disable(struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta_vht_cap *vht_cap, + u32 flag) +{ + __le32 le_flag = cpu_to_le32(flag); + + if (sdata->u.mgd.vht_capa_mask.vht_cap_info & le_flag && + !(sdata->u.mgd.vht_capa.vht_cap_info & le_flag)) + vht_cap->cap &= ~flag; +} + +void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta_vht_cap *vht_cap) +{ + int i; + u16 rxmcs_mask, rxmcs_cap, rxmcs_n, txmcs_mask, txmcs_cap, txmcs_n; + + if (!vht_cap->vht_supported) + return; + + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return; + + __check_vhtcap_disable(sdata, vht_cap, + IEEE80211_VHT_CAP_RXLDPC); + __check_vhtcap_disable(sdata, vht_cap, + IEEE80211_VHT_CAP_SHORT_GI_80); + __check_vhtcap_disable(sdata, vht_cap, + IEEE80211_VHT_CAP_SHORT_GI_160); + __check_vhtcap_disable(sdata, vht_cap, + IEEE80211_VHT_CAP_TXSTBC); + __check_vhtcap_disable(sdata, vht_cap, + IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE); + __check_vhtcap_disable(sdata, vht_cap, + IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE); + __check_vhtcap_disable(sdata, vht_cap, + IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN); + __check_vhtcap_disable(sdata, vht_cap, + IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN); + + /* Allow user to decrease AMPDU length exponent */ + if (sdata->u.mgd.vht_capa_mask.vht_cap_info & + cpu_to_le32(IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK)) { + u32 cap, n; + + n = le32_to_cpu(sdata->u.mgd.vht_capa.vht_cap_info) & + IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK; + n >>= IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT; + cap = vht_cap->cap & IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK; + cap >>= IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT; + + if (n < cap) { + vht_cap->cap &= + ~IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK; + vht_cap->cap |= + n << IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT; + } + } + + /* Allow the user to decrease MCSes */ + rxmcs_mask = + le16_to_cpu(sdata->u.mgd.vht_capa_mask.supp_mcs.rx_mcs_map); + rxmcs_n = le16_to_cpu(sdata->u.mgd.vht_capa.supp_mcs.rx_mcs_map); + rxmcs_n &= rxmcs_mask; + rxmcs_cap = le16_to_cpu(vht_cap->vht_mcs.rx_mcs_map); + + txmcs_mask = + le16_to_cpu(sdata->u.mgd.vht_capa_mask.supp_mcs.tx_mcs_map); + txmcs_n = le16_to_cpu(sdata->u.mgd.vht_capa.supp_mcs.tx_mcs_map); + txmcs_n &= txmcs_mask; + txmcs_cap = le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map); + for (i = 0; i < 8; i++) { + u8 m, n, c; + + m = (rxmcs_mask >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + n = (rxmcs_n >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + c = (rxmcs_cap >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + + if (m && ((c != IEEE80211_VHT_MCS_NOT_SUPPORTED && n < c) || + n == IEEE80211_VHT_MCS_NOT_SUPPORTED)) { + rxmcs_cap &= ~(3 << 2*i); + rxmcs_cap |= (rxmcs_n & (3 << 2*i)); + } + + m = (txmcs_mask >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + n = (txmcs_n >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + c = (txmcs_cap >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + + if (m && ((c != IEEE80211_VHT_MCS_NOT_SUPPORTED && n < c) || + n == IEEE80211_VHT_MCS_NOT_SUPPORTED)) { + txmcs_cap &= ~(3 << 2*i); + txmcs_cap |= (txmcs_n & (3 << 2*i)); + } + } + vht_cap->vht_mcs.rx_mcs_map = cpu_to_le16(rxmcs_cap); + vht_cap->vht_mcs.tx_mcs_map = cpu_to_le16(txmcs_cap); +} + void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, @@ -20,6 +118,8 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, struct sta_info *sta) { struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; + struct ieee80211_sta_vht_cap own_cap; + u32 cap_info, i; memset(vht_cap, 0, sizeof(*vht_cap)); @@ -35,12 +135,122 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, vht_cap->vht_supported = true; - vht_cap->cap = le32_to_cpu(vht_cap_ie->vht_cap_info); + own_cap = sband->vht_cap; + /* + * If user has specified capability overrides, take care + * of that if the station we're setting up is the AP that + * we advertised a restricted capability set to. Override + * our own capabilities and then use those below. + */ + if (sdata->vif.type == NL80211_IFTYPE_STATION && + !test_sta_flag(sta, WLAN_STA_TDLS_PEER)) + ieee80211_apply_vhtcap_overrides(sdata, &own_cap); + + /* take some capabilities as-is */ + cap_info = le32_to_cpu(vht_cap_ie->vht_cap_info); + vht_cap->cap = cap_info; + vht_cap->cap &= IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 | + IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 | + IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 | + IEEE80211_VHT_CAP_RXLDPC | + IEEE80211_VHT_CAP_VHT_TXOP_PS | + IEEE80211_VHT_CAP_HTC_VHT | + IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK | + IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_UNSOL_MFB | + IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_MRQ_MFB | + IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN | + IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN; + + /* and some based on our own capabilities */ + switch (own_cap.cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { + case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: + vht_cap->cap |= cap_info & + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ; + break; + case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ: + vht_cap->cap |= cap_info & + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK; + break; + default: + /* nothing */ + break; + } + + /* symmetric capabilities */ + vht_cap->cap |= cap_info & own_cap.cap & + (IEEE80211_VHT_CAP_SHORT_GI_80 | + IEEE80211_VHT_CAP_SHORT_GI_160); + + /* remaining ones */ + if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE) { + vht_cap->cap |= cap_info & + (IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE | + IEEE80211_VHT_CAP_BEAMFORMER_ANTENNAS_MAX | + IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX); + } + + if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE) + vht_cap->cap |= cap_info & + IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE; + + if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE) + vht_cap->cap |= cap_info & + IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE; + + if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE) + vht_cap->cap |= cap_info & + IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE; + + if (own_cap.cap & IEEE80211_VHT_CAP_TXSTBC) + vht_cap->cap |= cap_info & IEEE80211_VHT_CAP_RXSTBC_MASK; + + if (own_cap.cap & IEEE80211_VHT_CAP_RXSTBC_MASK) + vht_cap->cap |= cap_info & IEEE80211_VHT_CAP_TXSTBC; /* Copy peer MCS info, the driver might need them. */ memcpy(&vht_cap->vht_mcs, &vht_cap_ie->supp_mcs, sizeof(struct ieee80211_vht_mcs_info)); + /* but also restrict MCSes */ + for (i = 0; i < 8; i++) { + u16 own_rx, own_tx, peer_rx, peer_tx; + + own_rx = le16_to_cpu(own_cap.vht_mcs.rx_mcs_map); + own_rx = (own_rx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + + own_tx = le16_to_cpu(own_cap.vht_mcs.tx_mcs_map); + own_tx = (own_tx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + + peer_rx = le16_to_cpu(vht_cap->vht_mcs.rx_mcs_map); + peer_rx = (peer_rx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + + peer_tx = le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map); + peer_tx = (peer_tx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + + if (peer_tx != IEEE80211_VHT_MCS_NOT_SUPPORTED) { + if (own_rx == IEEE80211_VHT_MCS_NOT_SUPPORTED) + peer_tx = IEEE80211_VHT_MCS_NOT_SUPPORTED; + else if (own_rx < peer_tx) + peer_tx = own_rx; + } + + if (peer_rx != IEEE80211_VHT_MCS_NOT_SUPPORTED) { + if (own_tx == IEEE80211_VHT_MCS_NOT_SUPPORTED) + peer_rx = IEEE80211_VHT_MCS_NOT_SUPPORTED; + else if (own_tx < peer_rx) + peer_rx = own_tx; + } + + vht_cap->vht_mcs.rx_mcs_map &= + ~cpu_to_le16(IEEE80211_VHT_MCS_NOT_SUPPORTED << i * 2); + vht_cap->vht_mcs.rx_mcs_map |= cpu_to_le16(peer_rx << i * 2); + + vht_cap->vht_mcs.tx_mcs_map &= + ~cpu_to_le16(IEEE80211_VHT_MCS_NOT_SUPPORTED << i * 2); + vht_cap->vht_mcs.tx_mcs_map |= cpu_to_le16(peer_tx << i * 2); + } + + /* finally set up the bandwidth */ switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ: diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h index a4dcaf1..21fa386 100644 --- a/net/mac802154/mac802154.h +++ b/net/mac802154/mac802154.h @@ -114,5 +114,6 @@ void mac802154_dev_set_ieee_addr(struct net_device *dev); u16 mac802154_dev_get_pan_id(const struct net_device *dev); void mac802154_dev_set_pan_id(struct net_device *dev, u16 val); void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan); +u8 mac802154_dev_get_dsn(const struct net_device *dev); #endif /* MAC802154_H */ diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c index d8d2770..a99910d 100644 --- a/net/mac802154/mac_cmd.c +++ b/net/mac802154/mac_cmd.c @@ -73,4 +73,5 @@ struct ieee802154_mlme_ops mac802154_mlme_wpan = { .start_req = mac802154_mlme_start_req, .get_pan_id = mac802154_dev_get_pan_id, .get_short_addr = mac802154_dev_get_short_addr, + .get_dsn = mac802154_dev_get_dsn, }; diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c index f47781a..f03e55f 100644 --- a/net/mac802154/mib.c +++ b/net/mac802154/mib.c @@ -159,6 +159,15 @@ void mac802154_dev_set_pan_id(struct net_device *dev, u16 val) } } +u8 mac802154_dev_get_dsn(const struct net_device *dev) +{ + struct mac802154_sub_if_data *priv = netdev_priv(dev); + + BUG_ON(dev->type != ARPHRD_IEEE802154); + + return priv->dsn++; +} + static void phy_chan_notify(struct work_struct *work) { struct phy_chan_notify_work *nw = container_of(work, diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c index d20c6d3..7d3f659 100644 --- a/net/mac802154/wpan.c +++ b/net/mac802154/wpan.c @@ -145,6 +145,8 @@ static int mac802154_header_create(struct sk_buff *skb, head[pos++] = mac_cb(skb)->seq; /* DSN/BSN */ fc = mac_cb_type(skb); + if (mac_cb_is_ackreq(skb)) + fc |= IEEE802154_FC_ACK_REQ; if (!saddr) { spin_lock_bh(&priv->mib_lock); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 1ba9dbc..86f5e26 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -15,7 +15,6 @@ #include <linux/ip.h> #include <linux/skbuff.h> #include <linux/spinlock.h> -#include <linux/netlink.h> #include <linux/rculist.h> #include <net/netlink.h> @@ -1085,7 +1084,7 @@ static int dump_init(struct netlink_callback *cb) { struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); - int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); + int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; struct nlattr *attr = (void *)nlh + min_len; u32 dump_type; @@ -1301,7 +1300,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, struct sk_buff *skb2; struct nlmsgerr *errmsg; size_t payload = sizeof(*errmsg) + nlmsg_len(nlh); - int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); + int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); struct nlattr *cda[IPSET_ATTR_CMD_MAX+1]; struct nlattr *cmdattr; u32 *errline; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 61f49d2..2aef23e 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -69,10 +69,7 @@ EXPORT_SYMBOL(ip_vs_conn_put); EXPORT_SYMBOL(ip_vs_get_debug_level); #endif -int ip_vs_net_id __read_mostly; -#ifdef IP_VS_GENERIC_NETNS -EXPORT_SYMBOL(ip_vs_net_id); -#endif +static int ip_vs_net_id __read_mostly; /* netns cnt used for uniqueness */ static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0); @@ -1181,9 +1178,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) iph.len)))) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - struct net *net = - dev_net(skb_dst(skb)->dev); - if (!skb->dev) skb->dev = net->loopback_dev; icmpv6_send(skb, diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 9e2d1cc..8104120 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -271,16 +271,18 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto, { register unsigned int porth = ntohs(port); __be32 addr_fold = addr->ip; + __u32 ahash; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) addr_fold = addr->ip6[0]^addr->ip6[1]^ addr->ip6[2]^addr->ip6[3]; #endif - addr_fold ^= ((size_t)net>>8); + ahash = ntohl(addr_fold); + ahash ^= ((size_t) net >> 8); - return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth) - & IP_VS_SVC_TAB_MASK; + return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) & + IP_VS_SVC_TAB_MASK; } /* diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 0fac601..6bee6d0 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -56,7 +56,7 @@ * Make a summary from each cpu */ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, - struct ip_vs_cpu_stats *stats) + struct ip_vs_cpu_stats __percpu *stats) { int i; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c8e001a..007e8c4 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -48,6 +48,7 @@ #include <net/netfilter/nf_conntrack_labels.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_nat_helper.h> #define NF_CONNTRACK_VERSION "0.5.0" @@ -1364,30 +1365,48 @@ void nf_conntrack_cleanup_end(void) */ void nf_conntrack_cleanup_net(struct net *net) { + LIST_HEAD(single); + + list_add(&net->exit_list, &single); + nf_conntrack_cleanup_net_list(&single); +} + +void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) +{ + int busy; + struct net *net; + /* * This makes sure all current packets have passed through * netfilter framework. Roll on, two-stage module * delete... */ synchronize_net(); - i_see_dead_people: - nf_ct_iterate_cleanup(net, kill_all, NULL); - nf_ct_release_dying_list(net); - if (atomic_read(&net->ct.count) != 0) { +i_see_dead_people: + busy = 0; + list_for_each_entry(net, net_exit_list, exit_list) { + nf_ct_iterate_cleanup(net, kill_all, NULL); + nf_ct_release_dying_list(net); + if (atomic_read(&net->ct.count) != 0) + busy = 1; + } + if (busy) { schedule(); goto i_see_dead_people; } - nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); - nf_conntrack_proto_pernet_fini(net); - nf_conntrack_helper_pernet_fini(net); - nf_conntrack_ecache_pernet_fini(net); - nf_conntrack_tstamp_pernet_fini(net); - nf_conntrack_acct_pernet_fini(net); - nf_conntrack_expect_pernet_fini(net); - kmem_cache_destroy(net->ct.nf_conntrack_cachep); - kfree(net->ct.slabname); - free_percpu(net->ct.stat); + list_for_each_entry(net, net_exit_list, exit_list) { + nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); + nf_conntrack_proto_pernet_fini(net); + nf_conntrack_helper_pernet_fini(net); + nf_conntrack_ecache_pernet_fini(net); + nf_conntrack_tstamp_pernet_fini(net); + nf_conntrack_acct_pernet_fini(net); + nf_conntrack_expect_pernet_fini(net); + kmem_cache_destroy(net->ct.nf_conntrack_cachep); + kfree(net->ct.slabname); + free_percpu(net->ct.stat); + } } void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 9904b15..6d0f8a1 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2409,6 +2409,92 @@ out: return skb->len; } +static int +ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct nf_conntrack_expect *exp, *last; + struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + struct nf_conn *ct = cb->data; + struct nf_conn_help *help = nfct_help(ct); + u_int8_t l3proto = nfmsg->nfgen_family; + + if (cb->args[0]) + return 0; + + rcu_read_lock(); + last = (struct nf_conntrack_expect *)cb->args[1]; +restart: + hlist_for_each_entry(exp, &help->expectations, lnode) { + if (l3proto && exp->tuple.src.l3num != l3proto) + continue; + if (cb->args[1]) { + if (exp != last) + continue; + cb->args[1] = 0; + } + if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + IPCTNL_MSG_EXP_NEW, + exp) < 0) { + if (!atomic_inc_not_zero(&exp->use)) + continue; + cb->args[1] = (unsigned long)exp; + goto out; + } + } + if (cb->args[1]) { + cb->args[1] = 0; + goto restart; + } + cb->args[0] = 1; +out: + rcu_read_unlock(); + if (last) + nf_ct_expect_put(last); + + return skb->len; +} + +static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + int err; + struct net *net = sock_net(ctnl); + struct nfgenmsg *nfmsg = nlmsg_data(nlh); + u_int8_t u3 = nfmsg->nfgen_family; + struct nf_conntrack_tuple tuple; + struct nf_conntrack_tuple_hash *h; + struct nf_conn *ct; + u16 zone = 0; + struct netlink_dump_control c = { + .dump = ctnetlink_exp_ct_dump_table, + .done = ctnetlink_exp_done, + }; + + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3); + if (err < 0) + return err; + + if (cda[CTA_EXPECT_ZONE]) { + err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); + if (err < 0) + return err; + } + + h = nf_conntrack_find_get(net, zone, &tuple); + if (!h) + return -ENOENT; + + ct = nf_ct_tuplehash_to_ctrack(h); + c.data = ct; + + err = netlink_dump_start(ctnl, skb, nlh, &c); + nf_ct_put(ct); + + return err; +} + static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { [CTA_EXPECT_MASTER] = { .type = NLA_NESTED }, [CTA_EXPECT_TUPLE] = { .type = NLA_NESTED }, @@ -2439,11 +2525,15 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, int err; if (nlh->nlmsg_flags & NLM_F_DUMP) { - struct netlink_dump_control c = { - .dump = ctnetlink_exp_dump_table, - .done = ctnetlink_exp_done, - }; - return netlink_dump_start(ctnl, skb, nlh, &c); + if (cda[CTA_EXPECT_MASTER]) + return ctnetlink_dump_exp_ct(ctnl, skb, nlh, cda); + else { + struct netlink_dump_control c = { + .dump = ctnetlink_exp_dump_table, + .done = ctnetlink_exp_done, + }; + return netlink_dump_start(ctnl, skb, nlh, &c); + } } err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 6bcce40..6c69fbd 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -545,16 +545,20 @@ out_init: return ret; } -static void nf_conntrack_pernet_exit(struct net *net) +static void nf_conntrack_pernet_exit(struct list_head *net_exit_list) { - nf_conntrack_standalone_fini_sysctl(net); - nf_conntrack_standalone_fini_proc(net); - nf_conntrack_cleanup_net(net); + struct net *net; + + list_for_each_entry(net, net_exit_list, exit_list) { + nf_conntrack_standalone_fini_sysctl(net); + nf_conntrack_standalone_fini_proc(net); + } + nf_conntrack_cleanup_net_list(net_exit_list); } static struct pernet_operations nf_conntrack_net_ops = { - .init = nf_conntrack_pernet_init, - .exit = nf_conntrack_pernet_exit, + .init = nf_conntrack_pernet_init, + .exit_batch = nf_conntrack_pernet_exit, }; static int __init nf_conntrack_standalone_init(void) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 0b1b32c..bc4c499 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -24,10 +24,9 @@ #include <linux/skbuff.h> #include <asm/uaccess.h> #include <net/sock.h> -#include <net/netlink.h> #include <linux/init.h> -#include <linux/netlink.h> +#include <net/netlink.h> #include <linux/netfilter/nfnetlink.h> MODULE_LICENSE("GPL"); @@ -144,7 +143,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EPERM; /* All the messages must at least contain nfgenmsg */ - if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct nfgenmsg))) + if (nlmsg_len(nlh) < sizeof(struct nfgenmsg)) return 0; type = nlh->nlmsg_type; @@ -172,7 +171,7 @@ replay: } { - int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); + int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; struct nlattr *attr = (void *)nlh + min_len; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index f248db5..4a2593f 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -19,7 +19,7 @@ #include <linux/ipv6.h> #include <linux/netdevice.h> #include <linux/netfilter.h> -#include <linux/netlink.h> +#include <net/netlink.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_log.h> #include <linux/spinlock.h> @@ -609,7 +609,7 @@ nfulnl_log_packet(u_int8_t pf, /* FIXME: do we want to make the size calculation conditional based on * what is actually present? way more branches and checks, but more * memory efficient... */ - size = NLMSG_SPACE(sizeof(struct nfgenmsg)) + size = nlmsg_total_size(sizeof(struct nfgenmsg)) + nla_total_size(sizeof(struct nfulnl_msg_packet_hdr)) + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + nla_total_size(sizeof(u_int32_t)) /* ifindex */ diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 1cb4854..e92c916 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -73,7 +73,7 @@ static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly; static inline u_int8_t instance_hashfn(u_int16_t queue_num) { - return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS; + return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS; } static struct nfqnl_instance * @@ -217,14 +217,59 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data) spin_unlock_bh(&queue->lock); } +static void +nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) +{ + int i, j = 0; + int plen = 0; /* length of skb->head fragment */ + struct page *page; + unsigned int offset; + + /* dont bother with small payloads */ + if (len <= skb_tailroom(to)) { + skb_copy_bits(from, 0, skb_put(to, len), len); + return; + } + + if (hlen) { + skb_copy_bits(from, 0, skb_put(to, hlen), hlen); + len -= hlen; + } else { + plen = min_t(int, skb_headlen(from), len); + if (plen) { + page = virt_to_head_page(from->head); + offset = from->data - (unsigned char *)page_address(page); + __skb_fill_page_desc(to, 0, page, offset, plen); + get_page(page); + j = 1; + len -= plen; + } + } + + to->truesize += len + plen; + to->len += len + plen; + to->data_len += len + plen; + + for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { + if (!len) + break; + skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; + skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len); + len -= skb_shinfo(to)->frags[j].size; + skb_frag_ref(to, j); + j++; + } + skb_shinfo(to)->nr_frags = j; +} + static struct sk_buff * nfqnl_build_packet_message(struct nfqnl_instance *queue, struct nf_queue_entry *entry, __be32 **packet_id_ptr) { - sk_buff_data_t old_tail; size_t size; size_t data_len = 0, cap_len = 0; + int hlen = 0; struct sk_buff *skb; struct nlattr *nla; struct nfqnl_msg_packet_hdr *pmsg; @@ -236,7 +281,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, struct nf_conn *ct = NULL; enum ip_conntrack_info uninitialized_var(ctinfo); - size = NLMSG_SPACE(sizeof(struct nfgenmsg)) + size = nlmsg_total_size(sizeof(struct nfgenmsg)) + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + nla_total_size(sizeof(u_int32_t)) /* ifindex */ @@ -246,8 +291,10 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, #endif + nla_total_size(sizeof(u_int32_t)) /* mark */ + nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) - + nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp) - + nla_total_size(sizeof(u_int32_t))); /* cap_len */ + + nla_total_size(sizeof(u_int32_t)); /* cap_len */ + + if (entskb->tstamp.tv64) + size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); outdev = entry->outdev; @@ -265,7 +312,16 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (data_len == 0 || data_len > entskb->len) data_len = entskb->len; - size += nla_total_size(data_len); + + if (!entskb->head_frag || + skb_headlen(entskb) < L1_CACHE_BYTES || + skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS) + hlen = skb_headlen(entskb); + + if (skb_has_frag_list(entskb)) + hlen = entskb->len; + hlen = min_t(int, data_len, hlen); + size += sizeof(struct nlattr) + hlen; cap_len = entskb->len; break; } @@ -277,7 +333,6 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (!skb) return NULL; - old_tail = skb->tail; nlh = nlmsg_put(skb, 0, 0, NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, sizeof(struct nfgenmsg), 0); @@ -382,31 +437,26 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, goto nla_put_failure; } + if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) + goto nla_put_failure; + + if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len))) + goto nla_put_failure; + if (data_len) { struct nlattr *nla; - int sz = nla_attr_size(data_len); - if (skb_tailroom(skb) < nla_total_size(data_len)) { - printk(KERN_WARNING "nf_queue: no tailroom!\n"); - kfree_skb(skb); - return NULL; - } + if (skb_tailroom(skb) < sizeof(*nla) + hlen) + goto nla_put_failure; - nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len)); + nla = (struct nlattr *)skb_put(skb, sizeof(*nla)); nla->nla_type = NFQA_PAYLOAD; - nla->nla_len = sz; + nla->nla_len = nla_attr_size(data_len); - if (skb_copy_bits(entskb, 0, nla_data(nla), data_len)) - BUG(); + nfqnl_zcopy(skb, entskb, data_len, hlen); } - if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) - goto nla_put_failure; - - if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len))) - goto nla_put_failure; - - nlh->nlmsg_len = skb->tail - old_tail; + nlh->nlmsg_len = skb->len; return skb; nla_put_failure: diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig new file mode 100644 index 0000000..5d6e8c0 --- /dev/null +++ b/net/netlink/Kconfig @@ -0,0 +1,10 @@ +# +# Netlink Sockets +# + +config NETLINK_DIAG + tristate "NETLINK: socket monitoring interface" + default n + ---help--- + Support for NETLINK socket monitoring interface used by the ss tool. + If unsure, say Y. diff --git a/net/netlink/Makefile b/net/netlink/Makefile index bdd6ddf..e837917 100644 --- a/net/netlink/Makefile +++ b/net/netlink/Makefile @@ -3,3 +3,6 @@ # obj-y := af_netlink.o genetlink.o + +obj-$(CONFIG_NETLINK_DIAG) += netlink_diag.o +netlink_diag-y := diag.o diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 1e3fd5b..ce2e006 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -61,28 +61,7 @@ #include <net/scm.h> #include <net/netlink.h> -#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) -#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long)) - -struct netlink_sock { - /* struct sock has to be the first member of netlink_sock */ - struct sock sk; - u32 portid; - u32 dst_portid; - u32 dst_group; - u32 flags; - u32 subscriptions; - u32 ngroups; - unsigned long *groups; - unsigned long state; - wait_queue_head_t wait; - struct netlink_callback *cb; - struct mutex *cb_mutex; - struct mutex cb_def_mutex; - void (*netlink_rcv)(struct sk_buff *skb); - void (*netlink_bind)(int group); - struct module *module; -}; +#include "af_netlink.h" struct listeners { struct rcu_head rcu; @@ -94,48 +73,20 @@ struct listeners { #define NETLINK_BROADCAST_SEND_ERROR 0x4 #define NETLINK_RECV_NO_ENOBUFS 0x8 -static inline struct netlink_sock *nlk_sk(struct sock *sk) -{ - return container_of(sk, struct netlink_sock, sk); -} - static inline int netlink_is_kernel(struct sock *sk) { return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; } -struct nl_portid_hash { - struct hlist_head *table; - unsigned long rehash_time; - - unsigned int mask; - unsigned int shift; - - unsigned int entries; - unsigned int max_shift; - - u32 rnd; -}; - -struct netlink_table { - struct nl_portid_hash hash; - struct hlist_head mc_list; - struct listeners __rcu *listeners; - unsigned int flags; - unsigned int groups; - struct mutex *cb_mutex; - struct module *module; - void (*bind)(int group); - int registered; -}; - -static struct netlink_table *nl_table; +struct netlink_table *nl_table; +EXPORT_SYMBOL_GPL(nl_table); static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait); static int netlink_dump(struct sock *sk); -static DEFINE_RWLOCK(nl_table_lock); +DEFINE_RWLOCK(nl_table_lock); +EXPORT_SYMBOL_GPL(nl_table_lock); static atomic_t nl_table_users = ATOMIC_INIT(0); #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); @@ -1695,7 +1646,7 @@ struct nlmsghdr * __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags) { struct nlmsghdr *nlh; - int size = NLMSG_LENGTH(len); + int size = nlmsg_msg_size(len); nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size)); nlh->nlmsg_type = type; @@ -1704,7 +1655,7 @@ __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int fla nlh->nlmsg_pid = portid; nlh->nlmsg_seq = seq; if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0) - memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size); + memset(nlmsg_data(nlh) + len, 0, NLMSG_ALIGN(size) - size); return nlh; } EXPORT_SYMBOL(__nlmsg_put); diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h new file mode 100644 index 0000000..d9acb2a --- /dev/null +++ b/net/netlink/af_netlink.h @@ -0,0 +1,62 @@ +#ifndef _AF_NETLINK_H +#define _AF_NETLINK_H + +#include <net/sock.h> + +#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) +#define NLGRPLONGS(x) (NLGRPSZ(x)/sizeof(unsigned long)) + +struct netlink_sock { + /* struct sock has to be the first member of netlink_sock */ + struct sock sk; + u32 portid; + u32 dst_portid; + u32 dst_group; + u32 flags; + u32 subscriptions; + u32 ngroups; + unsigned long *groups; + unsigned long state; + wait_queue_head_t wait; + struct netlink_callback *cb; + struct mutex *cb_mutex; + struct mutex cb_def_mutex; + void (*netlink_rcv)(struct sk_buff *skb); + void (*netlink_bind)(int group); + struct module *module; +}; + +static inline struct netlink_sock *nlk_sk(struct sock *sk) +{ + return container_of(sk, struct netlink_sock, sk); +} + +struct nl_portid_hash { + struct hlist_head *table; + unsigned long rehash_time; + + unsigned int mask; + unsigned int shift; + + unsigned int entries; + unsigned int max_shift; + + u32 rnd; +}; + +struct netlink_table { + struct nl_portid_hash hash; + struct hlist_head mc_list; + struct listeners __rcu *listeners; + unsigned int flags; + unsigned int groups; + struct mutex *cb_mutex; + struct module *module; + void (*bind)(int group); + int registered; +}; + +extern struct netlink_table *nl_table; +extern rwlock_t nl_table_lock; + +#endif diff --git a/net/netlink/diag.c b/net/netlink/diag.c new file mode 100644 index 0000000..5ffb1d1 --- /dev/null +++ b/net/netlink/diag.c @@ -0,0 +1,188 @@ +#include <linux/module.h> + +#include <net/sock.h> +#include <linux/netlink.h> +#include <linux/sock_diag.h> +#include <linux/netlink_diag.h> + +#include "af_netlink.h" + +static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb) +{ + struct netlink_sock *nlk = nlk_sk(sk); + + if (nlk->groups == NULL) + return 0; + + return nla_put(nlskb, NETLINK_DIAG_GROUPS, NLGRPSZ(nlk->ngroups), + nlk->groups); +} + +static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, + struct netlink_diag_req *req, + u32 portid, u32 seq, u32 flags, int sk_ino) +{ + struct nlmsghdr *nlh; + struct netlink_diag_msg *rep; + struct netlink_sock *nlk = nlk_sk(sk); + + nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep), + flags); + if (!nlh) + return -EMSGSIZE; + + rep = nlmsg_data(nlh); + rep->ndiag_family = AF_NETLINK; + rep->ndiag_type = sk->sk_type; + rep->ndiag_protocol = sk->sk_protocol; + rep->ndiag_state = sk->sk_state; + + rep->ndiag_ino = sk_ino; + rep->ndiag_portid = nlk->portid; + rep->ndiag_dst_portid = nlk->dst_portid; + rep->ndiag_dst_group = nlk->dst_group; + sock_diag_save_cookie(sk, rep->ndiag_cookie); + + if ((req->ndiag_show & NDIAG_SHOW_GROUPS) && + sk_diag_dump_groups(sk, skb)) + goto out_nlmsg_trim; + + if ((req->ndiag_show & NDIAG_SHOW_MEMINFO) && + sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO)) + goto out_nlmsg_trim; + + return nlmsg_end(skb, nlh); + +out_nlmsg_trim: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, + int protocol, int s_num) +{ + struct netlink_table *tbl = &nl_table[protocol]; + struct nl_portid_hash *hash = &tbl->hash; + struct net *net = sock_net(skb->sk); + struct netlink_diag_req *req; + struct sock *sk; + int ret = 0, num = 0, i; + + req = nlmsg_data(cb->nlh); + + for (i = 0; i <= hash->mask; i++) { + sk_for_each(sk, &hash->table[i]) { + if (!net_eq(sock_net(sk), net)) + continue; + if (num < s_num) { + num++; + continue; + } + + if (sk_diag_fill(sk, skb, req, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + sock_i_ino(sk)) < 0) { + ret = 1; + goto done; + } + + num++; + } + } + + sk_for_each_bound(sk, &tbl->mc_list) { + if (sk_hashed(sk)) + continue; + if (!net_eq(sock_net(sk), net)) + continue; + if (num < s_num) { + num++; + continue; + } + + if (sk_diag_fill(sk, skb, req, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + sock_i_ino(sk)) < 0) { + ret = 1; + goto done; + } + num++; + } +done: + cb->args[0] = num; + cb->args[1] = protocol; + + return ret; +} + +static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct netlink_diag_req *req; + int s_num = cb->args[0]; + + req = nlmsg_data(cb->nlh); + + read_lock(&nl_table_lock); + + if (req->sdiag_protocol == NDIAG_PROTO_ALL) { + int i; + + for (i = cb->args[1]; i < MAX_LINKS; i++) { + if (__netlink_diag_dump(skb, cb, i, s_num)) + break; + s_num = 0; + } + } else { + if (req->sdiag_protocol >= MAX_LINKS) { + read_unlock(&nl_table_lock); + return -ENOENT; + } + + __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num); + } + + read_unlock(&nl_table_lock); + + return skb->len; +} + +static int netlink_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) +{ + int hdrlen = sizeof(struct netlink_diag_req); + struct net *net = sock_net(skb->sk); + + if (nlmsg_len(h) < hdrlen) + return -EINVAL; + + if (h->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = netlink_diag_dump, + }; + return netlink_dump_start(net->diag_nlsk, skb, h, &c); + } else + return -EOPNOTSUPP; +} + +static const struct sock_diag_handler netlink_diag_handler = { + .family = AF_NETLINK, + .dump = netlink_diag_handler_dump, +}; + +static int __init netlink_diag_init(void) +{ + return sock_diag_register(&netlink_diag_handler); +} + +static void __exit netlink_diag_exit(void) +{ + sock_diag_unregister(&netlink_diag_handler); +} + +module_init(netlink_diag_init); +module_exit(netlink_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 16 /* AF_NETLINK */); diff --git a/net/nfc/llcp/commands.c b/net/nfc/llcp/commands.c index c6bc3bd..b75a9b3 100644 --- a/net/nfc/llcp/commands.c +++ b/net/nfc/llcp/commands.c @@ -117,6 +117,88 @@ u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length) return tlv; } +struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap) +{ + struct nfc_llcp_sdp_tlv *sdres; + u8 value[2]; + + sdres = kzalloc(sizeof(struct nfc_llcp_sdp_tlv), GFP_KERNEL); + if (sdres == NULL) + return NULL; + + value[0] = tid; + value[1] = sap; + + sdres->tlv = nfc_llcp_build_tlv(LLCP_TLV_SDRES, value, 2, + &sdres->tlv_len); + if (sdres->tlv == NULL) { + kfree(sdres); + return NULL; + } + + sdres->tid = tid; + sdres->sap = sap; + + INIT_HLIST_NODE(&sdres->node); + + return sdres; +} + +struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri, + size_t uri_len) +{ + struct nfc_llcp_sdp_tlv *sdreq; + + pr_debug("uri: %s, len: %zu\n", uri, uri_len); + + sdreq = kzalloc(sizeof(struct nfc_llcp_sdp_tlv), GFP_KERNEL); + if (sdreq == NULL) + return NULL; + + sdreq->tlv_len = uri_len + 3; + + if (uri[uri_len - 1] == 0) + sdreq->tlv_len--; + + sdreq->tlv = kzalloc(sdreq->tlv_len + 1, GFP_KERNEL); + if (sdreq->tlv == NULL) { + kfree(sdreq); + return NULL; + } + + sdreq->tlv[0] = LLCP_TLV_SDREQ; + sdreq->tlv[1] = sdreq->tlv_len - 2; + sdreq->tlv[2] = tid; + + sdreq->tid = tid; + sdreq->uri = sdreq->tlv + 3; + memcpy(sdreq->uri, uri, uri_len); + + sdreq->time = jiffies; + + INIT_HLIST_NODE(&sdreq->node); + + return sdreq; +} + +void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp) +{ + kfree(sdp->tlv); + kfree(sdp); +} + +void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head) +{ + struct nfc_llcp_sdp_tlv *sdp; + struct hlist_node *n; + + hlist_for_each_entry_safe(sdp, n, head, node) { + hlist_del(&sdp->node); + + nfc_llcp_free_sdp_tlv(sdp); + } +} + int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local, u8 *tlv_array, u16 tlv_array_len) { @@ -184,10 +266,10 @@ int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock, switch (type) { case LLCP_TLV_MIUX: - sock->miu = llcp_tlv_miux(tlv) + 128; + sock->remote_miu = llcp_tlv_miux(tlv) + 128; break; case LLCP_TLV_RW: - sock->rw = llcp_tlv_rw(tlv); + sock->remote_rw = llcp_tlv_rw(tlv); break; case LLCP_TLV_SN: break; @@ -200,7 +282,8 @@ int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock, tlv += length + 2; } - pr_debug("sock %p rw %d miu %d\n", sock, sock->rw, sock->miu); + pr_debug("sock %p rw %d miu %d\n", sock, + sock->remote_rw, sock->remote_miu); return 0; } @@ -318,9 +401,9 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) struct sk_buff *skb; u8 *service_name_tlv = NULL, service_name_tlv_length; u8 *miux_tlv = NULL, miux_tlv_length; - u8 *rw_tlv = NULL, rw_tlv_length; + u8 *rw_tlv = NULL, rw_tlv_length, rw; int err; - u16 size = 0; + u16 size = 0, miux; pr_debug("Sending CONNECT\n"); @@ -336,11 +419,15 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) size += service_name_tlv_length; } - miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0, + /* If the socket parameters are not set, use the local ones */ + miux = sock->miux > LLCP_MAX_MIUX ? local->miux : sock->miux; + rw = sock->rw > LLCP_MAX_RW ? local->rw : sock->rw; + + miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0, &miux_tlv_length); size += miux_tlv_length; - rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &local->rw, 0, &rw_tlv_length); + rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length); size += rw_tlv_length; pr_debug("SKB size %d SN length %zu\n", size, sock->service_name_len); @@ -377,9 +464,9 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock) struct nfc_llcp_local *local; struct sk_buff *skb; u8 *miux_tlv = NULL, miux_tlv_length; - u8 *rw_tlv = NULL, rw_tlv_length; + u8 *rw_tlv = NULL, rw_tlv_length, rw; int err; - u16 size = 0; + u16 size = 0, miux; pr_debug("Sending CC\n"); @@ -387,11 +474,15 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock) if (local == NULL) return -ENODEV; - miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0, + /* If the socket parameters are not set, use the local ones */ + miux = sock->miux > LLCP_MAX_MIUX ? local->miux : sock->miux; + rw = sock->rw > LLCP_MAX_RW ? local->rw : sock->rw; + + miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0, &miux_tlv_length); size += miux_tlv_length; - rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &local->rw, 0, &rw_tlv_length); + rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length); size += rw_tlv_length; skb = llcp_allocate_pdu(sock, LLCP_PDU_CC, size); @@ -416,48 +507,90 @@ error_tlv: return err; } -int nfc_llcp_send_snl(struct nfc_llcp_local *local, u8 tid, u8 sap) +static struct sk_buff *nfc_llcp_allocate_snl(struct nfc_llcp_local *local, + size_t tlv_length) { struct sk_buff *skb; struct nfc_dev *dev; - u8 *sdres_tlv = NULL, sdres_tlv_length, sdres[2]; u16 size = 0; - pr_debug("Sending SNL tid 0x%x sap 0x%x\n", tid, sap); - if (local == NULL) - return -ENODEV; + return ERR_PTR(-ENODEV); dev = local->dev; if (dev == NULL) - return -ENODEV; - - sdres[0] = tid; - sdres[1] = sap; - sdres_tlv = nfc_llcp_build_tlv(LLCP_TLV_SDRES, sdres, 0, - &sdres_tlv_length); - if (sdres_tlv == NULL) - return -ENOMEM; + return ERR_PTR(-ENODEV); size += LLCP_HEADER_SIZE; size += dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE; - size += sdres_tlv_length; + size += tlv_length; skb = alloc_skb(size, GFP_KERNEL); - if (skb == NULL) { - kfree(sdres_tlv); - return -ENOMEM; - } + if (skb == NULL) + return ERR_PTR(-ENOMEM); skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE); skb = llcp_add_header(skb, LLCP_SAP_SDP, LLCP_SAP_SDP, LLCP_PDU_SNL); - memcpy(skb_put(skb, sdres_tlv_length), sdres_tlv, sdres_tlv_length); + return skb; +} + +int nfc_llcp_send_snl_sdres(struct nfc_llcp_local *local, + struct hlist_head *tlv_list, size_t tlvs_len) +{ + struct nfc_llcp_sdp_tlv *sdp; + struct hlist_node *n; + struct sk_buff *skb; + + skb = nfc_llcp_allocate_snl(local, tlvs_len); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + hlist_for_each_entry_safe(sdp, n, tlv_list, node) { + memcpy(skb_put(skb, sdp->tlv_len), sdp->tlv, sdp->tlv_len); + + hlist_del(&sdp->node); + + nfc_llcp_free_sdp_tlv(sdp); + } skb_queue_tail(&local->tx_queue, skb); - kfree(sdres_tlv); + return 0; +} + +int nfc_llcp_send_snl_sdreq(struct nfc_llcp_local *local, + struct hlist_head *tlv_list, size_t tlvs_len) +{ + struct nfc_llcp_sdp_tlv *sdreq; + struct hlist_node *n; + struct sk_buff *skb; + + skb = nfc_llcp_allocate_snl(local, tlvs_len); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + mutex_lock(&local->sdreq_lock); + + if (hlist_empty(&local->pending_sdreqs)) + mod_timer(&local->sdreq_timer, + jiffies + msecs_to_jiffies(3 * local->remote_lto)); + + hlist_for_each_entry_safe(sdreq, n, tlv_list, node) { + pr_debug("tid %d for %s\n", sdreq->tid, sdreq->uri); + + memcpy(skb_put(skb, sdreq->tlv_len), sdreq->tlv, + sdreq->tlv_len); + + hlist_del(&sdreq->node); + + hlist_add_head(&sdreq->node, &local->pending_sdreqs); + } + + mutex_unlock(&local->sdreq_lock); + + skb_queue_tail(&local->tx_queue, skb); return 0; } @@ -532,8 +665,8 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock, /* Remote is ready but has not acknowledged our frames */ if((sock->remote_ready && - skb_queue_len(&sock->tx_pending_queue) >= sock->rw && - skb_queue_len(&sock->tx_queue) >= 2 * sock->rw)) { + skb_queue_len(&sock->tx_pending_queue) >= sock->remote_rw && + skb_queue_len(&sock->tx_queue) >= 2 * sock->remote_rw)) { pr_err("Pending queue is full %d frames\n", skb_queue_len(&sock->tx_pending_queue)); return -ENOBUFS; @@ -541,7 +674,7 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock, /* Remote is not ready and we've been queueing enough frames */ if ((!sock->remote_ready && - skb_queue_len(&sock->tx_queue) >= 2 * sock->rw)) { + skb_queue_len(&sock->tx_queue) >= 2 * sock->remote_rw)) { pr_err("Tx queue is full %d frames\n", skb_queue_len(&sock->tx_queue)); return -ENOBUFS; @@ -561,7 +694,7 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock, while (remaining_len > 0) { - frag_len = min_t(size_t, sock->miu, remaining_len); + frag_len = min_t(size_t, sock->remote_miu, remaining_len); pr_debug("Fragment %zd bytes remaining %zd", frag_len, remaining_len); @@ -621,7 +754,7 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap, while (remaining_len > 0) { - frag_len = min_t(size_t, sock->miu, remaining_len); + frag_len = min_t(size_t, sock->remote_miu, remaining_len); pr_debug("Fragment %zd bytes remaining %zd", frag_len, remaining_len); diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index b530afa..bb67b98 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -188,6 +188,9 @@ static void local_cleanup(struct nfc_llcp_local *local, bool listen) cancel_work_sync(&local->rx_work); cancel_work_sync(&local->timeout_work); kfree_skb(local->rx_pending); + del_timer_sync(&local->sdreq_timer); + cancel_work_sync(&local->sdreq_timeout_work); + nfc_llcp_free_sdp_tlv_list(&local->pending_sdreqs); } static void local_release(struct kref *ref) @@ -265,6 +268,47 @@ static void nfc_llcp_symm_timer(unsigned long data) schedule_work(&local->timeout_work); } +static void nfc_llcp_sdreq_timeout_work(struct work_struct *work) +{ + unsigned long time; + HLIST_HEAD(nl_sdres_list); + struct hlist_node *n; + struct nfc_llcp_sdp_tlv *sdp; + struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local, + sdreq_timeout_work); + + mutex_lock(&local->sdreq_lock); + + time = jiffies - msecs_to_jiffies(3 * local->remote_lto); + + hlist_for_each_entry_safe(sdp, n, &local->pending_sdreqs, node) { + if (time_after(sdp->time, time)) + continue; + + sdp->sap = LLCP_SDP_UNBOUND; + + hlist_del(&sdp->node); + + hlist_add_head(&sdp->node, &nl_sdres_list); + } + + if (!hlist_empty(&local->pending_sdreqs)) + mod_timer(&local->sdreq_timer, + jiffies + msecs_to_jiffies(3 * local->remote_lto)); + + mutex_unlock(&local->sdreq_lock); + + if (!hlist_empty(&nl_sdres_list)) + nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list); +} + +static void nfc_llcp_sdreq_timer(unsigned long data) +{ + struct nfc_llcp_local *local = (struct nfc_llcp_local *) data; + + schedule_work(&local->sdreq_timeout_work); +} + struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev) { struct nfc_llcp_local *local, *n; @@ -808,8 +852,6 @@ static void nfc_llcp_recv_ui(struct nfc_llcp_local *local, ui_cb->dsap = dsap; ui_cb->ssap = ssap; - printk("%s %d %d\n", __func__, dsap, ssap); - pr_debug("%d %d\n", dsap, ssap); /* We're looking for a bound socket, not a client one */ @@ -907,7 +949,9 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local, new_sock = nfc_llcp_sock(new_sk); new_sock->dev = local->dev; new_sock->local = nfc_llcp_local_get(local); - new_sock->miu = local->remote_miu; + new_sock->rw = sock->rw; + new_sock->miux = sock->miux; + new_sock->remote_miu = local->remote_miu; new_sock->nfc_protocol = sock->nfc_protocol; new_sock->dsap = ssap; new_sock->target_idx = local->target_idx; @@ -961,11 +1005,11 @@ int nfc_llcp_queue_i_frames(struct nfc_llcp_sock *sock) pr_debug("Remote ready %d tx queue len %d remote rw %d", sock->remote_ready, skb_queue_len(&sock->tx_pending_queue), - sock->rw); + sock->remote_rw); /* Try to queue some I frames for transmission */ while (sock->remote_ready && - skb_queue_len(&sock->tx_pending_queue) < sock->rw) { + skb_queue_len(&sock->tx_pending_queue) < sock->remote_rw) { struct sk_buff *pdu; pdu = skb_dequeue(&sock->tx_queue); @@ -1186,6 +1230,10 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local, u16 tlv_len, offset; char *service_name; size_t service_name_len; + struct nfc_llcp_sdp_tlv *sdp; + HLIST_HEAD(llc_sdres_list); + size_t sdres_tlvs_len; + HLIST_HEAD(nl_sdres_list); dsap = nfc_llcp_dsap(skb); ssap = nfc_llcp_ssap(skb); @@ -1200,6 +1248,7 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local, tlv = &skb->data[LLCP_HEADER_SIZE]; tlv_len = skb->len - LLCP_HEADER_SIZE; offset = 0; + sdres_tlvs_len = 0; while (offset < tlv_len) { type = tlv[0]; @@ -1217,14 +1266,14 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local, !strncmp(service_name, "urn:nfc:sn:sdp", service_name_len)) { sap = 1; - goto send_snl; + goto add_snl; } llcp_sock = nfc_llcp_sock_from_sn(local, service_name, service_name_len); if (!llcp_sock) { sap = 0; - goto send_snl; + goto add_snl; } /* @@ -1241,7 +1290,7 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local, if (sap == LLCP_SAP_MAX) { sap = 0; - goto send_snl; + goto add_snl; } client_count = @@ -1258,8 +1307,37 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local, pr_debug("%p %d\n", llcp_sock, sap); -send_snl: - nfc_llcp_send_snl(local, tid, sap); +add_snl: + sdp = nfc_llcp_build_sdres_tlv(tid, sap); + if (sdp == NULL) + goto exit; + + sdres_tlvs_len += sdp->tlv_len; + hlist_add_head(&sdp->node, &llc_sdres_list); + break; + + case LLCP_TLV_SDRES: + mutex_lock(&local->sdreq_lock); + + pr_debug("LLCP_TLV_SDRES: searching tid %d\n", tlv[2]); + + hlist_for_each_entry(sdp, &local->pending_sdreqs, node) { + if (sdp->tid != tlv[2]) + continue; + + sdp->sap = tlv[3]; + + pr_debug("Found: uri=%s, sap=%d\n", + sdp->uri, sdp->sap); + + hlist_del(&sdp->node); + + hlist_add_head(&sdp->node, &nl_sdres_list); + + break; + } + + mutex_unlock(&local->sdreq_lock); break; default: @@ -1270,6 +1348,13 @@ send_snl: offset += length + 2; tlv += length + 2; } + +exit: + if (!hlist_empty(&nl_sdres_list)) + nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list); + + if (!hlist_empty(&llc_sdres_list)) + nfc_llcp_send_snl_sdres(local, &llc_sdres_list, sdres_tlvs_len); } static void nfc_llcp_rx_work(struct work_struct *work) @@ -1455,6 +1540,13 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) local->remote_miu = LLCP_DEFAULT_MIU; local->remote_lto = LLCP_DEFAULT_LTO; + mutex_init(&local->sdreq_lock); + INIT_HLIST_HEAD(&local->pending_sdreqs); + init_timer(&local->sdreq_timer); + local->sdreq_timer.data = (unsigned long) local; + local->sdreq_timer.function = nfc_llcp_sdreq_timer; + INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work); + list_add(&local->list, &llcp_devices); return 0; diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp/llcp.h index 0eae5c5..7e87a66 100644 --- a/net/nfc/llcp/llcp.h +++ b/net/nfc/llcp/llcp.h @@ -46,6 +46,19 @@ struct llcp_sock_list { rwlock_t lock; }; +struct nfc_llcp_sdp_tlv { + u8 *tlv; + u8 tlv_len; + + char *uri; + u8 tid; + u8 sap; + + unsigned long time; + + struct hlist_node node; +}; + struct nfc_llcp_local { struct list_head list; struct nfc_dev *dev; @@ -86,6 +99,12 @@ struct nfc_llcp_local { u8 remote_opt; u16 remote_wks; + struct mutex sdreq_lock; + struct hlist_head pending_sdreqs; + struct timer_list sdreq_timer; + struct work_struct sdreq_timeout_work; + u8 sdreq_next_tid; + /* sockets array */ struct llcp_sock_list sockets; struct llcp_sock_list connecting_sockets; @@ -105,7 +124,12 @@ struct nfc_llcp_sock { char *service_name; size_t service_name_len; u8 rw; - u16 miu; + u16 miux; + + + /* Remote link parameters */ + u8 remote_rw; + u16 remote_miu; /* Link variables */ u8 send_n; @@ -213,12 +237,20 @@ int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock, /* Commands API */ void nfc_llcp_recv(void *data, struct sk_buff *skb, int err); u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length); +struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap); +struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri, + size_t uri_len); +void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp); +void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head); void nfc_llcp_recv(void *data, struct sk_buff *skb, int err); int nfc_llcp_disconnect(struct nfc_llcp_sock *sock); int nfc_llcp_send_symm(struct nfc_dev *dev); int nfc_llcp_send_connect(struct nfc_llcp_sock *sock); int nfc_llcp_send_cc(struct nfc_llcp_sock *sock); -int nfc_llcp_send_snl(struct nfc_llcp_local *local, u8 tid, u8 sap); +int nfc_llcp_send_snl_sdres(struct nfc_llcp_local *local, + struct hlist_head *tlv_list, size_t tlvs_len); +int nfc_llcp_send_snl_sdreq(struct nfc_llcp_local *local, + struct hlist_head *tlv_list, size_t tlvs_len); int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason); int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock); int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock, diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c index 5c7cdf3f..6b32544 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp/sock.c @@ -223,6 +223,124 @@ error: return ret; } +static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, unsigned int optlen) +{ + struct sock *sk = sock->sk; + struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); + u32 opt; + int err = 0; + + pr_debug("%p optname %d\n", sk, optname); + + if (level != SOL_NFC) + return -ENOPROTOOPT; + + lock_sock(sk); + + switch (optname) { + case NFC_LLCP_RW: + if (sk->sk_state == LLCP_CONNECTED || + sk->sk_state == LLCP_BOUND || + sk->sk_state == LLCP_LISTEN) { + err = -EINVAL; + break; + } + + if (get_user(opt, (u32 __user *) optval)) { + err = -EFAULT; + break; + } + + if (opt > LLCP_MAX_RW) { + err = -EINVAL; + break; + } + + llcp_sock->rw = (u8) opt; + + break; + + case NFC_LLCP_MIUX: + if (sk->sk_state == LLCP_CONNECTED || + sk->sk_state == LLCP_BOUND || + sk->sk_state == LLCP_LISTEN) { + err = -EINVAL; + break; + } + + if (get_user(opt, (u32 __user *) optval)) { + err = -EFAULT; + break; + } + + if (opt > LLCP_MAX_MIUX) { + err = -EINVAL; + break; + } + + llcp_sock->miux = (u16) opt; + + break; + + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + + pr_debug("%p rw %d miux %d\n", llcp_sock, + llcp_sock->rw, llcp_sock->miux); + + return err; +} + +static int nfc_llcp_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct sock *sk = sock->sk; + struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); + int len, err = 0; + + pr_debug("%p optname %d\n", sk, optname); + + if (level != SOL_NFC) + return -ENOPROTOOPT; + + if (get_user(len, optlen)) + return -EFAULT; + + len = min_t(u32, len, sizeof(u32)); + + lock_sock(sk); + + switch (optname) { + case NFC_LLCP_RW: + if (put_user(llcp_sock->rw, (u32 __user *) optval)) + err = -EFAULT; + + break; + + case NFC_LLCP_MIUX: + if (put_user(llcp_sock->miux, (u32 __user *) optval)) + err = -EFAULT; + + break; + + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + + if (put_user(len, optlen)) + return -EFAULT; + + return err; +} + void nfc_llcp_accept_unlink(struct sock *sk) { struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); @@ -403,7 +521,8 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock, return llcp_accept_poll(sk); if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; @@ -543,7 +662,7 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, llcp_sock->dev = dev; llcp_sock->local = nfc_llcp_local_get(local); - llcp_sock->miu = llcp_sock->local->remote_miu; + llcp_sock->remote_miu = llcp_sock->local->remote_miu; llcp_sock->ssap = nfc_llcp_get_local_ssap(local); if (llcp_sock->ssap == LLCP_SAP_MAX) { ret = -ENOMEM; @@ -737,8 +856,8 @@ static const struct proto_ops llcp_sock_ops = { .ioctl = sock_no_ioctl, .listen = llcp_sock_listen, .shutdown = sock_no_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, + .setsockopt = nfc_llcp_setsockopt, + .getsockopt = nfc_llcp_getsockopt, .sendmsg = llcp_sock_sendmsg, .recvmsg = llcp_sock_recvmsg, .mmap = sock_no_mmap, @@ -802,8 +921,10 @@ struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp) llcp_sock->ssap = 0; llcp_sock->dsap = LLCP_SAP_SDP; - llcp_sock->rw = LLCP_DEFAULT_RW; - llcp_sock->miu = LLCP_DEFAULT_MIU; + llcp_sock->rw = LLCP_MAX_RW + 1; + llcp_sock->miux = LLCP_MAX_MIUX + 1; + llcp_sock->remote_rw = LLCP_DEFAULT_RW; + llcp_sock->remote_miu = LLCP_DEFAULT_MIU; llcp_sock->send_n = llcp_sock->send_ack_n = 0; llcp_sock->recv_n = llcp_sock->recv_ack_n = 0; llcp_sock->remote_ready = 1; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 504b883..73fd510 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -53,6 +53,15 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { [NFC_ATTR_DEVICE_POWERED] = { .type = NLA_U8 }, [NFC_ATTR_IM_PROTOCOLS] = { .type = NLA_U32 }, [NFC_ATTR_TM_PROTOCOLS] = { .type = NLA_U32 }, + [NFC_ATTR_LLC_PARAM_LTO] = { .type = NLA_U8 }, + [NFC_ATTR_LLC_PARAM_RW] = { .type = NLA_U8 }, + [NFC_ATTR_LLC_PARAM_MIUX] = { .type = NLA_U16 }, + [NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = { + [NFC_SDP_ATTR_URI] = { .type = NLA_STRING }, + [NFC_SDP_ATTR_SAP] = { .type = NLA_U8 }, }; static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target, @@ -348,6 +357,74 @@ free_msg: return -EMSGSIZE; } +int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list) +{ + struct sk_buff *msg; + struct nlattr *sdp_attr, *uri_attr; + struct nfc_llcp_sdp_tlv *sdres; + struct hlist_node *n; + void *hdr; + int rc = -EMSGSIZE; + int i; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_EVENT_LLC_SDRES); + if (!hdr) + goto free_msg; + + if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) + goto nla_put_failure; + + sdp_attr = nla_nest_start(msg, NFC_ATTR_LLC_SDP); + if (sdp_attr == NULL) { + rc = -ENOMEM; + goto nla_put_failure; + } + + i = 1; + hlist_for_each_entry_safe(sdres, n, sdres_list, node) { + pr_debug("uri: %s, sap: %d\n", sdres->uri, sdres->sap); + + uri_attr = nla_nest_start(msg, i++); + if (uri_attr == NULL) { + rc = -ENOMEM; + goto nla_put_failure; + } + + if (nla_put_u8(msg, NFC_SDP_ATTR_SAP, sdres->sap)) + goto nla_put_failure; + + if (nla_put_string(msg, NFC_SDP_ATTR_URI, sdres->uri)) + goto nla_put_failure; + + nla_nest_end(msg, uri_attr); + + hlist_del(&sdres->node); + + nfc_llcp_free_sdp_tlv(sdres); + } + + nla_nest_end(msg, sdp_attr); + + genlmsg_end(msg, hdr); + + return genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_ATOMIC); + +nla_put_failure: + genlmsg_cancel(msg, hdr); + +free_msg: + nlmsg_free(msg); + + nfc_llcp_free_sdp_tlv_list(sdres_list); + + return rc; +} + static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, u32 portid, u32 seq, struct netlink_callback *cb, @@ -859,6 +936,96 @@ exit: return rc; } +static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info) +{ + struct nfc_dev *dev; + struct nfc_llcp_local *local; + struct nlattr *attr, *sdp_attrs[NFC_SDP_ATTR_MAX+1]; + u32 idx; + u8 tid; + char *uri; + int rc = 0, rem; + size_t uri_len, tlvs_len; + struct hlist_head sdreq_list; + struct nfc_llcp_sdp_tlv *sdreq; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + !info->attrs[NFC_ATTR_LLC_SDP]) + return -EINVAL; + + idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + + dev = nfc_get_device(idx); + if (!dev) { + rc = -ENODEV; + goto exit; + } + + device_lock(&dev->dev); + + if (dev->dep_link_up == false) { + rc = -ENOLINK; + goto exit; + } + + local = nfc_llcp_find_local(dev); + if (!local) { + nfc_put_device(dev); + rc = -ENODEV; + goto exit; + } + + INIT_HLIST_HEAD(&sdreq_list); + + tlvs_len = 0; + + nla_for_each_nested(attr, info->attrs[NFC_ATTR_LLC_SDP], rem) { + rc = nla_parse_nested(sdp_attrs, NFC_SDP_ATTR_MAX, attr, + nfc_sdp_genl_policy); + + if (rc != 0) { + rc = -EINVAL; + goto exit; + } + + if (!sdp_attrs[NFC_SDP_ATTR_URI]) + continue; + + uri_len = nla_len(sdp_attrs[NFC_SDP_ATTR_URI]); + if (uri_len == 0) + continue; + + uri = nla_data(sdp_attrs[NFC_SDP_ATTR_URI]); + if (uri == NULL || *uri == 0) + continue; + + tid = local->sdreq_next_tid++; + + sdreq = nfc_llcp_build_sdreq_tlv(tid, uri, uri_len); + if (sdreq == NULL) { + rc = -ENOMEM; + goto exit; + } + + tlvs_len += sdreq->tlv_len; + + hlist_add_head(&sdreq->node, &sdreq_list); + } + + if (hlist_empty(&sdreq_list)) { + rc = -EINVAL; + goto exit; + } + + rc = nfc_llcp_send_snl_sdreq(local, &sdreq_list, tlvs_len); +exit: + device_unlock(&dev->dev); + + nfc_put_device(dev); + + return rc; +} + static struct genl_ops nfc_genl_ops[] = { { .cmd = NFC_CMD_GET_DEVICE, @@ -913,6 +1080,11 @@ static struct genl_ops nfc_genl_ops[] = { .doit = nfc_genl_llc_set_params, .policy = nfc_genl_policy, }, + { + .cmd = NFC_CMD_LLC_SDREQ, + .doit = nfc_genl_llc_sdreq, + .policy = nfc_genl_policy, + }, }; diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index 87d914d..94bfe19 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -46,6 +46,8 @@ struct nfc_rawsock { #define to_rawsock_sk(_tx_work) \ ((struct sock *) container_of(_tx_work, struct nfc_rawsock, tx_work)) +struct nfc_llcp_sdp_tlv; + #ifdef CONFIG_NFC_LLCP void nfc_llcp_mac_is_down(struct nfc_dev *dev); @@ -59,6 +61,8 @@ int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb); struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev); int __init nfc_llcp_init(void); void nfc_llcp_exit(void); +void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp); +void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head); #else @@ -112,6 +116,14 @@ static inline void nfc_llcp_exit(void) { } +static inline void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp) +{ +} + +static inline void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head) +{ +} + #endif int __init rawsock_init(void); @@ -144,6 +156,8 @@ int nfc_genl_dep_link_down_event(struct nfc_dev *dev); int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol); int nfc_genl_tm_deactivated(struct nfc_dev *dev); +int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list); + struct nfc_dev *nfc_get_device(unsigned int idx); static inline void nfc_put_device(struct nfc_dev *dev) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index a4b7247..8759265 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -369,8 +369,8 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, len = sizeof(struct ovs_header); len += nla_total_size(skb->len); len += nla_total_size(FLOW_BUFSIZE); - if (upcall_info->cmd == OVS_PACKET_CMD_ACTION) - len += nla_total_size(8); + if (upcall_info->userdata) + len += NLA_ALIGN(upcall_info->userdata->nla_len); user_skb = genlmsg_new(len, GFP_ATOMIC); if (!user_skb) { @@ -387,8 +387,9 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, nla_nest_end(user_skb, nla); if (upcall_info->userdata) - nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA, - nla_get_u64(upcall_info->userdata)); + __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, + nla_len(upcall_info->userdata), + nla_data(upcall_info->userdata)); nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len); @@ -544,7 +545,7 @@ static int validate_userspace(const struct nlattr *attr) { static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, - [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 }, + [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, }; struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; int error; @@ -680,7 +681,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) /* Normally, setting the skb 'protocol' field would be handled by a * call to eth_type_trans(), but it assumes there's a sending * device, which we may not have. */ - if (ntohs(eth->h_proto) >= 1536) + if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) packet->protocol = eth->h_proto; else packet->protocol = htons(ETH_P_802_2); @@ -1628,7 +1629,7 @@ static struct vport *lookup_vport(struct net *net, vport = ovs_vport_rtnl_rcu(dp, port_no); if (!vport) - return ERR_PTR(-ENOENT); + return ERR_PTR(-ENODEV); return vport; } else return ERR_PTR(-EINVAL); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 031dfbf..9125ad5 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -119,7 +119,7 @@ struct ovs_skb_cb { * struct dp_upcall - metadata to include with a packet to send to userspace * @cmd: One of %OVS_PACKET_CMD_*. * @key: Becomes %OVS_PACKET_ATTR_KEY. Must be nonnull. - * @userdata: If nonnull, its u64 value is extracted and passed to userspace as + * @userdata: If nonnull, its variable-length value is passed to userspace as * %OVS_PACKET_ATTR_USERDATA. * @pid: Netlink PID to which packet should be sent. If @pid is 0 then no * packet is sent and the packet is accounted in the datapath's @n_lost diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index fe0e421..3324868 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -466,7 +466,7 @@ static __be16 parse_ethertype(struct sk_buff *skb) proto = *(__be16 *) skb->data; __skb_pull(skb, sizeof(__be16)); - if (ntohs(proto) >= 1536) + if (ntohs(proto) >= ETH_P_802_3_MIN) return proto; if (skb->len < sizeof(struct llc_snap_hdr)) @@ -483,7 +483,7 @@ static __be16 parse_ethertype(struct sk_buff *skb) __skb_pull(skb, sizeof(struct llc_snap_hdr)); - if (ntohs(llc->ethertype) >= 1536) + if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN) return llc->ethertype; return htons(ETH_P_802_2); @@ -1038,7 +1038,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); - if (ntohs(swkey->eth.type) < 1536) + if (ntohs(swkey->eth.type) < ETH_P_802_3_MIN) return -EINVAL; attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); } else { diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 0531de6..40f8a24 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -63,16 +63,6 @@ static struct rtnl_link_stats64 *internal_dev_get_stats(struct net_device *netde return stats; } -static int internal_dev_mac_addr(struct net_device *dev, void *p) -{ - struct sockaddr *addr = p; - - if (!is_valid_ether_addr(addr->sa_data)) - return -EADDRNOTAVAIL; - memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); - return 0; -} - /* Called with rcu_read_lock_bh. */ static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) { @@ -126,7 +116,7 @@ static const struct net_device_ops internal_dev_netdev_ops = { .ndo_open = internal_dev_open, .ndo_stop = internal_dev_stop, .ndo_start_xmit = internal_dev_xmit, - .ndo_set_mac_address = internal_dev_mac_addr, + .ndo_set_mac_address = eth_mac_addr, .ndo_change_mtu = internal_dev_change_mtu, .ndo_get_stats64 = internal_dev_get_stats, }; @@ -138,6 +128,7 @@ static void do_setup(struct net_device *netdev) netdev->netdev_ops = &internal_dev_netdev_ops; netdev->priv_flags &= ~IFF_TX_SKB_SHARING; + netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE; netdev->destructor = internal_dev_destructor; SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops); netdev->tx_queue_len = 0; diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 3f7961e..aee7d43 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -68,10 +68,10 @@ struct vport_err_stats { /** * struct vport - one port within a datapath * @rcu: RCU callback head for deferred destruction. - * @port_no: Index into @dp's @ports array. * @dp: Datapath to which this port belongs. * @upcall_portid: The Netlink port to use for packets received on this port that * miss the flow table. + * @port_no: Index into @dp's @ports array. * @hash_node: Element in @dev_table hash table in vport.c. * @dp_hash_node: Element in @datapath->ports hash table in datapath.c. * @ops: Class structure. @@ -81,9 +81,9 @@ struct vport_err_stats { */ struct vport { struct rcu_head rcu; - u16 port_no; struct datapath *dp; u32 upcall_portid; + u16 port_no; struct hlist_node hash_node; struct hlist_node dp_hash_node; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 1d6793d..8e4644f 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -181,6 +181,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, struct packet_sock; static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); +static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev); static void *packet_previous_frame(struct packet_sock *po, struct packet_ring_buffer *rb, @@ -973,11 +975,11 @@ static void *packet_current_rx_frame(struct packet_sock *po, static void *prb_lookup_block(struct packet_sock *po, struct packet_ring_buffer *rb, - unsigned int previous, + unsigned int idx, int status) { struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); - struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, previous); + struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx); if (status != BLOCK_STATUS(pbd)) return NULL; @@ -1041,6 +1043,29 @@ static void packet_increment_head(struct packet_ring_buffer *buff) buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; } +static bool packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) +{ + struct sock *sk = &po->sk; + bool has_room; + + if (po->prot_hook.func != tpacket_rcv) + return (atomic_read(&sk->sk_rmem_alloc) + skb->truesize) + <= sk->sk_rcvbuf; + + spin_lock(&sk->sk_receive_queue.lock); + if (po->tp_version == TPACKET_V3) + has_room = prb_lookup_block(po, &po->rx_ring, + po->rx_ring.prb_bdqc.kactive_blk_num, + TP_STATUS_KERNEL); + else + has_room = packet_lookup_frame(po, &po->rx_ring, + po->rx_ring.head, + TP_STATUS_KERNEL); + spin_unlock(&sk->sk_receive_queue.lock); + + return has_room; +} + static void packet_sock_destruct(struct sock *sk) { skb_queue_purge(&sk->sk_error_queue); @@ -1066,16 +1091,16 @@ static int fanout_rr_next(struct packet_fanout *f, unsigned int num) return x; } -static struct sock *fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) +static unsigned int fanout_demux_hash(struct packet_fanout *f, + struct sk_buff *skb, + unsigned int num) { - u32 idx, hash = skb->rxhash; - - idx = ((u64)hash * num) >> 32; - - return f->arr[idx]; + return (((u64)skb->rxhash) * num) >> 32; } -static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) +static unsigned int fanout_demux_lb(struct packet_fanout *f, + struct sk_buff *skb, + unsigned int num) { int cur, old; @@ -1083,14 +1108,40 @@ static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb while ((old = atomic_cmpxchg(&f->rr_cur, cur, fanout_rr_next(f, num))) != cur) cur = old; - return f->arr[cur]; + return cur; +} + +static unsigned int fanout_demux_cpu(struct packet_fanout *f, + struct sk_buff *skb, + unsigned int num) +{ + return smp_processor_id() % num; } -static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) +static unsigned int fanout_demux_rollover(struct packet_fanout *f, + struct sk_buff *skb, + unsigned int idx, unsigned int skip, + unsigned int num) { - unsigned int cpu = smp_processor_id(); + unsigned int i, j; + + i = j = min_t(int, f->next[idx], num - 1); + do { + if (i != skip && packet_rcv_has_room(pkt_sk(f->arr[i]), skb)) { + if (i != j) + f->next[idx] = i; + return i; + } + if (++i == num) + i = 0; + } while (i != j); - return f->arr[cpu % num]; + return idx; +} + +static bool fanout_has_flag(struct packet_fanout *f, u16 flag) +{ + return f->flags & (flag >> 8); } static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, @@ -1099,7 +1150,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, struct packet_fanout *f = pt->af_packet_priv; unsigned int num = f->num_members; struct packet_sock *po; - struct sock *sk; + unsigned int idx; if (!net_eq(dev_net(dev), read_pnet(&f->net)) || !num) { @@ -1110,23 +1161,31 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, switch (f->type) { case PACKET_FANOUT_HASH: default: - if (f->defrag) { + if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) { skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET); if (!skb) return 0; } skb_get_rxhash(skb); - sk = fanout_demux_hash(f, skb, num); + idx = fanout_demux_hash(f, skb, num); break; case PACKET_FANOUT_LB: - sk = fanout_demux_lb(f, skb, num); + idx = fanout_demux_lb(f, skb, num); break; case PACKET_FANOUT_CPU: - sk = fanout_demux_cpu(f, skb, num); + idx = fanout_demux_cpu(f, skb, num); + break; + case PACKET_FANOUT_ROLLOVER: + idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num); break; } - po = pkt_sk(sk); + po = pkt_sk(f->arr[idx]); + if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER) && + unlikely(!packet_rcv_has_room(po, skb))) { + idx = fanout_demux_rollover(f, skb, idx, idx, num); + po = pkt_sk(f->arr[idx]); + } return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); } @@ -1175,10 +1234,13 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) struct packet_sock *po = pkt_sk(sk); struct packet_fanout *f, *match; u8 type = type_flags & 0xff; - u8 defrag = (type_flags & PACKET_FANOUT_FLAG_DEFRAG) ? 1 : 0; + u8 flags = type_flags >> 8; int err; switch (type) { + case PACKET_FANOUT_ROLLOVER: + if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER) + return -EINVAL; case PACKET_FANOUT_HASH: case PACKET_FANOUT_LB: case PACKET_FANOUT_CPU: @@ -1203,7 +1265,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) } } err = -EINVAL; - if (match && match->defrag != defrag) + if (match && match->flags != flags) goto out; if (!match) { err = -ENOMEM; @@ -1213,7 +1275,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) write_pnet(&match->net, sock_net(sk)); match->id = id; match->type = type; - match->defrag = defrag; + match->flags = flags; atomic_set(&match->rr_cur, 0); INIT_LIST_HEAD(&match->list); spin_lock_init(&match->lock); @@ -1450,6 +1512,8 @@ retry: if (unlikely(extra_len == 4)) skb->no_fcs = 1; + skb_probe_transport_header(skb, 0); + dev_queue_xmit(skb); rcu_read_unlock(); return len; @@ -1880,6 +1944,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb_reserve(skb, hlen); skb_reset_network_header(skb); + skb_probe_transport_header(skb, 0); if (po->tp_tx_has_off) { int off_min, off_max, off; @@ -2289,6 +2354,8 @@ static int packet_snd(struct socket *sock, len += vnet_hdr_len; } + skb_probe_transport_header(skb, reserve); + if (unlikely(extra_len == 4)) skb->no_fcs = 1; @@ -3240,7 +3307,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, case PACKET_FANOUT: val = (po->fanout ? ((u32)po->fanout->id | - ((u32)po->fanout->type << 16)) : + ((u32)po->fanout->type << 16) | + ((u32)po->fanout->flags << 24)) : 0); break; case PACKET_TX_HAS_OFF: diff --git a/net/packet/internal.h b/net/packet/internal.h index e84cab8..e891f02 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -77,10 +77,11 @@ struct packet_fanout { unsigned int num_members; u16 id; u8 type; - u8 defrag; + u8 flags; atomic_t rr_cur; struct list_head list; struct sock *arr[PACKET_FANOUT_MAX]; + int next[PACKET_FANOUT_MAX]; spinlock_t lock; atomic_t sk_ref; struct packet_type prot_hook ____cacheline_aligned_in_smp; diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 0193630..dc15f43 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -61,7 +61,7 @@ static const struct nla_policy ifa_phonet_policy[IFA_MAX+1] = { [IFA_LOCAL] = { .type = NLA_U8 }, }; -static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) +static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[IFA_MAX+1]; @@ -224,7 +224,7 @@ static const struct nla_policy rtm_phonet_policy[RTA_MAX+1] = { [RTA_OIF] = { .type = NLA_U32 }, }; -static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) +static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[RTA_MAX+1]; diff --git a/net/rfkill/rfkill-regulator.c b/net/rfkill/rfkill-regulator.c index 4b5ab21..d11ac79 100644 --- a/net/rfkill/rfkill-regulator.c +++ b/net/rfkill/rfkill-regulator.c @@ -51,7 +51,7 @@ static int rfkill_regulator_set_block(void *data, bool blocked) return 0; } -struct rfkill_ops rfkill_regulator_ops = { +static struct rfkill_ops rfkill_regulator_ops = { .set_block = rfkill_regulator_set_block, }; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 8579c4b..fd70728 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -982,7 +982,7 @@ done: return ret; } -static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_ACT_MAX + 1]; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 964f5e4..5c81b26 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -22,7 +22,7 @@ #include <linux/skbuff.h> #include <linux/init.h> #include <linux/kmod.h> -#include <linux/netlink.h> +#include <net/netlink.h> #include <linux/err.h> #include <linux/slab.h> #include <net/net_namespace.h> @@ -118,7 +118,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp) /* Add/change/delete/get a filter node */ -static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; @@ -141,7 +141,12 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) if ((n->nlmsg_type != RTM_GETTFILTER) && !capable(CAP_NET_ADMIN)) return -EPERM; + replay: + err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL); + if (err < 0) + return err; + t = nlmsg_data(n); protocol = TC_H_MIN(t->tcm_info); prio = TC_H_MAJ(t->tcm_info); @@ -164,10 +169,6 @@ replay: if (dev == NULL) return -ENODEV; - err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL); - if (err < 0) - return err; - /* Find qdisc */ if (!parent) { q = dev->qdisc; @@ -427,7 +428,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) const struct Qdisc_class_ops *cops; struct tcf_dump_args arg; - if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) + if (nlmsg_len(cb->nlh) < sizeof(*tcm)) return skb->len; dev = __dev_get_by_index(net, tcm->tcm_ifindex); if (!dev) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c297e2a..2b935e7 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -971,13 +971,13 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) * Delete/get qdisc. */ -static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n) { struct net *net = sock_net(skb->sk); struct tcmsg *tcm = nlmsg_data(n); struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; - u32 clid = tcm->tcm_parent; + u32 clid; struct Qdisc *q = NULL; struct Qdisc *p = NULL; int err; @@ -985,14 +985,15 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) if ((n->nlmsg_type != RTM_GETQDISC) && !capable(CAP_NET_ADMIN)) return -EPERM; - dev = __dev_get_by_index(net, tcm->tcm_ifindex); - if (!dev) - return -ENODEV; - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); if (err < 0) return err; + dev = __dev_get_by_index(net, tcm->tcm_ifindex); + if (!dev) + return -ENODEV; + + clid = tcm->tcm_parent; if (clid) { if (clid != TC_H_ROOT) { if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { @@ -1038,7 +1039,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) * Create/change qdisc. */ -static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n) { struct net *net = sock_net(skb->sk); struct tcmsg *tcm; @@ -1053,6 +1054,10 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) replay: /* Reinit, just in case something touches this. */ + err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); + if (err < 0) + return err; + tcm = nlmsg_data(n); clid = tcm->tcm_parent; q = p = NULL; @@ -1061,9 +1066,6 @@ replay: if (!dev) return -ENODEV; - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); - if (err < 0) - return err; if (clid) { if (clid != TC_H_ROOT) { @@ -1372,7 +1374,7 @@ done: -static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n) { struct net *net = sock_net(skb->sk); struct tcmsg *tcm = nlmsg_data(n); @@ -1382,22 +1384,22 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) const struct Qdisc_class_ops *cops; unsigned long cl = 0; unsigned long new_cl; - u32 portid = tcm->tcm_parent; - u32 clid = tcm->tcm_handle; - u32 qid = TC_H_MAJ(clid); + u32 portid; + u32 clid; + u32 qid; int err; if ((n->nlmsg_type != RTM_GETTCLASS) && !capable(CAP_NET_ADMIN)) return -EPERM; - dev = __dev_get_by_index(net, tcm->tcm_ifindex); - if (!dev) - return -ENODEV; - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); if (err < 0) return err; + dev = __dev_get_by_index(net, tcm->tcm_ifindex); + if (!dev) + return -ENODEV; + /* parent == TC_H_UNSPEC - unspecified parent. parent == TC_H_ROOT - class is root, which has no parent. @@ -1413,6 +1415,10 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Step 1. Determine qdisc handle X:0 */ + portid = tcm->tcm_parent; + clid = tcm->tcm_handle; + qid = TC_H_MAJ(clid); + if (portid != TC_H_ROOT) { u32 qid1 = TC_H_MAJ(portid); @@ -1636,7 +1642,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; int t, s_t; - if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) + if (nlmsg_len(cb->nlh) < sizeof(*tcm)) return 0; dev = dev_get_by_index(net, tcm->tcm_ifindex); if (!dev) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 571f1d2..79b1876 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -981,6 +981,7 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = { [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) }, [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, + [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 }, }; static void htb_work_func(struct work_struct *work) @@ -994,7 +995,7 @@ static void htb_work_func(struct work_struct *work) static int htb_init(struct Qdisc *sch, struct nlattr *opt) { struct htb_sched *q = qdisc_priv(sch); - struct nlattr *tb[TCA_HTB_INIT + 1]; + struct nlattr *tb[TCA_HTB_MAX + 1]; struct tc_htb_glob *gopt; int err; int i; @@ -1002,20 +1003,16 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; - err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy); + err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy); if (err < 0) return err; - if (tb[TCA_HTB_INIT] == NULL) { - pr_err("HTB: hey probably you have bad tc tool ?\n"); + if (!tb[TCA_HTB_INIT]) return -EINVAL; - } + gopt = nla_data(tb[TCA_HTB_INIT]); - if (gopt->version != HTB_VER >> 16) { - pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n", - HTB_VER >> 16, HTB_VER & 0xffff, gopt->version); + if (gopt->version != HTB_VER >> 16) return -EINVAL; - } err = qdisc_class_hash_init(&q->clhash); if (err < 0) @@ -1027,10 +1024,13 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) INIT_WORK(&q->work, htb_work_func); skb_queue_head_init(&q->direct_queue); - q->direct_qlen = qdisc_dev(sch)->tx_queue_len; - if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ - q->direct_qlen = 2; - + if (tb[TCA_HTB_DIRECT_QLEN]) + q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]); + else { + q->direct_qlen = qdisc_dev(sch)->tx_queue_len; + if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ + q->direct_qlen = 2; + } if ((q->rate2quantum = gopt->rate2quantum) < 1) q->rate2quantum = 1; q->defcls = gopt->defcls; @@ -1056,7 +1056,8 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) nest = nla_nest_start(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; - if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt)) + if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) || + nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen)) goto nla_put_failure; nla_nest_end(skb, nest); @@ -1311,7 +1312,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)*arg, *parent; struct nlattr *opt = tca[TCA_OPTIONS]; - struct nlattr *tb[__TCA_HTB_MAX]; + struct nlattr *tb[TCA_HTB_MAX + 1]; struct tc_htb_opt *hopt; /* extract all subattrs from opt attr */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b907073..dd21ae3 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6185,7 +6185,8 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) /* Is there any exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0; if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP | POLLIN | POLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 6675914..8bcd498 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -44,7 +44,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info) struct nlmsghdr *rep_nlh; struct nlmsghdr *req_nlh = info->nlhdr; struct tipc_genlmsghdr *req_userhdr = info->userhdr; - int hdr_space = NLMSG_SPACE(GENL_HDRLEN + TIPC_GENL_HDRLEN); + int hdr_space = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN); u16 cmd; if ((req_userhdr->cmd & 0xC000) && (!capable(CAP_NET_ADMIN))) @@ -53,8 +53,8 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info) cmd = req_userhdr->cmd; rep_buf = tipc_cfg_do_cmd(req_userhdr->dest, cmd, - NLMSG_DATA(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN, - NLMSG_PAYLOAD(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN), + nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN, + nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN), hdr_space); if (rep_buf) { diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 971282b..2e4d900 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2196,7 +2196,9 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, /* exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) - mask |= POLLERR; + mask |= POLLERR | + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); + if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP | POLLIN | POLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 1f6508e..9caa91c 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -123,6 +123,14 @@ static s32 vmci_transport_error_to_vsock_error(s32 vmci_error) return err > 0 ? -err : err; } +static u32 vmci_transport_peer_rid(u32 peer_cid) +{ + if (VMADDR_CID_HYPERVISOR == peer_cid) + return VMCI_TRANSPORT_HYPERVISOR_PACKET_RID; + + return VMCI_TRANSPORT_PACKET_RID; +} + static inline void vmci_transport_packet_init(struct vmci_transport_packet *pkt, struct sockaddr_vm *src, @@ -140,7 +148,7 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt, pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY, VMCI_TRANSPORT_PACKET_RID); pkt->dg.dst = vmci_make_handle(dst->svm_cid, - VMCI_TRANSPORT_PACKET_RID); + vmci_transport_peer_rid(dst->svm_cid)); pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg); pkt->version = VMCI_TRANSPORT_PACKET_VERSION; pkt->type = type; @@ -508,6 +516,9 @@ static bool vmci_transport_is_trusted(struct vsock_sock *vsock, u32 peer_cid) static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid) { + if (VMADDR_CID_HYPERVISOR == peer_cid) + return true; + if (vsock->cached_peer != peer_cid) { vsock->cached_peer = peer_cid; if (!vmci_transport_is_trusted(vsock, peer_cid) && @@ -628,7 +639,6 @@ static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg) static bool vmci_transport_stream_allow(u32 cid, u32 port) { static const u32 non_socket_contexts[] = { - VMADDR_CID_HYPERVISOR, VMADDR_CID_RESERVED, }; int i; @@ -667,7 +677,7 @@ static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg) */ if (!vmci_transport_stream_allow(dg->src.context, -1) - || VMCI_TRANSPORT_PACKET_RID != dg->src.resource) + || vmci_transport_peer_rid(dg->src.context) != dg->src.resource) return VMCI_ERROR_NO_ACCESS; if (VMCI_DG_SIZE(dg) < sizeof(*pkt)) diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h index 1bf9918..fd88ea8 100644 --- a/net/vmw_vsock/vmci_transport.h +++ b/net/vmw_vsock/vmci_transport.h @@ -28,6 +28,9 @@ /* The resource ID on which control packets are sent. */ #define VMCI_TRANSPORT_PACKET_RID 1 +/* The resource ID on which control packets are sent to the hypervisor. */ +#define VMCI_TRANSPORT_HYPERVISOR_PACKET_RID 15 + #define VSOCK_PROTO_INVALID 0 #define VSOCK_PROTO_PKT_ON_NOTIFY (1 << 0) #define VSOCK_PROTO_ALL_SUPPORTED (VSOCK_PROTO_PKT_ON_NOTIFY) diff --git a/net/wireless/ap.c b/net/wireless/ap.c index a4a14e8..324e8d8 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -46,65 +46,3 @@ int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, return err; } - -void cfg80211_ch_switch_notify(struct net_device *dev, - struct cfg80211_chan_def *chandef) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - - trace_cfg80211_ch_switch_notify(dev, chandef); - - wdev_lock(wdev); - - if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && - wdev->iftype != NL80211_IFTYPE_P2P_GO)) - goto out; - - wdev->channel = chandef->chan; - nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL); -out: - wdev_unlock(wdev); - return; -} -EXPORT_SYMBOL(cfg80211_ch_switch_notify); - -bool cfg80211_rx_spurious_frame(struct net_device *dev, - const u8 *addr, gfp_t gfp) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - bool ret; - - trace_cfg80211_rx_spurious_frame(dev, addr); - - if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && - wdev->iftype != NL80211_IFTYPE_P2P_GO)) { - trace_cfg80211_return_bool(false); - return false; - } - ret = nl80211_unexpected_frame(dev, addr, gfp); - trace_cfg80211_return_bool(ret); - return ret; -} -EXPORT_SYMBOL(cfg80211_rx_spurious_frame); - -bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev, - const u8 *addr, gfp_t gfp) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - bool ret; - - trace_cfg80211_rx_unexpected_4addr_frame(dev, addr); - - if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && - wdev->iftype != NL80211_IFTYPE_P2P_GO && - wdev->iftype != NL80211_IFTYPE_AP_VLAN)) { - trace_cfg80211_return_bool(false); - return false; - } - ret = nl80211_unexpected_4addr_frame(dev, addr, gfp); - trace_cfg80211_return_bool(ret); - return ret; -} -EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame); diff --git a/net/wireless/core.c b/net/wireless/core.c index 6ddf74f..00be555 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -842,6 +842,46 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, rdev->num_running_monitor_ifaces += num; } +void cfg80211_leave(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + struct net_device *dev = wdev->netdev; + + switch (wdev->iftype) { + case NL80211_IFTYPE_ADHOC: + cfg80211_leave_ibss(rdev, dev, true); + break; + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_STATION: + mutex_lock(&rdev->sched_scan_mtx); + __cfg80211_stop_sched_scan(rdev, false); + mutex_unlock(&rdev->sched_scan_mtx); + + wdev_lock(wdev); +#ifdef CONFIG_CFG80211_WEXT + kfree(wdev->wext.ie); + wdev->wext.ie = NULL; + wdev->wext.ie_len = 0; + wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC; +#endif + __cfg80211_disconnect(rdev, dev, + WLAN_REASON_DEAUTH_LEAVING, true); + cfg80211_mlme_down(rdev, dev); + wdev_unlock(wdev); + break; + case NL80211_IFTYPE_MESH_POINT: + cfg80211_leave_mesh(rdev, dev); + break; + case NL80211_IFTYPE_AP: + cfg80211_stop_ap(rdev, dev); + break; + default: + break; + } + + wdev->beacon_interval = 0; +} + static int cfg80211_netdev_notifier_call(struct notifier_block *nb, unsigned long state, void *ndev) @@ -910,38 +950,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, dev->priv_flags |= IFF_DONT_BRIDGE; break; case NETDEV_GOING_DOWN: - switch (wdev->iftype) { - case NL80211_IFTYPE_ADHOC: - cfg80211_leave_ibss(rdev, dev, true); - break; - case NL80211_IFTYPE_P2P_CLIENT: - case NL80211_IFTYPE_STATION: - mutex_lock(&rdev->sched_scan_mtx); - __cfg80211_stop_sched_scan(rdev, false); - mutex_unlock(&rdev->sched_scan_mtx); - - wdev_lock(wdev); -#ifdef CONFIG_CFG80211_WEXT - kfree(wdev->wext.ie); - wdev->wext.ie = NULL; - wdev->wext.ie_len = 0; - wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC; -#endif - __cfg80211_disconnect(rdev, dev, - WLAN_REASON_DEAUTH_LEAVING, true); - cfg80211_mlme_down(rdev, dev); - wdev_unlock(wdev); - break; - case NL80211_IFTYPE_MESH_POINT: - cfg80211_leave_mesh(rdev, dev); - break; - case NL80211_IFTYPE_AP: - cfg80211_stop_ap(rdev, dev); - break; - default: - break; - } - wdev->beacon_interval = 0; + cfg80211_leave(rdev, wdev); break; case NETDEV_DOWN: cfg80211_update_iface_num(rdev, wdev->iftype, -1); diff --git a/net/wireless/core.h b/net/wireless/core.h index 5845c2b..b5174f6 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -330,20 +330,15 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, - const u8 *bssid, const u8 *prev_bssid, + const u8 *bssid, const u8 *ssid, int ssid_len, - const u8 *ie, int ie_len, bool use_mfp, - struct cfg80211_crypto_settings *crypt, - u32 assoc_flags, struct ieee80211_ht_cap *ht_capa, - struct ieee80211_ht_cap *ht_capa_mask); + struct cfg80211_assoc_request *req); int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, - struct net_device *dev, struct ieee80211_channel *chan, - const u8 *bssid, const u8 *prev_bssid, + struct net_device *dev, + struct ieee80211_channel *chan, + const u8 *bssid, const u8 *ssid, int ssid_len, - const u8 *ie, int ie_len, bool use_mfp, - struct cfg80211_crypto_settings *crypt, - u32 assoc_flags, struct ieee80211_ht_cap *ht_capa, - struct ieee80211_ht_cap *ht_capa_mask); + struct cfg80211_assoc_request *req); int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *bssid, const u8 *ie, int ie_len, u16 reason, @@ -375,6 +370,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, bool no_cck, bool dont_wait_for_ack, u64 *cookie); void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, const struct ieee80211_ht_cap *ht_capa_mask); +void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, + const struct ieee80211_vht_cap *vht_capa_mask); /* SME */ int __cfg80211_connect(struct cfg80211_registered_device *rdev, @@ -503,9 +500,14 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, int num); + +void cfg80211_leave(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev); + void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); + #define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10 #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 55957a2..0bb93f3 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -85,6 +85,7 @@ const struct mesh_setup default_mesh_setup = { .ie = NULL, .ie_len = 0, .is_secure = false, + .user_mpm = false, .beacon_interval = MESH_DEFAULT_BEACON_INTERVAL, .dtim_period = MESH_DEFAULT_DTIM_PERIOD, }; @@ -233,20 +234,6 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, return 0; } -void cfg80211_notify_new_peer_candidate(struct net_device *dev, - const u8 *macaddr, const u8* ie, u8 ie_len, gfp_t gfp) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - - trace_cfg80211_notify_new_peer_candidate(dev, macaddr); - if (WARN_ON(wdev->iftype != NL80211_IFTYPE_MESH_POINT)) - return; - - nl80211_send_new_peer_candidate(wiphy_to_dev(wdev->wiphy), dev, - macaddr, ie, ie_len, gfp); -} -EXPORT_SYMBOL(cfg80211_notify_new_peer_candidate); - static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev) { diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index caddca3..390198b 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -187,30 +187,6 @@ void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len) } EXPORT_SYMBOL(cfg80211_send_disassoc); -void cfg80211_send_unprot_deauth(struct net_device *dev, const u8 *buf, - size_t len) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - - trace_cfg80211_send_unprot_deauth(dev); - nl80211_send_unprot_deauth(rdev, dev, buf, len, GFP_ATOMIC); -} -EXPORT_SYMBOL(cfg80211_send_unprot_deauth); - -void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf, - size_t len) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - - trace_cfg80211_send_unprot_disassoc(dev); - nl80211_send_unprot_disassoc(rdev, dev, buf, len, GFP_ATOMIC); -} -EXPORT_SYMBOL(cfg80211_send_unprot_disassoc); - void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -367,27 +343,38 @@ void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, p1[i] &= p2[i]; } +/* Do a logical ht_capa &= ht_capa_mask. */ +void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, + const struct ieee80211_vht_cap *vht_capa_mask) +{ + int i; + u8 *p1, *p2; + if (!vht_capa_mask) { + memset(vht_capa, 0, sizeof(*vht_capa)); + return; + } + + p1 = (u8*)(vht_capa); + p2 = (u8*)(vht_capa_mask); + for (i = 0; i < sizeof(*vht_capa); i++) + p1[i] &= p2[i]; +} + int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, - const u8 *bssid, const u8 *prev_bssid, + const u8 *bssid, const u8 *ssid, int ssid_len, - const u8 *ie, int ie_len, bool use_mfp, - struct cfg80211_crypto_settings *crypt, - u32 assoc_flags, struct ieee80211_ht_cap *ht_capa, - struct ieee80211_ht_cap *ht_capa_mask) + struct cfg80211_assoc_request *req) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_assoc_request req; int err; bool was_connected = false; ASSERT_WDEV_LOCK(wdev); - memset(&req, 0, sizeof(req)); - - if (wdev->current_bss && prev_bssid && - ether_addr_equal(wdev->current_bss->pub.bssid, prev_bssid)) { + if (wdev->current_bss && req->prev_bssid && + ether_addr_equal(wdev->current_bss->pub.bssid, req->prev_bssid)) { /* * Trying to reassociate: Allow this to proceed and let the old * association to be dropped when the new one is completed. @@ -399,40 +386,30 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, } else if (wdev->current_bss) return -EALREADY; - req.ie = ie; - req.ie_len = ie_len; - memcpy(&req.crypto, crypt, sizeof(req.crypto)); - req.use_mfp = use_mfp; - req.prev_bssid = prev_bssid; - req.flags = assoc_flags; - if (ht_capa) - memcpy(&req.ht_capa, ht_capa, sizeof(req.ht_capa)); - if (ht_capa_mask) - memcpy(&req.ht_capa_mask, ht_capa_mask, - sizeof(req.ht_capa_mask)); - cfg80211_oper_and_ht_capa(&req.ht_capa_mask, + cfg80211_oper_and_ht_capa(&req->ht_capa_mask, rdev->wiphy.ht_capa_mod_mask); + cfg80211_oper_and_vht_capa(&req->vht_capa_mask, + rdev->wiphy.vht_capa_mod_mask); - req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, - WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); - if (!req.bss) { + req->bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, + WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); + if (!req->bss) { if (was_connected) wdev->sme_state = CFG80211_SME_CONNECTED; return -ENOENT; } - err = cfg80211_can_use_chan(rdev, wdev, req.bss->channel, - CHAN_MODE_SHARED); + err = cfg80211_can_use_chan(rdev, wdev, chan, CHAN_MODE_SHARED); if (err) goto out; - err = rdev_assoc(rdev, dev, &req); + err = rdev_assoc(rdev, dev, req); out: if (err) { if (was_connected) wdev->sme_state = CFG80211_SME_CONNECTED; - cfg80211_put_bss(&rdev->wiphy, req.bss); + cfg80211_put_bss(&rdev->wiphy, req->bss); } return err; @@ -441,21 +418,17 @@ out: int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, - const u8 *bssid, const u8 *prev_bssid, + const u8 *bssid, const u8 *ssid, int ssid_len, - const u8 *ie, int ie_len, bool use_mfp, - struct cfg80211_crypto_settings *crypt, - u32 assoc_flags, struct ieee80211_ht_cap *ht_capa, - struct ieee80211_ht_cap *ht_capa_mask) + struct cfg80211_assoc_request *req) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; mutex_lock(&rdev->devlist_mtx); wdev_lock(wdev); - err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid, - ssid, ssid_len, ie, ie_len, use_mfp, crypt, - assoc_flags, ht_capa, ht_capa_mask); + err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid, + ssid, ssid_len, req); wdev_unlock(wdev); mutex_unlock(&rdev->devlist_mtx); @@ -577,62 +550,6 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, } } -void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie, - struct ieee80211_channel *chan, - unsigned int duration, gfp_t gfp) -{ - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - - trace_cfg80211_ready_on_channel(wdev, cookie, chan, duration); - nl80211_send_remain_on_channel(rdev, wdev, cookie, chan, duration, gfp); -} -EXPORT_SYMBOL(cfg80211_ready_on_channel); - -void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie, - struct ieee80211_channel *chan, - gfp_t gfp) -{ - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - - trace_cfg80211_ready_on_channel_expired(wdev, cookie, chan); - nl80211_send_remain_on_channel_cancel(rdev, wdev, cookie, chan, gfp); -} -EXPORT_SYMBOL(cfg80211_remain_on_channel_expired); - -void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, - struct station_info *sinfo, gfp_t gfp) -{ - struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - - trace_cfg80211_new_sta(dev, mac_addr, sinfo); - nl80211_send_sta_event(rdev, dev, mac_addr, sinfo, gfp); -} -EXPORT_SYMBOL(cfg80211_new_sta); - -void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp) -{ - struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - - trace_cfg80211_del_sta(dev, mac_addr); - nl80211_send_sta_del_event(rdev, dev, mac_addr, gfp); -} -EXPORT_SYMBOL(cfg80211_del_sta); - -void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr, - enum nl80211_connect_failed_reason reason, - gfp_t gfp) -{ - struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - - nl80211_send_conn_failed_event(rdev, dev, mac_addr, reason, gfp); -} -EXPORT_SYMBOL(cfg80211_conn_failed); - struct cfg80211_mgmt_registration { struct list_head list; @@ -909,85 +826,6 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm, } EXPORT_SYMBOL(cfg80211_rx_mgmt); -void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, - const u8 *buf, size_t len, bool ack, gfp_t gfp) -{ - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - - trace_cfg80211_mgmt_tx_status(wdev, cookie, ack); - - /* Indicate TX status of the Action frame to user space */ - nl80211_send_mgmt_tx_status(rdev, wdev, cookie, buf, len, ack, gfp); -} -EXPORT_SYMBOL(cfg80211_mgmt_tx_status); - -void cfg80211_cqm_rssi_notify(struct net_device *dev, - enum nl80211_cqm_rssi_threshold_event rssi_event, - gfp_t gfp) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - - trace_cfg80211_cqm_rssi_notify(dev, rssi_event); - - /* Indicate roaming trigger event to user space */ - nl80211_send_cqm_rssi_notify(rdev, dev, rssi_event, gfp); -} -EXPORT_SYMBOL(cfg80211_cqm_rssi_notify); - -void cfg80211_cqm_pktloss_notify(struct net_device *dev, - const u8 *peer, u32 num_packets, gfp_t gfp) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - - trace_cfg80211_cqm_pktloss_notify(dev, peer, num_packets); - - /* Indicate roaming trigger event to user space */ - nl80211_send_cqm_pktloss_notify(rdev, dev, peer, num_packets, gfp); -} -EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify); - -void cfg80211_cqm_txe_notify(struct net_device *dev, - const u8 *peer, u32 num_packets, - u32 rate, u32 intvl, gfp_t gfp) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - - nl80211_send_cqm_txe_notify(rdev, dev, peer, num_packets, - rate, intvl, gfp); -} -EXPORT_SYMBOL(cfg80211_cqm_txe_notify); - -void cfg80211_gtk_rekey_notify(struct net_device *dev, const u8 *bssid, - const u8 *replay_ctr, gfp_t gfp) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - - trace_cfg80211_gtk_rekey_notify(dev, bssid); - nl80211_gtk_rekey_notify(rdev, dev, bssid, replay_ctr, gfp); -} -EXPORT_SYMBOL(cfg80211_gtk_rekey_notify); - -void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index, - const u8 *bssid, bool preauth, gfp_t gfp) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - - trace_cfg80211_pmksa_candidate_notify(dev, index, bssid, preauth); - nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp); -} -EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify); - void cfg80211_dfs_channels_update_work(struct work_struct *work) { struct delayed_work *delayed_work; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 58e13a8..671b69a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -370,6 +370,14 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED }, [NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 }, [NL80211_ATTR_STA_EXT_CAPABILITY] = { .type = NLA_BINARY, }, + [NL80211_ATTR_SPLIT_WIPHY_DUMP] = { .type = NLA_FLAG, }, + [NL80211_ATTR_DISABLE_VHT] = { .type = NLA_FLAG }, + [NL80211_ATTR_VHT_CAPABILITY_MASK] = { + .len = NL80211_VHT_CAPABILITY_LEN, + }, + [NL80211_ATTR_MDID] = { .type = NLA_U16 }, + [NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY, + .len = IEEE80211_MAX_DATA_LEN }, }; /* policy for the key attributes */ @@ -539,7 +547,8 @@ static inline void *nl80211hdr_put(struct sk_buff *skb, u32 portid, u32 seq, } static int nl80211_msg_put_channel(struct sk_buff *msg, - struct ieee80211_channel *chan) + struct ieee80211_channel *chan, + bool large) { if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_FREQ, chan->center_freq)) @@ -554,9 +563,37 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, if ((chan->flags & IEEE80211_CHAN_NO_IBSS) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_IBSS)) goto nla_put_failure; - if ((chan->flags & IEEE80211_CHAN_RADAR) && - nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR)) - goto nla_put_failure; + if (chan->flags & IEEE80211_CHAN_RADAR) { + if (nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR)) + goto nla_put_failure; + if (large) { + u32 time; + + time = elapsed_jiffies_msecs(chan->dfs_state_entered); + + if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_STATE, + chan->dfs_state)) + goto nla_put_failure; + if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_TIME, + time)) + goto nla_put_failure; + } + } + + if (large) { + if ((chan->flags & IEEE80211_CHAN_NO_HT40MINUS) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_MINUS)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_HT40PLUS) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_PLUS)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_80MHZ) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_80MHZ)) + goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_160MHZ) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_160MHZ)) + goto nla_put_failure; + } if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, DBM_TO_MBM(chan->max_power))) @@ -832,7 +869,8 @@ nla_put_failure: } static int nl80211_put_iface_combinations(struct wiphy *wiphy, - struct sk_buff *msg) + struct sk_buff *msg, + bool large) { struct nlattr *nl_combis; int i, j; @@ -881,6 +919,10 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy, nla_put_u32(msg, NL80211_IFACE_COMB_MAXNUM, c->max_interfaces)) goto nla_put_failure; + if (large && + nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, + c->radar_detect_widths)) + goto nla_put_failure; nla_nest_end(msg, nl_combi); } @@ -892,412 +934,611 @@ nla_put_failure: return -ENOBUFS; } -static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flags, - struct cfg80211_registered_device *dev) +#ifdef CONFIG_PM +static int nl80211_send_wowlan_tcp_caps(struct cfg80211_registered_device *rdev, + struct sk_buff *msg) { - void *hdr; - struct nlattr *nl_bands, *nl_band; - struct nlattr *nl_freqs, *nl_freq; - struct nlattr *nl_rates, *nl_rate; - struct nlattr *nl_cmds; - enum ieee80211_band band; - struct ieee80211_channel *chan; - struct ieee80211_rate *rate; - int i; - const struct ieee80211_txrx_stypes *mgmt_stypes = - dev->wiphy.mgmt_stypes; + const struct wiphy_wowlan_tcp_support *tcp = rdev->wiphy.wowlan.tcp; + struct nlattr *nl_tcp; - hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY); - if (!hdr) - return -1; + if (!tcp) + return 0; - if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) || - nla_put_string(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy)) || - nla_put_u32(msg, NL80211_ATTR_GENERATION, - cfg80211_rdev_list_generation) || - nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT, - dev->wiphy.retry_short) || - nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_LONG, - dev->wiphy.retry_long) || - nla_put_u32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD, - dev->wiphy.frag_threshold) || - nla_put_u32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD, - dev->wiphy.rts_threshold) || - nla_put_u8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS, - dev->wiphy.coverage_class) || - nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, - dev->wiphy.max_scan_ssids) || - nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS, - dev->wiphy.max_sched_scan_ssids) || - nla_put_u16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN, - dev->wiphy.max_scan_ie_len) || - nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN, - dev->wiphy.max_sched_scan_ie_len) || - nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS, - dev->wiphy.max_match_sets)) - goto nla_put_failure; + nl_tcp = nla_nest_start(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION); + if (!nl_tcp) + return -ENOBUFS; - if ((dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) && - nla_put_flag(msg, NL80211_ATTR_SUPPORT_IBSS_RSN)) - goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) && - nla_put_flag(msg, NL80211_ATTR_SUPPORT_MESH_AUTH)) - goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) && - nla_put_flag(msg, NL80211_ATTR_SUPPORT_AP_UAPSD)) - goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) && - nla_put_flag(msg, NL80211_ATTR_ROAM_SUPPORT)) - goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) && - nla_put_flag(msg, NL80211_ATTR_TDLS_SUPPORT)) - goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) && - nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP)) - goto nla_put_failure; + if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, + tcp->data_payload_max)) + return -ENOBUFS; - if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES, - sizeof(u32) * dev->wiphy.n_cipher_suites, - dev->wiphy.cipher_suites)) - goto nla_put_failure; + if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, + tcp->data_payload_max)) + return -ENOBUFS; - if (nla_put_u8(msg, NL80211_ATTR_MAX_NUM_PMKIDS, - dev->wiphy.max_num_pmkids)) - goto nla_put_failure; + if (tcp->seq && nla_put_flag(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ)) + return -ENOBUFS; - if ((dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) && - nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE)) - goto nla_put_failure; + if (tcp->tok && nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN, + sizeof(*tcp->tok), tcp->tok)) + return -ENOBUFS; - if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX, - dev->wiphy.available_antennas_tx) || - nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX, - dev->wiphy.available_antennas_rx)) - goto nla_put_failure; + if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_INTERVAL, + tcp->data_interval_max)) + return -ENOBUFS; - if ((dev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) && - nla_put_u32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD, - dev->wiphy.probe_resp_offload)) - goto nla_put_failure; + if (nla_put_u32(msg, NL80211_WOWLAN_TCP_WAKE_PAYLOAD, + tcp->wake_payload_max)) + return -ENOBUFS; - if ((dev->wiphy.available_antennas_tx || - dev->wiphy.available_antennas_rx) && dev->ops->get_antenna) { - u32 tx_ant = 0, rx_ant = 0; - int res; - res = rdev_get_antenna(dev, &tx_ant, &rx_ant); - if (!res) { - if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_TX, - tx_ant) || - nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_RX, - rx_ant)) - goto nla_put_failure; - } + nla_nest_end(msg, nl_tcp); + return 0; +} + +static int nl80211_send_wowlan(struct sk_buff *msg, + struct cfg80211_registered_device *dev, + bool large) +{ + struct nlattr *nl_wowlan; + + if (!dev->wiphy.wowlan.flags && !dev->wiphy.wowlan.n_patterns) + return 0; + + nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED); + if (!nl_wowlan) + return -ENOBUFS; + + if (((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_ANY) && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) || + ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_DISCONNECT) && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) || + ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_MAGIC_PKT) && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) || + ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) || + ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) || + ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) || + ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) || + ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_RFKILL_RELEASE) && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) + return -ENOBUFS; + + if (dev->wiphy.wowlan.n_patterns) { + struct nl80211_wowlan_pattern_support pat = { + .max_patterns = dev->wiphy.wowlan.n_patterns, + .min_pattern_len = dev->wiphy.wowlan.pattern_min_len, + .max_pattern_len = dev->wiphy.wowlan.pattern_max_len, + .max_pkt_offset = dev->wiphy.wowlan.max_pkt_offset, + }; + + if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, + sizeof(pat), &pat)) + return -ENOBUFS; } - if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES, - dev->wiphy.interface_modes)) - goto nla_put_failure; + if (large && nl80211_send_wowlan_tcp_caps(dev, msg)) + return -ENOBUFS; - nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS); - if (!nl_bands) - goto nla_put_failure; + nla_nest_end(msg, nl_wowlan); - for (band = 0; band < IEEE80211_NUM_BANDS; band++) { - if (!dev->wiphy.bands[band]) - continue; + return 0; +} +#endif - nl_band = nla_nest_start(msg, band); - if (!nl_band) - goto nla_put_failure; +static int nl80211_send_band_rateinfo(struct sk_buff *msg, + struct ieee80211_supported_band *sband) +{ + struct nlattr *nl_rates, *nl_rate; + struct ieee80211_rate *rate; + int i; - /* add HT info */ - if (dev->wiphy.bands[band]->ht_cap.ht_supported && - (nla_put(msg, NL80211_BAND_ATTR_HT_MCS_SET, - sizeof(dev->wiphy.bands[band]->ht_cap.mcs), - &dev->wiphy.bands[band]->ht_cap.mcs) || - nla_put_u16(msg, NL80211_BAND_ATTR_HT_CAPA, - dev->wiphy.bands[band]->ht_cap.cap) || - nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_FACTOR, - dev->wiphy.bands[band]->ht_cap.ampdu_factor) || - nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_DENSITY, - dev->wiphy.bands[band]->ht_cap.ampdu_density))) - goto nla_put_failure; + /* add HT info */ + if (sband->ht_cap.ht_supported && + (nla_put(msg, NL80211_BAND_ATTR_HT_MCS_SET, + sizeof(sband->ht_cap.mcs), + &sband->ht_cap.mcs) || + nla_put_u16(msg, NL80211_BAND_ATTR_HT_CAPA, + sband->ht_cap.cap) || + nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_FACTOR, + sband->ht_cap.ampdu_factor) || + nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_DENSITY, + sband->ht_cap.ampdu_density))) + return -ENOBUFS; - /* add VHT info */ - if (dev->wiphy.bands[band]->vht_cap.vht_supported && - (nla_put(msg, NL80211_BAND_ATTR_VHT_MCS_SET, - sizeof(dev->wiphy.bands[band]->vht_cap.vht_mcs), - &dev->wiphy.bands[band]->vht_cap.vht_mcs) || - nla_put_u32(msg, NL80211_BAND_ATTR_VHT_CAPA, - dev->wiphy.bands[band]->vht_cap.cap))) - goto nla_put_failure; + /* add VHT info */ + if (sband->vht_cap.vht_supported && + (nla_put(msg, NL80211_BAND_ATTR_VHT_MCS_SET, + sizeof(sband->vht_cap.vht_mcs), + &sband->vht_cap.vht_mcs) || + nla_put_u32(msg, NL80211_BAND_ATTR_VHT_CAPA, + sband->vht_cap.cap))) + return -ENOBUFS; - /* add frequencies */ - nl_freqs = nla_nest_start(msg, NL80211_BAND_ATTR_FREQS); - if (!nl_freqs) - goto nla_put_failure; + /* add bitrates */ + nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES); + if (!nl_rates) + return -ENOBUFS; - for (i = 0; i < dev->wiphy.bands[band]->n_channels; i++) { - nl_freq = nla_nest_start(msg, i); - if (!nl_freq) - goto nla_put_failure; + for (i = 0; i < sband->n_bitrates; i++) { + nl_rate = nla_nest_start(msg, i); + if (!nl_rate) + return -ENOBUFS; - chan = &dev->wiphy.bands[band]->channels[i]; + rate = &sband->bitrates[i]; + if (nla_put_u32(msg, NL80211_BITRATE_ATTR_RATE, + rate->bitrate)) + return -ENOBUFS; + if ((rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) && + nla_put_flag(msg, + NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE)) + return -ENOBUFS; - if (nl80211_msg_put_channel(msg, chan)) - goto nla_put_failure; + nla_nest_end(msg, nl_rate); + } - nla_nest_end(msg, nl_freq); - } + nla_nest_end(msg, nl_rates); - nla_nest_end(msg, nl_freqs); + return 0; +} - /* add bitrates */ - nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES); - if (!nl_rates) - goto nla_put_failure; +static int +nl80211_send_mgmt_stypes(struct sk_buff *msg, + const struct ieee80211_txrx_stypes *mgmt_stypes) +{ + u16 stypes; + struct nlattr *nl_ftypes, *nl_ifs; + enum nl80211_iftype ift; + int i; - for (i = 0; i < dev->wiphy.bands[band]->n_bitrates; i++) { - nl_rate = nla_nest_start(msg, i); - if (!nl_rate) - goto nla_put_failure; + if (!mgmt_stypes) + return 0; - rate = &dev->wiphy.bands[band]->bitrates[i]; - if (nla_put_u32(msg, NL80211_BITRATE_ATTR_RATE, - rate->bitrate)) - goto nla_put_failure; - if ((rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) && - nla_put_flag(msg, - NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE)) - goto nla_put_failure; + nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES); + if (!nl_ifs) + return -ENOBUFS; - nla_nest_end(msg, nl_rate); + for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { + nl_ftypes = nla_nest_start(msg, ift); + if (!nl_ftypes) + return -ENOBUFS; + i = 0; + stypes = mgmt_stypes[ift].tx; + while (stypes) { + if ((stypes & 1) && + nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE, + (i << 4) | IEEE80211_FTYPE_MGMT)) + return -ENOBUFS; + stypes >>= 1; + i++; } + nla_nest_end(msg, nl_ftypes); + } - nla_nest_end(msg, nl_rates); + nla_nest_end(msg, nl_ifs); - nla_nest_end(msg, nl_band); + nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES); + if (!nl_ifs) + return -ENOBUFS; + + for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { + nl_ftypes = nla_nest_start(msg, ift); + if (!nl_ftypes) + return -ENOBUFS; + i = 0; + stypes = mgmt_stypes[ift].rx; + while (stypes) { + if ((stypes & 1) && + nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE, + (i << 4) | IEEE80211_FTYPE_MGMT)) + return -ENOBUFS; + stypes >>= 1; + i++; + } + nla_nest_end(msg, nl_ftypes); } - nla_nest_end(msg, nl_bands); + nla_nest_end(msg, nl_ifs); - nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS); - if (!nl_cmds) - goto nla_put_failure; + return 0; +} - i = 0; -#define CMD(op, n) \ - do { \ - if (dev->ops->op) { \ - i++; \ - if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \ - goto nla_put_failure; \ - } \ - } while (0) - - CMD(add_virtual_intf, NEW_INTERFACE); - CMD(change_virtual_intf, SET_INTERFACE); - CMD(add_key, NEW_KEY); - CMD(start_ap, START_AP); - CMD(add_station, NEW_STATION); - CMD(add_mpath, NEW_MPATH); - CMD(update_mesh_config, SET_MESH_CONFIG); - CMD(change_bss, SET_BSS); - CMD(auth, AUTHENTICATE); - CMD(assoc, ASSOCIATE); - CMD(deauth, DEAUTHENTICATE); - CMD(disassoc, DISASSOCIATE); - CMD(join_ibss, JOIN_IBSS); - CMD(join_mesh, JOIN_MESH); - CMD(set_pmksa, SET_PMKSA); - CMD(del_pmksa, DEL_PMKSA); - CMD(flush_pmksa, FLUSH_PMKSA); - if (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) - CMD(remain_on_channel, REMAIN_ON_CHANNEL); - CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); - CMD(mgmt_tx, FRAME); - CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL); - if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { - i++; - if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS)) - goto nla_put_failure; +static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, + struct sk_buff *msg, u32 portid, u32 seq, + int flags, bool split, long *split_start, + long *band_start, long *chan_start) +{ + void *hdr; + struct nlattr *nl_bands, *nl_band; + struct nlattr *nl_freqs, *nl_freq; + struct nlattr *nl_cmds; + enum ieee80211_band band; + struct ieee80211_channel *chan; + int i; + const struct ieee80211_txrx_stypes *mgmt_stypes = + dev->wiphy.mgmt_stypes; + long start = 0, start_chan = 0, start_band = 0; + u32 features; + + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY); + if (!hdr) + return -ENOBUFS; + + /* allow always using the variables */ + if (!split) { + split_start = &start; + band_start = &start_band; + chan_start = &start_chan; } - if (dev->ops->set_monitor_channel || dev->ops->start_ap || - dev->ops->join_mesh) { - i++; - if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL)) + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) || + nla_put_string(msg, NL80211_ATTR_WIPHY_NAME, + wiphy_name(&dev->wiphy)) || + nla_put_u32(msg, NL80211_ATTR_GENERATION, + cfg80211_rdev_list_generation)) + goto nla_put_failure; + + switch (*split_start) { + case 0: + if (nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT, + dev->wiphy.retry_short) || + nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_LONG, + dev->wiphy.retry_long) || + nla_put_u32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD, + dev->wiphy.frag_threshold) || + nla_put_u32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD, + dev->wiphy.rts_threshold) || + nla_put_u8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS, + dev->wiphy.coverage_class) || + nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, + dev->wiphy.max_scan_ssids) || + nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS, + dev->wiphy.max_sched_scan_ssids) || + nla_put_u16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN, + dev->wiphy.max_scan_ie_len) || + nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN, + dev->wiphy.max_sched_scan_ie_len) || + nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS, + dev->wiphy.max_match_sets)) goto nla_put_failure; - } - CMD(set_wds_peer, SET_WDS_PEER); - if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) { - CMD(tdls_mgmt, TDLS_MGMT); - CMD(tdls_oper, TDLS_OPER); - } - if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) - CMD(sched_scan_start, START_SCHED_SCAN); - CMD(probe_client, PROBE_CLIENT); - CMD(set_noack_map, SET_NOACK_MAP); - if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) { - i++; - if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS)) + + if ((dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) && + nla_put_flag(msg, NL80211_ATTR_SUPPORT_IBSS_RSN)) + goto nla_put_failure; + if ((dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) && + nla_put_flag(msg, NL80211_ATTR_SUPPORT_MESH_AUTH)) + goto nla_put_failure; + if ((dev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) && + nla_put_flag(msg, NL80211_ATTR_SUPPORT_AP_UAPSD)) + goto nla_put_failure; + if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) && + nla_put_flag(msg, NL80211_ATTR_ROAM_SUPPORT)) + goto nla_put_failure; + if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) && + nla_put_flag(msg, NL80211_ATTR_TDLS_SUPPORT)) + goto nla_put_failure; + if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) && + nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP)) goto nla_put_failure; - } - CMD(start_p2p_device, START_P2P_DEVICE); - CMD(set_mcast_rate, SET_MCAST_RATE); -#ifdef CONFIG_NL80211_TESTMODE - CMD(testmode_cmd, TESTMODE); -#endif + (*split_start)++; + if (split) + break; + case 1: + if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES, + sizeof(u32) * dev->wiphy.n_cipher_suites, + dev->wiphy.cipher_suites)) + goto nla_put_failure; -#undef CMD + if (nla_put_u8(msg, NL80211_ATTR_MAX_NUM_PMKIDS, + dev->wiphy.max_num_pmkids)) + goto nla_put_failure; - if (dev->ops->connect || dev->ops->auth) { - i++; - if (nla_put_u32(msg, i, NL80211_CMD_CONNECT)) + if ((dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) && + nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE)) goto nla_put_failure; - } - if (dev->ops->disconnect || dev->ops->deauth) { - i++; - if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT)) + if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX, + dev->wiphy.available_antennas_tx) || + nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX, + dev->wiphy.available_antennas_rx)) goto nla_put_failure; - } - nla_nest_end(msg, nl_cmds); + if ((dev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) && + nla_put_u32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD, + dev->wiphy.probe_resp_offload)) + goto nla_put_failure; - if (dev->ops->remain_on_channel && - (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) && - nla_put_u32(msg, NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION, - dev->wiphy.max_remain_on_channel_duration)) - goto nla_put_failure; + if ((dev->wiphy.available_antennas_tx || + dev->wiphy.available_antennas_rx) && + dev->ops->get_antenna) { + u32 tx_ant = 0, rx_ant = 0; + int res; + res = rdev_get_antenna(dev, &tx_ant, &rx_ant); + if (!res) { + if (nla_put_u32(msg, + NL80211_ATTR_WIPHY_ANTENNA_TX, + tx_ant) || + nla_put_u32(msg, + NL80211_ATTR_WIPHY_ANTENNA_RX, + rx_ant)) + goto nla_put_failure; + } + } - if ((dev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) && - nla_put_flag(msg, NL80211_ATTR_OFFCHANNEL_TX_OK)) - goto nla_put_failure; + (*split_start)++; + if (split) + break; + case 2: + if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES, + dev->wiphy.interface_modes)) + goto nla_put_failure; + (*split_start)++; + if (split) + break; + case 3: + nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS); + if (!nl_bands) + goto nla_put_failure; - if (mgmt_stypes) { - u16 stypes; - struct nlattr *nl_ftypes, *nl_ifs; - enum nl80211_iftype ift; + for (band = *band_start; band < IEEE80211_NUM_BANDS; band++) { + struct ieee80211_supported_band *sband; - nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES); - if (!nl_ifs) - goto nla_put_failure; + sband = dev->wiphy.bands[band]; - for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { - nl_ftypes = nla_nest_start(msg, ift); - if (!nl_ftypes) + if (!sband) + continue; + + nl_band = nla_nest_start(msg, band); + if (!nl_band) goto nla_put_failure; - i = 0; - stypes = mgmt_stypes[ift].tx; - while (stypes) { - if ((stypes & 1) && - nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE, - (i << 4) | IEEE80211_FTYPE_MGMT)) + + switch (*chan_start) { + case 0: + if (nl80211_send_band_rateinfo(msg, sband)) goto nla_put_failure; - stypes >>= 1; - i++; + (*chan_start)++; + if (split) + break; + default: + /* add frequencies */ + nl_freqs = nla_nest_start( + msg, NL80211_BAND_ATTR_FREQS); + if (!nl_freqs) + goto nla_put_failure; + + for (i = *chan_start - 1; + i < sband->n_channels; + i++) { + nl_freq = nla_nest_start(msg, i); + if (!nl_freq) + goto nla_put_failure; + + chan = &sband->channels[i]; + + if (nl80211_msg_put_channel(msg, chan, + split)) + goto nla_put_failure; + + nla_nest_end(msg, nl_freq); + if (split) + break; + } + if (i < sband->n_channels) + *chan_start = i + 2; + else + *chan_start = 0; + nla_nest_end(msg, nl_freqs); + } + + nla_nest_end(msg, nl_band); + + if (split) { + /* start again here */ + if (*chan_start) + band--; + break; } - nla_nest_end(msg, nl_ftypes); } + nla_nest_end(msg, nl_bands); - nla_nest_end(msg, nl_ifs); + if (band < IEEE80211_NUM_BANDS) + *band_start = band + 1; + else + *band_start = 0; - nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES); - if (!nl_ifs) + /* if bands & channels are done, continue outside */ + if (*band_start == 0 && *chan_start == 0) + (*split_start)++; + if (split) + break; + case 4: + nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS); + if (!nl_cmds) goto nla_put_failure; - for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { - nl_ftypes = nla_nest_start(msg, ift); - if (!nl_ftypes) + i = 0; +#define CMD(op, n) \ + do { \ + if (dev->ops->op) { \ + i++; \ + if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \ + goto nla_put_failure; \ + } \ + } while (0) + + CMD(add_virtual_intf, NEW_INTERFACE); + CMD(change_virtual_intf, SET_INTERFACE); + CMD(add_key, NEW_KEY); + CMD(start_ap, START_AP); + CMD(add_station, NEW_STATION); + CMD(add_mpath, NEW_MPATH); + CMD(update_mesh_config, SET_MESH_CONFIG); + CMD(change_bss, SET_BSS); + CMD(auth, AUTHENTICATE); + CMD(assoc, ASSOCIATE); + CMD(deauth, DEAUTHENTICATE); + CMD(disassoc, DISASSOCIATE); + CMD(join_ibss, JOIN_IBSS); + CMD(join_mesh, JOIN_MESH); + CMD(set_pmksa, SET_PMKSA); + CMD(del_pmksa, DEL_PMKSA); + CMD(flush_pmksa, FLUSH_PMKSA); + if (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) + CMD(remain_on_channel, REMAIN_ON_CHANNEL); + CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); + CMD(mgmt_tx, FRAME); + CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL); + if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { + i++; + if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS)) goto nla_put_failure; - i = 0; - stypes = mgmt_stypes[ift].rx; - while (stypes) { - if ((stypes & 1) && - nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE, - (i << 4) | IEEE80211_FTYPE_MGMT)) - goto nla_put_failure; - stypes >>= 1; - i++; - } - nla_nest_end(msg, nl_ftypes); } - nla_nest_end(msg, nl_ifs); - } + if (dev->ops->set_monitor_channel || dev->ops->start_ap || + dev->ops->join_mesh) { + i++; + if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL)) + goto nla_put_failure; + } + CMD(set_wds_peer, SET_WDS_PEER); + if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) { + CMD(tdls_mgmt, TDLS_MGMT); + CMD(tdls_oper, TDLS_OPER); + } + if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) + CMD(sched_scan_start, START_SCHED_SCAN); + CMD(probe_client, PROBE_CLIENT); + CMD(set_noack_map, SET_NOACK_MAP); + if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) { + i++; + if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS)) + goto nla_put_failure; + } + CMD(start_p2p_device, START_P2P_DEVICE); + CMD(set_mcast_rate, SET_MCAST_RATE); -#ifdef CONFIG_PM - if (dev->wiphy.wowlan.flags || dev->wiphy.wowlan.n_patterns) { - struct nlattr *nl_wowlan; +#ifdef CONFIG_NL80211_TESTMODE + CMD(testmode_cmd, TESTMODE); +#endif - nl_wowlan = nla_nest_start(msg, - NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED); - if (!nl_wowlan) - goto nla_put_failure; +#undef CMD - if (((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_ANY) && - nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_DISCONNECT) && - nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_MAGIC_PKT) && - nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) && - nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && - nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) && - nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) && - nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_RFKILL_RELEASE) && - nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) - goto nla_put_failure; - if (dev->wiphy.wowlan.n_patterns) { - struct nl80211_wowlan_pattern_support pat = { - .max_patterns = dev->wiphy.wowlan.n_patterns, - .min_pattern_len = - dev->wiphy.wowlan.pattern_min_len, - .max_pattern_len = - dev->wiphy.wowlan.pattern_max_len, - .max_pkt_offset = - dev->wiphy.wowlan.max_pkt_offset, - }; - if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, - sizeof(pat), &pat)) + if (dev->ops->connect || dev->ops->auth) { + i++; + if (nla_put_u32(msg, i, NL80211_CMD_CONNECT)) goto nla_put_failure; } - nla_nest_end(msg, nl_wowlan); - } + if (dev->ops->disconnect || dev->ops->deauth) { + i++; + if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT)) + goto nla_put_failure; + } + + nla_nest_end(msg, nl_cmds); + (*split_start)++; + if (split) + break; + case 5: + if (dev->ops->remain_on_channel && + (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) && + nla_put_u32(msg, + NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION, + dev->wiphy.max_remain_on_channel_duration)) + goto nla_put_failure; + + if ((dev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) && + nla_put_flag(msg, NL80211_ATTR_OFFCHANNEL_TX_OK)) + goto nla_put_failure; + + if (nl80211_send_mgmt_stypes(msg, mgmt_stypes)) + goto nla_put_failure; + (*split_start)++; + if (split) + break; + case 6: +#ifdef CONFIG_PM + if (nl80211_send_wowlan(msg, dev, split)) + goto nla_put_failure; + (*split_start)++; + if (split) + break; +#else + (*split_start)++; #endif + case 7: + if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES, + dev->wiphy.software_iftypes)) + goto nla_put_failure; - if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES, - dev->wiphy.software_iftypes)) - goto nla_put_failure; + if (nl80211_put_iface_combinations(&dev->wiphy, msg, split)) + goto nla_put_failure; - if (nl80211_put_iface_combinations(&dev->wiphy, msg)) - goto nla_put_failure; + (*split_start)++; + if (split) + break; + case 8: + if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) && + nla_put_u32(msg, NL80211_ATTR_DEVICE_AP_SME, + dev->wiphy.ap_sme_capa)) + goto nla_put_failure; - if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) && - nla_put_u32(msg, NL80211_ATTR_DEVICE_AP_SME, - dev->wiphy.ap_sme_capa)) - goto nla_put_failure; + features = dev->wiphy.features; + /* + * We can only add the per-channel limit information if the + * dump is split, otherwise it makes it too big. Therefore + * only advertise it in that case. + */ + if (split) + features |= NL80211_FEATURE_ADVERTISE_CHAN_LIMITS; + if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS, features)) + goto nla_put_failure; - if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS, - dev->wiphy.features)) - goto nla_put_failure; + if (dev->wiphy.ht_capa_mod_mask && + nla_put(msg, NL80211_ATTR_HT_CAPABILITY_MASK, + sizeof(*dev->wiphy.ht_capa_mod_mask), + dev->wiphy.ht_capa_mod_mask)) + goto nla_put_failure; - if (dev->wiphy.ht_capa_mod_mask && - nla_put(msg, NL80211_ATTR_HT_CAPABILITY_MASK, - sizeof(*dev->wiphy.ht_capa_mod_mask), - dev->wiphy.ht_capa_mod_mask)) - goto nla_put_failure; + if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME && + dev->wiphy.max_acl_mac_addrs && + nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX, + dev->wiphy.max_acl_mac_addrs)) + goto nla_put_failure; - if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME && - dev->wiphy.max_acl_mac_addrs && - nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX, - dev->wiphy.max_acl_mac_addrs)) - goto nla_put_failure; + /* + * Any information below this point is only available to + * applications that can deal with it being split. This + * helps ensure that newly added capabilities don't break + * older tools by overrunning their buffers. + * + * We still increment split_start so that in the split + * case we'll continue with more data in the next round, + * but break unconditionally so unsplit data stops here. + */ + (*split_start)++; + break; + case 9: + if (dev->wiphy.extended_capabilities && + (nla_put(msg, NL80211_ATTR_EXT_CAPA, + dev->wiphy.extended_capabilities_len, + dev->wiphy.extended_capabilities) || + nla_put(msg, NL80211_ATTR_EXT_CAPA_MASK, + dev->wiphy.extended_capabilities_len, + dev->wiphy.extended_capabilities_mask))) + goto nla_put_failure; + if (dev->wiphy.vht_capa_mod_mask && + nla_put(msg, NL80211_ATTR_VHT_CAPABILITY_MASK, + sizeof(*dev->wiphy.vht_capa_mod_mask), + dev->wiphy.vht_capa_mod_mask)) + goto nla_put_failure; + + /* done */ + *split_start = 0; + break; + } return genlmsg_end(msg, hdr); nla_put_failure: @@ -1310,39 +1551,80 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) int idx = 0, ret; int start = cb->args[0]; struct cfg80211_registered_device *dev; + s64 filter_wiphy = -1; + bool split = false; + struct nlattr **tb = nl80211_fam.attrbuf; + int res; mutex_lock(&cfg80211_mutex); + res = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, + tb, nl80211_fam.maxattr, nl80211_policy); + if (res == 0) { + split = tb[NL80211_ATTR_SPLIT_WIPHY_DUMP]; + if (tb[NL80211_ATTR_WIPHY]) + filter_wiphy = nla_get_u32(tb[NL80211_ATTR_WIPHY]); + if (tb[NL80211_ATTR_WDEV]) + filter_wiphy = nla_get_u64(tb[NL80211_ATTR_WDEV]) >> 32; + if (tb[NL80211_ATTR_IFINDEX]) { + struct net_device *netdev; + int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]); + + netdev = dev_get_by_index(sock_net(skb->sk), ifidx); + if (!netdev) { + mutex_unlock(&cfg80211_mutex); + return -ENODEV; + } + if (netdev->ieee80211_ptr) { + dev = wiphy_to_dev( + netdev->ieee80211_ptr->wiphy); + filter_wiphy = dev->wiphy_idx; + } + dev_put(netdev); + } + } + list_for_each_entry(dev, &cfg80211_rdev_list, list) { if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk))) continue; if (++idx <= start) continue; - ret = nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, - dev); - if (ret < 0) { - /* - * If sending the wiphy data didn't fit (ENOBUFS or - * EMSGSIZE returned), this SKB is still empty (so - * it's not too big because another wiphy dataset is - * already in the skb) and we've not tried to adjust - * the dump allocation yet ... then adjust the alloc - * size to be bigger, and return 1 but with the empty - * skb. This results in an empty message being RX'ed - * in userspace, but that is ignored. - * - * We can then retry with the larger buffer. - */ - if ((ret == -ENOBUFS || ret == -EMSGSIZE) && - !skb->len && - cb->min_dump_alloc < 4096) { - cb->min_dump_alloc = 4096; - mutex_unlock(&cfg80211_mutex); - return 1; + if (filter_wiphy != -1 && dev->wiphy_idx != filter_wiphy) + continue; + /* attempt to fit multiple wiphy data chunks into the skb */ + do { + ret = nl80211_send_wiphy(dev, skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + split, &cb->args[1], + &cb->args[2], + &cb->args[3]); + if (ret < 0) { + /* + * If sending the wiphy data didn't fit (ENOBUFS + * or EMSGSIZE returned), this SKB is still + * empty (so it's not too big because another + * wiphy dataset is already in the skb) and + * we've not tried to adjust the dump allocation + * yet ... then adjust the alloc size to be + * bigger, and return 1 but with the empty skb. + * This results in an empty message being RX'ed + * in userspace, but that is ignored. + * + * We can then retry with the larger buffer. + */ + if ((ret == -ENOBUFS || ret == -EMSGSIZE) && + !skb->len && + cb->min_dump_alloc < 4096) { + cb->min_dump_alloc = 4096; + mutex_unlock(&cfg80211_mutex); + return 1; + } + idx--; + break; } - idx--; - break; - } + } while (cb->args[1] > 0); + break; } mutex_unlock(&cfg80211_mutex); @@ -1360,7 +1642,8 @@ static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - if (nl80211_send_wiphy(msg, info->snd_portid, info->snd_seq, 0, dev) < 0) { + if (nl80211_send_wiphy(dev, msg, info->snd_portid, info->snd_seq, 0, + false, NULL, NULL, NULL) < 0) { nlmsg_free(msg); return -ENOBUFS; } @@ -2967,6 +3250,7 @@ static int parse_station_flags(struct genl_info *info, sta_flags = nla_data(nla); params->sta_flags_mask = sta_flags->mask; params->sta_flags_set = sta_flags->set; + params->sta_flags_set &= params->sta_flags_mask; if ((params->sta_flags_mask | params->sta_flags_set) & BIT(__NL80211_STA_FLAG_INVALID)) return -EINVAL; @@ -3320,6 +3604,136 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) return genlmsg_reply(msg, info); } +int cfg80211_check_station_change(struct wiphy *wiphy, + struct station_parameters *params, + enum cfg80211_station_type statype) +{ + if (params->listen_interval != -1) + return -EINVAL; + if (params->aid) + return -EINVAL; + + /* When you run into this, adjust the code below for the new flag */ + BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7); + + switch (statype) { + case CFG80211_STA_MESH_PEER_KERNEL: + case CFG80211_STA_MESH_PEER_USER: + /* + * No ignoring the TDLS flag here -- the userspace mesh + * code doesn't have the bug of including TDLS in the + * mask everywhere. + */ + if (params->sta_flags_mask & + ~(BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_MFP) | + BIT(NL80211_STA_FLAG_AUTHORIZED))) + return -EINVAL; + break; + case CFG80211_STA_TDLS_PEER_SETUP: + case CFG80211_STA_TDLS_PEER_ACTIVE: + if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) + return -EINVAL; + /* ignore since it can't change */ + params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); + break; + default: + /* disallow mesh-specific things */ + if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION) + return -EINVAL; + if (params->local_pm) + return -EINVAL; + if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) + return -EINVAL; + } + + if (statype != CFG80211_STA_TDLS_PEER_SETUP && + statype != CFG80211_STA_TDLS_PEER_ACTIVE) { + /* TDLS can't be set, ... */ + if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) + return -EINVAL; + /* + * ... but don't bother the driver with it. This works around + * a hostapd/wpa_supplicant issue -- it always includes the + * TLDS_PEER flag in the mask even for AP mode. + */ + params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); + } + + if (statype != CFG80211_STA_TDLS_PEER_SETUP) { + /* reject other things that can't change */ + if (params->sta_modify_mask & STATION_PARAM_APPLY_UAPSD) + return -EINVAL; + if (params->sta_modify_mask & STATION_PARAM_APPLY_CAPABILITY) + return -EINVAL; + if (params->supported_rates) + return -EINVAL; + if (params->ext_capab || params->ht_capa || params->vht_capa) + return -EINVAL; + } + + if (statype != CFG80211_STA_AP_CLIENT) { + if (params->vlan) + return -EINVAL; + } + + switch (statype) { + case CFG80211_STA_AP_MLME_CLIENT: + /* Use this only for authorizing/unauthorizing a station */ + if (!(params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED))) + return -EOPNOTSUPP; + break; + case CFG80211_STA_AP_CLIENT: + /* accept only the listed bits */ + if (params->sta_flags_mask & + ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED) | + BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | + BIT(NL80211_STA_FLAG_WME) | + BIT(NL80211_STA_FLAG_MFP))) + return -EINVAL; + + /* but authenticated/associated only if driver handles it */ + if (!(wiphy->features & NL80211_FEATURE_FULL_AP_CLIENT_STATE) && + params->sta_flags_mask & + (BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED))) + return -EINVAL; + break; + case CFG80211_STA_IBSS: + case CFG80211_STA_AP_STA: + /* reject any changes other than AUTHORIZED */ + if (params->sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED)) + return -EINVAL; + break; + case CFG80211_STA_TDLS_PEER_SETUP: + /* reject any changes other than AUTHORIZED or WME */ + if (params->sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_WME))) + return -EINVAL; + /* force (at least) rates when authorizing */ + if (params->sta_flags_set & BIT(NL80211_STA_FLAG_AUTHORIZED) && + !params->supported_rates) + return -EINVAL; + break; + case CFG80211_STA_TDLS_PEER_ACTIVE: + /* reject any changes */ + return -EINVAL; + case CFG80211_STA_MESH_PEER_KERNEL: + if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) + return -EINVAL; + break; + case CFG80211_STA_MESH_PEER_USER: + if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION) + return -EINVAL; + break; + } + + return 0; +} +EXPORT_SYMBOL(cfg80211_check_station_change); + /* * Get vlan interface making sure it is running and on the right wiphy. */ @@ -3342,6 +3756,13 @@ static struct net_device *get_vlan(struct genl_info *info, goto error; } + if (v->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && + v->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && + v->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) { + ret = -EINVAL; + goto error; + } + if (!netif_running(v)) { ret = -ENETDOWN; goto error; @@ -3359,21 +3780,13 @@ nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] __read_mostly = { [NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 }, }; -static int nl80211_set_station_tdls(struct genl_info *info, - struct station_parameters *params) +static int nl80211_parse_sta_wme(struct genl_info *info, + struct station_parameters *params) { struct nlattr *tb[NL80211_STA_WME_MAX + 1]; struct nlattr *nla; int err; - /* Dummy STA entry gets updated once the peer capabilities are known */ - if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) - params->ht_capa = - nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); - if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) - params->vht_capa = - nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); - /* parse WME attributes if present */ if (!info->attrs[NL80211_ATTR_STA_WME]) return 0; @@ -3401,18 +3814,34 @@ static int nl80211_set_station_tdls(struct genl_info *info, return 0; } +static int nl80211_set_station_tdls(struct genl_info *info, + struct station_parameters *params) +{ + /* Dummy STA entry gets updated once the peer capabilities are known */ + if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) + params->ht_capa = + nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); + if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) + params->vht_capa = + nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); + + return nl80211_parse_sta_wme(info, params); +} + static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - int err; struct net_device *dev = info->user_ptr[1]; struct station_parameters params; - u8 *mac_addr = NULL; + u8 *mac_addr; + int err; memset(¶ms, 0, sizeof(params)); params.listen_interval = -1; - params.plink_state = -1; + + if (!rdev->ops->change_station) + return -EOPNOTSUPP; if (info->attrs[NL80211_ATTR_STA_AID]) return -EINVAL; @@ -3445,19 +3874,23 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) return -EINVAL; - if (!rdev->ops->change_station) - return -EOPNOTSUPP; - if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms)) return -EINVAL; - if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) + if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) { params.plink_action = - nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); + nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); + if (params.plink_action >= NUM_NL80211_PLINK_ACTIONS) + return -EINVAL; + } - if (info->attrs[NL80211_ATTR_STA_PLINK_STATE]) + if (info->attrs[NL80211_ATTR_STA_PLINK_STATE]) { params.plink_state = - nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]); + nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]); + if (params.plink_state >= NUM_NL80211_PLINK_STATES) + return -EINVAL; + params.sta_modify_mask |= STATION_PARAM_APPLY_PLINK_STATE; + } if (info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]) { enum nl80211_mesh_power_mode pm = nla_get_u32( @@ -3470,127 +3903,33 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) params.local_pm = pm; } + /* Include parameters for TDLS peer (will check later) */ + err = nl80211_set_station_tdls(info, ¶ms); + if (err) + return err; + + params.vlan = get_vlan(info, rdev); + if (IS_ERR(params.vlan)) + return PTR_ERR(params.vlan); + switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: - /* disallow mesh-specific things */ - if (params.plink_action) - return -EINVAL; - if (params.local_pm) - return -EINVAL; - - /* TDLS can't be set, ... */ - if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) - return -EINVAL; - /* - * ... but don't bother the driver with it. This works around - * a hostapd/wpa_supplicant issue -- it always includes the - * TLDS_PEER flag in the mask even for AP mode. - */ - params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); - - /* accept only the listed bits */ - if (params.sta_flags_mask & - ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | - BIT(NL80211_STA_FLAG_AUTHENTICATED) | - BIT(NL80211_STA_FLAG_ASSOCIATED) | - BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | - BIT(NL80211_STA_FLAG_WME) | - BIT(NL80211_STA_FLAG_MFP))) - return -EINVAL; - - /* but authenticated/associated only if driver handles it */ - if (!(rdev->wiphy.features & - NL80211_FEATURE_FULL_AP_CLIENT_STATE) && - params.sta_flags_mask & - (BIT(NL80211_STA_FLAG_AUTHENTICATED) | - BIT(NL80211_STA_FLAG_ASSOCIATED))) - return -EINVAL; - - /* reject other things that can't change */ - if (params.supported_rates) - return -EINVAL; - if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) - return -EINVAL; - if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) - return -EINVAL; - if (info->attrs[NL80211_ATTR_HT_CAPABILITY] || - info->attrs[NL80211_ATTR_VHT_CAPABILITY]) - return -EINVAL; - - /* must be last in here for error handling */ - params.vlan = get_vlan(info, rdev); - if (IS_ERR(params.vlan)) - return PTR_ERR(params.vlan); - break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: - /* - * Don't allow userspace to change the TDLS_PEER flag, - * but silently ignore attempts to change it since we - * don't have state here to verify that it doesn't try - * to change the flag. - */ - params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); - /* Include parameters for TDLS peer (driver will check) */ - err = nl80211_set_station_tdls(info, ¶ms); - if (err) - return err; - /* disallow things sta doesn't support */ - if (params.plink_action) - return -EINVAL; - if (params.local_pm) - return -EINVAL; - /* reject any changes other than AUTHORIZED or WME (for TDLS) */ - if (params.sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | - BIT(NL80211_STA_FLAG_WME))) - return -EINVAL; - break; case NL80211_IFTYPE_ADHOC: - /* disallow things sta doesn't support */ - if (params.plink_action) - return -EINVAL; - if (params.local_pm) - return -EINVAL; - if (info->attrs[NL80211_ATTR_HT_CAPABILITY] || - info->attrs[NL80211_ATTR_VHT_CAPABILITY]) - return -EINVAL; - /* reject any changes other than AUTHORIZED */ - if (params.sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED)) - return -EINVAL; - break; case NL80211_IFTYPE_MESH_POINT: - /* disallow things mesh doesn't support */ - if (params.vlan) - return -EINVAL; - if (params.supported_rates) - return -EINVAL; - if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) - return -EINVAL; - if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) - return -EINVAL; - if (info->attrs[NL80211_ATTR_HT_CAPABILITY] || - info->attrs[NL80211_ATTR_VHT_CAPABILITY]) - return -EINVAL; - /* - * No special handling for TDLS here -- the userspace - * mesh code doesn't have this bug. - */ - if (params.sta_flags_mask & - ~(BIT(NL80211_STA_FLAG_AUTHENTICATED) | - BIT(NL80211_STA_FLAG_MFP) | - BIT(NL80211_STA_FLAG_AUTHORIZED))) - return -EINVAL; break; default: - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto out_put_vlan; } - /* be aware of params.vlan when changing code here */ - + /* driver will call cfg80211_check_station_change() */ err = rdev_change_station(rdev, dev, mac_addr, ¶ms); + out_put_vlan: if (params.vlan) dev_put(params.vlan); @@ -3607,6 +3946,9 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) memset(¶ms, 0, sizeof(params)); + if (!rdev->ops->add_station) + return -EOPNOTSUPP; + if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; @@ -3652,50 +3994,32 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.vht_capa = nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); - if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) + if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) { params.plink_action = - nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); + nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); + if (params.plink_action >= NUM_NL80211_PLINK_ACTIONS) + return -EINVAL; + } - if (!rdev->ops->add_station) - return -EOPNOTSUPP; + err = nl80211_parse_sta_wme(info, ¶ms); + if (err) + return err; if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms)) return -EINVAL; + /* When you run into this, adjust the code below for the new flag */ + BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7); + switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: - /* parse WME attributes if sta is WME capable */ - if ((rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) && - (params.sta_flags_set & BIT(NL80211_STA_FLAG_WME)) && - info->attrs[NL80211_ATTR_STA_WME]) { - struct nlattr *tb[NL80211_STA_WME_MAX + 1]; - struct nlattr *nla; - - nla = info->attrs[NL80211_ATTR_STA_WME]; - err = nla_parse_nested(tb, NL80211_STA_WME_MAX, nla, - nl80211_sta_wme_policy); - if (err) - return err; + /* ignore WME attributes if iface/sta is not capable */ + if (!(rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) || + !(params.sta_flags_set & BIT(NL80211_STA_FLAG_WME))) + params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD; - if (tb[NL80211_STA_WME_UAPSD_QUEUES]) - params.uapsd_queues = - nla_get_u8(tb[NL80211_STA_WME_UAPSD_QUEUES]); - if (params.uapsd_queues & - ~IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK) - return -EINVAL; - - if (tb[NL80211_STA_WME_MAX_SP]) - params.max_sp = - nla_get_u8(tb[NL80211_STA_WME_MAX_SP]); - - if (params.max_sp & - ~IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK) - return -EINVAL; - - params.sta_modify_mask |= STATION_PARAM_APPLY_UAPSD; - } /* TDLS peers cannot be added */ if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) return -EINVAL; @@ -3716,6 +4040,9 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) return PTR_ERR(params.vlan); break; case NL80211_IFTYPE_MESH_POINT: + /* ignore uAPSD data */ + params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD; + /* associated is disallowed */ if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) return -EINVAL; @@ -3724,8 +4051,14 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) return -EINVAL; break; case NL80211_IFTYPE_STATION: - /* associated is disallowed */ - if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) + case NL80211_IFTYPE_P2P_CLIENT: + /* ignore uAPSD data */ + params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD; + + /* these are disallowed */ + if (params.sta_flags_mask & + (BIT(NL80211_STA_FLAG_ASSOCIATED) | + BIT(NL80211_STA_FLAG_AUTHENTICATED))) return -EINVAL; /* Only TDLS peers can be added */ if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) @@ -3736,6 +4069,11 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) /* ... with external setup is supported */ if (!(rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP)) return -EOPNOTSUPP; + /* + * Older wpa_supplicant versions always mark the TDLS peer + * as authorized, but it shouldn't yet be. + */ + params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_AUTHORIZED); break; default: return -EOPNOTSUPP; @@ -4280,6 +4618,7 @@ static const struct nla_policy [NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG }, + [NL80211_MESH_SETUP_USERSPACE_MPM] = { .type = NLA_FLAG }, [NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, [NL80211_MESH_SETUP_USERSPACE_AMPE] = { .type = NLA_FLAG }, @@ -4418,6 +4757,7 @@ do { \ static int nl80211_parse_mesh_setup(struct genl_info *info, struct mesh_setup *setup) { + struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct nlattr *tb[NL80211_MESH_SETUP_ATTR_MAX + 1]; if (!info->attrs[NL80211_ATTR_MESH_SETUP]) @@ -4454,8 +4794,14 @@ static int nl80211_parse_mesh_setup(struct genl_info *info, setup->ie = nla_data(ieattr); setup->ie_len = nla_len(ieattr); } + if (tb[NL80211_MESH_SETUP_USERSPACE_MPM] && + !(rdev->wiphy.features & NL80211_FEATURE_USERSPACE_MPM)) + return -EINVAL; + setup->user_mpm = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_MPM]); setup->is_authenticated = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AUTH]); setup->is_secure = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AMPE]); + if (setup->is_secure) + setup->user_mpm = true; return 0; } @@ -5663,14 +6009,10 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; - struct cfg80211_crypto_settings crypto; struct ieee80211_channel *chan; - const u8 *bssid, *ssid, *ie = NULL, *prev_bssid = NULL; - int err, ssid_len, ie_len = 0; - bool use_mfp = false; - u32 flags = 0; - struct ieee80211_ht_cap *ht_capa = NULL; - struct ieee80211_ht_cap *ht_capa_mask = NULL; + struct cfg80211_assoc_request req = {}; + const u8 *bssid, *ssid; + int err, ssid_len = 0; if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) return -EINVAL; @@ -5698,41 +6040,58 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); if (info->attrs[NL80211_ATTR_IE]) { - ie = nla_data(info->attrs[NL80211_ATTR_IE]); - ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); + req.ie = nla_data(info->attrs[NL80211_ATTR_IE]); + req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); } if (info->attrs[NL80211_ATTR_USE_MFP]) { enum nl80211_mfp mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]); if (mfp == NL80211_MFP_REQUIRED) - use_mfp = true; + req.use_mfp = true; else if (mfp != NL80211_MFP_NO) return -EINVAL; } if (info->attrs[NL80211_ATTR_PREV_BSSID]) - prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]); + req.prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]); if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HT])) - flags |= ASSOC_REQ_DISABLE_HT; + req.flags |= ASSOC_REQ_DISABLE_HT; if (info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) - ht_capa_mask = - nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]); + memcpy(&req.ht_capa_mask, + nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]), + sizeof(req.ht_capa_mask)); if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) { - if (!ht_capa_mask) + if (!info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) return -EINVAL; - ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); + memcpy(&req.ht_capa, + nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]), + sizeof(req.ht_capa)); } - err = nl80211_crypto_settings(rdev, info, &crypto, 1); + if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_VHT])) + req.flags |= ASSOC_REQ_DISABLE_VHT; + + if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) + memcpy(&req.vht_capa_mask, + nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]), + sizeof(req.vht_capa_mask)); + + if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) { + if (!info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) + return -EINVAL; + memcpy(&req.vht_capa, + nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]), + sizeof(req.vht_capa)); + } + + err = nl80211_crypto_settings(rdev, info, &req.crypto, 1); if (!err) - err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid, - ssid, ssid_len, ie, ie_len, use_mfp, - &crypto, flags, ht_capa, - ht_capa_mask); + err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, + ssid, ssid_len, &req); return err; } @@ -6312,6 +6671,24 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) sizeof(connect.ht_capa)); } + if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_VHT])) + connect.flags |= ASSOC_REQ_DISABLE_VHT; + + if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) + memcpy(&connect.vht_capa_mask, + nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]), + sizeof(connect.vht_capa_mask)); + + if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) { + if (!info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) { + kfree(connkeys); + return -EINVAL; + } + memcpy(&connect.vht_capa, + nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]), + sizeof(connect.vht_capa)); + } + err = cfg80211_connect(rdev, dev, &connect, connkeys); if (err) kfree(connkeys); @@ -7085,6 +7462,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) return err; } + if (setup.user_mpm) + cfg.auto_open_plinks = false; + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { err = nl80211_parse_chandef(rdev, info, &setup.chandef); if (err) @@ -7284,7 +7664,8 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, return -EINVAL; if (nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) > - rdev->wiphy.wowlan.tcp->data_interval_max) + rdev->wiphy.wowlan.tcp->data_interval_max || + nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) == 0) return -EINVAL; wake_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]); @@ -7769,6 +8150,54 @@ static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info) return 0; } +static int nl80211_get_protocol_features(struct sk_buff *skb, + struct genl_info *info) +{ + void *hdr; + struct sk_buff *msg; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, + NL80211_CMD_GET_PROTOCOL_FEATURES); + if (!hdr) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_PROTOCOL_FEATURES, + NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return genlmsg_reply(msg, info); + + nla_put_failure: + kfree_skb(msg); + return -ENOBUFS; +} + +static int nl80211_update_ft_ies(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct cfg80211_update_ft_ies_params ft_params; + struct net_device *dev = info->user_ptr[1]; + + if (!rdev->ops->update_ft_ies) + return -EOPNOTSUPP; + + if (!info->attrs[NL80211_ATTR_MDID] || + !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) + return -EINVAL; + + memset(&ft_params, 0, sizeof(ft_params)); + ft_params.md = nla_get_u16(info->attrs[NL80211_ATTR_MDID]); + ft_params.ie = nla_data(info->attrs[NL80211_ATTR_IE]); + ft_params.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); + + return rdev_update_ft_ies(rdev, dev, &ft_params); +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -8445,6 +8874,19 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_GET_PROTOCOL_FEATURES, + .doit = nl80211_get_protocol_features, + .policy = nl80211_policy, + }, + { + .cmd = NL80211_CMD_UPDATE_FT_IES, + .doit = nl80211_update_ft_ies, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { @@ -8472,7 +8914,8 @@ void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev) if (!msg) return; - if (nl80211_send_wiphy(msg, 0, 0, 0, rdev) < 0) { + if (nl80211_send_wiphy(rdev, msg, 0, 0, 0, + false, NULL, NULL, NULL) < 0) { nlmsg_free(msg); return; } @@ -8796,21 +9239,31 @@ void nl80211_send_disassoc(struct cfg80211_registered_device *rdev, NL80211_CMD_DISASSOCIATE, gfp); } -void nl80211_send_unprot_deauth(struct cfg80211_registered_device *rdev, - struct net_device *netdev, const u8 *buf, - size_t len, gfp_t gfp) +void cfg80211_send_unprot_deauth(struct net_device *dev, const u8 *buf, + size_t len) { - nl80211_send_mlme_event(rdev, netdev, buf, len, - NL80211_CMD_UNPROT_DEAUTHENTICATE, gfp); + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + trace_cfg80211_send_unprot_deauth(dev); + nl80211_send_mlme_event(rdev, dev, buf, len, + NL80211_CMD_UNPROT_DEAUTHENTICATE, GFP_ATOMIC); } +EXPORT_SYMBOL(cfg80211_send_unprot_deauth); -void nl80211_send_unprot_disassoc(struct cfg80211_registered_device *rdev, - struct net_device *netdev, const u8 *buf, - size_t len, gfp_t gfp) +void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf, + size_t len) { - nl80211_send_mlme_event(rdev, netdev, buf, len, - NL80211_CMD_UNPROT_DISASSOCIATE, gfp); + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + trace_cfg80211_send_unprot_disassoc(dev); + nl80211_send_mlme_event(rdev, dev, buf, len, + NL80211_CMD_UNPROT_DISASSOCIATE, GFP_ATOMIC); } +EXPORT_SYMBOL(cfg80211_send_unprot_disassoc); static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev, struct net_device *netdev, int cmd, @@ -9013,14 +9466,19 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev, nlmsg_free(msg); } -void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev, - struct net_device *netdev, - const u8 *macaddr, const u8* ie, u8 ie_len, - gfp_t gfp) +void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *addr, + const u8* ie, u8 ie_len, gfp_t gfp) { + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); struct sk_buff *msg; void *hdr; + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_MESH_POINT)) + return; + + trace_cfg80211_notify_new_peer_candidate(dev, addr); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; @@ -9032,8 +9490,8 @@ void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev, } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || - nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || - nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, macaddr) || + nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) || (ie_len && ie && nla_put(msg, NL80211_ATTR_IE, ie_len , ie))) goto nla_put_failure; @@ -9048,6 +9506,7 @@ void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev, genlmsg_cancel(msg, hdr); nlmsg_free(msg); } +EXPORT_SYMBOL(cfg80211_notify_new_peer_candidate); void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *addr, @@ -9116,7 +9575,7 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy, nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_BEFORE); if (!nl_freq) goto nla_put_failure; - if (nl80211_msg_put_channel(msg, channel_before)) + if (nl80211_msg_put_channel(msg, channel_before, false)) goto nla_put_failure; nla_nest_end(msg, nl_freq); @@ -9124,7 +9583,7 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy, nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_AFTER); if (!nl_freq) goto nla_put_failure; - if (nl80211_msg_put_channel(msg, channel_after)) + if (nl80211_msg_put_channel(msg, channel_after, false)) goto nla_put_failure; nla_nest_end(msg, nl_freq); @@ -9186,31 +9645,42 @@ static void nl80211_send_remain_on_chan_event( nlmsg_free(msg); } -void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, u64 cookie, - struct ieee80211_channel *chan, - unsigned int duration, gfp_t gfp) +void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie, + struct ieee80211_channel *chan, + unsigned int duration, gfp_t gfp) { + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + trace_cfg80211_ready_on_channel(wdev, cookie, chan, duration); nl80211_send_remain_on_chan_event(NL80211_CMD_REMAIN_ON_CHANNEL, rdev, wdev, cookie, chan, duration, gfp); } +EXPORT_SYMBOL(cfg80211_ready_on_channel); -void nl80211_send_remain_on_channel_cancel( - struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, - u64 cookie, struct ieee80211_channel *chan, gfp_t gfp) +void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie, + struct ieee80211_channel *chan, + gfp_t gfp) { + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + trace_cfg80211_ready_on_channel_expired(wdev, cookie, chan); nl80211_send_remain_on_chan_event(NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, rdev, wdev, cookie, chan, 0, gfp); } +EXPORT_SYMBOL(cfg80211_remain_on_channel_expired); -void nl80211_send_sta_event(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *mac_addr, - struct station_info *sinfo, gfp_t gfp) +void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, + struct station_info *sinfo, gfp_t gfp) { + struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct sk_buff *msg; + trace_cfg80211_new_sta(dev, mac_addr, sinfo); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; @@ -9224,14 +9694,17 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev, genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, gfp); } +EXPORT_SYMBOL(cfg80211_new_sta); -void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *mac_addr, - gfp_t gfp) +void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp) { + struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct sk_buff *msg; void *hdr; + trace_cfg80211_del_sta(dev, mac_addr); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; @@ -9256,12 +9729,14 @@ void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev, genlmsg_cancel(msg, hdr); nlmsg_free(msg); } +EXPORT_SYMBOL(cfg80211_del_sta); -void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *mac_addr, - enum nl80211_connect_failed_reason reason, - gfp_t gfp) +void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr, + enum nl80211_connect_failed_reason reason, + gfp_t gfp) { + struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct sk_buff *msg; void *hdr; @@ -9290,6 +9765,7 @@ void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev, genlmsg_cancel(msg, hdr); nlmsg_free(msg); } +EXPORT_SYMBOL(cfg80211_conn_failed); static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, const u8 *addr, gfp_t gfp) @@ -9334,19 +9810,47 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, return true; } -bool nl80211_unexpected_frame(struct net_device *dev, const u8 *addr, gfp_t gfp) +bool cfg80211_rx_spurious_frame(struct net_device *dev, + const u8 *addr, gfp_t gfp) { - return __nl80211_unexpected_frame(dev, NL80211_CMD_UNEXPECTED_FRAME, - addr, gfp); + struct wireless_dev *wdev = dev->ieee80211_ptr; + bool ret; + + trace_cfg80211_rx_spurious_frame(dev, addr); + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && + wdev->iftype != NL80211_IFTYPE_P2P_GO)) { + trace_cfg80211_return_bool(false); + return false; + } + ret = __nl80211_unexpected_frame(dev, NL80211_CMD_UNEXPECTED_FRAME, + addr, gfp); + trace_cfg80211_return_bool(ret); + return ret; } +EXPORT_SYMBOL(cfg80211_rx_spurious_frame); -bool nl80211_unexpected_4addr_frame(struct net_device *dev, - const u8 *addr, gfp_t gfp) +bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev, + const u8 *addr, gfp_t gfp) { - return __nl80211_unexpected_frame(dev, - NL80211_CMD_UNEXPECTED_4ADDR_FRAME, - addr, gfp); + struct wireless_dev *wdev = dev->ieee80211_ptr; + bool ret; + + trace_cfg80211_rx_unexpected_4addr_frame(dev, addr); + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && + wdev->iftype != NL80211_IFTYPE_P2P_GO && + wdev->iftype != NL80211_IFTYPE_AP_VLAN)) { + trace_cfg80211_return_bool(false); + return false; + } + ret = __nl80211_unexpected_frame(dev, + NL80211_CMD_UNEXPECTED_4ADDR_FRAME, + addr, gfp); + trace_cfg80211_return_bool(ret); + return ret; } +EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame); int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, u32 nlportid, @@ -9386,15 +9890,17 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, return -ENOBUFS; } -void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, u64 cookie, - const u8 *buf, size_t len, bool ack, - gfp_t gfp) +void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, + const u8 *buf, size_t len, bool ack, gfp_t gfp) { + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct net_device *netdev = wdev->netdev; struct sk_buff *msg; void *hdr; + trace_cfg80211_mgmt_tx_status(wdev, cookie, ack); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; @@ -9422,17 +9928,21 @@ void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev, genlmsg_cancel(msg, hdr); nlmsg_free(msg); } +EXPORT_SYMBOL(cfg80211_mgmt_tx_status); -void -nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, - struct net_device *netdev, - enum nl80211_cqm_rssi_threshold_event rssi_event, - gfp_t gfp) +void cfg80211_cqm_rssi_notify(struct net_device *dev, + enum nl80211_cqm_rssi_threshold_event rssi_event, + gfp_t gfp) { + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct sk_buff *msg; struct nlattr *pinfoattr; void *hdr; + trace_cfg80211_cqm_rssi_notify(dev, rssi_event); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; @@ -9444,7 +9954,7 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || - nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) + nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; pinfoattr = nla_nest_start(msg, NL80211_ATTR_CQM); @@ -9467,10 +9977,11 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, genlmsg_cancel(msg, hdr); nlmsg_free(msg); } +EXPORT_SYMBOL(cfg80211_cqm_rssi_notify); -void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev, - struct net_device *netdev, const u8 *bssid, - const u8 *replay_ctr, gfp_t gfp) +static void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *bssid, + const u8 *replay_ctr, gfp_t gfp) { struct sk_buff *msg; struct nlattr *rekey_attr; @@ -9512,9 +10023,22 @@ void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev, nlmsg_free(msg); } -void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev, - struct net_device *netdev, int index, - const u8 *bssid, bool preauth, gfp_t gfp) +void cfg80211_gtk_rekey_notify(struct net_device *dev, const u8 *bssid, + const u8 *replay_ctr, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + trace_cfg80211_gtk_rekey_notify(dev, bssid); + nl80211_gtk_rekey_notify(rdev, dev, bssid, replay_ctr, gfp); +} +EXPORT_SYMBOL(cfg80211_gtk_rekey_notify); + +static void +nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev, + struct net_device *netdev, int index, + const u8 *bssid, bool preauth, gfp_t gfp) { struct sk_buff *msg; struct nlattr *attr; @@ -9557,9 +10081,22 @@ void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev, nlmsg_free(msg); } -void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, - struct net_device *netdev, - struct cfg80211_chan_def *chandef, gfp_t gfp) +void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index, + const u8 *bssid, bool preauth, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + trace_cfg80211_pmksa_candidate_notify(dev, index, bssid, preauth); + nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp); +} +EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify); + +static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, + struct net_device *netdev, + struct cfg80211_chan_def *chandef, + gfp_t gfp) { struct sk_buff *msg; void *hdr; @@ -9591,11 +10128,36 @@ void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, nlmsg_free(msg); } -void -nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev, - struct net_device *netdev, const u8 *peer, - u32 num_packets, u32 rate, u32 intvl, gfp_t gfp) +void cfg80211_ch_switch_notify(struct net_device *dev, + struct cfg80211_chan_def *chandef) { + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + trace_cfg80211_ch_switch_notify(dev, chandef); + + wdev_lock(wdev); + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && + wdev->iftype != NL80211_IFTYPE_P2P_GO)) + goto out; + + wdev->channel = chandef->chan; + nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL); +out: + wdev_unlock(wdev); + return; +} +EXPORT_SYMBOL(cfg80211_ch_switch_notify); + +void cfg80211_cqm_txe_notify(struct net_device *dev, + const u8 *peer, u32 num_packets, + u32 rate, u32 intvl, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct sk_buff *msg; struct nlattr *pinfoattr; void *hdr; @@ -9611,7 +10173,7 @@ nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev, } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || - nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || + nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer)) goto nla_put_failure; @@ -9640,6 +10202,7 @@ nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev, genlmsg_cancel(msg, hdr); nlmsg_free(msg); } +EXPORT_SYMBOL(cfg80211_cqm_txe_notify); void nl80211_radar_notify(struct cfg80211_registered_device *rdev, @@ -9692,15 +10255,18 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev, nlmsg_free(msg); } -void -nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, - struct net_device *netdev, const u8 *peer, - u32 num_packets, gfp_t gfp) +void cfg80211_cqm_pktloss_notify(struct net_device *dev, + const u8 *peer, u32 num_packets, gfp_t gfp) { + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct sk_buff *msg; struct nlattr *pinfoattr; void *hdr; + trace_cfg80211_cqm_pktloss_notify(dev, peer, num_packets); + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; @@ -9712,7 +10278,7 @@ nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || - nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || + nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer)) goto nla_put_failure; @@ -9735,6 +10301,7 @@ nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, genlmsg_cancel(msg, hdr); nlmsg_free(msg); } +EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify); void cfg80211_probe_status(struct net_device *dev, const u8 *addr, u64 cookie, bool acked, gfp_t gfp) @@ -10021,6 +10588,50 @@ static struct notifier_block nl80211_netlink_notifier = { .notifier_call = nl80211_netlink_notify, }; +void cfg80211_ft_event(struct net_device *netdev, + struct cfg80211_ft_event_params *ft_event) +{ + struct wiphy *wiphy = netdev->ieee80211_ptr->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct sk_buff *msg; + void *hdr; + int err; + + trace_cfg80211_ft_event(wiphy, netdev, ft_event); + + if (!ft_event->target_ap) + return; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FT_EVENT); + if (!hdr) { + nlmsg_free(msg); + return; + } + + nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); + nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, ft_event->target_ap); + if (ft_event->ies) + nla_put(msg, NL80211_ATTR_IE, ft_event->ies_len, ft_event->ies); + if (ft_event->ric_ies) + nla_put(msg, NL80211_ATTR_IE_RIC, ft_event->ric_ies_len, + ft_event->ric_ies); + + err = genlmsg_end(msg, hdr); + if (err < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, GFP_KERNEL); +} +EXPORT_SYMBOL(cfg80211_ft_event); + /* initialisation/exit functions */ int nl80211_init(void) diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index b061da4..a4073e8 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -29,12 +29,6 @@ void nl80211_send_deauth(struct cfg80211_registered_device *rdev, void nl80211_send_disassoc(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *buf, size_t len, gfp_t gfp); -void nl80211_send_unprot_deauth(struct cfg80211_registered_device *rdev, - struct net_device *netdev, - const u8 *buf, size_t len, gfp_t gfp); -void nl80211_send_unprot_disassoc(struct cfg80211_registered_device *rdev, - struct net_device *netdev, - const u8 *buf, size_t len, gfp_t gfp); void nl80211_send_auth_timeout(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *addr, gfp_t gfp); @@ -54,10 +48,6 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, struct net_device *netdev, u16 reason, const u8 *ie, size_t ie_len, bool from_ap); -void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev, - struct net_device *netdev, - const u8 *macaddr, const u8* ie, u8 ie_len, - gfp_t gfp); void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *addr, @@ -73,41 +63,10 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *bssid, gfp_t gfp); -void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, u64 cookie, - struct ieee80211_channel *chan, - unsigned int duration, gfp_t gfp); -void nl80211_send_remain_on_channel_cancel( - struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, - u64 cookie, struct ieee80211_channel *chan, gfp_t gfp); - -void nl80211_send_sta_event(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *mac_addr, - struct station_info *sinfo, gfp_t gfp); -void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *mac_addr, - gfp_t gfp); - -void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *mac_addr, - enum nl80211_connect_failed_reason reason, - gfp_t gfp); - int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, u32 nlpid, int freq, int sig_dbm, const u8 *buf, size_t len, gfp_t gfp); -void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, u64 cookie, - const u8 *buf, size_t len, bool ack, - gfp_t gfp); - -void -nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, - struct net_device *netdev, - enum nl80211_cqm_rssi_threshold_event rssi_event, - gfp_t gfp); void nl80211_radar_notify(struct cfg80211_registered_device *rdev, @@ -115,31 +74,4 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev, enum nl80211_radar_event event, struct net_device *netdev, gfp_t gfp); -void -nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, - struct net_device *netdev, const u8 *peer, - u32 num_packets, gfp_t gfp); - -void -nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev, - struct net_device *netdev, const u8 *peer, - u32 num_packets, u32 rate, u32 intvl, gfp_t gfp); - -void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev, - struct net_device *netdev, const u8 *bssid, - const u8 *replay_ctr, gfp_t gfp); - -void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev, - struct net_device *netdev, int index, - const u8 *bssid, bool preauth, gfp_t gfp); - -void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct cfg80211_chan_def *chandef, gfp_t gfp); - -bool nl80211_unexpected_frame(struct net_device *dev, - const u8 *addr, gfp_t gfp); -bool nl80211_unexpected_4addr_frame(struct net_device *dev, - const u8 *addr, gfp_t gfp); - #endif /* __NET_WIRELESS_NL80211_H */ diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 422d382..d77e1c1 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -6,11 +6,12 @@ #include "core.h" #include "trace.h" -static inline int rdev_suspend(struct cfg80211_registered_device *rdev) +static inline int rdev_suspend(struct cfg80211_registered_device *rdev, + struct cfg80211_wowlan *wowlan) { int ret; - trace_rdev_suspend(&rdev->wiphy, rdev->wowlan); - ret = rdev->ops->suspend(&rdev->wiphy, rdev->wowlan); + trace_rdev_suspend(&rdev->wiphy, wowlan); + ret = rdev->ops->suspend(&rdev->wiphy, wowlan); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } @@ -887,4 +888,17 @@ static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev, trace_rdev_return_int(&rdev->wiphy, ret); return ret; } + +static inline int rdev_update_ft_ies(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_update_ft_ies_params *ftie) +{ + int ret; + + trace_rdev_update_ft_ies(&rdev->wiphy, dev, ftie); + ret = rdev->ops->update_ft_ies(&rdev->wiphy, dev, ftie); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 98532c00..e6df52d 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -184,14 +184,14 @@ static const struct ieee80211_regdomain world_regdom = { NL80211_RRF_NO_IBSS | NL80211_RRF_NO_OFDM), /* IEEE 802.11a, channel 36..48 */ - REG_RULE(5180-10, 5240+10, 40, 6, 20, + REG_RULE(5180-10, 5240+10, 80, 6, 20, NL80211_RRF_PASSIVE_SCAN | NL80211_RRF_NO_IBSS), - /* NB: 5260 MHz - 5700 MHz requies DFS */ + /* NB: 5260 MHz - 5700 MHz requires DFS */ /* IEEE 802.11a, channel 149..165 */ - REG_RULE(5745-10, 5825+10, 40, 6, 20, + REG_RULE(5745-10, 5825+10, 80, 6, 20, NL80211_RRF_PASSIVE_SCAN | NL80211_RRF_NO_IBSS), diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 09d994d..ff6f7ae 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -160,7 +160,7 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) { struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); struct cfg80211_connect_params *params; - const u8 *prev_bssid = NULL; + struct cfg80211_assoc_request req = {}; int err; ASSERT_WDEV_LOCK(wdev); @@ -187,16 +187,20 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) BUG_ON(!rdev->ops->assoc); wdev->conn->state = CFG80211_CONN_ASSOCIATING; if (wdev->conn->prev_bssid_valid) - prev_bssid = wdev->conn->prev_bssid; - err = __cfg80211_mlme_assoc(rdev, wdev->netdev, - params->channel, params->bssid, - prev_bssid, - params->ssid, params->ssid_len, - params->ie, params->ie_len, - params->mfp != NL80211_MFP_NO, - ¶ms->crypto, - params->flags, ¶ms->ht_capa, - ¶ms->ht_capa_mask); + req.prev_bssid = wdev->conn->prev_bssid; + req.ie = params->ie; + req.ie_len = params->ie_len; + req.use_mfp = params->mfp != NL80211_MFP_NO; + req.crypto = params->crypto; + req.flags = params->flags; + req.ht_capa = params->ht_capa; + req.ht_capa_mask = params->ht_capa_mask; + req.vht_capa = params->vht_capa; + req.vht_capa_mask = params->vht_capa_mask; + + err = __cfg80211_mlme_assoc(rdev, wdev->netdev, params->channel, + params->bssid, params->ssid, + params->ssid_len, &req); if (err) __cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, NULL, 0, diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 238ee49..8f28b9f 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -83,6 +83,14 @@ static int wiphy_uevent(struct device *dev, struct kobj_uevent_env *env) return 0; } +static void cfg80211_leave_all(struct cfg80211_registered_device *rdev) +{ + struct wireless_dev *wdev; + + list_for_each_entry(wdev, &rdev->wdev_list, list) + cfg80211_leave(rdev, wdev); +} + static int wiphy_suspend(struct device *dev, pm_message_t state) { struct cfg80211_registered_device *rdev = dev_to_rdev(dev); @@ -90,12 +98,19 @@ static int wiphy_suspend(struct device *dev, pm_message_t state) rdev->suspend_at = get_seconds(); - if (rdev->ops->suspend) { - rtnl_lock(); - if (rdev->wiphy.registered) - ret = rdev_suspend(rdev); - rtnl_unlock(); + rtnl_lock(); + if (rdev->wiphy.registered) { + if (!rdev->wowlan) + cfg80211_leave_all(rdev); + if (rdev->ops->suspend) + ret = rdev_suspend(rdev, rdev->wowlan); + if (ret == 1) { + /* Driver refuse to configure wowlan */ + cfg80211_leave_all(rdev); + ret = rdev_suspend(rdev, NULL); + } } + rtnl_unlock(); return ret; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 7586de77..3c2033b 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1786,6 +1786,26 @@ TRACE_EVENT(rdev_set_mac_acl, WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->acl_policy) ); +TRACE_EVENT(rdev_update_ft_ies, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_update_ft_ies_params *ftie), + TP_ARGS(wiphy, netdev, ftie), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(u16, md) + __dynamic_array(u8, ie, ftie->ie_len) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + __entry->md = ftie->md; + memcpy(__get_dynamic_array(ie), ftie->ie, ftie->ie_len); + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", md: 0x%x", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->md) +); + /************************************************************* * cfg80211 exported functions traces * *************************************************************/ @@ -2414,6 +2434,32 @@ TRACE_EVENT(cfg80211_report_wowlan_wakeup, TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG) ); +TRACE_EVENT(cfg80211_ft_event, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct cfg80211_ft_event_params *ft_event), + TP_ARGS(wiphy, netdev, ft_event), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __dynamic_array(u8, ies, ft_event->ies_len) + MAC_ENTRY(target_ap) + __dynamic_array(u8, ric_ies, ft_event->ric_ies_len) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + if (ft_event->ies) + memcpy(__get_dynamic_array(ies), ft_event->ies, + ft_event->ies_len); + MAC_ASSIGN(target_ap, ft_event->target_ap); + if (ft_event->ric_ies) + memcpy(__get_dynamic_array(ric_ies), ft_event->ric_ies, + ft_event->ric_ies_len); + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", target_ap: " MAC_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(target_ap)) +); + #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/wireless/util.c b/net/wireless/util.c index 37a56ee..6cbac99 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -511,7 +511,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, encaps_data = bridge_tunnel_header; encaps_len = sizeof(bridge_tunnel_header); skip_header_bytes -= 2; - } else if (ethertype > 0x600) { + } else if (ethertype >= ETH_P_802_3_MIN) { encaps_data = rfc1042_header; encaps_len = sizeof(rfc1042_header); skip_header_bytes -= 2; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 167c67d..23cea0f 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1037,6 +1037,24 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); } +static int flow_to_policy_dir(int dir) +{ + if (XFRM_POLICY_IN == FLOW_DIR_IN && + XFRM_POLICY_OUT == FLOW_DIR_OUT && + XFRM_POLICY_FWD == FLOW_DIR_FWD) + return dir; + + switch (dir) { + default: + case FLOW_DIR_IN: + return XFRM_POLICY_IN; + case FLOW_DIR_OUT: + return XFRM_POLICY_OUT; + case FLOW_DIR_FWD: + return XFRM_POLICY_FWD; + } +} + static struct flow_cache_object * xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct flow_cache_object *old_obj, void *ctx) @@ -1046,7 +1064,7 @@ xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, if (old_obj) xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); - pol = __xfrm_policy_lookup(net, fl, family, dir); + pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir)); if (IS_ERR_OR_NULL(pol)) return ERR_CAST(pol); @@ -1932,7 +1950,8 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, * previous cache entry */ if (xdst == NULL) { num_pols = 1; - pols[0] = __xfrm_policy_lookup(net, fl, family, dir); + pols[0] = __xfrm_policy_lookup(net, fl, family, + flow_to_policy_dir(dir)); err = xfrm_expand_policies(fl, family, pols, &num_pols, &num_xfrms); if (err < 0) |