diff options
Diffstat (limited to 'net')
66 files changed, 4286 insertions, 1130 deletions
diff --git a/net/Kconfig b/net/Kconfig index c2cdbce..7b6cd34 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -369,6 +369,7 @@ source "net/irda/Kconfig" source "net/bluetooth/Kconfig" source "net/rxrpc/Kconfig" source "net/kcm/Kconfig" +source "net/strparser/Kconfig" config FIB_RULES bool diff --git a/net/Makefile b/net/Makefile index 9bd20bb..4cafaa2 100644 --- a/net/Makefile +++ b/net/Makefile @@ -35,6 +35,7 @@ obj-$(CONFIG_BT) += bluetooth/ obj-$(CONFIG_SUNRPC) += sunrpc/ obj-$(CONFIG_AF_RXRPC) += rxrpc/ obj-$(CONFIG_AF_KCM) += kcm/ +obj-$(CONFIG_STREAM_PARSER) += strparser/ obj-$(CONFIG_ATM) += atm/ obj-$(CONFIG_L2TP) += l2tp/ obj-$(CONFIG_DECNET) += decnet/ diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c index 81dbbf5..f2cc50d3 100644 --- a/net/batman-adv/bat_algo.c +++ b/net/batman-adv/bat_algo.c @@ -20,12 +20,18 @@ #include <linux/errno.h> #include <linux/list.h> #include <linux/moduleparam.h> +#include <linux/netlink.h> #include <linux/printk.h> #include <linux/seq_file.h> +#include <linux/skbuff.h> #include <linux/stddef.h> #include <linux/string.h> +#include <net/genetlink.h> +#include <net/netlink.h> +#include <uapi/linux/batman_adv.h> #include "bat_algo.h" +#include "netlink.h" char batadv_routing_algo[20] = "BATMAN_IV"; static struct hlist_head batadv_algo_list; @@ -138,3 +144,65 @@ static struct kparam_string batadv_param_string_ra = { module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra, 0644); + +/** + * batadv_algo_dump_entry - fill in information about one supported routing + * algorithm + * @msg: netlink message to be sent back + * @portid: Port to reply to + * @seq: Sequence number of message + * @bat_algo_ops: Algorithm to be dumped + * + * Return: Error number, or 0 on success + */ +static int batadv_algo_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_algo_ops *bat_algo_ops) +{ + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_ROUTING_ALGOS); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_string(msg, BATADV_ATTR_ALGO_NAME, bat_algo_ops->name)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_algo_dump - fill in information about supported routing + * algorithms + * @msg: netlink message to be sent back + * @cb: Parameters to the netlink request + * + * Return: Length of reply message. + */ +int batadv_algo_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + int portid = NETLINK_CB(cb->skb).portid; + struct batadv_algo_ops *bat_algo_ops; + int skip = cb->args[0]; + int i = 0; + + hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) { + if (i++ < skip) + continue; + + if (batadv_algo_dump_entry(msg, portid, cb->nlh->nlmsg_seq, + bat_algo_ops)) { + i--; + break; + } + } + + cb->args[0] = i; + + return msg->len; +} diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h index 860d773..3b5b69c 100644 --- a/net/batman-adv/bat_algo.h +++ b/net/batman-adv/bat_algo.h @@ -22,7 +22,9 @@ #include <linux/types.h> +struct netlink_callback; struct seq_file; +struct sk_buff; extern char batadv_routing_algo[]; extern struct list_head batadv_hardif_list; @@ -31,5 +33,6 @@ void batadv_algo_init(void); int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops); int batadv_algo_select(struct batadv_priv *bat_priv, char *name); int batadv_algo_seq_print_text(struct seq_file *seq, void *offset); +int batadv_algo_dump(struct sk_buff *msg, struct netlink_callback *cb); #endif /* _NET_BATMAN_ADV_BAT_ALGO_H_ */ diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 19b0abd..9ed4f1f 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -35,6 +35,7 @@ #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> +#include <linux/netlink.h> #include <linux/pkt_sched.h> #include <linux/printk.h> #include <linux/random.h> @@ -48,12 +49,17 @@ #include <linux/string.h> #include <linux/types.h> #include <linux/workqueue.h> +#include <net/genetlink.h> +#include <net/netlink.h> +#include <uapi/linux/batman_adv.h> #include "bat_algo.h" #include "bitarray.h" +#include "gateway_client.h" #include "hard-interface.h" #include "hash.h" #include "log.h" +#include "netlink.h" #include "network-coding.h" #include "originator.h" #include "packet.h" @@ -528,36 +534,25 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) { struct net_device *soft_iface; - struct batadv_priv *bat_priv; - struct batadv_hard_iface *primary_if = NULL; if (!forw_packet->if_incoming) { pr_err("Error - can't forward packet: incoming iface not specified\n"); - goto out; + return; } soft_iface = forw_packet->if_incoming->soft_iface; - bat_priv = netdev_priv(soft_iface); if (WARN_ON(!forw_packet->if_outgoing)) - goto out; + return; if (WARN_ON(forw_packet->if_outgoing->soft_iface != soft_iface)) - goto out; + return; if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE) - goto out; - - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) - goto out; + return; /* only for one specific outgoing interface */ batadv_iv_ogm_send_to_if(forw_packet, forw_packet->if_outgoing); - -out: - if (primary_if) - batadv_hardif_put(primary_if); } /** @@ -685,19 +680,12 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, struct batadv_forw_packet *forw_packet_aggr; unsigned char *skb_buff; unsigned int skb_size; + atomic_t *queue_left = own_packet ? NULL : &bat_priv->batman_queue_left; - /* own packet should always be scheduled */ - if (!own_packet) { - if (!batadv_atomic_dec_not_zero(&bat_priv->batman_queue_left)) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "batman packet queue full\n"); - return; - } - } - - forw_packet_aggr = kmalloc(sizeof(*forw_packet_aggr), GFP_ATOMIC); + forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing, + queue_left, bat_priv); if (!forw_packet_aggr) - goto out_nomem; + return; if (atomic_read(&bat_priv->aggregated_ogms) && packet_len < BATADV_MAX_AGGREGATION_BYTES) @@ -708,8 +696,11 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, skb_size += ETH_HLEN; forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size); - if (!forw_packet_aggr->skb) - goto out_free_forw_packet; + if (!forw_packet_aggr->skb) { + batadv_forw_packet_free(forw_packet_aggr); + return; + } + forw_packet_aggr->skb->priority = TC_PRIO_CONTROL; skb_reserve(forw_packet_aggr->skb, ETH_HLEN); @@ -717,12 +708,7 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, forw_packet_aggr->packet_len = packet_len; memcpy(skb_buff, packet_buff, packet_len); - kref_get(&if_incoming->refcount); - kref_get(&if_outgoing->refcount); forw_packet_aggr->own = own_packet; - forw_packet_aggr->if_incoming = if_incoming; - forw_packet_aggr->if_outgoing = if_outgoing; - forw_packet_aggr->num_packets = 0; forw_packet_aggr->direct_link_flags = BATADV_NO_FLAGS; forw_packet_aggr->send_time = send_time; @@ -741,13 +727,6 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, queue_delayed_work(batadv_event_workqueue, &forw_packet_aggr->delayed_work, send_time - jiffies); - - return; -out_free_forw_packet: - kfree(forw_packet_aggr); -out_nomem: - if (!own_packet) - atomic_inc(&bat_priv->batman_queue_left); } /* aggregate a new packet into the existing ogm packet */ @@ -1830,10 +1809,6 @@ static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work) batadv_iv_ogm_schedule(forw_packet->if_incoming); out: - /* don't count own packet */ - if (!forw_packet->own) - atomic_inc(&bat_priv->batman_queue_left); - batadv_forw_packet_free(forw_packet); } @@ -1978,6 +1953,235 @@ next: } /** + * batadv_iv_ogm_neigh_get_tq_avg - Get the TQ average for a neighbour on a + * given outgoing interface. + * @neigh_node: Neighbour of interest + * @if_outgoing: Outgoing interface of interest + * @tq_avg: Pointer of where to store the TQ average + * + * Return: False if no average TQ available, otherwise true. + */ +static bool +batadv_iv_ogm_neigh_get_tq_avg(struct batadv_neigh_node *neigh_node, + struct batadv_hard_iface *if_outgoing, + u8 *tq_avg) +{ + struct batadv_neigh_ifinfo *n_ifinfo; + + n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing); + if (!n_ifinfo) + return false; + + *tq_avg = n_ifinfo->bat_iv.tq_avg; + batadv_neigh_ifinfo_put(n_ifinfo); + + return true; +} + +/** + * batadv_iv_ogm_orig_dump_subentry - Dump an originator subentry into a + * message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @if_outgoing: Limit dump to entries with this outgoing interface + * @orig_node: Originator to dump + * @neigh_node: Single hops neighbour + * @best: Is the best originator + * + * Return: Error code, or 0 on success + */ +static int +batadv_iv_ogm_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *if_outgoing, + struct batadv_orig_node *orig_node, + struct batadv_neigh_node *neigh_node, + bool best) +{ + void *hdr; + u8 tq_avg; + unsigned int last_seen_msecs; + + last_seen_msecs = jiffies_to_msecs(jiffies - orig_node->last_seen); + + if (!batadv_iv_ogm_neigh_get_tq_avg(neigh_node, if_outgoing, &tq_avg)) + return 0; + + if (if_outgoing != BATADV_IF_DEFAULT && + if_outgoing != neigh_node->if_incoming) + return 0; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_ORIGINATORS); + if (!hdr) + return -ENOBUFS; + + if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, + orig_node->orig) || + nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN, + neigh_node->addr) || + nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, + neigh_node->if_incoming->net_dev->ifindex) || + nla_put_u8(msg, BATADV_ATTR_TQ, tq_avg) || + nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, + last_seen_msecs)) + goto nla_put_failure; + + if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_iv_ogm_orig_dump_entry - Dump an originator entry into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @if_outgoing: Limit dump to entries with this outgoing interface + * @orig_node: Originator to dump + * @sub_s: Number of sub entries to skip + * + * This function assumes the caller holds rcu_read_lock(). + * + * Return: Error code, or 0 on success + */ +static int +batadv_iv_ogm_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *if_outgoing, + struct batadv_orig_node *orig_node, int *sub_s) +{ + struct batadv_neigh_node *neigh_node_best; + struct batadv_neigh_node *neigh_node; + int sub = 0; + bool best; + u8 tq_avg_best; + + neigh_node_best = batadv_orig_router_get(orig_node, if_outgoing); + if (!neigh_node_best) + goto out; + + if (!batadv_iv_ogm_neigh_get_tq_avg(neigh_node_best, if_outgoing, + &tq_avg_best)) + goto out; + + if (tq_avg_best == 0) + goto out; + + hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) { + if (sub++ < *sub_s) + continue; + + best = (neigh_node == neigh_node_best); + + if (batadv_iv_ogm_orig_dump_subentry(msg, portid, seq, + bat_priv, if_outgoing, + orig_node, neigh_node, + best)) { + batadv_neigh_node_put(neigh_node_best); + + *sub_s = sub - 1; + return -EMSGSIZE; + } + } + + out: + if (neigh_node_best) + batadv_neigh_node_put(neigh_node_best); + + *sub_s = 0; + return 0; +} + +/** + * batadv_iv_ogm_orig_dump_bucket - Dump an originator bucket into a + * message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @if_outgoing: Limit dump to entries with this outgoing interface + * @head: Bucket to be dumped + * @idx_s: Number of entries to be skipped + * @sub: Number of sub entries to be skipped + * + * Return: Error code, or 0 on success + */ +static int +batadv_iv_ogm_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *if_outgoing, + struct hlist_head *head, int *idx_s, int *sub) +{ + struct batadv_orig_node *orig_node; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + if (idx++ < *idx_s) + continue; + + if (batadv_iv_ogm_orig_dump_entry(msg, portid, seq, bat_priv, + if_outgoing, orig_node, + sub)) { + rcu_read_unlock(); + *idx_s = idx - 1; + return -EMSGSIZE; + } + } + rcu_read_unlock(); + + *idx_s = 0; + *sub = 0; + return 0; +} + +/** + * batadv_iv_ogm_orig_dump - Dump the originators into a message + * @msg: Netlink message to dump into + * @cb: Control block containing additional options + * @bat_priv: The bat priv with all the soft interface information + * @if_outgoing: Limit dump to entries with this outgoing interface + */ +static void +batadv_iv_ogm_orig_dump(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *if_outgoing) +{ + struct batadv_hashtable *hash = bat_priv->orig_hash; + struct hlist_head *head; + int bucket = cb->args[0]; + int idx = cb->args[1]; + int sub = cb->args[2]; + int portid = NETLINK_CB(cb->skb).portid; + + while (bucket < hash->size) { + head = &hash->table[bucket]; + + if (batadv_iv_ogm_orig_dump_bucket(msg, portid, + cb->nlh->nlmsg_seq, + bat_priv, if_outgoing, head, + &idx, &sub)) + break; + + bucket++; + } + + cb->args[0] = bucket; + cb->args[1] = idx; + cb->args[2] = sub; +} + +/** * batadv_iv_hardif_neigh_print - print a single hop neighbour node * @seq: neighbour table seq_file struct * @hardif_neigh: hardif neighbour information @@ -2029,35 +2233,40 @@ static void batadv_iv_neigh_print(struct batadv_priv *bat_priv, } /** - * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors + * batadv_iv_ogm_neigh_diff - calculate tq difference of two neighbors * @neigh1: the first neighbor object of the comparison * @if_outgoing1: outgoing interface for the first neighbor * @neigh2: the second neighbor object of the comparison * @if_outgoing2: outgoing interface for the second neighbor + * @diff: pointer to integer receiving the calculated difference * - * Return: a value less, equal to or greater than 0 if the metric via neigh1 is - * lower, the same as or higher than the metric via neigh2 + * The content of *@diff is only valid when this function returns true. + * It is less, equal to or greater than 0 if the metric via neigh1 is lower, + * the same as or higher than the metric via neigh2 + * + * Return: true when the difference could be calculated, false otherwise */ -static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1, - struct batadv_hard_iface *if_outgoing1, - struct batadv_neigh_node *neigh2, - struct batadv_hard_iface *if_outgoing2) +static bool batadv_iv_ogm_neigh_diff(struct batadv_neigh_node *neigh1, + struct batadv_hard_iface *if_outgoing1, + struct batadv_neigh_node *neigh2, + struct batadv_hard_iface *if_outgoing2, + int *diff) { struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo; u8 tq1, tq2; - int diff; + bool ret = true; neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); if (!neigh1_ifinfo || !neigh2_ifinfo) { - diff = 0; + ret = false; goto out; } tq1 = neigh1_ifinfo->bat_iv.tq_avg; tq2 = neigh2_ifinfo->bat_iv.tq_avg; - diff = tq1 - tq2; + *diff = (int)tq1 - (int)tq2; out: if (neigh1_ifinfo) @@ -2065,6 +2274,162 @@ out: if (neigh2_ifinfo) batadv_neigh_ifinfo_put(neigh2_ifinfo); + return ret; +} + +/** + * batadv_iv_ogm_neigh_dump_neigh - Dump a neighbour into a netlink message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @hardif_neigh: Neighbour to be dumped + * + * Return: Error code, or 0 on success + */ +static int +batadv_iv_ogm_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_hardif_neigh_node *hardif_neigh) +{ + void *hdr; + unsigned int last_seen_msecs; + + last_seen_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen); + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_NEIGHBORS); + if (!hdr) + return -ENOBUFS; + + if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN, + hardif_neigh->addr) || + nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, + hardif_neigh->if_incoming->net_dev->ifindex) || + nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, + last_seen_msecs)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_iv_ogm_neigh_dump_hardif - Dump the neighbours of a hard interface + * into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @hard_iface: Hard interface to dump the neighbours for + * @idx_s: Number of entries to skip + * + * This function assumes the caller holds rcu_read_lock(). + * + * Return: Error code, or 0 on success + */ +static int +batadv_iv_ogm_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *hard_iface, + int *idx_s) +{ + struct batadv_hardif_neigh_node *hardif_neigh; + int idx = 0; + + hlist_for_each_entry_rcu(hardif_neigh, + &hard_iface->neigh_list, list) { + if (idx++ < *idx_s) + continue; + + if (batadv_iv_ogm_neigh_dump_neigh(msg, portid, seq, + hardif_neigh)) { + *idx_s = idx - 1; + return -EMSGSIZE; + } + } + + *idx_s = 0; + return 0; +} + +/** + * batadv_iv_ogm_neigh_dump - Dump the neighbours into a message + * @msg: Netlink message to dump into + * @cb: Control block containing additional options + * @bat_priv: The bat priv with all the soft interface information + * @single_hardif: Limit dump to this hard interfaace + */ +static void +batadv_iv_ogm_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *single_hardif) +{ + struct batadv_hard_iface *hard_iface; + int i_hardif = 0; + int i_hardif_s = cb->args[0]; + int idx = cb->args[1]; + int portid = NETLINK_CB(cb->skb).portid; + + rcu_read_lock(); + if (single_hardif) { + if (i_hardif_s == 0) { + if (batadv_iv_ogm_neigh_dump_hardif(msg, portid, + cb->nlh->nlmsg_seq, + bat_priv, + single_hardif, + &idx) == 0) + i_hardif++; + } + } else { + list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, + list) { + if (hard_iface->soft_iface != bat_priv->soft_iface) + continue; + + if (i_hardif++ < i_hardif_s) + continue; + + if (batadv_iv_ogm_neigh_dump_hardif(msg, portid, + cb->nlh->nlmsg_seq, + bat_priv, + hard_iface, &idx)) { + i_hardif--; + break; + } + } + } + rcu_read_unlock(); + + cb->args[0] = i_hardif; + cb->args[1] = idx; +} + +/** + * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors + * @neigh1: the first neighbor object of the comparison + * @if_outgoing1: outgoing interface for the first neighbor + * @neigh2: the second neighbor object of the comparison + * @if_outgoing2: outgoing interface for the second neighbor + * + * Return: a value less, equal to or greater than 0 if the metric via neigh1 is + * lower, the same as or higher than the metric via neigh2 + */ +static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1, + struct batadv_hard_iface *if_outgoing1, + struct batadv_neigh_node *neigh2, + struct batadv_hard_iface *if_outgoing2) +{ + bool ret; + int diff; + + ret = batadv_iv_ogm_neigh_diff(neigh1, if_outgoing1, neigh2, + if_outgoing2, &diff); + if (!ret) + return 0; + return diff; } @@ -2085,36 +2450,339 @@ batadv_iv_ogm_neigh_is_sob(struct batadv_neigh_node *neigh1, struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2) { - struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo; - u8 tq1, tq2; bool ret; + int diff; - neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); - neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); + ret = batadv_iv_ogm_neigh_diff(neigh1, if_outgoing1, neigh2, + if_outgoing2, &diff); + if (!ret) + return false; - /* we can't say that the metric is better */ - if (!neigh1_ifinfo || !neigh2_ifinfo) { - ret = false; + ret = diff > -BATADV_TQ_SIMILARITY_THRESHOLD; + return ret; +} + +static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface) +{ + /* begin scheduling originator messages on that interface */ + batadv_iv_ogm_schedule(hard_iface); +} + +static struct batadv_gw_node * +batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv) +{ + struct batadv_neigh_node *router; + struct batadv_neigh_ifinfo *router_ifinfo; + struct batadv_gw_node *gw_node, *curr_gw = NULL; + u64 max_gw_factor = 0; + u64 tmp_gw_factor = 0; + u8 max_tq = 0; + u8 tq_avg; + struct batadv_orig_node *orig_node; + + rcu_read_lock(); + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { + orig_node = gw_node->orig_node; + router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); + if (!router) + continue; + + router_ifinfo = batadv_neigh_ifinfo_get(router, + BATADV_IF_DEFAULT); + if (!router_ifinfo) + goto next; + + if (!kref_get_unless_zero(&gw_node->refcount)) + goto next; + + tq_avg = router_ifinfo->bat_iv.tq_avg; + + switch (atomic_read(&bat_priv->gw.sel_class)) { + case 1: /* fast connection */ + tmp_gw_factor = tq_avg * tq_avg; + tmp_gw_factor *= gw_node->bandwidth_down; + tmp_gw_factor *= 100 * 100; + tmp_gw_factor >>= 18; + + if ((tmp_gw_factor > max_gw_factor) || + ((tmp_gw_factor == max_gw_factor) && + (tq_avg > max_tq))) { + if (curr_gw) + batadv_gw_node_put(curr_gw); + curr_gw = gw_node; + kref_get(&curr_gw->refcount); + } + break; + + default: /* 2: stable connection (use best statistic) + * 3: fast-switch (use best statistic but change as + * soon as a better gateway appears) + * XX: late-switch (use best statistic but change as + * soon as a better gateway appears which has + * $routing_class more tq points) + */ + if (tq_avg > max_tq) { + if (curr_gw) + batadv_gw_node_put(curr_gw); + curr_gw = gw_node; + kref_get(&curr_gw->refcount); + } + break; + } + + if (tq_avg > max_tq) + max_tq = tq_avg; + + if (tmp_gw_factor > max_gw_factor) + max_gw_factor = tmp_gw_factor; + + batadv_gw_node_put(gw_node); + +next: + batadv_neigh_node_put(router); + if (router_ifinfo) + batadv_neigh_ifinfo_put(router_ifinfo); + } + rcu_read_unlock(); + + return curr_gw; +} + +static bool batadv_iv_gw_is_eligible(struct batadv_priv *bat_priv, + struct batadv_orig_node *curr_gw_orig, + struct batadv_orig_node *orig_node) +{ + struct batadv_neigh_ifinfo *router_orig_ifinfo = NULL; + struct batadv_neigh_ifinfo *router_gw_ifinfo = NULL; + struct batadv_neigh_node *router_gw = NULL; + struct batadv_neigh_node *router_orig = NULL; + u8 gw_tq_avg, orig_tq_avg; + bool ret = false; + + /* dynamic re-election is performed only on fast or late switch */ + if (atomic_read(&bat_priv->gw.sel_class) <= 2) + return false; + + router_gw = batadv_orig_router_get(curr_gw_orig, BATADV_IF_DEFAULT); + if (!router_gw) { + ret = true; goto out; } - tq1 = neigh1_ifinfo->bat_iv.tq_avg; - tq2 = neigh2_ifinfo->bat_iv.tq_avg; - ret = (tq1 - tq2) > -BATADV_TQ_SIMILARITY_THRESHOLD; + router_gw_ifinfo = batadv_neigh_ifinfo_get(router_gw, + BATADV_IF_DEFAULT); + if (!router_gw_ifinfo) { + ret = true; + goto out; + } + + router_orig = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); + if (!router_orig) + goto out; + + router_orig_ifinfo = batadv_neigh_ifinfo_get(router_orig, + BATADV_IF_DEFAULT); + if (!router_orig_ifinfo) + goto out; + + gw_tq_avg = router_gw_ifinfo->bat_iv.tq_avg; + orig_tq_avg = router_orig_ifinfo->bat_iv.tq_avg; + + /* the TQ value has to be better */ + if (orig_tq_avg < gw_tq_avg) + goto out; + + /* if the routing class is greater than 3 the value tells us how much + * greater the TQ value of the new gateway must be + */ + if ((atomic_read(&bat_priv->gw.sel_class) > 3) && + (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw.sel_class))) + goto out; + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Restarting gateway selection: better gateway found (tq curr: %i, tq new: %i)\n", + gw_tq_avg, orig_tq_avg); + ret = true; out: - if (neigh1_ifinfo) - batadv_neigh_ifinfo_put(neigh1_ifinfo); - if (neigh2_ifinfo) - batadv_neigh_ifinfo_put(neigh2_ifinfo); + if (router_gw_ifinfo) + batadv_neigh_ifinfo_put(router_gw_ifinfo); + if (router_orig_ifinfo) + batadv_neigh_ifinfo_put(router_orig_ifinfo); + if (router_gw) + batadv_neigh_node_put(router_gw); + if (router_orig) + batadv_neigh_node_put(router_orig); return ret; } -static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface) +/* fails if orig_node has no router */ +static int batadv_iv_gw_write_buffer_text(struct batadv_priv *bat_priv, + struct seq_file *seq, + const struct batadv_gw_node *gw_node) { - /* begin scheduling originator messages on that interface */ - batadv_iv_ogm_schedule(hard_iface); + struct batadv_gw_node *curr_gw; + struct batadv_neigh_node *router; + struct batadv_neigh_ifinfo *router_ifinfo = NULL; + int ret = -1; + + router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); + if (!router) + goto out; + + router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); + if (!router_ifinfo) + goto out; + + curr_gw = batadv_gw_get_selected_gw_node(bat_priv); + + seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n", + (curr_gw == gw_node ? "=>" : " "), + gw_node->orig_node->orig, + router_ifinfo->bat_iv.tq_avg, router->addr, + router->if_incoming->net_dev->name, + gw_node->bandwidth_down / 10, + gw_node->bandwidth_down % 10, + gw_node->bandwidth_up / 10, + gw_node->bandwidth_up % 10); + ret = seq_has_overflowed(seq) ? -1 : 0; + + if (curr_gw) + batadv_gw_node_put(curr_gw); +out: + if (router_ifinfo) + batadv_neigh_ifinfo_put(router_ifinfo); + if (router) + batadv_neigh_node_put(router); + return ret; +} + +static void batadv_iv_gw_print(struct batadv_priv *bat_priv, + struct seq_file *seq) +{ + struct batadv_gw_node *gw_node; + int gw_count = 0; + + seq_puts(seq, + " Gateway (#/255) Nexthop [outgoingIF]: advertised uplink bandwidth\n"); + + rcu_read_lock(); + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { + /* fails if orig_node has no router */ + if (batadv_iv_gw_write_buffer_text(bat_priv, seq, gw_node) < 0) + continue; + + gw_count++; + } + rcu_read_unlock(); + + if (gw_count == 0) + seq_puts(seq, "No gateways in range ...\n"); +} + +/** + * batadv_iv_gw_dump_entry - Dump a gateway into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @gw_node: Gateway to be dumped + * + * Return: Error code, or 0 on success + */ +static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_gw_node *gw_node) +{ + struct batadv_neigh_ifinfo *router_ifinfo = NULL; + struct batadv_neigh_node *router; + struct batadv_gw_node *curr_gw; + int ret = -EINVAL; + void *hdr; + + router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); + if (!router) + goto out; + + router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); + if (!router_ifinfo) + goto out; + + curr_gw = batadv_gw_get_selected_gw_node(bat_priv); + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS); + if (!hdr) { + ret = -ENOBUFS; + goto out; + } + + ret = -EMSGSIZE; + + if (curr_gw == gw_node) + if (nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, + gw_node->orig_node->orig) || + nla_put_u8(msg, BATADV_ATTR_TQ, router_ifinfo->bat_iv.tq_avg) || + nla_put(msg, BATADV_ATTR_ROUTER, ETH_ALEN, + router->addr) || + nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, + router->if_incoming->net_dev->name) || + nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN, + gw_node->bandwidth_down) || + nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_UP, + gw_node->bandwidth_up)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + genlmsg_end(msg, hdr); + ret = 0; + +out: + if (router_ifinfo) + batadv_neigh_ifinfo_put(router_ifinfo); + if (router) + batadv_neigh_node_put(router); + return ret; +} + +/** + * batadv_iv_gw_dump - Dump gateways into a message + * @msg: Netlink message to dump into + * @cb: Control block containing additional options + * @bat_priv: The bat priv with all the soft interface information + */ +static void batadv_iv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *bat_priv) +{ + int portid = NETLINK_CB(cb->skb).portid; + struct batadv_gw_node *gw_node; + int idx_skip = cb->args[0]; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { + if (idx++ < idx_skip) + continue; + + if (batadv_iv_gw_dump_entry(msg, portid, cb->nlh->nlmsg_seq, + bat_priv, gw_node)) { + idx_skip = idx - 1; + goto unlock; + } + } + + idx_skip = idx; +unlock: + rcu_read_unlock(); + + cb->args[0] = idx_skip; } static struct batadv_algo_ops batadv_batman_iv __read_mostly = { @@ -2130,13 +2798,21 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .cmp = batadv_iv_ogm_neigh_cmp, .is_similar_or_better = batadv_iv_ogm_neigh_is_sob, .print = batadv_iv_neigh_print, + .dump = batadv_iv_ogm_neigh_dump, }, .orig = { .print = batadv_iv_ogm_orig_print, + .dump = batadv_iv_ogm_orig_dump, .free = batadv_iv_ogm_orig_free, .add_if = batadv_iv_ogm_orig_add_if, .del_if = batadv_iv_ogm_orig_del_if, }, + .gw = { + .get_best_gw_node = batadv_iv_gw_get_best_gw_node, + .is_eligible = batadv_iv_gw_is_eligible, + .print = batadv_iv_gw_print, + .dump = batadv_iv_gw_dump, + }, }; int __init batadv_iv_init(void) diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 0366cbf..9e872dc 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -21,24 +21,38 @@ #include <linux/atomic.h> #include <linux/bug.h> #include <linux/cache.h> +#include <linux/errno.h> +#include <linux/if_ether.h> #include <linux/init.h> #include <linux/jiffies.h> +#include <linux/kernel.h> +#include <linux/kref.h> #include <linux/netdevice.h> +#include <linux/netlink.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/seq_file.h> #include <linux/stddef.h> #include <linux/types.h> #include <linux/workqueue.h> +#include <net/genetlink.h> +#include <net/netlink.h> +#include <uapi/linux/batman_adv.h> #include "bat_algo.h" #include "bat_v_elp.h" #include "bat_v_ogm.h" +#include "gateway_client.h" +#include "gateway_common.h" #include "hard-interface.h" #include "hash.h" +#include "log.h" +#include "netlink.h" #include "originator.h" #include "packet.h" +struct sk_buff; + static void batadv_v_iface_activate(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); @@ -200,6 +214,138 @@ static void batadv_v_neigh_print(struct batadv_priv *bat_priv, } /** + * batadv_v_neigh_dump_neigh - Dump a neighbour into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @hardif_neigh: Neighbour to dump + * + * Return: Error code, or 0 on success + */ +static int +batadv_v_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_hardif_neigh_node *hardif_neigh) +{ + void *hdr; + unsigned int last_seen_msecs; + u32 throughput; + + last_seen_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen); + throughput = ewma_throughput_read(&hardif_neigh->bat_v.throughput); + throughput = throughput * 100; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI, + BATADV_CMD_GET_NEIGHBORS); + if (!hdr) + return -ENOBUFS; + + if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN, + hardif_neigh->addr) || + nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, + hardif_neigh->if_incoming->net_dev->ifindex) || + nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, + last_seen_msecs) || + nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, throughput)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_v_neigh_dump_hardif - Dump the neighbours of a hard interface into + * a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @hard_iface: The hard interface to be dumped + * @idx_s: Entries to be skipped + * + * This function assumes the caller holds rcu_read_lock(). + * + * Return: Error code, or 0 on success + */ +static int +batadv_v_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *hard_iface, + int *idx_s) +{ + struct batadv_hardif_neigh_node *hardif_neigh; + int idx = 0; + + hlist_for_each_entry_rcu(hardif_neigh, + &hard_iface->neigh_list, list) { + if (idx++ < *idx_s) + continue; + + if (batadv_v_neigh_dump_neigh(msg, portid, seq, hardif_neigh)) { + *idx_s = idx - 1; + return -EMSGSIZE; + } + } + + *idx_s = 0; + return 0; +} + +/** + * batadv_v_neigh_dump - Dump the neighbours of a hard interface into a + * message + * @msg: Netlink message to dump into + * @cb: Control block containing additional options + * @bat_priv: The bat priv with all the soft interface information + * @single_hardif: Limit dumping to this hard interface + */ +static void +batadv_v_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *single_hardif) +{ + struct batadv_hard_iface *hard_iface; + int i_hardif = 0; + int i_hardif_s = cb->args[0]; + int idx = cb->args[1]; + int portid = NETLINK_CB(cb->skb).portid; + + rcu_read_lock(); + if (single_hardif) { + if (i_hardif_s == 0) { + if (batadv_v_neigh_dump_hardif(msg, portid, + cb->nlh->nlmsg_seq, + bat_priv, single_hardif, + &idx) == 0) + i_hardif++; + } + } else { + list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { + if (hard_iface->soft_iface != bat_priv->soft_iface) + continue; + + if (i_hardif++ < i_hardif_s) + continue; + + if (batadv_v_neigh_dump_hardif(msg, portid, + cb->nlh->nlmsg_seq, + bat_priv, hard_iface, + &idx)) { + i_hardif--; + break; + } + } + } + rcu_read_unlock(); + + cb->args[0] = i_hardif; + cb->args[1] = idx; +} + +/** * batadv_v_orig_print - print the originator table * @bat_priv: the bat priv with all the soft interface information * @seq: debugfs table seq_file struct @@ -266,6 +412,204 @@ next: seq_puts(seq, "No batman nodes in range ...\n"); } +/** + * batadv_v_orig_dump_subentry - Dump an originator subentry into a + * message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @if_outgoing: Limit dump to entries with this outgoing interface + * @orig_node: Originator to dump + * @neigh_node: Single hops neighbour + * @best: Is the best originator + * + * Return: Error code, or 0 on success + */ +static int +batadv_v_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *if_outgoing, + struct batadv_orig_node *orig_node, + struct batadv_neigh_node *neigh_node, + bool best) +{ + struct batadv_neigh_ifinfo *n_ifinfo; + unsigned int last_seen_msecs; + u32 throughput; + void *hdr; + + n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing); + if (!n_ifinfo) + return 0; + + throughput = n_ifinfo->bat_v.throughput * 100; + + batadv_neigh_ifinfo_put(n_ifinfo); + + last_seen_msecs = jiffies_to_msecs(jiffies - orig_node->last_seen); + + if (if_outgoing != BATADV_IF_DEFAULT && + if_outgoing != neigh_node->if_incoming) + return 0; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI, + BATADV_CMD_GET_ORIGINATORS); + if (!hdr) + return -ENOBUFS; + + if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, orig_node->orig) || + nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN, + neigh_node->addr) || + nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, + neigh_node->if_incoming->net_dev->ifindex) || + nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, throughput) || + nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, + last_seen_msecs)) + goto nla_put_failure; + + if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_v_orig_dump_entry - Dump an originator entry into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @if_outgoing: Limit dump to entries with this outgoing interface + * @orig_node: Originator to dump + * @sub_s: Number of sub entries to skip + * + * This function assumes the caller holds rcu_read_lock(). + * + * Return: Error code, or 0 on success + */ +static int +batadv_v_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *if_outgoing, + struct batadv_orig_node *orig_node, int *sub_s) +{ + struct batadv_neigh_node *neigh_node_best; + struct batadv_neigh_node *neigh_node; + int sub = 0; + bool best; + + neigh_node_best = batadv_orig_router_get(orig_node, if_outgoing); + if (!neigh_node_best) + goto out; + + hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) { + if (sub++ < *sub_s) + continue; + + best = (neigh_node == neigh_node_best); + + if (batadv_v_orig_dump_subentry(msg, portid, seq, bat_priv, + if_outgoing, orig_node, + neigh_node, best)) { + batadv_neigh_node_put(neigh_node_best); + + *sub_s = sub - 1; + return -EMSGSIZE; + } + } + + out: + if (neigh_node_best) + batadv_neigh_node_put(neigh_node_best); + + *sub_s = 0; + return 0; +} + +/** + * batadv_v_orig_dump_bucket - Dump an originator bucket into a + * message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @if_outgoing: Limit dump to entries with this outgoing interface + * @head: Bucket to be dumped + * @idx_s: Number of entries to be skipped + * @sub: Number of sub entries to be skipped + * + * Return: Error code, or 0 on success + */ +static int +batadv_v_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *if_outgoing, + struct hlist_head *head, int *idx_s, int *sub) +{ + struct batadv_orig_node *orig_node; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + if (idx++ < *idx_s) + continue; + + if (batadv_v_orig_dump_entry(msg, portid, seq, bat_priv, + if_outgoing, orig_node, sub)) { + rcu_read_unlock(); + *idx_s = idx - 1; + return -EMSGSIZE; + } + } + rcu_read_unlock(); + + *idx_s = 0; + *sub = 0; + return 0; +} + +/** + * batadv_v_orig_dump - Dump the originators into a message + * @msg: Netlink message to dump into + * @cb: Control block containing additional options + * @bat_priv: The bat priv with all the soft interface information + * @if_outgoing: Limit dump to entries with this outgoing interface + */ +static void +batadv_v_orig_dump(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *if_outgoing) +{ + struct batadv_hashtable *hash = bat_priv->orig_hash; + struct hlist_head *head; + int bucket = cb->args[0]; + int idx = cb->args[1]; + int sub = cb->args[2]; + int portid = NETLINK_CB(cb->skb).portid; + + while (bucket < hash->size) { + head = &hash->table[bucket]; + + if (batadv_v_orig_dump_bucket(msg, portid, + cb->nlh->nlmsg_seq, + bat_priv, if_outgoing, head, &idx, + &sub)) + break; + + bucket++; + } + + cb->args[0] = bucket; + cb->args[1] = idx; + cb->args[2] = sub; +} + static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1, struct batadv_hard_iface *if_outgoing1, struct batadv_neigh_node *neigh2, @@ -320,6 +664,363 @@ err_ifinfo1: return ret; } +static ssize_t batadv_v_store_sel_class(struct batadv_priv *bat_priv, + char *buff, size_t count) +{ + u32 old_class, class; + + if (!batadv_parse_throughput(bat_priv->soft_iface, buff, + "B.A.T.M.A.N. V GW selection class", + &class)) + return -EINVAL; + + old_class = atomic_read(&bat_priv->gw.sel_class); + atomic_set(&bat_priv->gw.sel_class, class); + + if (old_class != class) + batadv_gw_reselect(bat_priv); + + return count; +} + +static ssize_t batadv_v_show_sel_class(struct batadv_priv *bat_priv, char *buff) +{ + u32 class = atomic_read(&bat_priv->gw.sel_class); + + return sprintf(buff, "%u.%u MBit\n", class / 10, class % 10); +} + +/** + * batadv_v_gw_throughput_get - retrieve the GW-bandwidth for a given GW + * @gw_node: the GW to retrieve the metric for + * @bw: the pointer where the metric will be stored. The metric is computed as + * the minimum between the GW advertised throughput and the path throughput to + * it in the mesh + * + * Return: 0 on success, -1 on failure + */ +static int batadv_v_gw_throughput_get(struct batadv_gw_node *gw_node, u32 *bw) +{ + struct batadv_neigh_ifinfo *router_ifinfo = NULL; + struct batadv_orig_node *orig_node; + struct batadv_neigh_node *router; + int ret = -1; + + orig_node = gw_node->orig_node; + router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); + if (!router) + goto out; + + router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); + if (!router_ifinfo) + goto out; + + /* the GW metric is computed as the minimum between the path throughput + * to reach the GW itself and the advertised bandwidth. + * This gives us an approximation of the effective throughput that the + * client can expect via this particular GW node + */ + *bw = router_ifinfo->bat_v.throughput; + *bw = min_t(u32, *bw, gw_node->bandwidth_down); + + ret = 0; +out: + if (router) + batadv_neigh_node_put(router); + if (router_ifinfo) + batadv_neigh_ifinfo_put(router_ifinfo); + + return ret; +} + +/** + * batadv_v_gw_get_best_gw_node - retrieve the best GW node + * @bat_priv: the bat priv with all the soft interface information + * + * Return: the GW node having the best GW-metric, NULL if no GW is known + */ +static struct batadv_gw_node * +batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv) +{ + struct batadv_gw_node *gw_node, *curr_gw = NULL; + u32 max_bw = 0, bw; + + rcu_read_lock(); + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { + if (!kref_get_unless_zero(&gw_node->refcount)) + continue; + + if (batadv_v_gw_throughput_get(gw_node, &bw) < 0) + goto next; + + if (curr_gw && (bw <= max_bw)) + goto next; + + if (curr_gw) + batadv_gw_node_put(curr_gw); + + curr_gw = gw_node; + kref_get(&curr_gw->refcount); + max_bw = bw; + +next: + batadv_gw_node_put(gw_node); + } + rcu_read_unlock(); + + return curr_gw; +} + +/** + * batadv_v_gw_is_eligible - check if a originator would be selected as GW + * @bat_priv: the bat priv with all the soft interface information + * @curr_gw_orig: originator representing the currently selected GW + * @orig_node: the originator representing the new candidate + * + * Return: true if orig_node can be selected as current GW, false otherwise + */ +static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv, + struct batadv_orig_node *curr_gw_orig, + struct batadv_orig_node *orig_node) +{ + struct batadv_gw_node *curr_gw = NULL, *orig_gw = NULL; + u32 gw_throughput, orig_throughput, threshold; + bool ret = false; + + threshold = atomic_read(&bat_priv->gw.sel_class); + + curr_gw = batadv_gw_node_get(bat_priv, curr_gw_orig); + if (!curr_gw) { + ret = true; + goto out; + } + + if (batadv_v_gw_throughput_get(curr_gw, &gw_throughput) < 0) { + ret = true; + goto out; + } + + orig_gw = batadv_gw_node_get(bat_priv, orig_node); + if (!orig_node) + goto out; + + if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0) + goto out; + + if (orig_throughput < gw_throughput) + goto out; + + if ((orig_throughput - gw_throughput) < threshold) + goto out; + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Restarting gateway selection: better gateway found (throughput curr: %u, throughput new: %u)\n", + gw_throughput, orig_throughput); + + ret = true; +out: + if (curr_gw) + batadv_gw_node_put(curr_gw); + if (orig_gw) + batadv_gw_node_put(orig_gw); + + return ret; +} + +/* fails if orig_node has no router */ +static int batadv_v_gw_write_buffer_text(struct batadv_priv *bat_priv, + struct seq_file *seq, + const struct batadv_gw_node *gw_node) +{ + struct batadv_gw_node *curr_gw; + struct batadv_neigh_node *router; + struct batadv_neigh_ifinfo *router_ifinfo = NULL; + int ret = -1; + + router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); + if (!router) + goto out; + + router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); + if (!router_ifinfo) + goto out; + + curr_gw = batadv_gw_get_selected_gw_node(bat_priv); + + seq_printf(seq, "%s %pM (%9u.%1u) %pM [%10s]: %u.%u/%u.%u MBit\n", + (curr_gw == gw_node ? "=>" : " "), + gw_node->orig_node->orig, + router_ifinfo->bat_v.throughput / 10, + router_ifinfo->bat_v.throughput % 10, router->addr, + router->if_incoming->net_dev->name, + gw_node->bandwidth_down / 10, + gw_node->bandwidth_down % 10, + gw_node->bandwidth_up / 10, + gw_node->bandwidth_up % 10); + ret = seq_has_overflowed(seq) ? -1 : 0; + + if (curr_gw) + batadv_gw_node_put(curr_gw); +out: + if (router_ifinfo) + batadv_neigh_ifinfo_put(router_ifinfo); + if (router) + batadv_neigh_node_put(router); + return ret; +} + +/** + * batadv_v_gw_print - print the gateway list + * @bat_priv: the bat priv with all the soft interface information + * @seq: gateway table seq_file struct + */ +static void batadv_v_gw_print(struct batadv_priv *bat_priv, + struct seq_file *seq) +{ + struct batadv_gw_node *gw_node; + int gw_count = 0; + + seq_puts(seq, + " Gateway ( throughput) Nexthop [outgoingIF]: advertised uplink bandwidth\n"); + + rcu_read_lock(); + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { + /* fails if orig_node has no router */ + if (batadv_v_gw_write_buffer_text(bat_priv, seq, gw_node) < 0) + continue; + + gw_count++; + } + rcu_read_unlock(); + + if (gw_count == 0) + seq_puts(seq, "No gateways in range ...\n"); +} + +/** + * batadv_v_gw_dump_entry - Dump a gateway into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @gw_node: Gateway to be dumped + * + * Return: Error code, or 0 on success + */ +static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_gw_node *gw_node) +{ + struct batadv_neigh_ifinfo *router_ifinfo = NULL; + struct batadv_neigh_node *router; + struct batadv_gw_node *curr_gw; + int ret = -EINVAL; + void *hdr; + + router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); + if (!router) + goto out; + + router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); + if (!router_ifinfo) + goto out; + + curr_gw = batadv_gw_get_selected_gw_node(bat_priv); + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS); + if (!hdr) { + ret = -ENOBUFS; + goto out; + } + + ret = -EMSGSIZE; + + if (curr_gw == gw_node) { + if (nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) { + genlmsg_cancel(msg, hdr); + goto out; + } + } + + if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, + gw_node->orig_node->orig)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + if (nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, + router_ifinfo->bat_v.throughput)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + if (nla_put(msg, BATADV_ATTR_ROUTER, ETH_ALEN, router->addr)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + if (nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, + router->if_incoming->net_dev->name)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + if (nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN, + gw_node->bandwidth_down)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + if (nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_UP, gw_node->bandwidth_up)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + genlmsg_end(msg, hdr); + ret = 0; + +out: + if (router_ifinfo) + batadv_neigh_ifinfo_put(router_ifinfo); + if (router) + batadv_neigh_node_put(router); + return ret; +} + +/** + * batadv_v_gw_dump - Dump gateways into a message + * @msg: Netlink message to dump into + * @cb: Control block containing additional options + * @bat_priv: The bat priv with all the soft interface information + */ +static void batadv_v_gw_dump(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *bat_priv) +{ + int portid = NETLINK_CB(cb->skb).portid; + struct batadv_gw_node *gw_node; + int idx_skip = cb->args[0]; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { + if (idx++ < idx_skip) + continue; + + if (batadv_v_gw_dump_entry(msg, portid, cb->nlh->nlmsg_seq, + bat_priv, gw_node)) { + idx_skip = idx - 1; + goto unlock; + } + } + + idx_skip = idx; +unlock: + rcu_read_unlock(); + + cb->args[0] = idx_skip; +} + static struct batadv_algo_ops batadv_batman_v __read_mostly = { .name = "BATMAN_V", .iface = { @@ -334,9 +1035,19 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = { .cmp = batadv_v_neigh_cmp, .is_similar_or_better = batadv_v_neigh_is_sob, .print = batadv_v_neigh_print, + .dump = batadv_v_neigh_dump, }, .orig = { .print = batadv_v_orig_print, + .dump = batadv_v_orig_dump, + }, + .gw = { + .store_sel_class = batadv_v_store_sel_class, + .show_sel_class = batadv_v_show_sel_class, + .get_best_gw_node = batadv_v_gw_get_best_gw_node, + .is_eligible = batadv_v_gw_is_eligible, + .print = batadv_v_gw_print, + .dump = batadv_v_gw_dump, }, }; @@ -363,7 +1074,16 @@ void batadv_v_hardif_init(struct batadv_hard_iface *hard_iface) */ int batadv_v_mesh_init(struct batadv_priv *bat_priv) { - return batadv_v_ogm_init(bat_priv); + int ret = 0; + + ret = batadv_v_ogm_init(bat_priv); + if (ret < 0) + return ret; + + /* set default throughput difference threshold to 5Mbps */ + atomic_set(&bat_priv->gw.sel_class, 50); + + return 0; } /** diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index ad2ffe1..35ed1d3 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -35,6 +35,7 @@ #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> +#include <linux/netlink.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/seq_file.h> @@ -45,12 +46,18 @@ #include <linux/string.h> #include <linux/workqueue.h> #include <net/arp.h> +#include <net/genetlink.h> +#include <net/netlink.h> +#include <net/sock.h> +#include <uapi/linux/batman_adv.h> #include "hard-interface.h" #include "hash.h" #include "log.h" +#include "netlink.h" #include "originator.h" #include "packet.h" +#include "soft-interface.h" #include "sysfs.h" #include "translation-table.h" @@ -1148,7 +1155,7 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv, /* Let the loopdetect frames on the mesh in any case. */ if (bla_dst->type == BATADV_CLAIM_TYPE_LOOPDETECT) - return 0; + return false; /* check if it is a claim frame. */ ret = batadv_check_claim_group(bat_priv, primary_if, hw_src, hw_dst, @@ -2052,6 +2059,168 @@ out: } /** + * batadv_bla_claim_dump_entry - dump one entry of the claim table + * to a netlink socket + * @msg: buffer for the message + * @portid: netlink port + * @seq: Sequence number of netlink message + * @primary_if: primary interface + * @claim: entry to dump + * + * Return: 0 or error code. + */ +static int +batadv_bla_claim_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_hard_iface *primary_if, + struct batadv_bla_claim *claim) +{ + u8 *primary_addr = primary_if->net_dev->dev_addr; + u16 backbone_crc; + bool is_own; + void *hdr; + int ret = -EINVAL; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_BLA_CLAIM); + if (!hdr) { + ret = -ENOBUFS; + goto out; + } + + is_own = batadv_compare_eth(claim->backbone_gw->orig, + primary_addr); + + spin_lock_bh(&claim->backbone_gw->crc_lock); + backbone_crc = claim->backbone_gw->crc; + spin_unlock_bh(&claim->backbone_gw->crc_lock); + + if (is_own) + if (nla_put_flag(msg, BATADV_ATTR_BLA_OWN)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + if (nla_put(msg, BATADV_ATTR_BLA_ADDRESS, ETH_ALEN, claim->addr) || + nla_put_u16(msg, BATADV_ATTR_BLA_VID, claim->vid) || + nla_put(msg, BATADV_ATTR_BLA_BACKBONE, ETH_ALEN, + claim->backbone_gw->orig) || + nla_put_u16(msg, BATADV_ATTR_BLA_CRC, + backbone_crc)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + genlmsg_end(msg, hdr); + ret = 0; + +out: + return ret; +} + +/** + * batadv_bla_claim_dump_bucket - dump one bucket of the claim table + * to a netlink socket + * @msg: buffer for the message + * @portid: netlink port + * @seq: Sequence number of netlink message + * @primary_if: primary interface + * @head: bucket to dump + * @idx_skip: How many entries to skip + * + * Return: always 0. + */ +static int +batadv_bla_claim_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_hard_iface *primary_if, + struct hlist_head *head, int *idx_skip) +{ + struct batadv_bla_claim *claim; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(claim, head, hash_entry) { + if (idx++ < *idx_skip) + continue; + if (batadv_bla_claim_dump_entry(msg, portid, seq, + primary_if, claim)) { + *idx_skip = idx - 1; + goto unlock; + } + } + + *idx_skip = idx; +unlock: + rcu_read_unlock(); + return 0; +} + +/** + * batadv_bla_claim_dump - dump claim table to a netlink socket + * @msg: buffer for the message + * @cb: callback structure containing arguments + * + * Return: message length. + */ +int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct batadv_hard_iface *primary_if = NULL; + int portid = NETLINK_CB(cb->skb).portid; + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct batadv_hashtable *hash; + struct batadv_priv *bat_priv; + int bucket = cb->args[0]; + struct hlist_head *head; + int idx = cb->args[1]; + int ifindex; + int ret = 0; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, + BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + hash = bat_priv->bla.claim_hash; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } + + while (bucket < hash->size) { + head = &hash->table[bucket]; + + if (batadv_bla_claim_dump_bucket(msg, portid, + cb->nlh->nlmsg_seq, + primary_if, head, &idx)) + break; + bucket++; + } + + cb->args[0] = bucket; + cb->args[1] = idx; + + ret = msg->len; + +out: + if (primary_if) + batadv_hardif_put(primary_if); + + if (soft_iface) + dev_put(soft_iface); + + return ret; +} + +/** * batadv_bla_backbone_table_seq_print_text - print the backbone table in a seq * file * @seq: seq file to print on @@ -2114,3 +2283,167 @@ out: batadv_hardif_put(primary_if); return 0; } + +/** + * batadv_bla_backbone_dump_entry - dump one entry of the backbone table + * to a netlink socket + * @msg: buffer for the message + * @portid: netlink port + * @seq: Sequence number of netlink message + * @primary_if: primary interface + * @backbone_gw: entry to dump + * + * Return: 0 or error code. + */ +static int +batadv_bla_backbone_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_hard_iface *primary_if, + struct batadv_bla_backbone_gw *backbone_gw) +{ + u8 *primary_addr = primary_if->net_dev->dev_addr; + u16 backbone_crc; + bool is_own; + int msecs; + void *hdr; + int ret = -EINVAL; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_BLA_BACKBONE); + if (!hdr) { + ret = -ENOBUFS; + goto out; + } + + is_own = batadv_compare_eth(backbone_gw->orig, primary_addr); + + spin_lock_bh(&backbone_gw->crc_lock); + backbone_crc = backbone_gw->crc; + spin_unlock_bh(&backbone_gw->crc_lock); + + msecs = jiffies_to_msecs(jiffies - backbone_gw->lasttime); + + if (is_own) + if (nla_put_flag(msg, BATADV_ATTR_BLA_OWN)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + if (nla_put(msg, BATADV_ATTR_BLA_BACKBONE, ETH_ALEN, + backbone_gw->orig) || + nla_put_u16(msg, BATADV_ATTR_BLA_VID, backbone_gw->vid) || + nla_put_u16(msg, BATADV_ATTR_BLA_CRC, + backbone_crc) || + nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, msecs)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + genlmsg_end(msg, hdr); + ret = 0; + +out: + return ret; +} + +/** + * batadv_bla_backbone_dump_bucket - dump one bucket of the backbone table + * to a netlink socket + * @msg: buffer for the message + * @portid: netlink port + * @seq: Sequence number of netlink message + * @primary_if: primary interface + * @head: bucket to dump + * @idx_skip: How many entries to skip + * + * Return: always 0. + */ +static int +batadv_bla_backbone_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_hard_iface *primary_if, + struct hlist_head *head, int *idx_skip) +{ + struct batadv_bla_backbone_gw *backbone_gw; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) { + if (idx++ < *idx_skip) + continue; + if (batadv_bla_backbone_dump_entry(msg, portid, seq, + primary_if, backbone_gw)) { + *idx_skip = idx - 1; + goto unlock; + } + } + + *idx_skip = idx; +unlock: + rcu_read_unlock(); + return 0; +} + +/** + * batadv_bla_backbone_dump - dump backbone table to a netlink socket + * @msg: buffer for the message + * @cb: callback structure containing arguments + * + * Return: message length. + */ +int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct batadv_hard_iface *primary_if = NULL; + int portid = NETLINK_CB(cb->skb).portid; + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct batadv_hashtable *hash; + struct batadv_priv *bat_priv; + int bucket = cb->args[0]; + struct hlist_head *head; + int idx = cb->args[1]; + int ifindex; + int ret = 0; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, + BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + hash = bat_priv->bla.backbone_hash; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } + + while (bucket < hash->size) { + head = &hash->table[bucket]; + + if (batadv_bla_backbone_dump_bucket(msg, portid, + cb->nlh->nlmsg_seq, + primary_if, head, &idx)) + break; + bucket++; + } + + cb->args[0] = bucket; + cb->args[1] = idx; + + ret = msg->len; + +out: + if (primary_if) + batadv_hardif_put(primary_if); + + if (soft_iface) + dev_put(soft_iface); + + return ret; +} diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index 0f01dae..1ae93e4 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -23,6 +23,7 @@ #include <linux/types.h> struct net_device; +struct netlink_callback; struct seq_file; struct sk_buff; @@ -35,8 +36,10 @@ bool batadv_bla_is_backbone_gw(struct sk_buff *skb, struct batadv_orig_node *orig_node, int hdr_size); int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset); +int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb); int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset); +int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb); bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig, unsigned short vid); bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, @@ -47,7 +50,7 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, void batadv_bla_status_update(struct net_device *net_dev); int batadv_bla_init(struct batadv_priv *bat_priv); void batadv_bla_free(struct batadv_priv *bat_priv); - +int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb); #define BATADV_BLA_CRC_INIT 0 #else /* ifdef CONFIG_BATMAN_ADV_BLA */ @@ -112,6 +115,18 @@ static inline void batadv_bla_free(struct batadv_priv *bat_priv) { } +static inline int batadv_bla_claim_dump(struct sk_buff *msg, + struct netlink_callback *cb) +{ + return -EOPNOTSUPP; +} + +static inline int batadv_bla_backbone_dump(struct sk_buff *msg, + struct netlink_callback *cb) +{ + return -EOPNOTSUPP; +} + #endif /* ifdef CONFIG_BATMAN_ADV_BLA */ #endif /* ifndef _NET_BATMAN_ADV_BLA_H_ */ diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index 1d68b6e..b4ffba7 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -31,6 +31,7 @@ #include <linux/stddef.h> #include <linux/stringify.h> #include <linux/sysfs.h> +#include <net/net_namespace.h> #include "bat_algo.h" #include "bridge_loop_avoidance.h" @@ -305,12 +306,16 @@ void batadv_debugfs_destroy(void) */ int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface) { + struct net *net = dev_net(hard_iface->net_dev); struct batadv_debuginfo **bat_debug; struct dentry *file; if (!batadv_debugfs) goto out; + if (net != &init_net) + return 0; + hard_iface->debug_dir = debugfs_create_dir(hard_iface->net_dev->name, batadv_debugfs); if (!hard_iface->debug_dir) @@ -341,6 +346,11 @@ out: */ void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface) { + struct net *net = dev_net(hard_iface->net_dev); + + if (net != &init_net) + return; + if (batadv_debugfs) { debugfs_remove_recursive(hard_iface->debug_dir); hard_iface->debug_dir = NULL; @@ -351,11 +361,15 @@ int batadv_debugfs_add_meshif(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); struct batadv_debuginfo **bat_debug; + struct net *net = dev_net(dev); struct dentry *file; if (!batadv_debugfs) goto out; + if (net != &init_net) + return 0; + bat_priv->debug_dir = debugfs_create_dir(dev->name, batadv_debugfs); if (!bat_priv->debug_dir) goto out; @@ -392,6 +406,10 @@ out: void batadv_debugfs_del_meshif(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); + struct net *net = dev_net(dev); + + if (net != &init_net) + return; batadv_debug_log_cleanup(bat_priv); diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 63a805d..c2928c2 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -20,6 +20,7 @@ #include <linux/atomic.h> #include <linux/byteorder/generic.h> +#include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/fs.h> #include <linux/if_ether.h> @@ -31,6 +32,7 @@ #include <linux/kref.h> #include <linux/list.h> #include <linux/netdevice.h> +#include <linux/netlink.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/seq_file.h> @@ -39,13 +41,17 @@ #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/udp.h> +#include <net/sock.h> +#include <uapi/linux/batman_adv.h> #include "gateway_common.h" #include "hard-interface.h" #include "log.h" +#include "netlink.h" #include "originator.h" #include "packet.h" #include "routing.h" +#include "soft-interface.h" #include "sysfs.h" #include "translation-table.h" @@ -80,12 +86,12 @@ static void batadv_gw_node_release(struct kref *ref) * batadv_gw_node_put - decrement the gw_node refcounter and possibly release it * @gw_node: gateway node to free */ -static void batadv_gw_node_put(struct batadv_gw_node *gw_node) +void batadv_gw_node_put(struct batadv_gw_node *gw_node) { kref_put(&gw_node->refcount, batadv_gw_node_release); } -static struct batadv_gw_node * +struct batadv_gw_node * batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv) { struct batadv_gw_node *gw_node; @@ -164,86 +170,6 @@ void batadv_gw_reselect(struct batadv_priv *bat_priv) atomic_set(&bat_priv->gw.reselect, 1); } -static struct batadv_gw_node * -batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) -{ - struct batadv_neigh_node *router; - struct batadv_neigh_ifinfo *router_ifinfo; - struct batadv_gw_node *gw_node, *curr_gw = NULL; - u64 max_gw_factor = 0; - u64 tmp_gw_factor = 0; - u8 max_tq = 0; - u8 tq_avg; - struct batadv_orig_node *orig_node; - - rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { - orig_node = gw_node->orig_node; - router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); - if (!router) - continue; - - router_ifinfo = batadv_neigh_ifinfo_get(router, - BATADV_IF_DEFAULT); - if (!router_ifinfo) - goto next; - - if (!kref_get_unless_zero(&gw_node->refcount)) - goto next; - - tq_avg = router_ifinfo->bat_iv.tq_avg; - - switch (atomic_read(&bat_priv->gw.sel_class)) { - case 1: /* fast connection */ - tmp_gw_factor = tq_avg * tq_avg; - tmp_gw_factor *= gw_node->bandwidth_down; - tmp_gw_factor *= 100 * 100; - tmp_gw_factor >>= 18; - - if ((tmp_gw_factor > max_gw_factor) || - ((tmp_gw_factor == max_gw_factor) && - (tq_avg > max_tq))) { - if (curr_gw) - batadv_gw_node_put(curr_gw); - curr_gw = gw_node; - kref_get(&curr_gw->refcount); - } - break; - - default: /* 2: stable connection (use best statistic) - * 3: fast-switch (use best statistic but change as - * soon as a better gateway appears) - * XX: late-switch (use best statistic but change as - * soon as a better gateway appears which has - * $routing_class more tq points) - */ - if (tq_avg > max_tq) { - if (curr_gw) - batadv_gw_node_put(curr_gw); - curr_gw = gw_node; - kref_get(&curr_gw->refcount); - } - break; - } - - if (tq_avg > max_tq) - max_tq = tq_avg; - - if (tmp_gw_factor > max_gw_factor) - max_gw_factor = tmp_gw_factor; - - batadv_gw_node_put(gw_node); - -next: - batadv_neigh_node_put(router); - if (router_ifinfo) - batadv_neigh_ifinfo_put(router_ifinfo); - } - rcu_read_unlock(); - - return curr_gw; -} - /** * batadv_gw_check_client_stop - check if client mode has been switched off * @bat_priv: the bat priv with all the soft interface information @@ -287,12 +213,19 @@ void batadv_gw_election(struct batadv_priv *bat_priv) if (atomic_read(&bat_priv->gw.mode) != BATADV_GW_MODE_CLIENT) goto out; + if (!bat_priv->algo_ops->gw.get_best_gw_node) + goto out; + curr_gw = batadv_gw_get_selected_gw_node(bat_priv); if (!batadv_atomic_dec_not_zero(&bat_priv->gw.reselect) && curr_gw) goto out; - next_gw = batadv_gw_get_best_gw_node(bat_priv); + /* if gw.reselect is set to 1 it means that a previous call to + * gw.is_eligible() said that we have a new best GW, therefore it can + * now be picked from the list and selected + */ + next_gw = bat_priv->algo_ops->gw.get_best_gw_node(bat_priv); if (curr_gw == next_gw) goto out; @@ -360,70 +293,31 @@ out: void batadv_gw_check_election(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node) { - struct batadv_neigh_ifinfo *router_orig_tq = NULL; - struct batadv_neigh_ifinfo *router_gw_tq = NULL; struct batadv_orig_node *curr_gw_orig; - struct batadv_neigh_node *router_gw = NULL; - struct batadv_neigh_node *router_orig = NULL; - u8 gw_tq_avg, orig_tq_avg; + + /* abort immediately if the routing algorithm does not support gateway + * election + */ + if (!bat_priv->algo_ops->gw.is_eligible) + return; curr_gw_orig = batadv_gw_get_selected_orig(bat_priv); if (!curr_gw_orig) goto reselect; - router_gw = batadv_orig_router_get(curr_gw_orig, BATADV_IF_DEFAULT); - if (!router_gw) - goto reselect; - - router_gw_tq = batadv_neigh_ifinfo_get(router_gw, - BATADV_IF_DEFAULT); - if (!router_gw_tq) - goto reselect; - /* this node already is the gateway */ if (curr_gw_orig == orig_node) goto out; - router_orig = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); - if (!router_orig) - goto out; - - router_orig_tq = batadv_neigh_ifinfo_get(router_orig, - BATADV_IF_DEFAULT); - if (!router_orig_tq) - goto out; - - gw_tq_avg = router_gw_tq->bat_iv.tq_avg; - orig_tq_avg = router_orig_tq->bat_iv.tq_avg; - - /* the TQ value has to be better */ - if (orig_tq_avg < gw_tq_avg) - goto out; - - /* if the routing class is greater than 3 the value tells us how much - * greater the TQ value of the new gateway must be - */ - if ((atomic_read(&bat_priv->gw.sel_class) > 3) && - (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw.sel_class))) + if (!bat_priv->algo_ops->gw.is_eligible(bat_priv, curr_gw_orig, + orig_node)) goto out; - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Restarting gateway selection: better gateway found (tq curr: %i, tq new: %i)\n", - gw_tq_avg, orig_tq_avg); - reselect: batadv_gw_reselect(bat_priv); out: if (curr_gw_orig) batadv_orig_node_put(curr_gw_orig); - if (router_gw) - batadv_neigh_node_put(router_gw); - if (router_orig) - batadv_neigh_node_put(router_orig); - if (router_gw_tq) - batadv_neigh_ifinfo_put(router_gw_tq); - if (router_orig_tq) - batadv_neigh_ifinfo_put(router_orig_tq); } /** @@ -472,9 +366,8 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, * * Return: gateway node if found or NULL otherwise. */ -static struct batadv_gw_node * -batadv_gw_node_get(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig_node) +struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node) { struct batadv_gw_node *gw_node_tmp, *gw_node = NULL; @@ -585,81 +478,85 @@ void batadv_gw_node_free(struct batadv_priv *bat_priv) spin_unlock_bh(&bat_priv->gw.list_lock); } -/* fails if orig_node has no router */ -static int batadv_write_buffer_text(struct batadv_priv *bat_priv, - struct seq_file *seq, - const struct batadv_gw_node *gw_node) +int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) { - struct batadv_gw_node *curr_gw; - struct batadv_neigh_node *router; - struct batadv_neigh_ifinfo *router_ifinfo = NULL; - int ret = -1; + struct net_device *net_dev = (struct net_device *)seq->private; + struct batadv_priv *bat_priv = netdev_priv(net_dev); + struct batadv_hard_iface *primary_if; - router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); - if (!router) - goto out; + primary_if = batadv_seq_print_text_primary_if_get(seq); + if (!primary_if) + return 0; - router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); - if (!router_ifinfo) - goto out; + seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n", + BATADV_SOURCE_VERSION, primary_if->net_dev->name, + primary_if->net_dev->dev_addr, net_dev->name, + bat_priv->algo_ops->name); - curr_gw = batadv_gw_get_selected_gw_node(bat_priv); + batadv_hardif_put(primary_if); - seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n", - (curr_gw == gw_node ? "=>" : " "), - gw_node->orig_node->orig, - router_ifinfo->bat_iv.tq_avg, router->addr, - router->if_incoming->net_dev->name, - gw_node->bandwidth_down / 10, - gw_node->bandwidth_down % 10, - gw_node->bandwidth_up / 10, - gw_node->bandwidth_up % 10); - ret = seq_has_overflowed(seq) ? -1 : 0; + if (!bat_priv->algo_ops->gw.print) { + seq_puts(seq, + "No printing function for this routing protocol\n"); + return 0; + } - if (curr_gw) - batadv_gw_node_put(curr_gw); -out: - if (router_ifinfo) - batadv_neigh_ifinfo_put(router_ifinfo); - if (router) - batadv_neigh_node_put(router); - return ret; + bat_priv->algo_ops->gw.print(bat_priv, seq); + + return 0; } -int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) +/** + * batadv_gw_dump - Dump gateways into a message + * @msg: Netlink message to dump into + * @cb: Control block containing additional options + * + * Return: Error code, or length of message + */ +int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb) { - struct net_device *net_dev = (struct net_device *)seq->private; - struct batadv_priv *bat_priv = netdev_priv(net_dev); - struct batadv_hard_iface *primary_if; - struct batadv_gw_node *gw_node; - int gw_count = 0; - - primary_if = batadv_seq_print_text_primary_if_get(seq); - if (!primary_if) + struct batadv_hard_iface *primary_if = NULL; + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct batadv_priv *bat_priv; + int ifindex; + int ret; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, + BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; goto out; + } - seq_printf(seq, - " Gateway (#/255) Nexthop [outgoingIF]: advertised uplink bandwidth ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n", - BATADV_SOURCE_VERSION, primary_if->net_dev->name, - primary_if->net_dev->dev_addr, net_dev->name); + bat_priv = netdev_priv(soft_iface); - rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { - /* fails if orig_node has no router */ - if (batadv_write_buffer_text(bat_priv, seq, gw_node) < 0) - continue; + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } - gw_count++; + if (!bat_priv->algo_ops->gw.dump) { + ret = -EOPNOTSUPP; + goto out; } - rcu_read_unlock(); - if (gw_count == 0) - seq_puts(seq, "No gateways in range ...\n"); + bat_priv->algo_ops->gw.dump(msg, cb, bat_priv); + + ret = msg->len; out: if (primary_if) batadv_hardif_put(primary_if); - return 0; + if (soft_iface) + dev_put(soft_iface); + + return ret; } /** diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index 582dd8c..859166d 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -23,6 +23,7 @@ #include <linux/types.h> struct batadv_tvlv_gateway_data; +struct netlink_callback; struct seq_file; struct sk_buff; @@ -39,10 +40,16 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv, void batadv_gw_node_delete(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node); void batadv_gw_node_free(struct batadv_priv *bat_priv); +void batadv_gw_node_put(struct batadv_gw_node *gw_node); +struct batadv_gw_node * +batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv); int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset); +int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb); bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb); enum batadv_dhcp_recipient batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, u8 *chaddr); +struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node); #endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */ diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index d7bc6a8..2118481 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -241,10 +241,9 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, batadv_gw_node_update(bat_priv, orig, &gateway); - /* restart gateway selection if fast or late switching was enabled */ + /* restart gateway selection */ if ((gateway.bandwidth_down != 0) && - (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT) && - (atomic_read(&bat_priv->gw.sel_class) > 2)) + (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT)) batadv_gw_check_election(bat_priv, orig); } diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 1f90808..43c9a3e 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -35,7 +35,8 @@ #include <linux/rtnetlink.h> #include <linux/slab.h> #include <linux/spinlock.h> -#include <linux/workqueue.h> +#include <net/net_namespace.h> +#include <net/rtnetlink.h> #include "bat_v.h" #include "bridge_loop_avoidance.h" @@ -85,25 +86,55 @@ out: } /** + * batadv_getlink_net - return link net namespace (of use fallback) + * @netdev: net_device to check + * @fallback_net: return in case get_link_net is not available for @netdev + * + * Return: result of rtnl_link_ops->get_link_net or @fallback_net + */ +static const struct net *batadv_getlink_net(const struct net_device *netdev, + const struct net *fallback_net) +{ + if (!netdev->rtnl_link_ops) + return fallback_net; + + if (!netdev->rtnl_link_ops->get_link_net) + return fallback_net; + + return netdev->rtnl_link_ops->get_link_net(netdev); +} + +/** * batadv_mutual_parents - check if two devices are each others parent - * @dev1: 1st net_device - * @dev2: 2nd net_device + * @dev1: 1st net dev + * @net1: 1st devices netns + * @dev2: 2nd net dev + * @net2: 2nd devices netns * * veth devices come in pairs and each is the parent of the other! * * Return: true if the devices are each others parent, otherwise false */ static bool batadv_mutual_parents(const struct net_device *dev1, - const struct net_device *dev2) + const struct net *net1, + const struct net_device *dev2, + const struct net *net2) { int dev1_parent_iflink = dev_get_iflink(dev1); int dev2_parent_iflink = dev_get_iflink(dev2); + const struct net *dev1_parent_net; + const struct net *dev2_parent_net; + + dev1_parent_net = batadv_getlink_net(dev1, net1); + dev2_parent_net = batadv_getlink_net(dev2, net2); if (!dev1_parent_iflink || !dev2_parent_iflink) return false; return (dev1_parent_iflink == dev2->ifindex) && - (dev2_parent_iflink == dev1->ifindex); + (dev2_parent_iflink == dev1->ifindex) && + net_eq(dev1_parent_net, net2) && + net_eq(dev2_parent_net, net1); } /** @@ -121,8 +152,9 @@ static bool batadv_mutual_parents(const struct net_device *dev1, */ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) { - struct net_device *parent_dev; struct net *net = dev_net(net_dev); + struct net_device *parent_dev; + const struct net *parent_net; bool ret; /* check if this is a batman-adv mesh interface */ @@ -134,13 +166,16 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) dev_get_iflink(net_dev) == net_dev->ifindex) return false; + parent_net = batadv_getlink_net(net_dev, net); + /* recurse over the parent device */ - parent_dev = __dev_get_by_index(net, dev_get_iflink(net_dev)); + parent_dev = __dev_get_by_index((struct net *)parent_net, + dev_get_iflink(net_dev)); /* if we got a NULL parent_dev there is something broken.. */ if (WARN(!parent_dev, "Cannot find parent device")) return false; - if (batadv_mutual_parents(net_dev, parent_dev)) + if (batadv_mutual_parents(net_dev, net, parent_dev, parent_net)) return false; ret = batadv_is_on_batman_iface(parent_dev); @@ -625,25 +660,6 @@ out: batadv_hardif_put(primary_if); } -/** - * batadv_hardif_remove_interface_finish - cleans up the remains of a hardif - * @work: work queue item - * - * Free the parts of the hard interface which can not be removed under - * rtnl lock (to prevent deadlock situations). - */ -static void batadv_hardif_remove_interface_finish(struct work_struct *work) -{ - struct batadv_hard_iface *hard_iface; - - hard_iface = container_of(work, struct batadv_hard_iface, - cleanup_work); - - batadv_debugfs_del_hardif(hard_iface); - batadv_sysfs_del_hardif(&hard_iface->hardif_obj); - batadv_hardif_put(hard_iface); -} - static struct batadv_hard_iface * batadv_hardif_add_interface(struct net_device *net_dev) { @@ -676,8 +692,6 @@ batadv_hardif_add_interface(struct net_device *net_dev) INIT_LIST_HEAD(&hard_iface->list); INIT_HLIST_HEAD(&hard_iface->neigh_list); - INIT_WORK(&hard_iface->cleanup_work, - batadv_hardif_remove_interface_finish); spin_lock_init(&hard_iface->neigh_list_lock); @@ -719,7 +733,9 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface) return; hard_iface->if_status = BATADV_IF_TO_BE_REMOVED; - queue_work(batadv_event_workqueue, &hard_iface->cleanup_work); + batadv_debugfs_del_hardif(hard_iface); + batadv_sysfs_del_hardif(&hard_iface->hardif_obj); + batadv_hardif_put(hard_iface); } void batadv_hardif_remove_interfaces(void) diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index fe4c5e2..ef07e5b 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -82,6 +82,12 @@ static void batadv_recv_handler_init(void); static int __init batadv_init(void) { + int ret; + + ret = batadv_tt_cache_init(); + if (ret < 0) + return ret; + INIT_LIST_HEAD(&batadv_hardif_list); batadv_algo_init(); @@ -93,9 +99,8 @@ static int __init batadv_init(void) batadv_tp_meter_init(); batadv_event_workqueue = create_singlethread_workqueue("bat_events"); - if (!batadv_event_workqueue) - return -ENOMEM; + goto err_create_wq; batadv_socket_init(); batadv_debugfs_init(); @@ -108,6 +113,11 @@ static int __init batadv_init(void) BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION); return 0; + +err_create_wq: + batadv_tt_cache_destroy(); + + return -ENOMEM; } static void __exit batadv_exit(void) @@ -123,6 +133,8 @@ static void __exit batadv_exit(void) batadv_event_workqueue = NULL; rcu_barrier(); + + batadv_tt_cache_destroy(); } int batadv_mesh_init(struct net_device *soft_iface) @@ -638,3 +650,4 @@ MODULE_AUTHOR(BATADV_DRIVER_AUTHOR); MODULE_DESCRIPTION(BATADV_DRIVER_DESC); MODULE_SUPPORTED_DEVICE(BATADV_DRIVER_DEVICE); MODULE_VERSION(BATADV_SOURCE_VERSION); +MODULE_ALIAS_RTNL_LINK("batadv"); diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 06a8608..09af21e 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2016.3" +#define BATADV_SOURCE_VERSION "2016.4" #endif /* B.A.T.M.A.N. parameters */ diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index cc91507..894df60 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -528,7 +528,7 @@ update: } return !(mcast_data.flags & - (BATADV_MCAST_WANT_ALL_IPV4 + BATADV_MCAST_WANT_ALL_IPV6)); + (BATADV_MCAST_WANT_ALL_IPV4 | BATADV_MCAST_WANT_ALL_IPV6)); } /** diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 231f8ea..18831e7 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -18,6 +18,8 @@ #include "netlink.h" #include "main.h" +#include <linux/atomic.h> +#include <linux/byteorder/generic.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/genetlink.h> @@ -26,24 +28,33 @@ #include <linux/netdevice.h> #include <linux/netlink.h> #include <linux/printk.h> +#include <linux/rculist.h> +#include <linux/rcupdate.h> +#include <linux/skbuff.h> #include <linux/stddef.h> #include <linux/types.h> #include <net/genetlink.h> #include <net/netlink.h> +#include <net/sock.h> #include <uapi/linux/batman_adv.h> +#include "bat_algo.h" +#include "bridge_loop_avoidance.h" +#include "gateway_client.h" #include "hard-interface.h" +#include "originator.h" +#include "packet.h" #include "soft-interface.h" #include "tp_meter.h" +#include "translation-table.h" -struct sk_buff; - -static struct genl_family batadv_netlink_family = { +struct genl_family batadv_netlink_family = { .id = GENL_ID_GENERATE, .hdrsize = 0, .name = BATADV_NL_NAME, .version = 1, .maxattr = BATADV_ATTR_MAX, + .netnsok = true, }; /* multicast groups */ @@ -69,9 +80,44 @@ static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_TPMETER_TEST_TIME] = { .type = NLA_U32 }, [BATADV_ATTR_TPMETER_BYTES] = { .type = NLA_U64 }, [BATADV_ATTR_TPMETER_COOKIE] = { .type = NLA_U32 }, + [BATADV_ATTR_ACTIVE] = { .type = NLA_FLAG }, + [BATADV_ATTR_TT_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_TT_TTVN] = { .type = NLA_U8 }, + [BATADV_ATTR_TT_LAST_TTVN] = { .type = NLA_U8 }, + [BATADV_ATTR_TT_CRC32] = { .type = NLA_U32 }, + [BATADV_ATTR_TT_VID] = { .type = NLA_U16 }, + [BATADV_ATTR_TT_FLAGS] = { .type = NLA_U32 }, + [BATADV_ATTR_FLAG_BEST] = { .type = NLA_FLAG }, + [BATADV_ATTR_LAST_SEEN_MSECS] = { .type = NLA_U32 }, + [BATADV_ATTR_NEIGH_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_TQ] = { .type = NLA_U8 }, + [BATADV_ATTR_THROUGHPUT] = { .type = NLA_U32 }, + [BATADV_ATTR_BANDWIDTH_UP] = { .type = NLA_U32 }, + [BATADV_ATTR_BANDWIDTH_DOWN] = { .type = NLA_U32 }, + [BATADV_ATTR_ROUTER] = { .len = ETH_ALEN }, + [BATADV_ATTR_BLA_OWN] = { .type = NLA_FLAG }, + [BATADV_ATTR_BLA_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_BLA_VID] = { .type = NLA_U16 }, + [BATADV_ATTR_BLA_BACKBONE] = { .len = ETH_ALEN }, + [BATADV_ATTR_BLA_CRC] = { .type = NLA_U16 }, }; /** + * batadv_netlink_get_ifindex - Extract an interface index from a message + * @nlh: Message header + * @attrtype: Attribute which holds an interface index + * + * Return: interface index, or 0. + */ +int +batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype) +{ + struct nlattr *attr = nlmsg_find_attr(nlh, GENL_HDRLEN, attrtype); + + return attr ? nla_get_u32(attr) : 0; +} + +/** * batadv_netlink_mesh_info_put - fill in generic information about mesh * interface * @msg: netlink message to be sent back @@ -93,8 +139,16 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface) nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, soft_iface->ifindex) || nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, soft_iface->name) || nla_put(msg, BATADV_ATTR_MESH_ADDRESS, ETH_ALEN, - soft_iface->dev_addr)) + soft_iface->dev_addr) || + nla_put_u8(msg, BATADV_ATTR_TT_TTVN, + (u8)atomic_read(&bat_priv->tt.vn))) + goto out; + +#ifdef CONFIG_BATMAN_ADV_BLA + if (nla_put_u16(msg, BATADV_ATTR_BLA_CRC, + ntohs(bat_priv->bla.claim_dest.group))) goto out; +#endif primary_if = batadv_primary_if_get_selected(bat_priv); if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) { @@ -380,6 +434,106 @@ out: return ret; } +/** + * batadv_netlink_dump_hardif_entry - Dump one hard interface into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @hard_iface: Hard interface to dump + * + * Return: error code, or 0 on success + */ +static int +batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_hard_iface *hard_iface) +{ + struct net_device *net_dev = hard_iface->net_dev; + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI, + BATADV_CMD_GET_HARDIFS); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, + net_dev->ifindex) || + nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, + net_dev->name) || + nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN, + net_dev->dev_addr)) + goto nla_put_failure; + + if (hard_iface->if_status == BATADV_IF_ACTIVE) { + if (nla_put_flag(msg, BATADV_ATTR_ACTIVE)) + goto nla_put_failure; + } + + genlmsg_end(msg, hdr); + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_netlink_dump_hardifs - Dump all hard interface into a messages + * @msg: Netlink message to dump into + * @cb: Parameters from query + * + * Return: error code, or length of reply message on success + */ +static int +batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct batadv_hard_iface *hard_iface; + int ifindex; + int portid = NETLINK_CB(cb->skb).portid; + int seq = cb->nlh->nlmsg_seq; + int skip = cb->args[0]; + int i = 0; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, + BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface) + return -ENODEV; + + if (!batadv_softif_is_valid(soft_iface)) { + dev_put(soft_iface); + return -ENODEV; + } + + rcu_read_lock(); + + list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { + if (hard_iface->soft_iface != soft_iface) + continue; + + if (i++ < skip) + continue; + + if (batadv_netlink_dump_hardif_entry(msg, portid, seq, + hard_iface)) { + i--; + break; + } + } + + rcu_read_unlock(); + + dev_put(soft_iface); + + cb->args[0] = i; + + return msg->len; +} + static struct genl_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_GET_MESH_INFO, @@ -399,6 +553,61 @@ static struct genl_ops batadv_netlink_ops[] = { .policy = batadv_netlink_policy, .doit = batadv_netlink_tp_meter_cancel, }, + { + .cmd = BATADV_CMD_GET_ROUTING_ALGOS, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_algo_dump, + }, + { + .cmd = BATADV_CMD_GET_HARDIFS, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_netlink_dump_hardifs, + }, + { + .cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_tt_local_dump, + }, + { + .cmd = BATADV_CMD_GET_TRANSTABLE_GLOBAL, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_tt_global_dump, + }, + { + .cmd = BATADV_CMD_GET_ORIGINATORS, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_orig_dump, + }, + { + .cmd = BATADV_CMD_GET_NEIGHBORS, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_hardif_neigh_dump, + }, + { + .cmd = BATADV_CMD_GET_GATEWAYS, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_gw_dump, + }, + { + .cmd = BATADV_CMD_GET_BLA_CLAIM, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_bla_claim_dump, + }, + { + .cmd = BATADV_CMD_GET_BLA_BACKBONE, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_bla_backbone_dump, + }, + }; /** diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h index 945653a..52eb162 100644 --- a/net/batman-adv/netlink.h +++ b/net/batman-adv/netlink.h @@ -21,12 +21,18 @@ #include "main.h" #include <linux/types.h> +#include <net/genetlink.h> + +struct nlmsghdr; void batadv_netlink_register(void); void batadv_netlink_unregister(void); +int batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype); int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst, u8 result, u32 test_time, u64 total_bytes, u32 cookie); +extern struct genl_family batadv_netlink_family; + #endif /* _NET_BATMAN_ADV_NETLINK_H_ */ diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 3940b5d..95c8555 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -28,11 +28,15 @@ #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> +#include <linux/netlink.h> #include <linux/rculist.h> #include <linux/seq_file.h> +#include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/workqueue.h> +#include <net/sock.h> +#include <uapi/linux/batman_adv.h> #include "bat_algo.h" #include "distributed-arp-table.h" @@ -42,8 +46,10 @@ #include "hash.h" #include "log.h" #include "multicast.h" +#include "netlink.h" #include "network-coding.h" #include "routing.h" +#include "soft-interface.h" #include "translation-table.h" /* hash class keys */ @@ -721,6 +727,83 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset) } /** + * batadv_hardif_neigh_dump - Dump to netlink the neighbor infos for a specific + * outgoing interface + * @msg: message to dump into + * @cb: parameters for the dump + * + * Return: 0 or error value + */ +int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct net_device *hard_iface = NULL; + struct batadv_hard_iface *hardif = BATADV_IF_DEFAULT; + struct batadv_priv *bat_priv; + struct batadv_hard_iface *primary_if = NULL; + int ret; + int ifindex, hard_ifindex; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } + + hard_ifindex = batadv_netlink_get_ifindex(cb->nlh, + BATADV_ATTR_HARD_IFINDEX); + if (hard_ifindex) { + hard_iface = dev_get_by_index(net, hard_ifindex); + if (hard_iface) + hardif = batadv_hardif_get_by_netdev(hard_iface); + + if (!hardif) { + ret = -ENODEV; + goto out; + } + + if (hardif->soft_iface != soft_iface) { + ret = -ENOENT; + goto out; + } + } + + if (!bat_priv->algo_ops->neigh.dump) { + ret = -EOPNOTSUPP; + goto out; + } + + bat_priv->algo_ops->neigh.dump(msg, cb, bat_priv, hardif); + + ret = msg->len; + + out: + if (hardif) + batadv_hardif_put(hardif); + if (hard_iface) + dev_put(hard_iface); + if (primary_if) + batadv_hardif_put(primary_if); + if (soft_iface) + dev_put(soft_iface); + + return ret; +} + +/** * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for * free after rcu grace period * @ref: kref pointer of the orig_ifinfo @@ -1330,6 +1413,83 @@ out: return 0; } +/** + * batadv_orig_dump - Dump to netlink the originator infos for a specific + * outgoing interface + * @msg: message to dump into + * @cb: parameters for the dump + * + * Return: 0 or error value + */ +int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct net_device *hard_iface = NULL; + struct batadv_hard_iface *hardif = BATADV_IF_DEFAULT; + struct batadv_priv *bat_priv; + struct batadv_hard_iface *primary_if = NULL; + int ret; + int ifindex, hard_ifindex; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } + + hard_ifindex = batadv_netlink_get_ifindex(cb->nlh, + BATADV_ATTR_HARD_IFINDEX); + if (hard_ifindex) { + hard_iface = dev_get_by_index(net, hard_ifindex); + if (hard_iface) + hardif = batadv_hardif_get_by_netdev(hard_iface); + + if (!hardif) { + ret = -ENODEV; + goto out; + } + + if (hardif->soft_iface != soft_iface) { + ret = -ENOENT; + goto out; + } + } + + if (!bat_priv->algo_ops->orig.dump) { + ret = -EOPNOTSUPP; + goto out; + } + + bat_priv->algo_ops->orig.dump(msg, cb, bat_priv, hardif); + + ret = msg->len; + + out: + if (hardif) + batadv_hardif_put(hardif); + if (hard_iface) + dev_put(hard_iface); + if (primary_if) + batadv_hardif_put(primary_if); + if (soft_iface) + dev_put(soft_iface); + + return ret; +} + int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, int max_if_num) { diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 566306b..ebc5618 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -31,7 +31,9 @@ #include "hash.h" +struct netlink_callback; struct seq_file; +struct sk_buff; bool batadv_compare_orig(const struct hlist_node *node, const void *data2); int batadv_originator_init(struct batadv_priv *bat_priv); @@ -61,6 +63,7 @@ batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh, struct batadv_hard_iface *if_outgoing); void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo); +int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb); int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset); struct batadv_orig_ifinfo * @@ -72,6 +75,7 @@ batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node, void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo); int batadv_orig_seq_print_text(struct seq_file *seq, void *offset); +int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb); int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset); int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, int max_if_num); diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 6b011ff..6afc0b8 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -129,42 +129,6 @@ enum batadv_tt_data_flags { }; /** - * enum batadv_tt_client_flags - TT client specific flags - * @BATADV_TT_CLIENT_DEL: the client has to be deleted from the table - * @BATADV_TT_CLIENT_ROAM: the client roamed to/from another node and the new - * update telling its new real location has not been received/sent yet - * @BATADV_TT_CLIENT_WIFI: this client is connected through a wifi interface. - * This information is used by the "AP Isolation" feature - * @BATADV_TT_CLIENT_ISOLA: this client is considered "isolated". This - * information is used by the Extended Isolation feature - * @BATADV_TT_CLIENT_NOPURGE: this client should never be removed from the table - * @BATADV_TT_CLIENT_NEW: this client has been added to the local table but has - * not been announced yet - * @BATADV_TT_CLIENT_PENDING: this client is marked for removal but it is kept - * in the table for one more originator interval for consistency purposes - * @BATADV_TT_CLIENT_TEMP: this global client has been detected to be part of - * the network but no nnode has already announced it - * - * Bits from 0 to 7 are called _remote flags_ because they are sent on the wire. - * Bits from 8 to 15 are called _local flags_ because they are used for local - * computations only. - * - * Bits from 4 to 7 - a subset of remote flags - are ensured to be in sync with - * the other nodes in the network. To achieve this goal these flags are included - * in the TT CRC computation. - */ -enum batadv_tt_client_flags { - BATADV_TT_CLIENT_DEL = BIT(0), - BATADV_TT_CLIENT_ROAM = BIT(1), - BATADV_TT_CLIENT_WIFI = BIT(4), - BATADV_TT_CLIENT_ISOLA = BIT(5), - BATADV_TT_CLIENT_NOPURGE = BIT(8), - BATADV_TT_CLIENT_NEW = BIT(9), - BATADV_TT_CLIENT_PENDING = BIT(10), - BATADV_TT_CLIENT_TEMP = BIT(11), -}; - -/** * enum batadv_vlan_flags - flags for the four MSB of any vlan ID field * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not */ diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 7602c00..610f2c4 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -74,11 +74,23 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, if (!orig_ifinfo) return; - rcu_read_lock(); - curr_router = rcu_dereference(orig_ifinfo->router); - if (curr_router && !kref_get_unless_zero(&curr_router->refcount)) - curr_router = NULL; - rcu_read_unlock(); + spin_lock_bh(&orig_node->neigh_list_lock); + /* curr_router used earlier may not be the current orig_ifinfo->router + * anymore because it was dereferenced outside of the neigh_list_lock + * protected region. After the new best neighbor has replace the current + * best neighbor the reference counter needs to decrease. Consequently, + * the code needs to ensure the curr_router variable contains a pointer + * to the replaced best neighbor. + */ + curr_router = rcu_dereference_protected(orig_ifinfo->router, true); + + /* increase refcount of new best neighbor */ + if (neigh_node) + kref_get(&neigh_node->refcount); + + rcu_assign_pointer(orig_ifinfo->router, neigh_node); + spin_unlock_bh(&orig_node->neigh_list_lock); + batadv_orig_ifinfo_put(orig_ifinfo); /* route deleted */ if ((curr_router) && (!neigh_node)) { @@ -100,27 +112,6 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, curr_router->addr); } - if (curr_router) - batadv_neigh_node_put(curr_router); - - spin_lock_bh(&orig_node->neigh_list_lock); - /* curr_router used earlier may not be the current orig_ifinfo->router - * anymore because it was dereferenced outside of the neigh_list_lock - * protected region. After the new best neighbor has replace the current - * best neighbor the reference counter needs to decrease. Consequently, - * the code needs to ensure the curr_router variable contains a pointer - * to the replaced best neighbor. - */ - curr_router = rcu_dereference_protected(orig_ifinfo->router, true); - - /* increase refcount of new best neighbor */ - if (neigh_node) - kref_get(&neigh_node->refcount); - - rcu_assign_pointer(orig_ifinfo->router, neigh_node); - spin_unlock_bh(&orig_node->neigh_list_lock); - batadv_orig_ifinfo_put(orig_ifinfo); - /* decrease refcount of previous best neighbor */ if (curr_router) batadv_neigh_node_put(curr_router); diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 6191159..8d4e1f5 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -315,8 +315,7 @@ out: * * Wrap the given skb into a batman-adv unicast or unicast-4addr header * depending on whether BATADV_UNICAST or BATADV_UNICAST_4ADDR was supplied - * as packet_type. Then send this frame to the given orig_node and release a - * reference to this orig_node. + * as packet_type. Then send this frame to the given orig_node. * * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. */ @@ -370,8 +369,6 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv, ret = NET_XMIT_SUCCESS; out: - if (orig_node) - batadv_orig_node_put(orig_node); if (ret == NET_XMIT_DROP) kfree_skb(skb); return ret; @@ -403,6 +400,7 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, struct ethhdr *ethhdr = (struct ethhdr *)skb->data; struct batadv_orig_node *orig_node; u8 *src, *dst; + int ret; src = ethhdr->h_source; dst = ethhdr->h_dest; @@ -414,8 +412,13 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, } orig_node = batadv_transtable_search(bat_priv, src, dst, vid); - return batadv_send_skb_unicast(bat_priv, skb, packet_type, - packet_subtype, orig_node, vid); + ret = batadv_send_skb_unicast(bat_priv, skb, packet_type, + packet_subtype, orig_node, vid); + + if (orig_node) + batadv_orig_node_put(orig_node); + + return ret; } /** @@ -433,12 +436,25 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) { struct batadv_orig_node *orig_node; + int ret; orig_node = batadv_gw_get_selected_orig(bat_priv); - return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR, - BATADV_P_DATA, orig_node, vid); + ret = batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR, + BATADV_P_DATA, orig_node, vid); + + if (orig_node) + batadv_orig_node_put(orig_node); + + return ret; } +/** + * batadv_forw_packet_free - free a forwarding packet + * @forw_packet: The packet to free + * + * This frees a forwarding packet and releases any resources it might + * have claimed. + */ void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet) { kfree_skb(forw_packet->skb); @@ -446,9 +462,73 @@ void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet) batadv_hardif_put(forw_packet->if_incoming); if (forw_packet->if_outgoing) batadv_hardif_put(forw_packet->if_outgoing); + if (forw_packet->queue_left) + atomic_inc(forw_packet->queue_left); kfree(forw_packet); } +/** + * batadv_forw_packet_alloc - allocate a forwarding packet + * @if_incoming: The (optional) if_incoming to be grabbed + * @if_outgoing: The (optional) if_outgoing to be grabbed + * @queue_left: The (optional) queue counter to decrease + * @bat_priv: The bat_priv for the mesh of this forw_packet + * + * Allocates a forwarding packet and tries to get a reference to the + * (optional) if_incoming, if_outgoing and queue_left. If queue_left + * is NULL then bat_priv is optional, too. + * + * Return: An allocated forwarding packet on success, NULL otherwise. + */ +struct batadv_forw_packet * +batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming, + struct batadv_hard_iface *if_outgoing, + atomic_t *queue_left, + struct batadv_priv *bat_priv) +{ + struct batadv_forw_packet *forw_packet; + const char *qname; + + if (queue_left && !batadv_atomic_dec_not_zero(queue_left)) { + qname = "unknown"; + + if (queue_left == &bat_priv->bcast_queue_left) + qname = "bcast"; + + if (queue_left == &bat_priv->batman_queue_left) + qname = "batman"; + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "%s queue is full\n", qname); + + return NULL; + } + + forw_packet = kmalloc(sizeof(*forw_packet), GFP_ATOMIC); + if (!forw_packet) + goto err; + + if (if_incoming) + kref_get(&if_incoming->refcount); + + if (if_outgoing) + kref_get(&if_outgoing->refcount); + + forw_packet->skb = NULL; + forw_packet->queue_left = queue_left; + forw_packet->if_incoming = if_incoming; + forw_packet->if_outgoing = if_outgoing; + forw_packet->num_packets = 0; + + return forw_packet; + +err: + if (queue_left) + atomic_inc(queue_left); + + return NULL; +} + static void _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet, @@ -487,24 +567,20 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, struct batadv_bcast_packet *bcast_packet; struct sk_buff *newskb; - if (!batadv_atomic_dec_not_zero(&bat_priv->bcast_queue_left)) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "bcast packet queue full\n"); - goto out; - } - primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) - goto out_and_inc; - - forw_packet = kmalloc(sizeof(*forw_packet), GFP_ATOMIC); + goto err; + forw_packet = batadv_forw_packet_alloc(primary_if, NULL, + &bat_priv->bcast_queue_left, + bat_priv); + batadv_hardif_put(primary_if); if (!forw_packet) - goto out_and_inc; + goto err; newskb = skb_copy(skb, GFP_ATOMIC); if (!newskb) - goto packet_free; + goto err_packet_free; /* as we have a copy now, it is safe to decrease the TTL */ bcast_packet = (struct batadv_bcast_packet *)newskb->data; @@ -513,11 +589,6 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, skb_reset_mac_header(newskb); forw_packet->skb = newskb; - forw_packet->if_incoming = primary_if; - forw_packet->if_outgoing = NULL; - - /* how often did we send the bcast packet ? */ - forw_packet->num_packets = 0; INIT_DELAYED_WORK(&forw_packet->delayed_work, batadv_send_outstanding_bcast_packet); @@ -525,13 +596,9 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, _batadv_add_bcast_packet_to_list(bat_priv, forw_packet, delay); return NETDEV_TX_OK; -packet_free: - kfree(forw_packet); -out_and_inc: - atomic_inc(&bat_priv->bcast_queue_left); -out: - if (primary_if) - batadv_hardif_put(primary_if); +err_packet_free: + batadv_forw_packet_free(forw_packet); +err: return NETDEV_TX_BUSY; } @@ -592,7 +659,6 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) out: batadv_forw_packet_free(forw_packet); - atomic_inc(&bat_priv->bcast_queue_left); } void @@ -633,9 +699,6 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (pending) { hlist_del(&forw_packet->list); - if (!forw_packet->own) - atomic_inc(&bat_priv->bcast_queue_left); - batadv_forw_packet_free(forw_packet); } } @@ -663,9 +726,6 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (pending) { hlist_del(&forw_packet->list); - if (!forw_packet->own) - atomic_inc(&bat_priv->batman_queue_left); - batadv_forw_packet_free(forw_packet); } } diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 7cecb75..999f786 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -28,6 +28,12 @@ struct sk_buff; void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet); +struct batadv_forw_packet * +batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming, + struct batadv_hard_iface *if_outgoing, + atomic_t *queue_left, + struct batadv_priv *bat_priv); + int batadv_send_skb_to_orig(struct sk_buff *skb, struct batadv_orig_node *orig_node, struct batadv_hard_iface *recv_if); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 7527c06..e508bf5 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -39,6 +39,7 @@ #include <linux/random.h> #include <linux/rculist.h> #include <linux/rcupdate.h> +#include <linux/rtnetlink.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/socket.h> @@ -46,7 +47,6 @@ #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> -#include <linux/workqueue.h> #include "bat_algo.h" #include "bridge_loop_avoidance.h" @@ -57,6 +57,7 @@ #include "hard-interface.h" #include "multicast.h" #include "network-coding.h" +#include "originator.h" #include "packet.h" #include "send.h" #include "sysfs.h" @@ -377,6 +378,8 @@ dropped: dropped_freed: batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED); end: + if (mcast_single_orig) + batadv_orig_node_put(mcast_single_orig); if (primary_if) batadv_hardif_put(primary_if); return NETDEV_TX_OK; @@ -747,34 +750,6 @@ static void batadv_set_lockdep_class(struct net_device *dev) } /** - * batadv_softif_destroy_finish - cleans up the remains of a softif - * @work: work queue item - * - * Free the parts of the soft interface which can not be removed under - * rtnl lock (to prevent deadlock situations). - */ -static void batadv_softif_destroy_finish(struct work_struct *work) -{ - struct batadv_softif_vlan *vlan; - struct batadv_priv *bat_priv; - struct net_device *soft_iface; - - bat_priv = container_of(work, struct batadv_priv, - cleanup_work); - soft_iface = bat_priv->soft_iface; - - /* destroy the "untagged" VLAN */ - vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); - if (vlan) { - batadv_softif_destroy_vlan(bat_priv, vlan); - batadv_softif_vlan_put(vlan); - } - - batadv_sysfs_del_meshif(soft_iface); - unregister_netdev(soft_iface); -} - -/** * batadv_softif_init_late - late stage initialization of soft interface * @dev: registered network device to modify * @@ -791,7 +766,6 @@ static int batadv_softif_init_late(struct net_device *dev) bat_priv = netdev_priv(dev); bat_priv->soft_iface = dev; - INIT_WORK(&bat_priv->cleanup_work, batadv_softif_destroy_finish); /* batadv_interface_stats() needs to be available as soon as * register_netdevice() has been called @@ -1028,8 +1002,19 @@ struct net_device *batadv_softif_create(struct net *net, const char *name) void batadv_softif_destroy_sysfs(struct net_device *soft_iface) { struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_softif_vlan *vlan; + + ASSERT_RTNL(); + + /* destroy the "untagged" VLAN */ + vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); + if (vlan) { + batadv_softif_destroy_vlan(bat_priv, vlan); + batadv_softif_vlan_put(vlan); + } - queue_work(batadv_event_workqueue, &bat_priv->cleanup_work); + batadv_sysfs_del_meshif(soft_iface); + unregister_netdevice(soft_iface); } /** diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index fe9ca94..02d96f2 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -37,6 +37,7 @@ #include <linux/stddef.h> #include <linux/string.h> #include <linux/stringify.h> +#include <linux/workqueue.h> #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" @@ -428,6 +429,13 @@ static ssize_t batadv_show_gw_mode(struct kobject *kobj, struct attribute *attr, struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); int bytes_written; + /* GW mode is not available if the routing algorithm in use does not + * implement the GW API + */ + if (!bat_priv->algo_ops->gw.get_best_gw_node || + !bat_priv->algo_ops->gw.is_eligible) + return -ENOENT; + switch (atomic_read(&bat_priv->gw.mode)) { case BATADV_GW_MODE_CLIENT: bytes_written = sprintf(buff, "%s\n", @@ -455,6 +463,13 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj, char *curr_gw_mode_str; int gw_mode_tmp = -1; + /* toggling GW mode is allowed only if the routing algorithm in use + * provides the GW API + */ + if (!bat_priv->algo_ops->gw.get_best_gw_node || + !bat_priv->algo_ops->gw.is_eligible) + return -EINVAL; + if (buff[count - 1] == '\n') buff[count - 1] = '\0'; @@ -514,6 +529,50 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj, return count; } +static ssize_t batadv_show_gw_sel_class(struct kobject *kobj, + struct attribute *attr, char *buff) +{ + struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); + + /* GW selection class is not available if the routing algorithm in use + * does not implement the GW API + */ + if (!bat_priv->algo_ops->gw.get_best_gw_node || + !bat_priv->algo_ops->gw.is_eligible) + return -ENOENT; + + if (bat_priv->algo_ops->gw.show_sel_class) + return bat_priv->algo_ops->gw.show_sel_class(bat_priv, buff); + + return sprintf(buff, "%i\n", atomic_read(&bat_priv->gw.sel_class)); +} + +static ssize_t batadv_store_gw_sel_class(struct kobject *kobj, + struct attribute *attr, char *buff, + size_t count) +{ + struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); + + /* setting the GW selection class is allowed only if the routing + * algorithm in use implements the GW API + */ + if (!bat_priv->algo_ops->gw.get_best_gw_node || + !bat_priv->algo_ops->gw.is_eligible) + return -EINVAL; + + if (buff[count - 1] == '\n') + buff[count - 1] = '\0'; + + if (bat_priv->algo_ops->gw.store_sel_class) + return bat_priv->algo_ops->gw.store_sel_class(bat_priv, buff, + count); + + return __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE, + batadv_post_gw_reselect, attr, + &bat_priv->gw.sel_class, + bat_priv->soft_iface); +} + static ssize_t batadv_show_gw_bwidth(struct kobject *kobj, struct attribute *attr, char *buff) { @@ -625,8 +684,8 @@ BATADV_ATTR_SIF_UINT(orig_interval, orig_interval, S_IRUGO | S_IWUSR, 2 * BATADV_JITTER, INT_MAX, NULL); BATADV_ATTR_SIF_UINT(hop_penalty, hop_penalty, S_IRUGO | S_IWUSR, 0, BATADV_TQ_MAX_VALUE, NULL); -BATADV_ATTR_SIF_UINT(gw_sel_class, gw.sel_class, S_IRUGO | S_IWUSR, 1, - BATADV_TQ_MAX_VALUE, batadv_post_gw_reselect); +static BATADV_ATTR(gw_sel_class, S_IRUGO | S_IWUSR, batadv_show_gw_sel_class, + batadv_store_gw_sel_class); static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth, batadv_store_gw_bwidth); #ifdef CONFIG_BATMAN_ADV_MCAST @@ -712,6 +771,8 @@ rem_attr: for (bat_attr = batadv_mesh_attrs; *bat_attr; ++bat_attr) sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr)); + kobject_uevent(bat_priv->mesh_obj, KOBJ_REMOVE); + kobject_del(bat_priv->mesh_obj); kobject_put(bat_priv->mesh_obj); bat_priv->mesh_obj = NULL; out: @@ -726,6 +787,8 @@ void batadv_sysfs_del_meshif(struct net_device *dev) for (bat_attr = batadv_mesh_attrs; *bat_attr; ++bat_attr) sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr)); + kobject_uevent(bat_priv->mesh_obj, KOBJ_REMOVE); + kobject_del(bat_priv->mesh_obj); kobject_put(bat_priv->mesh_obj); bat_priv->mesh_obj = NULL; } @@ -781,6 +844,10 @@ rem_attr: for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr) sysfs_remove_file(vlan->kobj, &((*bat_attr)->attr)); + if (vlan->kobj != bat_priv->mesh_obj) { + kobject_uevent(vlan->kobj, KOBJ_REMOVE); + kobject_del(vlan->kobj); + } kobject_put(vlan->kobj); vlan->kobj = NULL; out: @@ -800,6 +867,10 @@ void batadv_sysfs_del_vlan(struct batadv_priv *bat_priv, for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr) sysfs_remove_file(vlan->kobj, &((*bat_attr)->attr)); + if (vlan->kobj != bat_priv->mesh_obj) { + kobject_uevent(vlan->kobj, KOBJ_REMOVE); + kobject_del(vlan->kobj); + } kobject_put(vlan->kobj); vlan->kobj = NULL; } @@ -828,31 +899,31 @@ static ssize_t batadv_show_mesh_iface(struct kobject *kobj, return length; } -static ssize_t batadv_store_mesh_iface(struct kobject *kobj, - struct attribute *attr, char *buff, - size_t count) +/** + * batadv_store_mesh_iface_finish - store new hardif mesh_iface state + * @net_dev: netdevice to add/remove to/from batman-adv soft-interface + * @ifname: name of soft-interface to modify + * + * Changes the parts of the hard+soft interface which can not be modified under + * sysfs lock (to prevent deadlock situations). + * + * Return: 0 on success, 0 < on failure + */ +static int batadv_store_mesh_iface_finish(struct net_device *net_dev, + char ifname[IFNAMSIZ]) { - struct net_device *net_dev = batadv_kobj_to_netdev(kobj); struct net *net = dev_net(net_dev); struct batadv_hard_iface *hard_iface; - int status_tmp = -1; - int ret = count; + int status_tmp; + int ret = 0; + + ASSERT_RTNL(); hard_iface = batadv_hardif_get_by_netdev(net_dev); if (!hard_iface) - return count; - - if (buff[count - 1] == '\n') - buff[count - 1] = '\0'; - - if (strlen(buff) >= IFNAMSIZ) { - pr_err("Invalid parameter for 'mesh_iface' setting received: interface name too long '%s'\n", - buff); - batadv_hardif_put(hard_iface); - return -EINVAL; - } + return 0; - if (strncmp(buff, "none", 4) == 0) + if (strncmp(ifname, "none", 4) == 0) status_tmp = BATADV_IF_NOT_IN_USE; else status_tmp = BATADV_IF_I_WANT_YOU; @@ -861,15 +932,13 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj, goto out; if ((hard_iface->soft_iface) && - (strncmp(hard_iface->soft_iface->name, buff, IFNAMSIZ) == 0)) + (strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0)) goto out; - rtnl_lock(); - if (status_tmp == BATADV_IF_NOT_IN_USE) { batadv_hardif_disable_interface(hard_iface, BATADV_IF_CLEANUP_AUTO); - goto unlock; + goto out; } /* if the interface already is in use */ @@ -877,15 +946,71 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj, batadv_hardif_disable_interface(hard_iface, BATADV_IF_CLEANUP_AUTO); - ret = batadv_hardif_enable_interface(hard_iface, net, buff); - -unlock: - rtnl_unlock(); + ret = batadv_hardif_enable_interface(hard_iface, net, ifname); out: batadv_hardif_put(hard_iface); return ret; } +/** + * batadv_store_mesh_iface_work - store new hardif mesh_iface state + * @work: work queue item + * + * Changes the parts of the hard+soft interface which can not be modified under + * sysfs lock (to prevent deadlock situations). + */ +static void batadv_store_mesh_iface_work(struct work_struct *work) +{ + struct batadv_store_mesh_work *store_work; + int ret; + + store_work = container_of(work, struct batadv_store_mesh_work, work); + + rtnl_lock(); + ret = batadv_store_mesh_iface_finish(store_work->net_dev, + store_work->soft_iface_name); + rtnl_unlock(); + + if (ret < 0) + pr_err("Failed to store new mesh_iface state %s for %s: %d\n", + store_work->soft_iface_name, store_work->net_dev->name, + ret); + + dev_put(store_work->net_dev); + kfree(store_work); +} + +static ssize_t batadv_store_mesh_iface(struct kobject *kobj, + struct attribute *attr, char *buff, + size_t count) +{ + struct net_device *net_dev = batadv_kobj_to_netdev(kobj); + struct batadv_store_mesh_work *store_work; + + if (buff[count - 1] == '\n') + buff[count - 1] = '\0'; + + if (strlen(buff) >= IFNAMSIZ) { + pr_err("Invalid parameter for 'mesh_iface' setting received: interface name too long '%s'\n", + buff); + return -EINVAL; + } + + store_work = kmalloc(sizeof(*store_work), GFP_KERNEL); + if (!store_work) + return -ENOMEM; + + dev_hold(net_dev); + INIT_WORK(&store_work->work, batadv_store_mesh_iface_work); + store_work->net_dev = net_dev; + strlcpy(store_work->soft_iface_name, buff, + sizeof(store_work->soft_iface_name)); + + queue_work(batadv_event_workqueue, &store_work->work); + + return count; +} + static ssize_t batadv_show_iface_status(struct kobject *kobj, struct attribute *attr, char *buff) { @@ -1048,6 +1173,8 @@ out: void batadv_sysfs_del_hardif(struct kobject **hardif_obj) { + kobject_uevent(*hardif_obj, KOBJ_REMOVE); + kobject_del(*hardif_obj); kobject_put(*hardif_obj); *hardif_obj = NULL; } diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 7e6df7a..2080407 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -22,12 +22,14 @@ #include <linux/bitops.h> #include <linux/bug.h> #include <linux/byteorder/generic.h> +#include <linux/cache.h> #include <linux/compiler.h> #include <linux/crc32c.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/fs.h> #include <linux/if_ether.h> +#include <linux/init.h> #include <linux/jhash.h> #include <linux/jiffies.h> #include <linux/kernel.h> @@ -35,25 +37,39 @@ #include <linux/list.h> #include <linux/lockdep.h> #include <linux/netdevice.h> +#include <linux/netlink.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/seq_file.h> +#include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/workqueue.h> +#include <net/genetlink.h> +#include <net/netlink.h> +#include <net/sock.h> +#include <uapi/linux/batman_adv.h> #include "bridge_loop_avoidance.h" #include "hard-interface.h" #include "hash.h" #include "log.h" #include "multicast.h" +#include "netlink.h" #include "originator.h" #include "packet.h" #include "soft-interface.h" #include "tvlv.h" +static struct kmem_cache *batadv_tl_cache __read_mostly; +static struct kmem_cache *batadv_tg_cache __read_mostly; +static struct kmem_cache *batadv_tt_orig_cache __read_mostly; +static struct kmem_cache *batadv_tt_change_cache __read_mostly; +static struct kmem_cache *batadv_tt_req_cache __read_mostly; +static struct kmem_cache *batadv_tt_roam_cache __read_mostly; + /* hash class keys */ static struct lock_class_key batadv_tt_local_hash_lock_class_key; static struct lock_class_key batadv_tt_global_hash_lock_class_key; @@ -205,6 +221,20 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr, } /** + * batadv_tt_local_entry_free_rcu - free the tt_local_entry + * @rcu: rcu pointer of the tt_local_entry + */ +static void batadv_tt_local_entry_free_rcu(struct rcu_head *rcu) +{ + struct batadv_tt_local_entry *tt_local_entry; + + tt_local_entry = container_of(rcu, struct batadv_tt_local_entry, + common.rcu); + + kmem_cache_free(batadv_tl_cache, tt_local_entry); +} + +/** * batadv_tt_local_entry_release - release tt_local_entry from lists and queue * for free after rcu grace period * @ref: kref pointer of the nc_node @@ -218,7 +248,7 @@ static void batadv_tt_local_entry_release(struct kref *ref) batadv_softif_vlan_put(tt_local_entry->vlan); - kfree_rcu(tt_local_entry, common.rcu); + call_rcu(&tt_local_entry->common.rcu, batadv_tt_local_entry_free_rcu); } /** @@ -234,6 +264,20 @@ batadv_tt_local_entry_put(struct batadv_tt_local_entry *tt_local_entry) } /** + * batadv_tt_global_entry_free_rcu - free the tt_global_entry + * @rcu: rcu pointer of the tt_global_entry + */ +static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu) +{ + struct batadv_tt_global_entry *tt_global_entry; + + tt_global_entry = container_of(rcu, struct batadv_tt_global_entry, + common.rcu); + + kmem_cache_free(batadv_tg_cache, tt_global_entry); +} + +/** * batadv_tt_global_entry_release - release tt_global_entry from lists and queue * for free after rcu grace period * @ref: kref pointer of the nc_node @@ -246,7 +290,8 @@ static void batadv_tt_global_entry_release(struct kref *ref) common.refcount); batadv_tt_global_del_orig_list(tt_global_entry); - kfree_rcu(tt_global_entry, common.rcu); + + call_rcu(&tt_global_entry->common.rcu, batadv_tt_global_entry_free_rcu); } /** @@ -384,6 +429,19 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node, } /** + * batadv_tt_orig_list_entry_free_rcu - free the orig_entry + * @rcu: rcu pointer of the orig_entry + */ +static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu) +{ + struct batadv_tt_orig_list_entry *orig_entry; + + orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu); + + kmem_cache_free(batadv_tt_orig_cache, orig_entry); +} + +/** * batadv_tt_orig_list_entry_release - release tt orig entry from lists and * queue for free after rcu grace period * @ref: kref pointer of the tt orig entry @@ -396,7 +454,7 @@ static void batadv_tt_orig_list_entry_release(struct kref *ref) refcount); batadv_orig_node_put(orig_entry->orig_node); - kfree_rcu(orig_entry, rcu); + call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu); } /** @@ -426,7 +484,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, bool event_removed = false; bool del_op_requested, del_op_entry; - tt_change_node = kmalloc(sizeof(*tt_change_node), GFP_ATOMIC); + tt_change_node = kmem_cache_alloc(batadv_tt_change_cache, GFP_ATOMIC); if (!tt_change_node) return; @@ -467,8 +525,8 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, continue; del: list_del(&entry->list); - kfree(entry); - kfree(tt_change_node); + kmem_cache_free(batadv_tt_change_cache, entry); + kmem_cache_free(batadv_tt_change_cache, tt_change_node); event_removed = true; goto unlock; } @@ -646,7 +704,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, goto out; } - tt_local = kmalloc(sizeof(*tt_local), GFP_ATOMIC); + tt_local = kmem_cache_alloc(batadv_tl_cache, GFP_ATOMIC); if (!tt_local) goto out; @@ -656,7 +714,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, net_ratelimited_function(batadv_info, soft_iface, "adding TT local entry %pM to non-existent VLAN %d\n", addr, BATADV_PRINT_VID(vid)); - kfree(tt_local); + kmem_cache_free(batadv_tl_cache, tt_local); tt_local = NULL; goto out; } @@ -959,7 +1017,7 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) tt_diff_entries_count++; } list_del(&entry->list); - kfree(entry); + kmem_cache_free(batadv_tt_change_cache, entry); } spin_unlock_bh(&bat_priv->tt.changes_list_lock); @@ -1057,6 +1115,164 @@ out: return 0; } +/** + * batadv_tt_local_dump_entry - Dump one TT local entry into a message + * @msg :Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @common: tt local & tt global common data + * + * Return: Error code, or 0 on success + */ +static int +batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_tt_common_entry *common) +{ + void *hdr; + struct batadv_softif_vlan *vlan; + struct batadv_tt_local_entry *local; + unsigned int last_seen_msecs; + u32 crc; + + local = container_of(common, struct batadv_tt_local_entry, common); + last_seen_msecs = jiffies_to_msecs(jiffies - local->last_seen); + + vlan = batadv_softif_vlan_get(bat_priv, common->vid); + if (!vlan) + return 0; + + crc = vlan->tt.crc; + + batadv_softif_vlan_put(vlan); + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, + BATADV_CMD_GET_TRANSTABLE_LOCAL); + if (!hdr) + return -ENOBUFS; + + if (nla_put(msg, BATADV_ATTR_TT_ADDRESS, ETH_ALEN, common->addr) || + nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) || + nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) || + nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, common->flags)) + goto nla_put_failure; + + if (!(common->flags & BATADV_TT_CLIENT_NOPURGE) && + nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, last_seen_msecs)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_tt_local_dump_bucket - Dump one TT local bucket into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @head: Pointer to the list containing the local tt entries + * @idx_s: Number of entries to skip + * + * Return: Error code, or 0 on success + */ +static int +batadv_tt_local_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct hlist_head *head, int *idx_s) +{ + struct batadv_tt_common_entry *common; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(common, head, hash_entry) { + if (idx++ < *idx_s) + continue; + + if (batadv_tt_local_dump_entry(msg, portid, seq, bat_priv, + common)) { + rcu_read_unlock(); + *idx_s = idx - 1; + return -EMSGSIZE; + } + } + rcu_read_unlock(); + + *idx_s = 0; + return 0; +} + +/** + * batadv_tt_local_dump - Dump TT local entries into a message + * @msg: Netlink message to dump into + * @cb: Parameters from query + * + * Return: Error code, or 0 on success + */ +int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct batadv_priv *bat_priv; + struct batadv_hard_iface *primary_if = NULL; + struct batadv_hashtable *hash; + struct hlist_head *head; + int ret; + int ifindex; + int bucket = cb->args[0]; + int idx = cb->args[1]; + int portid = NETLINK_CB(cb->skb).portid; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } + + hash = bat_priv->tt.local_hash; + + while (bucket < hash->size) { + head = &hash->table[bucket]; + + if (batadv_tt_local_dump_bucket(msg, portid, cb->nlh->nlmsg_seq, + bat_priv, head, &idx)) + break; + + bucket++; + } + + ret = msg->len; + + out: + if (primary_if) + batadv_hardif_put(primary_if); + if (soft_iface) + dev_put(soft_iface); + + cb->args[0] = bucket; + cb->args[1] = idx; + + return ret; +} + static void batadv_tt_local_set_pending(struct batadv_priv *bat_priv, struct batadv_tt_local_entry *tt_local_entry, @@ -1259,7 +1475,7 @@ static void batadv_tt_changes_list_free(struct batadv_priv *bat_priv) list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list, list) { list_del(&entry->list); - kfree(entry); + kmem_cache_free(batadv_tt_change_cache, entry); } atomic_set(&bat_priv->tt.local_changes, 0); @@ -1341,7 +1557,7 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, goto out; } - orig_entry = kzalloc(sizeof(*orig_entry), GFP_ATOMIC); + orig_entry = kmem_cache_zalloc(batadv_tt_orig_cache, GFP_ATOMIC); if (!orig_entry) goto out; @@ -1411,7 +1627,8 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, goto out; if (!tt_global_entry) { - tt_global_entry = kzalloc(sizeof(*tt_global_entry), GFP_ATOMIC); + tt_global_entry = kmem_cache_zalloc(batadv_tg_cache, + GFP_ATOMIC); if (!tt_global_entry) goto out; @@ -1704,6 +1921,218 @@ out: } /** + * batadv_tt_global_dump_subentry - Dump all TT local entries into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @common: tt local & tt global common data + * @orig: Originator node announcing a non-mesh client + * @best: Is the best originator for the TT entry + * + * Return: Error code, or 0 on success + */ +static int +batadv_tt_global_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_tt_common_entry *common, + struct batadv_tt_orig_list_entry *orig, + bool best) +{ + void *hdr; + struct batadv_orig_node_vlan *vlan; + u8 last_ttvn; + u32 crc; + + vlan = batadv_orig_node_vlan_get(orig->orig_node, + common->vid); + if (!vlan) + return 0; + + crc = vlan->tt.crc; + + batadv_orig_node_vlan_put(vlan); + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, + BATADV_CMD_GET_TRANSTABLE_GLOBAL); + if (!hdr) + return -ENOBUFS; + + last_ttvn = atomic_read(&orig->orig_node->last_ttvn); + + if (nla_put(msg, BATADV_ATTR_TT_ADDRESS, ETH_ALEN, common->addr) || + nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, + orig->orig_node->orig) || + nla_put_u8(msg, BATADV_ATTR_TT_TTVN, orig->ttvn) || + nla_put_u8(msg, BATADV_ATTR_TT_LAST_TTVN, last_ttvn) || + nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) || + nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) || + nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, common->flags)) + goto nla_put_failure; + + if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_tt_global_dump_entry - Dump one TT global entry into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @common: tt local & tt global common data + * @sub_s: Number of entries to skip + * + * This function assumes the caller holds rcu_read_lock(). + * + * Return: Error code, or 0 on success + */ +static int +batadv_tt_global_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_tt_common_entry *common, int *sub_s) +{ + struct batadv_tt_orig_list_entry *orig_entry, *best_entry; + struct batadv_tt_global_entry *global; + struct hlist_head *head; + int sub = 0; + bool best; + + global = container_of(common, struct batadv_tt_global_entry, common); + best_entry = batadv_transtable_best_orig(bat_priv, global); + head = &global->orig_list; + + hlist_for_each_entry_rcu(orig_entry, head, list) { + if (sub++ < *sub_s) + continue; + + best = (orig_entry == best_entry); + + if (batadv_tt_global_dump_subentry(msg, portid, seq, common, + orig_entry, best)) { + *sub_s = sub - 1; + return -EMSGSIZE; + } + } + + *sub_s = 0; + return 0; +} + +/** + * batadv_tt_global_dump_bucket - Dump one TT local bucket into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @head: Pointer to the list containing the global tt entries + * @idx_s: Number of entries to skip + * @sub: Number of entries to skip + * + * Return: Error code, or 0 on success + */ +static int +batadv_tt_global_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct hlist_head *head, int *idx_s, int *sub) +{ + struct batadv_tt_common_entry *common; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(common, head, hash_entry) { + if (idx++ < *idx_s) + continue; + + if (batadv_tt_global_dump_entry(msg, portid, seq, bat_priv, + common, sub)) { + rcu_read_unlock(); + *idx_s = idx - 1; + return -EMSGSIZE; + } + } + rcu_read_unlock(); + + *idx_s = 0; + *sub = 0; + return 0; +} + +/** + * batadv_tt_global_dump - Dump TT global entries into a message + * @msg: Netlink message to dump into + * @cb: Parameters from query + * + * Return: Error code, or length of message on success + */ +int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct batadv_priv *bat_priv; + struct batadv_hard_iface *primary_if = NULL; + struct batadv_hashtable *hash; + struct hlist_head *head; + int ret; + int ifindex; + int bucket = cb->args[0]; + int idx = cb->args[1]; + int sub = cb->args[2]; + int portid = NETLINK_CB(cb->skb).portid; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } + + hash = bat_priv->tt.global_hash; + + while (bucket < hash->size) { + head = &hash->table[bucket]; + + if (batadv_tt_global_dump_bucket(msg, portid, + cb->nlh->nlmsg_seq, bat_priv, + head, &idx, &sub)) + break; + + bucket++; + } + + ret = msg->len; + + out: + if (primary_if) + batadv_hardif_put(primary_if); + if (soft_iface) + dev_put(soft_iface); + + cb->args[0] = bucket; + cb->args[1] = idx; + cb->args[2] = sub; + + return ret; +} + +/** * _batadv_tt_global_del_orig_entry - remove and free an orig_entry * @tt_global_entry: the global entry to remove the orig_entry from * @orig_entry: the orig entry to remove and free @@ -2280,7 +2709,7 @@ static void batadv_tt_req_node_release(struct kref *ref) tt_req_node = container_of(ref, struct batadv_tt_req_node, refcount); - kfree(tt_req_node); + kmem_cache_free(batadv_tt_req_cache, tt_req_node); } /** @@ -2367,7 +2796,7 @@ batadv_tt_req_node_new(struct batadv_priv *bat_priv, goto unlock; } - tt_req_node = kmalloc(sizeof(*tt_req_node), GFP_ATOMIC); + tt_req_node = kmem_cache_alloc(batadv_tt_req_cache, GFP_ATOMIC); if (!tt_req_node) goto unlock; @@ -3104,7 +3533,7 @@ static void batadv_tt_roam_list_free(struct batadv_priv *bat_priv) list_for_each_entry_safe(node, safe, &bat_priv->tt.roam_list, list) { list_del(&node->list); - kfree(node); + kmem_cache_free(batadv_tt_roam_cache, node); } spin_unlock_bh(&bat_priv->tt.roam_list_lock); @@ -3121,7 +3550,7 @@ static void batadv_tt_roam_purge(struct batadv_priv *bat_priv) continue; list_del(&node->list); - kfree(node); + kmem_cache_free(batadv_tt_roam_cache, node); } spin_unlock_bh(&bat_priv->tt.roam_list_lock); } @@ -3162,7 +3591,8 @@ static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, u8 *client) } if (!ret) { - tt_roam_node = kmalloc(sizeof(*tt_roam_node), GFP_ATOMIC); + tt_roam_node = kmem_cache_alloc(batadv_tt_roam_cache, + GFP_ATOMIC); if (!tt_roam_node) goto unlock; @@ -3865,3 +4295,85 @@ bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv, return ret; } + +/** + * batadv_tt_cache_init - Initialize tt memory object cache + * + * Return: 0 on success or negative error number in case of failure. + */ +int __init batadv_tt_cache_init(void) +{ + size_t tl_size = sizeof(struct batadv_tt_local_entry); + size_t tg_size = sizeof(struct batadv_tt_global_entry); + size_t tt_orig_size = sizeof(struct batadv_tt_orig_list_entry); + size_t tt_change_size = sizeof(struct batadv_tt_change_node); + size_t tt_req_size = sizeof(struct batadv_tt_req_node); + size_t tt_roam_size = sizeof(struct batadv_tt_roam_node); + + batadv_tl_cache = kmem_cache_create("batadv_tl_cache", tl_size, 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!batadv_tl_cache) + return -ENOMEM; + + batadv_tg_cache = kmem_cache_create("batadv_tg_cache", tg_size, 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!batadv_tg_cache) + goto err_tt_tl_destroy; + + batadv_tt_orig_cache = kmem_cache_create("batadv_tt_orig_cache", + tt_orig_size, 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!batadv_tt_orig_cache) + goto err_tt_tg_destroy; + + batadv_tt_change_cache = kmem_cache_create("batadv_tt_change_cache", + tt_change_size, 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!batadv_tt_change_cache) + goto err_tt_orig_destroy; + + batadv_tt_req_cache = kmem_cache_create("batadv_tt_req_cache", + tt_req_size, 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!batadv_tt_req_cache) + goto err_tt_change_destroy; + + batadv_tt_roam_cache = kmem_cache_create("batadv_tt_roam_cache", + tt_roam_size, 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!batadv_tt_roam_cache) + goto err_tt_req_destroy; + + return 0; + +err_tt_req_destroy: + kmem_cache_destroy(batadv_tt_req_cache); + batadv_tt_req_cache = NULL; +err_tt_change_destroy: + kmem_cache_destroy(batadv_tt_change_cache); + batadv_tt_change_cache = NULL; +err_tt_orig_destroy: + kmem_cache_destroy(batadv_tt_orig_cache); + batadv_tt_orig_cache = NULL; +err_tt_tg_destroy: + kmem_cache_destroy(batadv_tg_cache); + batadv_tg_cache = NULL; +err_tt_tl_destroy: + kmem_cache_destroy(batadv_tl_cache); + batadv_tl_cache = NULL; + + return -ENOMEM; +} + +/** + * batadv_tt_cache_destroy - Destroy tt memory object cache + */ +void batadv_tt_cache_destroy(void) +{ + kmem_cache_destroy(batadv_tl_cache); + kmem_cache_destroy(batadv_tg_cache); + kmem_cache_destroy(batadv_tt_orig_cache); + kmem_cache_destroy(batadv_tt_change_cache); + kmem_cache_destroy(batadv_tt_req_cache); + kmem_cache_destroy(batadv_tt_roam_cache); +} diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index 7c7e2c0..783fdba 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -22,8 +22,10 @@ #include <linux/types.h> +struct netlink_callback; struct net_device; struct seq_file; +struct sk_buff; int batadv_tt_init(struct batadv_priv *bat_priv); bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, @@ -33,6 +35,8 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const char *message, bool roaming); int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset); int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset); +int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb); +int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb); void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, s32 match_vid, const char *message); @@ -59,4 +63,7 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid); +int batadv_tt_cache_init(void); +void batadv_tt_cache_destroy(void); + #endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */ diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index a64522c..b5f01a3 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -28,6 +28,7 @@ #include <linux/if_ether.h> #include <linux/kref.h> #include <linux/netdevice.h> +#include <linux/netlink.h> #include <linux/sched.h> /* for linux/wait.h */ #include <linux/spinlock.h> #include <linux/types.h> @@ -132,7 +133,6 @@ struct batadv_hard_iface_bat_v { * @rcu: struct used for freeing in an RCU-safe manner * @bat_iv: per hard-interface B.A.T.M.A.N. IV data * @bat_v: per hard-interface B.A.T.M.A.N. V data - * @cleanup_work: work queue callback item for hard-interface deinit * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs * @neigh_list: list of unique single hop neighbors via this interface * @neigh_list_lock: lock protecting neigh_list @@ -152,7 +152,6 @@ struct batadv_hard_iface { #ifdef CONFIG_BATMAN_ADV_BATMAN_V struct batadv_hard_iface_bat_v bat_v; #endif - struct work_struct cleanup_work; struct dentry *debug_dir; struct hlist_head neigh_list; /* neigh_list_lock protects: neigh_list */ @@ -1015,7 +1014,6 @@ struct batadv_priv_bat_v { * @forw_bcast_list_lock: lock protecting forw_bcast_list * @tp_list_lock: spinlock protecting @tp_list * @orig_work: work queue callback item for orig node purging - * @cleanup_work: work queue callback item for soft-interface deinit * @primary_if: one of the hard-interfaces assigned to this mesh interface * becomes the primary interface * @algo_ops: routing algorithm used by this mesh interface @@ -1074,7 +1072,6 @@ struct batadv_priv { spinlock_t tp_list_lock; /* protects tp_list */ atomic_t tp_num; struct delayed_work orig_work; - struct work_struct cleanup_work; struct batadv_hard_iface __rcu *primary_if; /* rcu protected pointer */ struct batadv_algo_ops *algo_ops; struct hlist_head softif_vlan_list; @@ -1379,6 +1376,7 @@ struct batadv_skb_cb { * locally generated packet * @if_outgoing: packet where the packet should be sent to, or NULL if * unspecified + * @queue_left: The queue (counter) this packet was applied to */ struct batadv_forw_packet { struct hlist_node list; @@ -1391,11 +1389,13 @@ struct batadv_forw_packet { struct delayed_work delayed_work; struct batadv_hard_iface *if_incoming; struct batadv_hard_iface *if_outgoing; + atomic_t *queue_left; }; /** * struct batadv_algo_iface_ops - mesh algorithm callbacks (interface specific) * @activate: start routing mechanisms when hard-interface is brought up + * (optional) * @enable: init routing info when hard-interface is enabled * @disable: de-init routing info when hard-interface is disabled * @update_mac: (re-)init mac addresses of the protocol information @@ -1413,11 +1413,13 @@ struct batadv_algo_iface_ops { /** * struct batadv_algo_neigh_ops - mesh algorithm callbacks (neighbour specific) * @hardif_init: called on creation of single hop entry + * (optional) * @cmp: compare the metrics of two neighbors for their respective outgoing * interfaces * @is_similar_or_better: check if neigh1 is equally similar or better than * neigh2 for their respective outgoing interface from the metric prospective * @print: print the single hop neighbor list (optional) + * @dump: dump neighbors to a netlink socket (optional) */ struct batadv_algo_neigh_ops { void (*hardif_init)(struct batadv_hardif_neigh_node *neigh); @@ -1430,17 +1432,21 @@ struct batadv_algo_neigh_ops { struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2); void (*print)(struct batadv_priv *priv, struct seq_file *seq); + void (*dump)(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *priv, + struct batadv_hard_iface *hard_iface); }; /** * struct batadv_algo_orig_ops - mesh algorithm callbacks (originator specific) * @free: free the resources allocated by the routing algorithm for an orig_node - * object + * object (optional) * @add_if: ask the routing algorithm to apply the needed changes to the - * orig_node due to a new hard-interface being added into the mesh + * orig_node due to a new hard-interface being added into the mesh (optional) * @del_if: ask the routing algorithm to apply the needed changes to the - * orig_node due to an hard-interface being removed from the mesh + * orig_node due to an hard-interface being removed from the mesh (optional) * @print: print the originator table (optional) + * @dump: dump originators to a netlink socket (optional) */ struct batadv_algo_orig_ops { void (*free)(struct batadv_orig_node *orig_node); @@ -1449,6 +1455,34 @@ struct batadv_algo_orig_ops { int del_if_num); void (*print)(struct batadv_priv *priv, struct seq_file *seq, struct batadv_hard_iface *hard_iface); + void (*dump)(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *priv, + struct batadv_hard_iface *hard_iface); +}; + +/** + * struct batadv_algo_gw_ops - mesh algorithm callbacks (GW specific) + * @store_sel_class: parse and stores a new GW selection class (optional) + * @show_sel_class: prints the current GW selection class (optional) + * @get_best_gw_node: select the best GW from the list of available nodes + * (optional) + * @is_eligible: check if a newly discovered GW is a potential candidate for + * the election as best GW (optional) + * @print: print the gateway table (optional) + * @dump: dump gateways to a netlink socket (optional) + */ +struct batadv_algo_gw_ops { + ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff, + size_t count); + ssize_t (*show_sel_class)(struct batadv_priv *bat_priv, char *buff); + struct batadv_gw_node *(*get_best_gw_node) + (struct batadv_priv *bat_priv); + bool (*is_eligible)(struct batadv_priv *bat_priv, + struct batadv_orig_node *curr_gw_orig, + struct batadv_orig_node *orig_node); + void (*print)(struct batadv_priv *bat_priv, struct seq_file *seq); + void (*dump)(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *priv); }; /** @@ -1458,6 +1492,7 @@ struct batadv_algo_orig_ops { * @iface: callbacks related to interface handling * @neigh: callbacks related to neighbors handling * @orig: callbacks related to originators handling + * @gw: callbacks related to GW mode */ struct batadv_algo_ops { struct hlist_node list; @@ -1465,6 +1500,7 @@ struct batadv_algo_ops { struct batadv_algo_iface_ops iface; struct batadv_algo_neigh_ops neigh; struct batadv_algo_orig_ops orig; + struct batadv_algo_gw_ops gw; }; /** @@ -1564,4 +1600,17 @@ enum batadv_tvlv_handler_flags { BATADV_TVLV_HANDLER_OGM_CALLED = BIT(2), }; +/** + * struct batadv_store_mesh_work - Work queue item to detach add/del interface + * from sysfs locks + * @net_dev: netdevice to add/remove to/from batman-adv soft-interface + * @soft_iface_name: name of soft-interface to modify + * @work: work queue item + */ +struct batadv_store_mesh_work { + struct net_device *net_dev; + char soft_iface_name[IFNAMSIZ]; + struct work_struct work; +}; + #endif /* _NET_BATMAN_ADV_TYPES_H_ */ diff --git a/net/core/dev.c b/net/core/dev.c index dd6ce59..a75df86 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7625,6 +7625,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->all_adj_list.lower); INIT_LIST_HEAD(&dev->ptype_all); INIT_LIST_HEAD(&dev->ptype_specific); +#ifdef CONFIG_NET_SCHED + hash_init(dev->qdisc_hash); +#endif dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; setup(dev); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 61ad43f..91028ae 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -6,6 +6,8 @@ #include <linux/if_vlan.h> #include <net/ip.h> #include <net/ipv6.h> +#include <net/gre.h> +#include <net/pptp.h> #include <linux/igmp.h> #include <linux/icmp.h> #include <linux/sctp.h> @@ -338,32 +340,42 @@ mpls: ip_proto_again: switch (ip_proto) { case IPPROTO_GRE: { - struct gre_hdr { - __be16 flags; - __be16 proto; - } *hdr, _hdr; + struct gre_base_hdr *hdr, _hdr; + u16 gre_ver; + int offset = 0; hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) goto out_bad; - /* - * Only look inside GRE if version zero and no - * routing - */ - if (hdr->flags & (GRE_VERSION | GRE_ROUTING)) + + /* Only look inside GRE without routing */ + if (hdr->flags & GRE_ROUTING) break; - proto = hdr->proto; - nhoff += 4; + /* Only look inside GRE for version 0 and 1 */ + gre_ver = ntohs(hdr->flags & GRE_VERSION); + if (gre_ver > 1) + break; + + proto = hdr->protocol; + if (gre_ver) { + /* Version1 must be PPTP, and check the flags */ + if (!(proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY))) + break; + } + + offset += sizeof(struct gre_base_hdr); + if (hdr->flags & GRE_CSUM) - nhoff += 4; + offset += sizeof(((struct gre_full_hdr *)0)->csum) + + sizeof(((struct gre_full_hdr *)0)->reserved1); + if (hdr->flags & GRE_KEY) { const __be32 *keyid; __be32 _keyid; - keyid = __skb_header_pointer(skb, nhoff, sizeof(_keyid), + keyid = __skb_header_pointer(skb, nhoff + offset, sizeof(_keyid), data, hlen, &_keyid); - if (!keyid) goto out_bad; @@ -372,32 +384,65 @@ ip_proto_again: key_keyid = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_GRE_KEYID, target_container); - key_keyid->keyid = *keyid; + if (gre_ver == 0) + key_keyid->keyid = *keyid; + else + key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK; } - nhoff += 4; + offset += sizeof(((struct gre_full_hdr *)0)->key); } + if (hdr->flags & GRE_SEQ) - nhoff += 4; - if (proto == htons(ETH_P_TEB)) { - const struct ethhdr *eth; - struct ethhdr _eth; - - eth = __skb_header_pointer(skb, nhoff, - sizeof(_eth), - data, hlen, &_eth); - if (!eth) + offset += sizeof(((struct pptp_gre_header *)0)->seq); + + if (gre_ver == 0) { + if (proto == htons(ETH_P_TEB)) { + const struct ethhdr *eth; + struct ethhdr _eth; + + eth = __skb_header_pointer(skb, nhoff + offset, + sizeof(_eth), + data, hlen, &_eth); + if (!eth) + goto out_bad; + proto = eth->h_proto; + offset += sizeof(*eth); + + /* Cap headers that we access via pointers at the + * end of the Ethernet header as our maximum alignment + * at that point is only 2 bytes. + */ + if (NET_IP_ALIGN) + hlen = (nhoff + offset); + } + } else { /* version 1, must be PPTP */ + u8 _ppp_hdr[PPP_HDRLEN]; + u8 *ppp_hdr; + + if (hdr->flags & GRE_ACK) + offset += sizeof(((struct pptp_gre_header *)0)->ack); + + ppp_hdr = skb_header_pointer(skb, nhoff + offset, + sizeof(_ppp_hdr), _ppp_hdr); + if (!ppp_hdr) goto out_bad; - proto = eth->h_proto; - nhoff += sizeof(*eth); - - /* Cap headers that we access via pointers at the - * end of the Ethernet header as our maximum alignment - * at that point is only 2 bytes. - */ - if (NET_IP_ALIGN) - hlen = nhoff; + + switch (PPP_PROTOCOL(ppp_hdr)) { + case PPP_IP: + proto = htons(ETH_P_IP); + break; + case PPP_IPV6: + proto = htons(ETH_P_IPV6); + break; + default: + /* Could probably catch some more like MPLS */ + break; + } + + offset += PPP_HDRLEN; } + nhoff += offset; key_control->flags |= FLOW_DIS_ENCAPSULATION; if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) goto out_good; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index cf26e04c4..2ae929f 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1148,7 +1148,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, } else goto out; } else { - if (lladdr == neigh->ha && new == NUD_STALE) + if (lladdr == neigh->ha && new == NUD_STALE && + !(flags & NEIGH_UPDATE_F_ADMIN)) new = old; } } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 2c2eb1b..1fe5816 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -37,6 +37,8 @@ struct net init_net = { }; EXPORT_SYMBOL(init_net); +static bool init_net_initialized; + #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; @@ -750,6 +752,8 @@ static int __init net_ns_init(void) if (setup_net(&init_net, &init_user_ns)) panic("Could not setup the initial network namespace"); + init_net_initialized = true; + rtnl_lock(); list_add_tail_rcu(&init_net.list, &net_namespace_list); rtnl_unlock(); @@ -811,15 +815,24 @@ static void __unregister_pernet_operations(struct pernet_operations *ops) static int __register_pernet_operations(struct list_head *list, struct pernet_operations *ops) { + if (!init_net_initialized) { + list_add_tail(&ops->list, list); + return 0; + } + return ops_init(ops, &init_net); } static void __unregister_pernet_operations(struct pernet_operations *ops) { - LIST_HEAD(net_exit_list); - list_add(&init_net.exit_list, &net_exit_list); - ops_exit_list(ops, &net_exit_list); - ops_free_list(ops, &net_exit_list); + if (!init_net_initialized) { + list_del(&ops->list); + } else { + LIST_HEAD(net_exit_list); + list_add(&init_net.exit_list, &net_exit_list); + ops_exit_list(ops, &net_exit_list); + ops_free_list(ops, &net_exit_list); + } } #endif /* CONFIG_NET_NS */ diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index ef2ebeb..317c319 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -93,9 +93,6 @@ struct fib_table *fib_new_table(struct net *net, u32 id) return NULL; switch (id) { - case RT_TABLE_LOCAL: - rcu_assign_pointer(net->ipv4.fib_local, tb); - break; case RT_TABLE_MAIN: rcu_assign_pointer(net->ipv4.fib_main, tb); break; @@ -137,9 +134,6 @@ static void fib_replace_table(struct net *net, struct fib_table *old, { #ifdef CONFIG_IP_MULTIPLE_TABLES switch (new->tb_id) { - case RT_TABLE_LOCAL: - rcu_assign_pointer(net->ipv4.fib_local, new); - break; case RT_TABLE_MAIN: rcu_assign_pointer(net->ipv4.fib_main, new); break; @@ -1249,7 +1243,6 @@ static void ip_fib_net_exit(struct net *net) rtnl_lock(); #ifdef CONFIG_IP_MULTIPLE_TABLES - RCU_INIT_POINTER(net->ipv4.fib_local, NULL); RCU_INIT_POINTER(net->ipv4.fib_main, NULL); RCU_INIT_POINTER(net->ipv4.fib_default, NULL); #endif diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 9b4ca87..606cc3e 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -472,6 +472,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, continue; } + /* Based on RFC3376 5.1. Should not send source-list change + * records when there is a filter mode change. + */ + if (((gdeleted && pmc->sfmode == MCAST_EXCLUDE) || + (!gdeleted && pmc->crcount)) && + (type == IGMPV3_ALLOW_NEW_SOURCES || + type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount) + goto decrease_sf_crcount; + /* clear marks on query responses */ if (isquery) psf->sf_gsresp = 0; @@ -499,6 +508,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, scount++; stotal++; if ((type == IGMPV3_ALLOW_NEW_SOURCES || type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount) { +decrease_sf_crcount: psf->sf_crcount--; if ((sdeleted || gdeleted) && psf->sf_crcount == 0) { if (psf_prev) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 1d71c40..ba9cbea 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -85,7 +85,6 @@ /* Define the timeout for waiting for a DHCP/BOOTP/RARP reply */ #define CONF_OPEN_RETRIES 2 /* (Re)open devices twice */ #define CONF_SEND_RETRIES 6 /* Send six requests per open */ -#define CONF_INTER_TIMEOUT (HZ) /* Inter-device timeout: 1 second */ #define CONF_BASE_TIMEOUT (HZ*2) /* Initial timeout: 2 seconds */ #define CONF_TIMEOUT_RANDOM (HZ) /* Maximum amount of randomization */ #define CONF_TIMEOUT_MULT *7/4 /* Rate of timeout growth */ @@ -188,7 +187,7 @@ struct ic_device { }; static struct ic_device *ic_first_dev __initdata; /* List of open device */ -static struct net_device *ic_dev __initdata; /* Selected device */ +static struct ic_device *ic_dev __initdata; /* Selected device */ static bool __init ic_is_init_dev(struct net_device *dev) { @@ -307,7 +306,7 @@ static void __init ic_close_devs(void) while ((d = next)) { next = d->next; dev = d->dev; - if (dev != ic_dev && !netdev_uses_dsa(dev)) { + if (dev != ic_dev->dev && !netdev_uses_dsa(dev)) { pr_debug("IP-Config: Downing %s\n", dev->name); dev_change_flags(dev, d->flags); } @@ -372,7 +371,7 @@ static int __init ic_setup_if(void) int err; memset(&ir, 0, sizeof(ir)); - strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->name); + strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->dev->name); set_sockaddr(sin, ic_myaddr, 0); if ((err = ic_devinet_ioctl(SIOCSIFADDR, &ir)) < 0) { pr_err("IP-Config: Unable to set interface address (%d)\n", @@ -396,7 +395,7 @@ static int __init ic_setup_if(void) * out, we'll try to muddle along. */ if (ic_dev_mtu != 0) { - strcpy(ir.ifr_name, ic_dev->name); + strcpy(ir.ifr_name, ic_dev->dev->name); ir.ifr_mtu = ic_dev_mtu; if ((err = ic_dev_ioctl(SIOCSIFMTU, &ir)) < 0) pr_err("IP-Config: Unable to set interface mtu to %d (%d)\n", @@ -568,7 +567,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt goto drop_unlock; /* We have a winner! */ - ic_dev = dev; + ic_dev = d; if (ic_myaddr == NONE) ic_myaddr = tip; ic_servaddr = sip; @@ -655,8 +654,6 @@ static struct packet_type bootp_packet_type __initdata = { .func = ic_bootp_recv, }; -static __be32 ic_dev_xid; /* Device under configuration */ - /* * Initialize DHCP/BOOTP extension fields in the request. */ @@ -666,14 +663,14 @@ static const u8 ic_bootp_cookie[4] = { 99, 130, 83, 99 }; #ifdef IPCONFIG_DHCP static void __init -ic_dhcp_init_options(u8 *options) +ic_dhcp_init_options(u8 *options, struct ic_device *d) { u8 mt = ((ic_servaddr == NONE) ? DHCPDISCOVER : DHCPREQUEST); u8 *e = options; int len; - pr_debug("DHCP: Sending message type %d\n", mt); + pr_debug("DHCP: Sending message type %d (%s)\n", mt, d->dev->name); memcpy(e, ic_bootp_cookie, 4); /* RFC1048 Magic Cookie */ e += 4; @@ -857,7 +854,7 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d /* add DHCP options or BOOTP extensions */ #ifdef IPCONFIG_DHCP if (ic_proto_enabled & IC_USE_DHCP) - ic_dhcp_init_options(b->exten); + ic_dhcp_init_options(b->exten, d); else #endif ic_bootp_init_ext(b->exten); @@ -1033,14 +1030,8 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str /* Is it a reply to our BOOTP request? */ if (b->op != BOOTP_REPLY || b->xid != d->xid) { - net_err_ratelimited("DHCP/BOOTP: Reply not for us, op[%x] xid[%x]\n", - b->op, b->xid); - goto drop_unlock; - } - - /* Is it a reply for the device we are configuring? */ - if (b->xid != ic_dev_xid) { - net_err_ratelimited("DHCP/BOOTP: Ignoring delayed packet\n"); + net_err_ratelimited("DHCP/BOOTP: Reply not for us on %s, op[%x] xid[%x]\n", + d->dev->name, b->op, b->xid); goto drop_unlock; } @@ -1075,7 +1066,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str } } - pr_debug("DHCP: Got message type %d\n", mt); + pr_debug("DHCP: Got message type %d (%s)\n", mt, d->dev->name); switch (mt) { case DHCPOFFER: @@ -1130,7 +1121,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str } /* We have a winner! */ - ic_dev = dev; + ic_dev = d; ic_myaddr = b->your_ip; ic_servaddr = b->server_ip; ic_addrservaddr = b->iph.saddr; @@ -1225,9 +1216,6 @@ static int __init ic_dynamic(void) timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned int) CONF_TIMEOUT_RANDOM); for (;;) { #ifdef IPCONFIG_BOOTP - /* Track the device we are configuring */ - ic_dev_xid = d->xid; - if (do_bootp && (d->able & IC_BOOTP)) ic_bootp_send_if(d, jiffies - start_jiffies); #endif @@ -1236,15 +1224,19 @@ static int __init ic_dynamic(void) ic_rarp_send_if(d); #endif - jiff = jiffies + (d->next ? CONF_INTER_TIMEOUT : timeout); - while (time_before(jiffies, jiff) && !ic_got_reply) - schedule_timeout_uninterruptible(1); + if (!d->next) { + jiff = jiffies + timeout; + while (time_before(jiffies, jiff) && !ic_got_reply) + schedule_timeout_uninterruptible(1); + } #ifdef IPCONFIG_DHCP /* DHCP isn't done until we get a DHCPACK. */ if ((ic_got_reply & IC_BOOTP) && (ic_proto_enabled & IC_USE_DHCP) && ic_dhcp_msgtype != DHCPACK) { ic_got_reply = 0; + /* continue on device that got the reply */ + d = ic_dev; pr_cont(","); continue; } @@ -1487,7 +1479,7 @@ static int __init ip_auto_config(void) #endif /* IPCONFIG_DYNAMIC */ } else { /* Device selected manually or only one device -> use it */ - ic_dev = ic_first_dev->dev; + ic_dev = ic_first_dev; } addr = root_nfs_parse_addr(root_server_path); @@ -1501,14 +1493,6 @@ static int __init ip_auto_config(void) return -1; /* - * Close all network devices except the device we've - * autoconfigured and set up routes. - */ - ic_close_devs(); - if (ic_setup_if() < 0 || ic_setup_routes() < 0) - return -1; - - /* * Record which protocol was actually used. */ #ifdef IPCONFIG_DYNAMIC @@ -1522,7 +1506,7 @@ static int __init ip_auto_config(void) pr_info("IP-Config: Complete:\n"); pr_info(" device=%s, hwaddr=%*phC, ipaddr=%pI4, mask=%pI4, gw=%pI4\n", - ic_dev->name, ic_dev->addr_len, ic_dev->dev_addr, + ic_dev->dev->name, ic_dev->dev->addr_len, ic_dev->dev->dev_addr, &ic_myaddr, &ic_netmask, &ic_gateway); pr_info(" host=%s, domain=%s, nis-domain=%s\n", utsname()->nodename, ic_domain, utsname()->domainname); @@ -1542,7 +1526,18 @@ static int __init ip_auto_config(void) pr_cont("\n"); #endif /* !SILENT */ - return 0; + /* + * Close all network devices except the device we've + * autoconfigured and set up routes. + */ + if (ic_setup_if() < 0 || ic_setup_routes() < 0) + err = -1; + else + err = 0; + + ic_close_devs(); + + return err; } late_initcall(ip_auto_config); diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c index ec9efbc..aba0998 100644 --- a/net/ipv6/ila/ila_common.c +++ b/net/ipv6/ila/ila_common.c @@ -172,6 +172,5 @@ static void __exit ila_fini(void) module_init(ila_init); module_exit(ila_fini); -MODULE_ALIAS_RTNL_LWT(ILA); MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>"); MODULE_LICENSE("GPL"); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 704274c..397e1ed 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -61,12 +61,12 @@ static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); -#define HASH_SIZE_SHIFT 5 -#define HASH_SIZE (1 << HASH_SIZE_SHIFT) +#define IP6_GRE_HASH_SIZE_SHIFT 5 +#define IP6_GRE_HASH_SIZE (1 << IP6_GRE_HASH_SIZE_SHIFT) static int ip6gre_net_id __read_mostly; struct ip6gre_net { - struct ip6_tnl __rcu *tunnels[4][HASH_SIZE]; + struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE]; struct net_device *fb_tunnel_dev; }; @@ -96,12 +96,12 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu); will match fallback tunnel. */ -#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1)) +#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(IP6_GRE_HASH_SIZE - 1)) static u32 HASH_ADDR(const struct in6_addr *addr) { u32 hash = ipv6_addr_hash(addr); - return hash_32(hash, HASH_SIZE_SHIFT); + return hash_32(hash, IP6_GRE_HASH_SIZE_SHIFT); } #define tunnels_r_l tunnels[3] @@ -1087,7 +1087,7 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) for (prio = 0; prio < 4; prio++) { int h; - for (h = 0; h < HASH_SIZE; h++) { + for (h = 0; h < IP6_GRE_HASH_SIZE; h++) { struct ip6_tnl *t; t = rtnl_dereference(ign->tunnels[prio][h]); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 7b0481e..2050217 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -64,8 +64,8 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("ip6tnl"); MODULE_ALIAS_NETDEV("ip6tnl0"); -#define HASH_SIZE_SHIFT 5 -#define HASH_SIZE (1 << HASH_SIZE_SHIFT) +#define IP6_TUNNEL_HASH_SIZE_SHIFT 5 +#define IP6_TUNNEL_HASH_SIZE (1 << IP6_TUNNEL_HASH_SIZE_SHIFT) static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); @@ -75,7 +75,7 @@ static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) { u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); - return hash_32(hash, HASH_SIZE_SHIFT); + return hash_32(hash, IP6_TUNNEL_HASH_SIZE_SHIFT); } static int ip6_tnl_dev_init(struct net_device *dev); @@ -87,7 +87,7 @@ struct ip6_tnl_net { /* the IPv6 tunnel fallback device */ struct net_device *fb_tnl_dev; /* lists for storing tunnels in use */ - struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE]; + struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE]; struct ip6_tnl __rcu *tnls_wc[1]; struct ip6_tnl __rcu **tnls[2]; }; @@ -2031,7 +2031,7 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net) if (dev->rtnl_link_ops == &ip6_link_ops) unregister_netdevice_queue(dev, &list); - for (h = 0; h < HASH_SIZE; h++) { + for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) { t = rtnl_dereference(ip6n->tnls_r_l[h]); while (t) { /* If dev is in the same netns, it has already diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index d90a11f..cc7e058 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -50,14 +50,14 @@ #include <net/net_namespace.h> #include <net/netns/generic.h> -#define HASH_SIZE_SHIFT 5 -#define HASH_SIZE (1 << HASH_SIZE_SHIFT) +#define IP6_VTI_HASH_SIZE_SHIFT 5 +#define IP6_VTI_HASH_SIZE (1 << IP6_VTI_HASH_SIZE_SHIFT) static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) { u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); - return hash_32(hash, HASH_SIZE_SHIFT); + return hash_32(hash, IP6_VTI_HASH_SIZE_SHIFT); } static int vti6_dev_init(struct net_device *dev); @@ -69,7 +69,7 @@ struct vti6_net { /* the vti6 tunnel fallback device */ struct net_device *fb_tnl_dev; /* lists for storing tunnels in use */ - struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE]; + struct ip6_tnl __rcu *tnls_r_l[IP6_VTI_HASH_SIZE]; struct ip6_tnl __rcu *tnls_wc[1]; struct ip6_tnl __rcu **tnls[2]; }; @@ -1040,7 +1040,7 @@ static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n) struct ip6_tnl *t; LIST_HEAD(list); - for (h = 0; h < HASH_SIZE; h++) { + for (h = 0; h < IP6_VTI_HASH_SIZE; h++) { t = rtnl_dereference(ip6n->tnls_r_l[h]); while (t) { unregister_netdevice_queue(t->dev, &list); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index d64ee7e..75c1fc5 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1739,6 +1739,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, continue; } + /* Based on RFC3810 6.1. Should not send source-list change + * records when there is a filter mode change. + */ + if (((gdeleted && pmc->mca_sfmode == MCAST_EXCLUDE) || + (!gdeleted && pmc->mca_crcount)) && + (type == MLD2_ALLOW_NEW_SOURCES || + type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount) + goto decrease_sf_crcount; + /* clear marks on query responses */ if (isquery) psf->sf_gsresp = 0; @@ -1766,6 +1775,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, scount++; stotal++; if ((type == MLD2_ALLOW_NEW_SOURCES || type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount) { +decrease_sf_crcount: psf->sf_crcount--; if ((sdeleted || gdeleted) && psf->sf_crcount == 0) { if (psf_prev) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 182b6a9..b1cdf80 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -62,7 +62,7 @@ For comments look at net/ipv4/ip_gre.c --ANK */ -#define HASH_SIZE 16 +#define IP6_SIT_HASH_SIZE 16 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) static bool log_ecn_error = true; @@ -78,9 +78,9 @@ static struct rtnl_link_ops sit_link_ops __read_mostly; static int sit_net_id __read_mostly; struct sit_net { - struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE]; - struct ip_tunnel __rcu *tunnels_r[HASH_SIZE]; - struct ip_tunnel __rcu *tunnels_l[HASH_SIZE]; + struct ip_tunnel __rcu *tunnels_r_l[IP6_SIT_HASH_SIZE]; + struct ip_tunnel __rcu *tunnels_r[IP6_SIT_HASH_SIZE]; + struct ip_tunnel __rcu *tunnels_l[IP6_SIT_HASH_SIZE]; struct ip_tunnel __rcu *tunnels_wc[1]; struct ip_tunnel __rcu **tunnels[4]; @@ -1126,7 +1126,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, } #endif -bool ipip6_valid_ip_proto(u8 ipproto) +static bool ipip6_valid_ip_proto(u8 ipproto) { return ipproto == IPPROTO_IPV6 || ipproto == IPPROTO_IPIP || @@ -1783,7 +1783,7 @@ static void __net_exit sit_destroy_tunnels(struct net *net, for (prio = 1; prio < 4; prio++) { int h; - for (h = 0; h < HASH_SIZE; h++) { + for (h = 0; h < IP6_SIT_HASH_SIZE; h++) { struct ip_tunnel *t; t = rtnl_dereference(sitn->tunnels[prio][h]); diff --git a/net/kcm/Kconfig b/net/kcm/Kconfig index 5db94d9..87fca36 100644 --- a/net/kcm/Kconfig +++ b/net/kcm/Kconfig @@ -3,6 +3,7 @@ config AF_KCM tristate "KCM sockets" depends on INET select BPF_SYSCALL + select STREAM_PARSER ---help--- KCM (Kernel Connection Multiplexor) sockets provide a method for multiplexing messages of a message based application diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c index 16c2e03..47e4453 100644 --- a/net/kcm/kcmproc.c +++ b/net/kcm/kcmproc.c @@ -155,8 +155,8 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq, seq_printf(seq, " psock-%-5u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8d ", psock->index, - psock->stats.rx_msgs, - psock->stats.rx_bytes, + psock->strp.stats.rx_msgs, + psock->strp.stats.rx_bytes, psock->stats.tx_msgs, psock->stats.tx_bytes, psock->sk->sk_receive_queue.qlen, @@ -170,9 +170,12 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq, if (psock->tx_stopped) seq_puts(seq, "TxStop "); - if (psock->rx_stopped) + if (psock->strp.rx_stopped) seq_puts(seq, "RxStop "); + if (psock->strp.rx_paused) + seq_puts(seq, "RxPause "); + if (psock->tx_kcm) seq_printf(seq, "Rsvd-%d ", psock->tx_kcm->index); @@ -275,6 +278,7 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) { struct kcm_psock_stats psock_stats; struct kcm_mux_stats mux_stats; + struct strp_aggr_stats strp_stats; struct kcm_mux *mux; struct kcm_psock *psock; struct net *net = seq->private; @@ -282,20 +286,28 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) memset(&mux_stats, 0, sizeof(mux_stats)); memset(&psock_stats, 0, sizeof(psock_stats)); + memset(&strp_stats, 0, sizeof(strp_stats)); mutex_lock(&knet->mutex); aggregate_mux_stats(&knet->aggregate_mux_stats, &mux_stats); aggregate_psock_stats(&knet->aggregate_psock_stats, &psock_stats); + aggregate_strp_stats(&knet->aggregate_strp_stats, + &strp_stats); list_for_each_entry_rcu(mux, &knet->mux_list, kcm_mux_list) { spin_lock_bh(&mux->lock); aggregate_mux_stats(&mux->stats, &mux_stats); aggregate_psock_stats(&mux->aggregate_psock_stats, &psock_stats); - list_for_each_entry(psock, &mux->psocks, psock_list) + aggregate_strp_stats(&mux->aggregate_strp_stats, + &strp_stats); + list_for_each_entry(psock, &mux->psocks, psock_list) { aggregate_psock_stats(&psock->stats, &psock_stats); + save_strp_stats(&psock->strp, &strp_stats); + } + spin_unlock_bh(&mux->lock); } @@ -328,7 +340,7 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) mux_stats.rx_ready_drops); seq_printf(seq, - "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n", + "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n", "Psock", "RX-Msgs", "RX-Bytes", @@ -337,6 +349,8 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) "Reserved", "Unreserved", "RX-Aborts", + "RX-Intr", + "RX-Unrecov", "RX-MemFail", "RX-NeedMor", "RX-BadLen", @@ -345,20 +359,22 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) "TX-Aborts"); seq_printf(seq, - "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n", + "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n", "", - psock_stats.rx_msgs, - psock_stats.rx_bytes, + strp_stats.rx_msgs, + strp_stats.rx_bytes, psock_stats.tx_msgs, psock_stats.tx_bytes, psock_stats.reserved, psock_stats.unreserved, - psock_stats.rx_aborts, - psock_stats.rx_mem_fail, - psock_stats.rx_need_more_hdr, - psock_stats.rx_bad_hdr_len, - psock_stats.rx_msg_too_big, - psock_stats.rx_msg_timeouts, + strp_stats.rx_aborts, + strp_stats.rx_interrupted, + strp_stats.rx_unrecov_intr, + strp_stats.rx_mem_fail, + strp_stats.rx_need_more_hdr, + strp_stats.rx_bad_hdr_len, + strp_stats.rx_msg_too_big, + strp_stats.rx_msg_timeouts, psock_stats.tx_aborts); return 0; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index cb39e05..eedbe40 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1,3 +1,13 @@ +/* + * Kernel Connection Multiplexor + * + * Copyright (c) 2016 Tom Herbert <tom@herbertland.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + */ + #include <linux/bpf.h> #include <linux/errno.h> #include <linux/errqueue.h> @@ -35,38 +45,12 @@ static inline struct kcm_tx_msg *kcm_tx_msg(struct sk_buff *skb) return (struct kcm_tx_msg *)skb->cb; } -static inline struct kcm_rx_msg *kcm_rx_msg(struct sk_buff *skb) -{ - return (struct kcm_rx_msg *)((void *)skb->cb + - offsetof(struct qdisc_skb_cb, data)); -} - static void report_csk_error(struct sock *csk, int err) { csk->sk_err = EPIPE; csk->sk_error_report(csk); } -/* Callback lock held */ -static void kcm_abort_rx_psock(struct kcm_psock *psock, int err, - struct sk_buff *skb) -{ - struct sock *csk = psock->sk; - - /* Unrecoverable error in receive */ - - del_timer(&psock->rx_msg_timer); - - if (psock->rx_stopped) - return; - - psock->rx_stopped = 1; - KCM_STATS_INCR(psock->stats.rx_aborts); - - /* Report an error on the lower socket */ - report_csk_error(csk, err); -} - static void kcm_abort_tx_psock(struct kcm_psock *psock, int err, bool wakeup_kcm) { @@ -109,12 +93,13 @@ static void kcm_abort_tx_psock(struct kcm_psock *psock, int err, static void kcm_update_rx_mux_stats(struct kcm_mux *mux, struct kcm_psock *psock) { - KCM_STATS_ADD(mux->stats.rx_bytes, - psock->stats.rx_bytes - psock->saved_rx_bytes); + STRP_STATS_ADD(mux->stats.rx_bytes, + psock->strp.stats.rx_bytes - + psock->saved_rx_bytes); mux->stats.rx_msgs += - psock->stats.rx_msgs - psock->saved_rx_msgs; - psock->saved_rx_msgs = psock->stats.rx_msgs; - psock->saved_rx_bytes = psock->stats.rx_bytes; + psock->strp.stats.rx_msgs - psock->saved_rx_msgs; + psock->saved_rx_msgs = psock->strp.stats.rx_msgs; + psock->saved_rx_bytes = psock->strp.stats.rx_bytes; } static void kcm_update_tx_mux_stats(struct kcm_mux *mux, @@ -167,11 +152,11 @@ static void kcm_rcv_ready(struct kcm_sock *kcm) */ list_del(&psock->psock_ready_list); psock->ready_rx_msg = NULL; - /* Commit clearing of ready_rx_msg for queuing work */ smp_mb(); - queue_work(kcm_wq, &psock->rx_work); + strp_unpause(&psock->strp); + strp_check_rcv(&psock->strp); } /* Buffer limit is okay now, add to ready list */ @@ -285,6 +270,7 @@ static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock, if (list_empty(&mux->kcm_rx_waiters)) { psock->ready_rx_msg = head; + strp_pause(&psock->strp); list_add_tail(&psock->psock_ready_list, &mux->psocks_ready); spin_unlock_bh(&mux->rx_lock); @@ -353,276 +339,6 @@ static void unreserve_rx_kcm(struct kcm_psock *psock, spin_unlock_bh(&mux->rx_lock); } -static void kcm_start_rx_timer(struct kcm_psock *psock) -{ - if (psock->sk->sk_rcvtimeo) - mod_timer(&psock->rx_msg_timer, psock->sk->sk_rcvtimeo); -} - -/* Macro to invoke filter function. */ -#define KCM_RUN_FILTER(prog, ctx) \ - (*prog->bpf_func)(ctx, prog->insnsi) - -/* Lower socket lock held */ -static int kcm_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, - unsigned int orig_offset, size_t orig_len) -{ - struct kcm_psock *psock = (struct kcm_psock *)desc->arg.data; - struct kcm_rx_msg *rxm; - struct kcm_sock *kcm; - struct sk_buff *head, *skb; - size_t eaten = 0, cand_len; - ssize_t extra; - int err; - bool cloned_orig = false; - - if (psock->ready_rx_msg) - return 0; - - head = psock->rx_skb_head; - if (head) { - /* Message already in progress */ - - rxm = kcm_rx_msg(head); - if (unlikely(rxm->early_eaten)) { - /* Already some number of bytes on the receive sock - * data saved in rx_skb_head, just indicate they - * are consumed. - */ - eaten = orig_len <= rxm->early_eaten ? - orig_len : rxm->early_eaten; - rxm->early_eaten -= eaten; - - return eaten; - } - - if (unlikely(orig_offset)) { - /* Getting data with a non-zero offset when a message is - * in progress is not expected. If it does happen, we - * need to clone and pull since we can't deal with - * offsets in the skbs for a message expect in the head. - */ - orig_skb = skb_clone(orig_skb, GFP_ATOMIC); - if (!orig_skb) { - KCM_STATS_INCR(psock->stats.rx_mem_fail); - desc->error = -ENOMEM; - return 0; - } - if (!pskb_pull(orig_skb, orig_offset)) { - KCM_STATS_INCR(psock->stats.rx_mem_fail); - kfree_skb(orig_skb); - desc->error = -ENOMEM; - return 0; - } - cloned_orig = true; - orig_offset = 0; - } - - if (!psock->rx_skb_nextp) { - /* We are going to append to the frags_list of head. - * Need to unshare the frag_list. - */ - err = skb_unclone(head, GFP_ATOMIC); - if (err) { - KCM_STATS_INCR(psock->stats.rx_mem_fail); - desc->error = err; - return 0; - } - - if (unlikely(skb_shinfo(head)->frag_list)) { - /* We can't append to an sk_buff that already - * has a frag_list. We create a new head, point - * the frag_list of that to the old head, and - * then are able to use the old head->next for - * appending to the message. - */ - if (WARN_ON(head->next)) { - desc->error = -EINVAL; - return 0; - } - - skb = alloc_skb(0, GFP_ATOMIC); - if (!skb) { - KCM_STATS_INCR(psock->stats.rx_mem_fail); - desc->error = -ENOMEM; - return 0; - } - skb->len = head->len; - skb->data_len = head->len; - skb->truesize = head->truesize; - *kcm_rx_msg(skb) = *kcm_rx_msg(head); - psock->rx_skb_nextp = &head->next; - skb_shinfo(skb)->frag_list = head; - psock->rx_skb_head = skb; - head = skb; - } else { - psock->rx_skb_nextp = - &skb_shinfo(head)->frag_list; - } - } - } - - while (eaten < orig_len) { - /* Always clone since we will consume something */ - skb = skb_clone(orig_skb, GFP_ATOMIC); - if (!skb) { - KCM_STATS_INCR(psock->stats.rx_mem_fail); - desc->error = -ENOMEM; - break; - } - - cand_len = orig_len - eaten; - - head = psock->rx_skb_head; - if (!head) { - head = skb; - psock->rx_skb_head = head; - /* Will set rx_skb_nextp on next packet if needed */ - psock->rx_skb_nextp = NULL; - rxm = kcm_rx_msg(head); - memset(rxm, 0, sizeof(*rxm)); - rxm->offset = orig_offset + eaten; - } else { - /* Unclone since we may be appending to an skb that we - * already share a frag_list with. - */ - err = skb_unclone(skb, GFP_ATOMIC); - if (err) { - KCM_STATS_INCR(psock->stats.rx_mem_fail); - desc->error = err; - break; - } - - rxm = kcm_rx_msg(head); - *psock->rx_skb_nextp = skb; - psock->rx_skb_nextp = &skb->next; - head->data_len += skb->len; - head->len += skb->len; - head->truesize += skb->truesize; - } - - if (!rxm->full_len) { - ssize_t len; - - len = KCM_RUN_FILTER(psock->bpf_prog, head); - - if (!len) { - /* Need more header to determine length */ - if (!rxm->accum_len) { - /* Start RX timer for new message */ - kcm_start_rx_timer(psock); - } - rxm->accum_len += cand_len; - eaten += cand_len; - KCM_STATS_INCR(psock->stats.rx_need_more_hdr); - WARN_ON(eaten != orig_len); - break; - } else if (len > psock->sk->sk_rcvbuf) { - /* Message length exceeds maximum allowed */ - KCM_STATS_INCR(psock->stats.rx_msg_too_big); - desc->error = -EMSGSIZE; - psock->rx_skb_head = NULL; - kcm_abort_rx_psock(psock, EMSGSIZE, head); - break; - } else if (len <= (ssize_t)head->len - - skb->len - rxm->offset) { - /* Length must be into new skb (and also - * greater than zero) - */ - KCM_STATS_INCR(psock->stats.rx_bad_hdr_len); - desc->error = -EPROTO; - psock->rx_skb_head = NULL; - kcm_abort_rx_psock(psock, EPROTO, head); - break; - } - - rxm->full_len = len; - } - - extra = (ssize_t)(rxm->accum_len + cand_len) - rxm->full_len; - - if (extra < 0) { - /* Message not complete yet. */ - if (rxm->full_len - rxm->accum_len > - tcp_inq(psock->sk)) { - /* Don't have the whole messages in the socket - * buffer. Set psock->rx_need_bytes to wait for - * the rest of the message. Also, set "early - * eaten" since we've already buffered the skb - * but don't consume yet per tcp_read_sock. - */ - - if (!rxm->accum_len) { - /* Start RX timer for new message */ - kcm_start_rx_timer(psock); - } - - psock->rx_need_bytes = rxm->full_len - - rxm->accum_len; - rxm->accum_len += cand_len; - rxm->early_eaten = cand_len; - KCM_STATS_ADD(psock->stats.rx_bytes, cand_len); - desc->count = 0; /* Stop reading socket */ - break; - } - rxm->accum_len += cand_len; - eaten += cand_len; - WARN_ON(eaten != orig_len); - break; - } - - /* Positive extra indicates ore bytes than needed for the - * message - */ - - WARN_ON(extra > cand_len); - - eaten += (cand_len - extra); - - /* Hurray, we have a new message! */ - del_timer(&psock->rx_msg_timer); - psock->rx_skb_head = NULL; - KCM_STATS_INCR(psock->stats.rx_msgs); - -try_queue: - kcm = reserve_rx_kcm(psock, head); - if (!kcm) { - /* Unable to reserve a KCM, message is held in psock. */ - break; - } - - if (kcm_queue_rcv_skb(&kcm->sk, head)) { - /* Should mean socket buffer full */ - unreserve_rx_kcm(psock, false); - goto try_queue; - } - } - - if (cloned_orig) - kfree_skb(orig_skb); - - KCM_STATS_ADD(psock->stats.rx_bytes, eaten); - - return eaten; -} - -/* Called with lock held on lower socket */ -static int psock_tcp_read_sock(struct kcm_psock *psock) -{ - read_descriptor_t desc; - - desc.arg.data = psock; - desc.error = 0; - desc.count = 1; /* give more than one skb per call */ - - /* sk should be locked here, so okay to do tcp_read_sock */ - tcp_read_sock(psock->sk, &desc, kcm_tcp_recv); - - unreserve_rx_kcm(psock, true); - - return desc.error; -} - /* Lower sock lock held */ static void psock_tcp_data_ready(struct sock *sk) { @@ -631,65 +347,49 @@ static void psock_tcp_data_ready(struct sock *sk) read_lock_bh(&sk->sk_callback_lock); psock = (struct kcm_psock *)sk->sk_user_data; - if (unlikely(!psock || psock->rx_stopped)) - goto out; - - if (psock->ready_rx_msg) - goto out; - - if (psock->rx_need_bytes) { - if (tcp_inq(sk) >= psock->rx_need_bytes) - psock->rx_need_bytes = 0; - else - goto out; - } - - if (psock_tcp_read_sock(psock) == -ENOMEM) - queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0); + if (likely(psock)) + strp_tcp_data_ready(&psock->strp); -out: read_unlock_bh(&sk->sk_callback_lock); } -static void do_psock_rx_work(struct kcm_psock *psock) +/* Called with lower sock held */ +static void kcm_rcv_strparser(struct strparser *strp, struct sk_buff *skb) { - read_descriptor_t rd_desc; - struct sock *csk = psock->sk; - - /* We need the read lock to synchronize with psock_tcp_data_ready. We - * need the socket lock for calling tcp_read_sock. - */ - lock_sock(csk); - read_lock_bh(&csk->sk_callback_lock); - - if (unlikely(csk->sk_user_data != psock)) - goto out; - - if (unlikely(psock->rx_stopped)) - goto out; - - if (psock->ready_rx_msg) - goto out; - - rd_desc.arg.data = psock; + struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp); + struct kcm_sock *kcm; - if (psock_tcp_read_sock(psock) == -ENOMEM) - queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0); +try_queue: + kcm = reserve_rx_kcm(psock, skb); + if (!kcm) { + /* Unable to reserve a KCM, message is held in psock and strp + * is paused. + */ + return; + } -out: - read_unlock_bh(&csk->sk_callback_lock); - release_sock(csk); + if (kcm_queue_rcv_skb(&kcm->sk, skb)) { + /* Should mean socket buffer full */ + unreserve_rx_kcm(psock, false); + goto try_queue; + } } -static void psock_rx_work(struct work_struct *w) +static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb) { - do_psock_rx_work(container_of(w, struct kcm_psock, rx_work)); + struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp); + struct bpf_prog *prog = psock->bpf_prog; + + return (*prog->bpf_func)(skb, prog->insnsi); } -static void psock_rx_delayed_work(struct work_struct *w) +static int kcm_read_sock_done(struct strparser *strp, int err) { - do_psock_rx_work(container_of(w, struct kcm_psock, - rx_delayed_work.work)); + struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp); + + unreserve_rx_kcm(psock, true); + + return err; } static void psock_tcp_state_change(struct sock *sk) @@ -713,14 +413,13 @@ static void psock_tcp_write_space(struct sock *sk) psock = (struct kcm_psock *)sk->sk_user_data; if (unlikely(!psock)) goto out; - mux = psock->mux; spin_lock_bh(&mux->lock); /* Check if the socket is reserved so someone is waiting for sending. */ kcm = psock->tx_kcm; - if (kcm) + if (kcm && !unlikely(kcm->tx_stopped)) queue_work(kcm_wq, &kcm->tx_work); spin_unlock_bh(&mux->lock); @@ -1411,7 +1110,7 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, struct kcm_sock *kcm = kcm_sk(sk); int err = 0; long timeo; - struct kcm_rx_msg *rxm; + struct strp_rx_msg *rxm; int copied = 0; struct sk_buff *skb; @@ -1425,7 +1124,7 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, /* Okay, have a message on the receive queue */ - rxm = kcm_rx_msg(skb); + rxm = strp_rx_msg(skb); if (len > rxm->full_len) len = rxm->full_len; @@ -1481,7 +1180,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, struct sock *sk = sock->sk; struct kcm_sock *kcm = kcm_sk(sk); long timeo; - struct kcm_rx_msg *rxm; + struct strp_rx_msg *rxm; int err = 0; ssize_t copied; struct sk_buff *skb; @@ -1498,7 +1197,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, /* Okay, have a message on the receive queue */ - rxm = kcm_rx_msg(skb); + rxm = strp_rx_msg(skb); if (len > rxm->full_len) len = rxm->full_len; @@ -1674,15 +1373,6 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux) spin_unlock_bh(&mux->rx_lock); } -static void kcm_rx_msg_timeout(unsigned long arg) -{ - struct kcm_psock *psock = (struct kcm_psock *)arg; - - /* Message assembly timed out */ - KCM_STATS_INCR(psock->stats.rx_msg_timeouts); - kcm_abort_rx_psock(psock, ETIMEDOUT, NULL); -} - static int kcm_attach(struct socket *sock, struct socket *csock, struct bpf_prog *prog) { @@ -1692,6 +1382,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock, struct kcm_psock *psock = NULL, *tpsock; struct list_head *head; int index = 0; + struct strp_callbacks cb; if (csock->ops->family != PF_INET && csock->ops->family != PF_INET6) @@ -1713,11 +1404,12 @@ static int kcm_attach(struct socket *sock, struct socket *csock, psock->sk = csk; psock->bpf_prog = prog; - setup_timer(&psock->rx_msg_timer, kcm_rx_msg_timeout, - (unsigned long)psock); + cb.rcv_msg = kcm_rcv_strparser; + cb.abort_parser = NULL; + cb.parse_msg = kcm_parse_func_strparser; + cb.read_sock_done = kcm_read_sock_done; - INIT_WORK(&psock->rx_work, psock_rx_work); - INIT_DELAYED_WORK(&psock->rx_delayed_work, psock_rx_delayed_work); + strp_init(&psock->strp, csk, &cb); sock_hold(csk); @@ -1750,7 +1442,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock, spin_unlock_bh(&mux->lock); /* Schedule RX work in case there are already bytes queued */ - queue_work(kcm_wq, &psock->rx_work); + strp_check_rcv(&psock->strp); return 0; } @@ -1785,6 +1477,7 @@ out: return err; } +/* Lower socket lock held */ static void kcm_unattach(struct kcm_psock *psock) { struct sock *csk = psock->sk; @@ -1798,7 +1491,7 @@ static void kcm_unattach(struct kcm_psock *psock) csk->sk_data_ready = psock->save_data_ready; csk->sk_write_space = psock->save_write_space; csk->sk_state_change = psock->save_state_change; - psock->rx_stopped = 1; + strp_stop(&psock->strp); if (WARN_ON(psock->rx_kcm)) { write_unlock_bh(&csk->sk_callback_lock); @@ -1821,18 +1514,14 @@ static void kcm_unattach(struct kcm_psock *psock) write_unlock_bh(&csk->sk_callback_lock); - del_timer_sync(&psock->rx_msg_timer); - cancel_work_sync(&psock->rx_work); - cancel_delayed_work_sync(&psock->rx_delayed_work); + strp_done(&psock->strp); bpf_prog_put(psock->bpf_prog); - kfree_skb(psock->rx_skb_head); - psock->rx_skb_head = NULL; - spin_lock_bh(&mux->lock); aggregate_psock_stats(&psock->stats, &mux->aggregate_psock_stats); + save_strp_stats(&psock->strp, &mux->aggregate_strp_stats); KCM_STATS_INCR(mux->stats.psock_unattach); @@ -1915,6 +1604,7 @@ static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info) spin_unlock_bh(&mux->lock); + /* Lower socket lock should already be held */ kcm_unattach(psock); err = 0; @@ -2059,8 +1749,11 @@ static void release_mux(struct kcm_mux *mux) /* Release psocks */ list_for_each_entry_safe(psock, tmp_psock, &mux->psocks, psock_list) { - if (!WARN_ON(psock->unattaching)) + if (!WARN_ON(psock->unattaching)) { + lock_sock(psock->strp.sk); kcm_unattach(psock); + release_sock(psock->strp.sk); + } } if (WARN_ON(mux->psocks_cnt)) @@ -2072,6 +1765,8 @@ static void release_mux(struct kcm_mux *mux) aggregate_mux_stats(&mux->stats, &knet->aggregate_mux_stats); aggregate_psock_stats(&mux->aggregate_psock_stats, &knet->aggregate_psock_stats); + aggregate_strp_stats(&mux->aggregate_strp_stats, + &knet->aggregate_strp_stats); list_del_rcu(&mux->kcm_mux_list); knet->count--; mutex_unlock(&knet->mutex); @@ -2151,6 +1846,13 @@ static int kcm_release(struct socket *sock) * it will just return. */ __skb_queue_purge(&sk->sk_write_queue); + + /* Set tx_stopped. This is checked when psock is bound to a kcm and we + * get a writespace callback. This prevents further work being queued + * from the callback (unbinding the psock occurs after canceling work. + */ + kcm->tx_stopped = 1; + release_sock(sk); spin_lock_bh(&mux->lock); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index ba5fc1f..42a41ae 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1088,13 +1088,13 @@ static inline void drv_leave_ibss(struct ieee80211_local *local, } static inline u32 drv_get_expected_throughput(struct ieee80211_local *local, - struct ieee80211_sta *sta) + struct sta_info *sta) { u32 ret = 0; - trace_drv_get_expected_throughput(sta); - if (local->ops->get_expected_throughput) - ret = local->ops->get_expected_throughput(&local->hw, sta); + trace_drv_get_expected_throughput(&sta->sta); + if (local->ops->get_expected_throughput && sta->uploaded) + ret = local->ops->get_expected_throughput(&local->hw, &sta->sta); trace_drv_return_u32(local, ret); return ret; diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 8f9c3bd..fa7d37c 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -326,22 +326,33 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, u32 tx_time, estimated_retx; u64 result; - if (sta->mesh->fail_avg >= 100) - return MAX_METRIC; + /* Try to get rate based on HW/SW RC algorithm. + * Rate is returned in units of Kbps, correct this + * to comply with airtime calculation units + * Round up in case we get rate < 100Kbps + */ + rate = DIV_ROUND_UP(sta_get_expected_throughput(sta), 100); - sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo); - rate = cfg80211_calculate_bitrate(&rinfo); - if (WARN_ON(!rate)) - return MAX_METRIC; + if (rate) { + err = 0; + } else { + if (sta->mesh->fail_avg >= 100) + return MAX_METRIC; - err = (sta->mesh->fail_avg << ARITH_SHIFT) / 100; + sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo); + rate = cfg80211_calculate_bitrate(&rinfo); + if (WARN_ON(!rate)) + return MAX_METRIC; + + err = (sta->mesh->fail_avg << ARITH_SHIFT) / 100; + } /* bitrate is in units of 100 Kbps, while we need rate in units of * 1Mbps. This will be corrected on tx_time computation. */ tx_time = (device_constant + 10 * test_frame_len / rate); estimated_retx = ((1 << (2 * ARITH_SHIFT)) / (s_unit - err)); - result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT) ; + result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT); return (u32)result; } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 76b737d..19f14c9 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -2279,11 +2279,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER); - /* check if the driver has a SW RC implementation */ - if (ref && ref->ops->get_expected_throughput) - thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv); - else - thr = drv_get_expected_throughput(local, &sta->sta); + thr = sta_get_expected_throughput(sta); if (thr != 0) { sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT); @@ -2291,6 +2287,25 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) } } +u32 sta_get_expected_throughput(struct sta_info *sta) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + struct rate_control_ref *ref = NULL; + u32 thr = 0; + + if (test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) + ref = local->rate_ctrl; + + /* check if the driver has a SW RC implementation */ + if (ref && ref->ops->get_expected_throughput) + thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv); + else + thr = drv_get_expected_throughput(local, sta); + + return thr; +} + unsigned long ieee80211_sta_last_active(struct sta_info *sta) { struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 78b0ef3..0556be3 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -712,6 +712,8 @@ void sta_set_rate_info_tx(struct sta_info *sta, struct rate_info *rinfo); void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo); +u32 sta_get_expected_throughput(struct sta_info *sta); + void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, unsigned long exp_time); u8 sta_info_tx_streams(struct sta_info *sta); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 5023966..1d0746d 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2334,7 +2334,6 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, struct mesh_path __maybe_unused *mppath = NULL, *mpath = NULL; const u8 *encaps_data; int encaps_len, skip_header_bytes; - int nh_pos, h_pos; bool wme_sta = false, authorized = false; bool tdls_peer; bool multicast; @@ -2640,13 +2639,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, encaps_len = 0; } - nh_pos = skb_network_header(skb) - skb->data; - h_pos = skb_transport_header(skb) - skb->data; - skb_pull(skb, skip_header_bytes); - nh_pos -= skip_header_bytes; - h_pos -= skip_header_bytes; - head_need = hdrlen + encaps_len + meshhdrlen - skb_headroom(skb); /* @@ -2672,18 +2665,12 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, } } - if (encaps_data) { + if (encaps_data) memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len); - nh_pos += encaps_len; - h_pos += encaps_len; - } #ifdef CONFIG_MAC80211_MESH - if (meshhdrlen > 0) { + if (meshhdrlen > 0) memcpy(skb_push(skb, meshhdrlen), &mesh_hdr, meshhdrlen); - nh_pos += meshhdrlen; - h_pos += meshhdrlen; - } #endif if (ieee80211_is_data_qos(fc)) { @@ -2699,15 +2686,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, } else memcpy(skb_push(skb, hdrlen), &hdr, hdrlen); - nh_pos += hdrlen; - h_pos += hdrlen; - - /* Update skb pointers to various headers since this modified frame - * is going to go through Linux networking code that may potentially - * need things like pointer to IP header. */ skb_reset_mac_header(skb); - skb_set_network_header(skb, nh_pos); - skb_set_transport_header(skb, h_pos); info = IEEE80211_SKB_CB(skb); memset(info, 0, sizeof(*info)); @@ -4390,9 +4369,6 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, int ac = ieee802_1d_to_ac[tid & 7]; skb_reset_mac_header(skb); - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - skb_set_queue_mapping(skb, ac); skb->priority = tid; diff --git a/net/rds/ib.h b/net/rds/ib.h index 046f750..45ac8e8 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -333,6 +333,7 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp); void rds_ib_state_change(struct sock *sk); int rds_ib_listen_init(void); void rds_ib_listen_stop(void); +__printf(2, 3) void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...); int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, struct rdma_cm_event *event); diff --git a/net/rds/rds.h b/net/rds/rds.h index b2d17f0..fd0bccb 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -688,6 +688,7 @@ void __rds_conn_error(struct rds_connection *conn, const char *, ...); #define rds_conn_error(conn, fmt...) \ __rds_conn_error(conn, KERN_WARNING "RDS: " fmt) +__printf(2, 3) void __rds_conn_path_error(struct rds_conn_path *cp, const char *, ...); #define rds_conn_path_error(cp, fmt...) \ __rds_conn_path_error(cp, KERN_WARNING "RDS: " fmt) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 12ebde8..25aada7 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -29,6 +29,7 @@ #include <linux/hrtimer.h> #include <linux/lockdep.h> #include <linux/slab.h> +#include <linux/hashtable.h> #include <net/net_namespace.h> #include <net/sock.h> @@ -263,33 +264,33 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) root->handle == handle) return root; - list_for_each_entry_rcu(q, &root->list, list) { + hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) { if (q->handle == handle) return q; } return NULL; } -void qdisc_list_add(struct Qdisc *q) +void qdisc_hash_add(struct Qdisc *q) { if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { struct Qdisc *root = qdisc_dev(q)->qdisc; WARN_ON_ONCE(root == &noop_qdisc); ASSERT_RTNL(); - list_add_tail_rcu(&q->list, &root->list); + hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle); } } -EXPORT_SYMBOL(qdisc_list_add); +EXPORT_SYMBOL(qdisc_hash_add); -void qdisc_list_del(struct Qdisc *q) +void qdisc_hash_del(struct Qdisc *q) { if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { ASSERT_RTNL(); - list_del_rcu(&q->list); + hash_del_rcu(&q->hash); } } -EXPORT_SYMBOL(qdisc_list_del); +EXPORT_SYMBOL(qdisc_hash_del); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { @@ -998,7 +999,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, goto err_out4; } - qdisc_list_add(sch); + qdisc_hash_add(sch); return sch; } @@ -1435,6 +1436,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, { int ret = 0, q_idx = *q_idx_p; struct Qdisc *q; + int b; if (!root) return 0; @@ -1449,7 +1451,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, goto done; q_idx++; } - list_for_each_entry(q, &root->list, list) { + hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { if (q_idx < s_q_idx) { q_idx++; continue; @@ -1765,6 +1767,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, int *t_p, int s_t) { struct Qdisc *q; + int b; if (!root) return 0; @@ -1772,7 +1775,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) return -1; - list_for_each_entry(q, &root->list, list) { + hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) return -1; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index e95b67c..18faecc 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -423,7 +423,6 @@ struct Qdisc noop_qdisc = { .dequeue = noop_dequeue, .flags = TCQ_F_BUILTIN, .ops = &noop_qdisc_ops, - .list = LIST_HEAD_INIT(noop_qdisc.list), .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, .running = SEQCNT_ZERO(noop_qdisc.running), @@ -613,7 +612,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); sch->padded = (char *) sch - (char *) p; } - INIT_LIST_HEAD(&sch->list); skb_queue_head_init(&sch->q); spin_lock_init(&sch->busylock); @@ -700,7 +698,7 @@ void qdisc_destroy(struct Qdisc *qdisc) return; #ifdef CONFIG_NET_SCHED - qdisc_list_del(qdisc); + qdisc_hash_del(qdisc); qdisc_put_stab(rtnl_dereference(qdisc->stab)); #endif @@ -788,6 +786,10 @@ static void attach_default_qdiscs(struct net_device *dev) qdisc->ops->attach(qdisc); } } +#ifdef CONFIG_NET_SCHED + if (dev->qdisc) + qdisc_hash_add(dev->qdisc); +#endif } static void transition_one_qdisc(struct net_device *dev, diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 3ddc7bd..000f1d3 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -142,8 +142,6 @@ struct hfsc_class { link-sharing, max(myf, cfmin) */ u64 cl_myf; /* my fit-time (calculated from this class's own upperlimit curve) */ - u64 cl_myfadj; /* my fit-time adjustment (to cancel - history dependence) */ u64 cl_cfmin; /* earliest children's fit-time (used with cl_myf to obtain cl_f) */ u64 cl_cvtmin; /* minimal virtual time among the @@ -151,11 +149,8 @@ struct hfsc_class { (monotonic within a period) */ u64 cl_vtadj; /* intra-period cumulative vt adjustment */ - u64 cl_vtoff; /* inter-period cumulative vt offset */ - u64 cl_cvtmax; /* max child's vt in the last period */ - u64 cl_cvtoff; /* cumulative cvtmax of all periods */ - u64 cl_pcvtoff; /* parent's cvtoff at initialization - time */ + u64 cl_cvtoff; /* largest virtual time seen among + the children */ struct internal_sc cl_rsc; /* internal real-time service curve */ struct internal_sc cl_fsc; /* internal fair service curve */ @@ -701,28 +696,16 @@ init_vf(struct hfsc_class *cl, unsigned int len) } else { /* * first child for a new parent backlog period. - * add parent's cvtmax to cvtoff to make a new - * vt (vtoff + vt) larger than the vt in the - * last period for all children. + * initialize cl_vt to the highest value seen + * among the siblings. this is analogous to + * what cur_time would provide in realtime case. */ - vt = cl->cl_parent->cl_cvtmax; - cl->cl_parent->cl_cvtoff += vt; - cl->cl_parent->cl_cvtmax = 0; + cl->cl_vt = cl->cl_parent->cl_cvtoff; cl->cl_parent->cl_cvtmin = 0; - cl->cl_vt = 0; } - cl->cl_vtoff = cl->cl_parent->cl_cvtoff - - cl->cl_pcvtoff; - /* update the virtual curve */ - vt = cl->cl_vt + cl->cl_vtoff; - rtsc_min(&cl->cl_virtual, &cl->cl_fsc, vt, - cl->cl_total); - if (cl->cl_virtual.x == vt) { - cl->cl_virtual.x -= cl->cl_vtoff; - cl->cl_vtoff = 0; - } + rtsc_min(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total); cl->cl_vtadj = 0; cl->cl_vtperiod++; /* increment vt period */ @@ -745,7 +728,6 @@ init_vf(struct hfsc_class *cl, unsigned int len) /* compute myf */ cl->cl_myf = rtsc_y2x(&cl->cl_ulimit, cl->cl_total); - cl->cl_myfadj = 0; } } @@ -779,8 +761,7 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) go_passive = 0; /* update vt */ - cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) - - cl->cl_vtoff + cl->cl_vtadj; + cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) + cl->cl_vtadj; /* * if vt of the class is smaller than cvtmin, @@ -795,9 +776,9 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) if (go_passive) { /* no more active child, going passive */ - /* update cvtmax of the parent class */ - if (cl->cl_vt > cl->cl_parent->cl_cvtmax) - cl->cl_parent->cl_cvtmax = cl->cl_vt; + /* update cvtoff of the parent class */ + if (cl->cl_vt > cl->cl_parent->cl_cvtoff) + cl->cl_parent->cl_cvtoff = cl->cl_vt; /* remove this class from the vt tree */ vttree_remove(cl); @@ -813,9 +794,10 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) /* update f */ if (cl->cl_flags & HFSC_USC) { + cl->cl_myf = rtsc_y2x(&cl->cl_ulimit, cl->cl_total); +#if 0 cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit, cl->cl_total); -#if 0 /* * This code causes classes to stay way under their * limit when multiple classes are used at gigabit @@ -940,7 +922,7 @@ static void hfsc_change_fsc(struct hfsc_class *cl, struct tc_service_curve *fsc) { sc2isc(fsc, &cl->cl_fsc); - rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vtoff + cl->cl_vt, cl->cl_total); + rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total); cl->cl_flags |= HFSC_FSC; } @@ -1094,7 +1076,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (parent->level == 0) hfsc_purge_queue(sch, parent); hfsc_adjust_levels(parent); - cl->cl_pcvtoff = parent->cl_cvtoff; sch_tree_unlock(sch); qdisc_class_hash_grow(sch, &q->clhash); @@ -1482,16 +1463,12 @@ hfsc_reset_class(struct hfsc_class *cl) cl->cl_e = 0; cl->cl_vt = 0; cl->cl_vtadj = 0; - cl->cl_vtoff = 0; cl->cl_cvtmin = 0; - cl->cl_cvtmax = 0; cl->cl_cvtoff = 0; - cl->cl_pcvtoff = 0; cl->cl_vtperiod = 0; cl->cl_parentperiod = 0; cl->cl_f = 0; cl->cl_myf = 0; - cl->cl_myfadj = 0; cl->cl_cfmin = 0; cl->cl_nactive = 0; diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index b943982..2bc8d7f 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -88,7 +88,7 @@ static void mq_attach(struct Qdisc *sch) qdisc_destroy(old); #ifdef CONFIG_NET_SCHED if (ntx < dev->real_num_tx_queues) - qdisc_list_add(qdisc); + qdisc_hash_add(qdisc); #endif } diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 549c663..b5c502c 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -182,7 +182,7 @@ static void mqprio_attach(struct Qdisc *sch) if (old) qdisc_destroy(old); if (ntx < dev->real_num_tx_queues) - qdisc_list_add(qdisc); + qdisc_hash_add(qdisc); } kfree(priv->qdiscs); priv->qdiscs = NULL; diff --git a/net/strparser/Kconfig b/net/strparser/Kconfig new file mode 100644 index 0000000..6cff3f6 --- /dev/null +++ b/net/strparser/Kconfig @@ -0,0 +1,4 @@ + +config STREAM_PARSER + tristate + default n diff --git a/net/strparser/Makefile b/net/strparser/Makefile new file mode 100644 index 0000000..858a126 --- /dev/null +++ b/net/strparser/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_STREAM_PARSER) += strparser.o diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c new file mode 100644 index 0000000..fd688c0 --- /dev/null +++ b/net/strparser/strparser.c @@ -0,0 +1,492 @@ +/* + * Stream Parser + * + * Copyright (c) 2016 Tom Herbert <tom@herbertland.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + */ + +#include <linux/bpf.h> +#include <linux/errno.h> +#include <linux/errqueue.h> +#include <linux/file.h> +#include <linux/in.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/net.h> +#include <linux/netdevice.h> +#include <linux/poll.h> +#include <linux/rculist.h> +#include <linux/skbuff.h> +#include <linux/socket.h> +#include <linux/uaccess.h> +#include <linux/workqueue.h> +#include <net/strparser.h> +#include <net/netns/generic.h> +#include <net/sock.h> +#include <net/tcp.h> + +static struct workqueue_struct *strp_wq; + +struct _strp_rx_msg { + /* Internal cb structure. struct strp_rx_msg must be first for passing + * to upper layer. + */ + struct strp_rx_msg strp; + int accum_len; + int early_eaten; +}; + +static inline struct _strp_rx_msg *_strp_rx_msg(struct sk_buff *skb) +{ + return (struct _strp_rx_msg *)((void *)skb->cb + + offsetof(struct qdisc_skb_cb, data)); +} + +/* Lower lock held */ +static void strp_abort_rx_strp(struct strparser *strp, int err) +{ + struct sock *csk = strp->sk; + + /* Unrecoverable error in receive */ + + del_timer(&strp->rx_msg_timer); + + if (strp->rx_stopped) + return; + + strp->rx_stopped = 1; + + /* Report an error on the lower socket */ + csk->sk_err = err; + csk->sk_error_report(csk); +} + +static void strp_start_rx_timer(struct strparser *strp) +{ + if (strp->sk->sk_rcvtimeo) + mod_timer(&strp->rx_msg_timer, strp->sk->sk_rcvtimeo); +} + +/* Lower lock held */ +static void strp_parser_err(struct strparser *strp, int err, + read_descriptor_t *desc) +{ + desc->error = err; + kfree_skb(strp->rx_skb_head); + strp->rx_skb_head = NULL; + strp->cb.abort_parser(strp, err); +} + +/* Lower socket lock held */ +static int strp_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, + unsigned int orig_offset, size_t orig_len) +{ + struct strparser *strp = (struct strparser *)desc->arg.data; + struct _strp_rx_msg *rxm; + struct sk_buff *head, *skb; + size_t eaten = 0, cand_len; + ssize_t extra; + int err; + bool cloned_orig = false; + + if (strp->rx_paused) + return 0; + + head = strp->rx_skb_head; + if (head) { + /* Message already in progress */ + + rxm = _strp_rx_msg(head); + if (unlikely(rxm->early_eaten)) { + /* Already some number of bytes on the receive sock + * data saved in rx_skb_head, just indicate they + * are consumed. + */ + eaten = orig_len <= rxm->early_eaten ? + orig_len : rxm->early_eaten; + rxm->early_eaten -= eaten; + + return eaten; + } + + if (unlikely(orig_offset)) { + /* Getting data with a non-zero offset when a message is + * in progress is not expected. If it does happen, we + * need to clone and pull since we can't deal with + * offsets in the skbs for a message expect in the head. + */ + orig_skb = skb_clone(orig_skb, GFP_ATOMIC); + if (!orig_skb) { + STRP_STATS_INCR(strp->stats.rx_mem_fail); + desc->error = -ENOMEM; + return 0; + } + if (!pskb_pull(orig_skb, orig_offset)) { + STRP_STATS_INCR(strp->stats.rx_mem_fail); + kfree_skb(orig_skb); + desc->error = -ENOMEM; + return 0; + } + cloned_orig = true; + orig_offset = 0; + } + + if (!strp->rx_skb_nextp) { + /* We are going to append to the frags_list of head. + * Need to unshare the frag_list. + */ + err = skb_unclone(head, GFP_ATOMIC); + if (err) { + STRP_STATS_INCR(strp->stats.rx_mem_fail); + desc->error = err; + return 0; + } + + if (unlikely(skb_shinfo(head)->frag_list)) { + /* We can't append to an sk_buff that already + * has a frag_list. We create a new head, point + * the frag_list of that to the old head, and + * then are able to use the old head->next for + * appending to the message. + */ + if (WARN_ON(head->next)) { + desc->error = -EINVAL; + return 0; + } + + skb = alloc_skb(0, GFP_ATOMIC); + if (!skb) { + STRP_STATS_INCR(strp->stats.rx_mem_fail); + desc->error = -ENOMEM; + return 0; + } + skb->len = head->len; + skb->data_len = head->len; + skb->truesize = head->truesize; + *_strp_rx_msg(skb) = *_strp_rx_msg(head); + strp->rx_skb_nextp = &head->next; + skb_shinfo(skb)->frag_list = head; + strp->rx_skb_head = skb; + head = skb; + } else { + strp->rx_skb_nextp = + &skb_shinfo(head)->frag_list; + } + } + } + + while (eaten < orig_len) { + /* Always clone since we will consume something */ + skb = skb_clone(orig_skb, GFP_ATOMIC); + if (!skb) { + STRP_STATS_INCR(strp->stats.rx_mem_fail); + desc->error = -ENOMEM; + break; + } + + cand_len = orig_len - eaten; + + head = strp->rx_skb_head; + if (!head) { + head = skb; + strp->rx_skb_head = head; + /* Will set rx_skb_nextp on next packet if needed */ + strp->rx_skb_nextp = NULL; + rxm = _strp_rx_msg(head); + memset(rxm, 0, sizeof(*rxm)); + rxm->strp.offset = orig_offset + eaten; + } else { + /* Unclone since we may be appending to an skb that we + * already share a frag_list with. + */ + err = skb_unclone(skb, GFP_ATOMIC); + if (err) { + STRP_STATS_INCR(strp->stats.rx_mem_fail); + desc->error = err; + break; + } + + rxm = _strp_rx_msg(head); + *strp->rx_skb_nextp = skb; + strp->rx_skb_nextp = &skb->next; + head->data_len += skb->len; + head->len += skb->len; + head->truesize += skb->truesize; + } + + if (!rxm->strp.full_len) { + ssize_t len; + + len = (*strp->cb.parse_msg)(strp, head); + + if (!len) { + /* Need more header to determine length */ + if (!rxm->accum_len) { + /* Start RX timer for new message */ + strp_start_rx_timer(strp); + } + rxm->accum_len += cand_len; + eaten += cand_len; + STRP_STATS_INCR(strp->stats.rx_need_more_hdr); + WARN_ON(eaten != orig_len); + break; + } else if (len < 0) { + if (len == -ESTRPIPE && rxm->accum_len) { + len = -ENODATA; + strp->rx_unrecov_intr = 1; + } else { + strp->rx_interrupted = 1; + } + strp_parser_err(strp, err, desc); + break; + } else if (len > strp->sk->sk_rcvbuf) { + /* Message length exceeds maximum allowed */ + STRP_STATS_INCR(strp->stats.rx_msg_too_big); + strp_parser_err(strp, -EMSGSIZE, desc); + break; + } else if (len <= (ssize_t)head->len - + skb->len - rxm->strp.offset) { + /* Length must be into new skb (and also + * greater than zero) + */ + STRP_STATS_INCR(strp->stats.rx_bad_hdr_len); + strp_parser_err(strp, -EPROTO, desc); + break; + } + + rxm->strp.full_len = len; + } + + extra = (ssize_t)(rxm->accum_len + cand_len) - + rxm->strp.full_len; + + if (extra < 0) { + /* Message not complete yet. */ + if (rxm->strp.full_len - rxm->accum_len > + tcp_inq(strp->sk)) { + /* Don't have the whole messages in the socket + * buffer. Set strp->rx_need_bytes to wait for + * the rest of the message. Also, set "early + * eaten" since we've already buffered the skb + * but don't consume yet per tcp_read_sock. + */ + + if (!rxm->accum_len) { + /* Start RX timer for new message */ + strp_start_rx_timer(strp); + } + + strp->rx_need_bytes = rxm->strp.full_len - + rxm->accum_len; + rxm->accum_len += cand_len; + rxm->early_eaten = cand_len; + STRP_STATS_ADD(strp->stats.rx_bytes, cand_len); + desc->count = 0; /* Stop reading socket */ + break; + } + rxm->accum_len += cand_len; + eaten += cand_len; + WARN_ON(eaten != orig_len); + break; + } + + /* Positive extra indicates ore bytes than needed for the + * message + */ + + WARN_ON(extra > cand_len); + + eaten += (cand_len - extra); + + /* Hurray, we have a new message! */ + del_timer(&strp->rx_msg_timer); + strp->rx_skb_head = NULL; + STRP_STATS_INCR(strp->stats.rx_msgs); + + /* Give skb to upper layer */ + strp->cb.rcv_msg(strp, head); + + if (unlikely(strp->rx_paused)) { + /* Upper layer paused strp */ + break; + } + } + + if (cloned_orig) + kfree_skb(orig_skb); + + STRP_STATS_ADD(strp->stats.rx_bytes, eaten); + + return eaten; +} + +static int default_read_sock_done(struct strparser *strp, int err) +{ + return err; +} + +/* Called with lock held on lower socket */ +static int strp_tcp_read_sock(struct strparser *strp) +{ + read_descriptor_t desc; + + desc.arg.data = strp; + desc.error = 0; + desc.count = 1; /* give more than one skb per call */ + + /* sk should be locked here, so okay to do tcp_read_sock */ + tcp_read_sock(strp->sk, &desc, strp_tcp_recv); + + desc.error = strp->cb.read_sock_done(strp, desc.error); + + return desc.error; +} + +/* Lower sock lock held */ +void strp_tcp_data_ready(struct strparser *strp) +{ + struct sock *csk = strp->sk; + + if (unlikely(strp->rx_stopped)) + return; + + /* This check is needed to synchronize with do_strp_rx_work. + * do_strp_rx_work acquires a process lock (lock_sock) whereas + * the lock held here is bh_lock_sock. The two locks can be + * held by different threads at the same time, but bh_lock_sock + * allows a thread in BH context to safely check if the process + * lock is held. In this case, if the lock is held, queue work. + */ + if (sock_owned_by_user(csk)) { + queue_work(strp_wq, &strp->rx_work); + return; + } + + if (strp->rx_paused) + return; + + if (strp->rx_need_bytes) { + if (tcp_inq(csk) >= strp->rx_need_bytes) + strp->rx_need_bytes = 0; + else + return; + } + + if (strp_tcp_read_sock(strp) == -ENOMEM) + queue_work(strp_wq, &strp->rx_work); +} +EXPORT_SYMBOL_GPL(strp_tcp_data_ready); + +static void do_strp_rx_work(struct strparser *strp) +{ + read_descriptor_t rd_desc; + struct sock *csk = strp->sk; + + /* We need the read lock to synchronize with strp_tcp_data_ready. We + * need the socket lock for calling tcp_read_sock. + */ + lock_sock(csk); + + if (unlikely(csk->sk_user_data != strp)) + goto out; + + if (unlikely(strp->rx_stopped)) + goto out; + + if (strp->rx_paused) + goto out; + + rd_desc.arg.data = strp; + + if (strp_tcp_read_sock(strp) == -ENOMEM) + queue_work(strp_wq, &strp->rx_work); + +out: + release_sock(csk); +} + +static void strp_rx_work(struct work_struct *w) +{ + do_strp_rx_work(container_of(w, struct strparser, rx_work)); +} + +static void strp_rx_msg_timeout(unsigned long arg) +{ + struct strparser *strp = (struct strparser *)arg; + + /* Message assembly timed out */ + STRP_STATS_INCR(strp->stats.rx_msg_timeouts); + lock_sock(strp->sk); + strp->cb.abort_parser(strp, ETIMEDOUT); + release_sock(strp->sk); +} + +int strp_init(struct strparser *strp, struct sock *csk, + struct strp_callbacks *cb) +{ + if (!cb || !cb->rcv_msg || !cb->parse_msg) + return -EINVAL; + + memset(strp, 0, sizeof(*strp)); + + strp->sk = csk; + + setup_timer(&strp->rx_msg_timer, strp_rx_msg_timeout, + (unsigned long)strp); + + INIT_WORK(&strp->rx_work, strp_rx_work); + + strp->cb.rcv_msg = cb->rcv_msg; + strp->cb.parse_msg = cb->parse_msg; + strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done; + strp->cb.abort_parser = cb->abort_parser ? : strp_abort_rx_strp; + + return 0; +} +EXPORT_SYMBOL_GPL(strp_init); + +/* strp must already be stopped so that strp_tcp_recv will no longer be called. + * Note that strp_done is not called with the lower socket held. + */ +void strp_done(struct strparser *strp) +{ + WARN_ON(!strp->rx_stopped); + + del_timer_sync(&strp->rx_msg_timer); + cancel_work_sync(&strp->rx_work); + + if (strp->rx_skb_head) { + kfree_skb(strp->rx_skb_head); + strp->rx_skb_head = NULL; + } +} +EXPORT_SYMBOL_GPL(strp_done); + +void strp_stop(struct strparser *strp) +{ + strp->rx_stopped = 1; +} +EXPORT_SYMBOL_GPL(strp_stop); + +void strp_check_rcv(struct strparser *strp) +{ + queue_work(strp_wq, &strp->rx_work); +} +EXPORT_SYMBOL_GPL(strp_check_rcv); + +static int __init strp_mod_init(void) +{ + strp_wq = create_singlethread_workqueue("kstrp"); + + return 0; +} + +static void __exit strp_mod_exit(void) +{ +} +module_init(strp_mod_init); +module_exit(strp_mod_exit); +MODULE_LICENSE("GPL"); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index a5fc9dd..9e90129 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -1306,6 +1306,7 @@ bool switchdev_port_same_parent_id(struct net_device *a, return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid); } +EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id); static u32 switchdev_port_fwd_mark_get(struct net_device *dev, struct net_device *group_dev) @@ -1323,7 +1324,6 @@ static u32 switchdev_port_fwd_mark_get(struct net_device *dev, return dev->ifindex; } -EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id); static void switchdev_port_fwd_mark_reset(struct net_device *group_dev, u32 old_mark, u32 *reset_mark) diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 46a71c7..5bc1a3d 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -42,26 +42,37 @@ static int net_ctl_permissions(struct ctl_table_header *head, struct ctl_table *table) { struct net *net = container_of(head->set, struct net, sysctls); - kuid_t root_uid = make_kuid(net->user_ns, 0); - kgid_t root_gid = make_kgid(net->user_ns, 0); /* Allow network administrator to have same access as root. */ - if (ns_capable_noaudit(net->user_ns, CAP_NET_ADMIN) || - uid_eq(root_uid, current_euid())) { + if (ns_capable(net->user_ns, CAP_NET_ADMIN)) { int mode = (table->mode >> 6) & 7; return (mode << 6) | (mode << 3) | mode; } - /* Allow netns root group to have the same access as the root group */ - if (in_egroup_p(root_gid)) { - int mode = (table->mode >> 3) & 7; - return (mode << 3) | mode; - } + return table->mode; } +static void net_ctl_set_ownership(struct ctl_table_header *head, + struct ctl_table *table, + kuid_t *uid, kgid_t *gid) +{ + struct net *net = container_of(head->set, struct net, sysctls); + kuid_t ns_root_uid; + kgid_t ns_root_gid; + + ns_root_uid = make_kuid(net->user_ns, 0); + if (uid_valid(ns_root_uid)) + *uid = ns_root_uid; + + ns_root_gid = make_kgid(net->user_ns, 0); + if (gid_valid(ns_root_gid)) + *gid = ns_root_gid; +} + static struct ctl_table_root net_sysctl_root = { .lookup = net_ctl_header_lookup, .permissions = net_ctl_permissions, + .set_ownership = net_ctl_set_ownership, }; static int __net_init sysctl_net_init(struct net *net) diff --git a/net/wireless/core.c b/net/wireless/core.c index 7645e97..2029b49 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -906,6 +906,8 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev) if (WARN_ON(wdev->netdev)) return; + nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE); + list_del_rcu(&wdev->list); rdev->devlist_generation++; @@ -1079,6 +1081,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, wdev->iftype == NL80211_IFTYPE_P2P_CLIENT || wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr) dev->priv_flags |= IFF_DONT_BRIDGE; + + nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE); break; case NETDEV_GOING_DOWN: cfg80211_leave(rdev, wdev); @@ -1157,6 +1161,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, * remove and clean it up. */ if (!list_empty(&wdev->list)) { + nl80211_notify_iface(rdev, wdev, + NL80211_CMD_DEL_INTERFACE); sysfs_remove_link(&dev->dev.kobj, "phy80211"); list_del_rcu(&wdev->list); rdev->devlist_generation++; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f02653a..4997857 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2751,7 +2751,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct vif_params params; struct wireless_dev *wdev; - struct sk_buff *msg, *event; + struct sk_buff *msg; int err; enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED; u32 flags; @@ -2855,20 +2855,15 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) return -ENOBUFS; } - event = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (event) { - if (nl80211_send_iface(event, 0, 0, 0, - rdev, wdev, false) < 0) { - nlmsg_free(event); - goto out; - } - - genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), - event, 0, NL80211_MCGRP_CONFIG, - GFP_KERNEL); - } + /* + * For wdevs which have no associated netdev object (e.g. of type + * NL80211_IFTYPE_P2P_DEVICE), emit the NEW_INTERFACE event here. + * For all other types, the event will be generated from the + * netdev notifier + */ + if (!wdev->netdev) + nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE); -out: return genlmsg_reply(msg, info); } @@ -2876,18 +2871,10 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; - struct sk_buff *msg; - int status; if (!rdev->ops->del_virtual_intf) return -EOPNOTSUPP; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (msg && nl80211_send_iface(msg, 0, 0, 0, rdev, wdev, true) < 0) { - nlmsg_free(msg); - msg = NULL; - } - /* * If we remove a wireless device without a netdev then clear * user_ptr[1] so that nl80211_post_doit won't dereference it @@ -2898,15 +2885,7 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) if (!wdev->netdev) info->user_ptr[1] = NULL; - status = rdev_del_virtual_intf(rdev, wdev); - if (status >= 0 && msg) - genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), - msg, 0, NL80211_MCGRP_CONFIG, - GFP_KERNEL); - else - nlmsg_free(msg); - - return status; + return rdev_del_virtual_intf(rdev, wdev); } static int nl80211_set_noack_map(struct sk_buff *skb, struct genl_info *info) @@ -5374,6 +5353,18 @@ static int nl80211_check_s32(const struct nlattr *nla, s32 min, s32 max, s32 *ou return 0; } +static int nl80211_check_power_mode(const struct nlattr *nla, + enum nl80211_mesh_power_mode min, + enum nl80211_mesh_power_mode max, + enum nl80211_mesh_power_mode *out) +{ + u32 val = nla_get_u32(nla); + if (val < min || val > max) + return -EINVAL; + *out = val; + return 0; +} + static int nl80211_parse_mesh_config(struct genl_info *info, struct mesh_config *cfg, u32 *mask_out) @@ -5518,7 +5509,7 @@ do { \ NL80211_MESH_POWER_ACTIVE, NL80211_MESH_POWER_MAX, mask, NL80211_MESHCONF_POWER_MODE, - nl80211_check_u32); + nl80211_check_power_mode); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration, 0, 65535, mask, NL80211_MESHCONF_AWAKE_WINDOW, nl80211_check_u16); @@ -7773,12 +7764,13 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) ibss.beacon_interval = 100; - if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) { + if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) ibss.beacon_interval = nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); - if (ibss.beacon_interval < 1 || ibss.beacon_interval > 10000) - return -EINVAL; - } + + err = cfg80211_validate_beacon_int(rdev, ibss.beacon_interval); + if (err) + return err; if (!rdev->ops->join_ibss) return -EOPNOTSUPP; @@ -9252,9 +9244,10 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) { setup.beacon_interval = nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); - if (setup.beacon_interval < 10 || - setup.beacon_interval > 10000) - return -EINVAL; + + err = cfg80211_validate_beacon_int(rdev, setup.beacon_interval); + if (err) + return err; } if (info->attrs[NL80211_ATTR_DTIM_PERIOD]) { @@ -11847,6 +11840,29 @@ void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev, NL80211_MCGRP_CONFIG, GFP_KERNEL); } +void nl80211_notify_iface(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + enum nl80211_commands cmd) +{ + struct sk_buff *msg; + + WARN_ON(cmd != NL80211_CMD_NEW_INTERFACE && + cmd != NL80211_CMD_DEL_INTERFACE); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + if (nl80211_send_iface(msg, 0, 0, 0, rdev, wdev, + cmd == NL80211_CMD_DEL_INTERFACE) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, + NL80211_MCGRP_CONFIG, GFP_KERNEL); +} + static int nl80211_add_scan_req(struct sk_buff *msg, struct cfg80211_registered_device *rdev) { diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index a63f402..7e3821d 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -7,6 +7,9 @@ int nl80211_init(void); void nl80211_exit(void); void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev, enum nl80211_commands cmd); +void nl80211_notify_iface(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + enum nl80211_commands cmd); void nl80211_send_scan_start(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); struct sk_buff *nl80211_build_scan_msg(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/util.c b/net/wireless/util.c index b7d1592..0675f51 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1559,7 +1559,7 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev; int res = 0; - if (!beacon_int) + if (beacon_int < 10 || beacon_int > 10000) return -EINVAL; list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { |